diff --git a/Prediction Models/Research topic Prediction/README.md b/Prediction Models/Research topic Prediction/README.md
new file mode 100644
index 00000000..eaf0e79e
--- /dev/null
+++ b/Prediction Models/Research topic Prediction/README.md
@@ -0,0 +1,47 @@
+# Research-topic-Prediction
+### Problem Statement
+Researchers have access to large online archives of scientific articles. As a consequence, finding relevant articles has become more difficult. Tagging or topic modelling provides a way to give token of identification to research articles which facilitates recommendation and search process.
+Given the abstract and title for a set of research articles, predict the topics for each article included in the test set.
+Note that a research article can possibly have more than 1 topic. The research article abstracts and titles are sourced from the following 6 topics:
+1. Computer Science
+2. Physics
+3. Mathematics
+4. Statistics
+5. Quantitative Biology
+6. Quantitative Finance
+## Approach
+### Data Preprocessing:
+We clean the text data by removing punctuations, converting text to lowercase, and removing unnecessary characters. This helps standardize the text input for better performance during the model training.
+Feature Extraction:
+We extract features from the research abstracts and titles using text vectorization techniques. The two main methods used are CountVectorizer (which counts word occurrences) and TF-IDF (Term Frequency-Inverse Document Frequency), which assigns importance to words based on their frequency across documents.
+### Model Selection:
+We use a Linear Support Vector Machine (LinearSVC) with a multi-output classification approach. This allows the model to predict multiple topics for each article simultaneously.
+The model is evaluated using common classification metrics such as precision, recall, F1-score, and accuracy. These metrics give insight into how well the model performs across different research topics.
+### Prediction:
+After training, the model predicts the topics for unseen research articles, and the results are formatted for submission. Each prediction shows whether a particular article belongs to one or more of the six topics.
+## Project Highlights
+Multi-label Classification: Each article can be tagged with more than one topic, so the model needs to handle multiple outputs simultaneously.
+Text Processing: Effective text preprocessing and vectorization are key to extracting meaningful features from the research articles’ titles and abstracts.
+### Model Performance:
+ The model demonstrates good performance in most categories, but there is room for improvement, particularly in addressing class imbalances for topics with fewer articles.
+## Future Work
+Future improvements could include addressing the data imbalance, exploring advanced machine learning models such as deep learning techniques, and refining the feature extraction process for better prediction accuracy.
+This project provides a foundation for automating topic prediction in research articles, potentially enhancing search engines and recommendation systems in academic databases.
\ No newline at end of file
diff --git a/Prediction Models/Research topic Prediction/Research-topic-Prediction.ipynb b/Prediction Models/Research topic Prediction/Research-topic-Prediction.ipynb
new file mode 100644
index 00000000..7ffa5618
--- /dev/null
+++ b/Prediction Models/Research topic Prediction/Research-topic-Prediction.ipynb
@@ -0,0 +1,722 @@
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "OdBntg3UTJg3"
+ },
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "import warnings\n",
+ "warnings.filterwarnings('ignore')\n",
+ "from sklearn.preprocessing import LabelEncoder\n",
+ "\n",
+ "train = pd.read_csv('/content/drive/My Drive/ml/train.csv')\n",
+ "test = pd.read_csv('/content/drive/My Drive/ml/test.csv')\n",
+ "subission_pd = pd.read_csv('/content/drive/My Drive/ml/sample_submission_UVKGLZE.csv')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 68
+ },
+ "colab_type": "code",
+ "id": "c8wedvYeT4nm",
+ "outputId": "1a91f831-6556-49e3-dcba-e87c7567241e"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Train shape: (20972, 9)\n",
+ "Test shape: (8989, 3)\n",
+ "Sample shape: (8989, 7)\n"
+ ]
+ }
+ ],
+ "source": [
+ "print('Train shape:',train.shape)\n",
+ "print('Test shape:',test.shape)\n",
+ "print('Sample shape:',subission_pd.shape)\n",
+ "\n",
+ "l = ['Computer Science', 'Physics', 'Mathematics', 'Statistics', 'Quantitative Biology', 'Quantitative Finance']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 51
+ },
+ "colab_type": "code",
+ "id": "_8-A4zRtT8Ss",
+ "outputId": "91bb890b-601a-42ef-cb0c-1e2aaafcba28"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(18874, 2) (2098, 2)\n",
+ "(18874, 6) (2098, 6)\n"
+ ]
+ }
+ ],
+ "source": [
+ "test = test.drop(['ID'],axis=1)\n",
+ "\n",
+ "X = train.loc[:,['TITLE','ABSTRACT']]\n",
+ "y = train.loc[:,l]\n",
+ "\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42, shuffle=True)\n",
+ "\n",
+ "print(X_train.shape, X_test.shape)\n",
+ "print(y_train.shape, y_test.shape)\n",
+ "\n",
+ "y_test.reset_index(drop=True,inplace=True)\n",
+ "X_test.reset_index(drop=True,inplace=True)\n",
+ "\n",
+ "y1 = np.array(y_train)\n",
+ "y2 = np.array(y_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "ENRy0yNfUaCK"
+ },
+ "outputs": [],
+ "source": [
+ "#Removing Punctuations\n",
+ "\n",
+ "X_train.replace('[^a-zA-Z]',' ', regex=True, inplace=True)\n",
+ "X_test.replace('[^a-zA-Z]',' ', regex=True, inplace=True)\n",
+ "\n",
+ "test.replace('[^a-zA-Z]',' ', regex=True, inplace=True)\n",
+ "\n",
+ "#Converting to lower case characters\n",
+ "\n",
+ "for index in X_train.columns:\n",
+ " X_train[index] = X_train[index].str.lower()\n",
+ "\n",
+ "for index in X_test.columns:\n",
+ " X_test[index] = X_test[index].str.lower()\n",
+ "\n",
+ "for index in test.columns:\n",
+ " test[index] = test[index].str.lower()\n",
+ "\n",
+ "#Removing one letter words\n",
+ "\n",
+ "X_train['ABSTRACT'] = X_train['ABSTRACT'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+ "X_test['ABSTRACT'] = X_test['ABSTRACT'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+ "\n",
+ "test['ABSTRACT'] = test['ABSTRACT'].str.replace(r'\\b\\w\\b', '').str.replace(r'\\s+', ' ')\n",
+ "\n",
+ "#Removing multiple blank spaces\n",
+ "\n",
+ "X_train = X_train.replace('\\s+', ' ', regex=True)\n",
+ "X_test = X_test.replace('\\s+', ' ', regex=True)\n",
+ "\n",
+ "test = test.replace('\\s+', ' ', regex=True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 374
+ },
+ "colab_type": "code",
+ "id": "VJD5hEUkUfNY",
+ "outputId": "1b4eff7d-4a42-44a3-8e4e-ae4f2ef4a533"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+ "[nltk_data] Package punkt is already up-to-date!\n",
+ "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
+ "[nltk_data] Package wordnet is already up-to-date!\n",
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+ "[nltk_data] Package stopwords is already up-to-date!\n",
+ "[nltk_data] Downloading package averaged_perceptron_tagger to\n",
+ "[nltk_data] /root/nltk_data...\n",
+ "[nltk_data] Package averaged_perceptron_tagger is already up-to-\n",
+ "[nltk_data] date!\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
+ "\n",
+ "
+ " \n",
+ " \n",
+ " | \n",
+ " combined | \n",
+ "
+ " \n",
+ " \n",
+ " \n",
+ " 13275 | \n",
+ " clustering in hilbert space of a quantum optim... | \n",
+ "
+ " \n",
+ " 19273 | \n",
+ " graph heat mixture model learning graph infer... | \n",
+ "
+ " \n",
+ " 6427 | \n",
+ " fast and unsupervised methods for multilingual... | \n",
+ "
+ " \n",
+ " 19168 | \n",
+ " natasha faster non convex stochastic optimizat... | \n",
+ "
+ " \n",
+ " 14148 | \n",
+ " kustaanheimo stiefel transformation with an ar... | \n",
+ "
+ " \n",
+ "
+ "
+ ],
+ "text/plain": [
+ " combined\n",
+ "13275 clustering in hilbert space of a quantum optim...\n",
+ "19273 graph heat mixture model learning graph infer...\n",
+ "6427 fast and unsupervised methods for multilingual...\n",
+ "19168 natasha faster non convex stochastic optimizat...\n",
+ "14148 kustaanheimo stiefel transformation with an ar..."
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import nltk\n",
+ "nltk.download('punkt')\n",
+ "nltk.download('wordnet')\n",
+ "nltk.download('stopwords')\n",
+ "nltk.download('averaged_perceptron_tagger')\n",
+ "from nltk import sent_tokenize, word_tokenize\n",
+ "from nltk.stem.snowball import SnowballStemmer\n",
+ "from nltk.stem.wordnet import WordNetLemmatizer\n",
+ "from nltk.corpus import stopwords\n",
+ "\n",
+ "\n",
+ "stop_words = set(stopwords.words('english')) \n",
+ "# len(stop_words)\n",
+ "# X_train['ABSTRACT'] = X_train['ABSTRACT'].apply(lambda x: ' '.join(term for term in x.split() if term not in stop_words))\n",
+ "# X_test['ABSTRACT'] = X_test['ABSTRACT'].apply(lambda x: ' '.join(term for term in x.split() if term not in stop_words))\n",
+ "\n",
+ "# test['ABSTRACT'] = test['ABSTRACT'].apply(lambda x: ' '.join(term for term in x.split() if term not in stop_words))\n",
+ "\n",
+ "X_train['combined'] = X_train['TITLE']+' '+X_train['ABSTRACT']\n",
+ "X_test['combined'] = X_test['TITLE']+' '+X_test['ABSTRACT']\n",
+ "\n",
+ "test['combined'] = test['TITLE']+' '+test['ABSTRACT']\n",
+ "\n",
+ "X_train = X_train.drop(['TITLE','ABSTRACT'],axis=1)\n",
+ "X_test = X_test.drop(['TITLE','ABSTRACT'],axis=1)\n",
+ "\n",
+ "test = test.drop(['TITLE','ABSTRACT'],axis=1)\n",
+ "\n",
+ "X_train.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "ZZpJCI5cYCap"
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "X_lines = []\n",
+ "for row in range(0,X.shape[0]):\n",
+ " X_lines.append(' '.join(str(x) for x in X.iloc[row,:]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "aKsTUdeRUrMd"
+ },
+ "outputs": [],
+ "source": [
+ "train_lines = []\n",
+ "for row in range(0,X_train.shape[0]):\n",
+ " train_lines.append(' '.join(str(x) for x in X_train.iloc[row,:]))\n",
+ "\n",
+ "test_lines = []\n",
+ "for row in range(0,X_test.shape[0]):\n",
+ " test_lines.append(' '.join(str(x) for x in X_test.iloc[row,:]))\n",
+ "\n",
+ "predtest_lines = []\n",
+ "for row in range(0,test.shape[0]):\n",
+ " predtest_lines.append(' '.join(str(x) for x in test.iloc[row,:]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 34
+ },
+ "colab_type": "code",
+ "id": "jGDABzRWZlOi",
+ "outputId": "a9321f83-b977-43e7-9d7b-243bfeeaa754"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "18874"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(train_lines)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "O05zD3CTUu_x"
+ },
+ "outputs": [],
+ "source": [
+ "from sklearn.feature_extraction.text import CountVectorizer\n",
+ "\n",
+ "countvector = CountVectorizer(ngram_range=(1,2))\n",
+ "X_train_cv = countvector.fit_transform(train_lines)\n",
+ "X_test_cv = countvector.transform(test_lines)\n",
+ "\n",
+ "test_cv = countvector.transform(predtest_lines)\n",
+ "\n",
+ "#Using TfidfVectorizer\n",
+ "\n",
+ "from sklearn.feature_extraction.text import TfidfVectorizer, TfidfTransformer\n",
+ "\n",
+ "tfidfvector = TfidfTransformer()\n",
+ "X_train_tf = tfidfvector.fit_transform(X_train_cv)\n",
+ "X_test_tf = tfidfvector.fit_transform(X_test_cv)\n",
+ "\n",
+ "test_tf = tfidfvector.fit_transform(test_cv)\n",
+ "\n",
+ "X_cv = countvector.transform(X_lines)\n",
+ "\n",
+ "X_tf = tfidfvector.fit_transform(X_cv) #x_tf,y"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 153
+ },
+ "colab_type": "code",
+ "id": "CcnLYLSQV3wk",
+ "outputId": "eec93633-b54c-4efb-9403-19467293f4c0"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "MultiOutputClassifier(estimator=LinearSVC(C=0.5, class_weight='balanced',\n",
+ " dual=True, fit_intercept=True,\n",
+ " intercept_scaling=1,\n",
+ " loss='squared_hinge', max_iter=1000,\n",
+ " multi_class='ovr', penalty='l2',\n",
+ " random_state=42, tol=0.0001,\n",
+ " verbose=0),\n",
+ " n_jobs=None)"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.svm import LinearSVC\n",
+ "from sklearn.multioutput import MultiOutputClassifier\n",
+ "\n",
+ "model = LinearSVC(C=0.5, class_weight='balanced', random_state=42)\n",
+ "models = MultiOutputClassifier(model)\n",
+ "\n",
+ "models.fit(X_train_tf, y1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 136
+ },
+ "colab_type": "code",
+ "id": "MITfjxaeWGNf",
+ "outputId": "b5b6e7e6-ac08-43ba-ced2-d19bd292fe39"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[1, 0, 0, 1, 0, 0],\n",
+ " [0, 0, 0, 0, 0, 0],\n",
+ " [1, 0, 0, 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 1, 0, 0, 1, 0],\n",
+ " [1, 0, 0, 1, 0, 0],\n",
+ " [0, 1, 0, 0, 0, 0]])"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "preds = models.predict(X_test_tf)\n",
+ "preds\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 272
+ },
+ "colab_type": "code",
+ "id": "f09g5ZevWI6e",
+ "outputId": "a1410f03-caed-44f3-dd73-b19e1592f054"
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " 0 0.81 0.91 0.85 853\n",
+ " 1 0.88 0.89 0.88 623\n",
+ " 2 0.84 0.84 0.84 580\n",
+ " 3 0.72 0.86 0.78 516\n",
+ " 4 0.53 0.40 0.46 58\n",
+ " 5 0.86 0.69 0.77 26\n",
+ "\n",
+ " micro avg 0.81 0.86 0.83 2656\n",
+ " macro avg 0.77 0.76 0.76 2656\n",
+ "weighted avg 0.81 0.86 0.83 2656\n",
+ " samples avg 0.84 0.89 0.84 2656\n",
+ "\n",
+ "0.6611058150619638\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
+ "\n",
+ "#print(confusion_matrix(y2,preds))\n",
+ "print(classification_report(y2,preds))\n",
+ "print(accuracy_score(y2,preds))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "hlszaq_ujFje"
+ },
+ "outputs": [],
+ "source": [
+ "# for i in range(100):\n",
+ "# print(str(y2[i])+str(preds[i]))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "LgQTqGrhjI7w"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 136
+ },
+ "colab_type": "code",
+ "id": "mi9nVKeqWK-p",
+ "outputId": "c27fb35b-0f4a-410e-ecdf-1b1b0aae4dae"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "array([[0, 0, 0, 1, 0, 0],\n",
+ " [0, 1, 0, 0, 0, 0],\n",
+ " [1, 0, 0, 0, 0, 0],\n",
+ " ...,\n",
+ " [0, 0, 0, 0, 1, 0],\n",
+ " [0, 0, 0, 1, 0, 0],\n",
+ " [1, 0, 0, 0, 0, 0]])"
+ ]
+ },
+ "execution_count": 30,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "predssv = models.predict(test_tf)\n",
+ "predssv"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 204
+ },
+ "colab_type": "code",
+ "id": "EBUnVCwlWPOZ",
+ "outputId": "e99d8905-3d96-4471-8c2e-a43e2d4eef76"
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
+ " \n",
+ " \n",
+ " | \n",
+ " ID | \n",
+ " Computer Science | \n",
+ " Physics | \n",
+ " Mathematics | \n",
+ " Statistics | \n",
+ " Quantitative Biology | \n",
+ " Quantitative Finance | \n",
+ "
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 20973 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
+ " \n",
+ " 1 | \n",
+ " 20974 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
+ " \n",
+ " 2 | \n",
+ " 20975 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
+ " \n",
+ " 3 | \n",
+ " 20976 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
+ " \n",
+ " 4 | \n",
+ " 20977 | \n",
+ " 1 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
+ " \n",
+ "
+ "
+ ],
+ "text/plain": [
+ " ID Computer Science ... Quantitative Biology Quantitative Finance\n",
+ "0 20973 0 ... 0 0\n",
+ "1 20974 0 ... 0 0\n",
+ "2 20975 1 ... 0 0\n",
+ "3 20976 0 ... 0 0\n",
+ "4 20977 1 ... 0 0\n",
+ "\n",
+ "[5 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {
+ "tags": []
+ },
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test = pd.read_csv('/content/drive/My Drive/ml/test.csv')\n",
+ "\n",
+ "submit = pd.DataFrame({'ID': test.ID, 'Computer Science': predssv[:,0],'Physics':predssv[:,1],\n",
+ " 'Mathematics':predssv[:,2],'Statistics':predssv[:,3],'Quantitative Biology':predssv[:,4],\n",
+ " 'Quantitative Finance':predssv[:,5]})\n",
+ "submit.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {
+ "colab": {},
+ "colab_type": "code",
+ "id": "ybS28j2AWbio"
+ },
+ "outputs": [],
+ "source": [
+ "submit.to_csv('submission2.csv', index=False)"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "authorship_tag": "ABX9TyMhmjaEvacjvO1SJCWFndxi",
+ "collapsed_sections": [],
+ "mount_file_id": "1-RAfjiGOy2brJkcnEiA6loSQDSn5oPQ1",
+ "name": "independenceday_challange",
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
diff --git a/Prediction Models/Research topic Prediction/submission6.csv b/Prediction Models/Research topic Prediction/submission6.csv
new file mode 100644
index 00000000..e3d7db90
--- /dev/null
+++ b/Prediction Models/Research topic Prediction/submission6.csv
@@ -0,0 +1,8990 @@
+ID,Computer Science,Physics,Mathematics,Statistics,Quantitative Biology,Quantitative Finance
diff --git a/Prediction Models/Research topic Prediction/test_8iecVfC.zip b/Prediction Models/Research topic Prediction/test_8iecVfC.zip
new file mode 100644
index 00000000..fb90e827
Binary files /dev/null and b/Prediction Models/Research topic Prediction/test_8iecVfC.zip differ
diff --git a/Prediction Models/Research topic Prediction/train_tGmol3O.zip b/Prediction Models/Research topic Prediction/train_tGmol3O.zip
new file mode 100644
index 00000000..3f9679ca
Binary files /dev/null and b/Prediction Models/Research topic Prediction/train_tGmol3O.zip differ