Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
ArmyComm authored Aug 5, 2024
1 parent d021ce3 commit b5f067b
Show file tree
Hide file tree
Showing 7 changed files with 36,999 additions and 0 deletions.
1 change: 1 addition & 0 deletions DataSplitGRU (2).ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions DataSplitLSTM (2).ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions DataSplitRNN (1).ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions GRUUsingTestData.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"GRUUsingTestData.ipynb","provenance":[],"collapsed_sections":[],"authorship_tag":"ABX9TyOJcCyO8ePKhtjiSZ1UphTl"},"kernelspec":{"name":"python3","display_name":"Python 3"},"accelerator":"GPU"},"cells":[{"cell_type":"code","metadata":{"id":"30t45_ETJUvN","colab_type":"code","colab":{}},"source":["import pandas as pd\n","import tensorflow as tf\n","import os\n","import re\n","import numpy as np\n","from string import punctuation\n","from zipfile import ZipFile\n","from sklearn.model_selection import train_test_split\n","import matplotlib.pyplot as plt"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"mEWkgvChQGdy","colab_type":"code","outputId":"110f63f9-ec28-4dc4-b0c0-ca8a319a00a1","executionInfo":{"status":"ok","timestamp":1591163498202,"user_tz":-540,"elapsed":28564,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":131}},"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"],"execution_count":2,"outputs":[{"output_type":"stream","text":["Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly\n","\n","Enter your authorization code:\n","··········\n","Mounted at /content/gdrive\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"QF-E6FUTQOcA","colab_type":"code","colab":{}},"source":["PATH='/content/gdrive/My Drive/ML Project/test.csv'"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"ML0Md42iQcej","colab_type":"code","outputId":"0d89e270-e812-4812-f4f1-f2abdc2e3982","executionInfo":{"status":"ok","timestamp":1591163521953,"user_tz":-540,"elapsed":3940,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":55}},"source":["data = pd.read_csv(PATH)\n","print('총 샘플의 수 :',len(data))\n","train_data=data.copy()\n","train_data['title'].nunique(), data['text'].nunique() #기사제목, 기사내용 중복 내용 확인\n","# 윈도우 바탕화면에서 작업한 저자의 경우에는\n","# data = pd.read_csv(r'C:\\Users\\USER\\Desktop\\spam.csv',encoding='latin1')\n","train_data.drop_duplicates(subset=['text'], inplace=True) # v2 열에서 중복인 내용이 있다면 중복 제거\n","print('중복 제거 후 샘플의 수 :',len(train_data))"],"execution_count":4,"outputs":[{"output_type":"stream","text":["총 샘플의 수 : 5200\n","중복 제거 후 샘플의 수 : 5127\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Sqsv-JRcRYDl","colab_type":"code","colab":{}},"source":["# importing neural network libraries\n","import tensorflow as tf\n","from tensorflow.keras.preprocessing.text import Tokenizer\n","from tensorflow.keras.preprocessing.sequence import pad_sequences\n","from tensorflow.keras.models import Sequential\n","from tensorflow.keras.layers import Dense, Dropout, Embedding, GRU, LSTM, RNN, SpatialDropout1D\n","from tensorflow.keras.models import load_model"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"RS-DGWwTQigJ","colab_type":"code","colab":{}},"source":["train_data = train_data.set_index('id', drop = True)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"1EBUOEixQ0_-","colab_type":"code","outputId":"879a62f2-bd36-435d-e5ab-9ee68e642059","executionInfo":{"status":"ok","timestamp":1591163533526,"user_tz":-540,"elapsed":888,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":251}},"source":["print(train_data.shape)\n","train_data.head()"],"execution_count":7,"outputs":[{"output_type":"stream","text":["(5127, 3)\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>title</th>\n"," <th>author</th>\n"," <th>text</th>\n"," </tr>\n"," <tr>\n"," <th>id</th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>20800</th>\n"," <td>Specter of Trump Loosens Tongues, if Not Purse...</td>\n"," <td>David Streitfeld</td>\n"," <td>PALO ALTO, Calif. — After years of scorning...</td>\n"," </tr>\n"," <tr>\n"," <th>20801</th>\n"," <td>Russian warships ready to strike terrorists ne...</td>\n"," <td>NaN</td>\n"," <td>Russian warships ready to strike terrorists ne...</td>\n"," </tr>\n"," <tr>\n"," <th>20802</th>\n"," <td>#NoDAPL: Native American Leaders Vow to Stay A...</td>\n"," <td>Common Dreams</td>\n"," <td>Videos #NoDAPL: Native American Leaders Vow to...</td>\n"," </tr>\n"," <tr>\n"," <th>20803</th>\n"," <td>Tim Tebow Will Attempt Another Comeback, This ...</td>\n"," <td>Daniel Victor</td>\n"," <td>If at first you don’t succeed, try a different...</td>\n"," </tr>\n"," <tr>\n"," <th>20804</th>\n"," <td>Keiser Report: Meme Wars (E995)</td>\n"," <td>Truth Broadcast Network</td>\n"," <td>42 mins ago 1 Views 0 Comments 0 Likes 'For th...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" title ... text\n","id ... \n","20800 Specter of Trump Loosens Tongues, if Not Purse... ... PALO ALTO, Calif. — After years of scorning...\n","20801 Russian warships ready to strike terrorists ne... ... Russian warships ready to strike terrorists ne...\n","20802 #NoDAPL: Native American Leaders Vow to Stay A... ... Videos #NoDAPL: Native American Leaders Vow to...\n","20803 Tim Tebow Will Attempt Another Comeback, This ... ... If at first you don’t succeed, try a different...\n","20804 Keiser Report: Meme Wars (E995) ... 42 mins ago 1 Views 0 Comments 0 Likes 'For th...\n","\n","[5 rows x 3 columns]"]},"metadata":{"tags":[]},"execution_count":7}]},{"cell_type":"code","metadata":{"id":"moL7ODX0Q2wg","colab_type":"code","outputId":"f355be6a-c415-420c-87b4-8ea67d772884","executionInfo":{"status":"ok","timestamp":1591163538327,"user_tz":-540,"elapsed":1128,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":92}},"source":["train_data.isnull().sum()"],"execution_count":8,"outputs":[{"output_type":"execute_result","data":{"text/plain":["title 120\n","author 490\n","text 1\n","dtype: int64"]},"metadata":{"tags":[]},"execution_count":8}]},{"cell_type":"code","metadata":{"id":"X6Oi4VnqQ5_p","colab_type":"code","outputId":"92529f3f-7fcf-4919-ca42-252e92fa2f5a","executionInfo":{"status":"ok","timestamp":1591163557169,"user_tz":-540,"elapsed":1392,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":92}},"source":["train_data[['title', 'author']] = train_data[['title', 'author']].fillna(value = 'Missing')\n","train_data = train_data.dropna()\n","train_data.isnull().sum()"],"execution_count":9,"outputs":[{"output_type":"execute_result","data":{"text/plain":["title 0\n","author 0\n","text 0\n","dtype: int64"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"code","metadata":{"id":"KawGg6tBQ8Te","colab_type":"code","outputId":"dbcedea6-ce4a-4446-e71c-edd5ac96f25e","executionInfo":{"status":"ok","timestamp":1591163613037,"user_tz":-540,"elapsed":1052,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":233}},"source":["length = []\n","[length.append(len(str(text))) for text in train_data['text']]\n","train_data['length'] = length\n","train_data.head()"],"execution_count":10,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>title</th>\n"," <th>author</th>\n"," <th>text</th>\n"," <th>length</th>\n"," </tr>\n"," <tr>\n"," <th>id</th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," <th></th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>20800</th>\n"," <td>Specter of Trump Loosens Tongues, if Not Purse...</td>\n"," <td>David Streitfeld</td>\n"," <td>PALO ALTO, Calif. — After years of scorning...</td>\n"," <td>8015</td>\n"," </tr>\n"," <tr>\n"," <th>20801</th>\n"," <td>Russian warships ready to strike terrorists ne...</td>\n"," <td>Missing</td>\n"," <td>Russian warships ready to strike terrorists ne...</td>\n"," <td>1559</td>\n"," </tr>\n"," <tr>\n"," <th>20802</th>\n"," <td>#NoDAPL: Native American Leaders Vow to Stay A...</td>\n"," <td>Common Dreams</td>\n"," <td>Videos #NoDAPL: Native American Leaders Vow to...</td>\n"," <td>4547</td>\n"," </tr>\n"," <tr>\n"," <th>20803</th>\n"," <td>Tim Tebow Will Attempt Another Comeback, This ...</td>\n"," <td>Daniel Victor</td>\n"," <td>If at first you don’t succeed, try a different...</td>\n"," <td>3578</td>\n"," </tr>\n"," <tr>\n"," <th>20804</th>\n"," <td>Keiser Report: Meme Wars (E995)</td>\n"," <td>Truth Broadcast Network</td>\n"," <td>42 mins ago 1 Views 0 Comments 0 Likes 'For th...</td>\n"," <td>542</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" title ... length\n","id ... \n","20800 Specter of Trump Loosens Tongues, if Not Purse... ... 8015\n","20801 Russian warships ready to strike terrorists ne... ... 1559\n","20802 #NoDAPL: Native American Leaders Vow to Stay A... ... 4547\n","20803 Tim Tebow Will Attempt Another Comeback, This ... ... 3578\n","20804 Keiser Report: Meme Wars (E995) ... 542\n","\n","[5 rows x 4 columns]"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"id":"hRVB4wjNQ-n7","colab_type":"code","outputId":"9218e18c-92d4-4f04-ba8f-b3071519eba1","executionInfo":{"status":"ok","timestamp":1591163638877,"user_tz":-540,"elapsed":1148,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":36}},"source":["train_data = train_data.drop(train_data['text'][train_data['length'] < 50].index, axis = 0)\n","print('기사의 최대길이 : %d' % max(j for j in train_data['length']))"],"execution_count":11,"outputs":[{"output_type":"stream","text":["기사의 최대길이 : 107014\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"s1DfblyvRBRk","colab_type":"code","colab":{}},"source":["max_features = 4500"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"Vjl3wJt-RDpB","colab_type":"code","colab":{}},"source":["tokenizer = Tokenizer(num_words = max_features, filters='!\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n', lower = True, split = ' ')\n","tokenizer.fit_on_texts(texts = train_data['text'])\n","X = tokenizer.texts_to_sequences(texts = train_data['text'])"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"7kYDI_Y0RF3H","colab_type":"code","outputId":"e777694c-f5a8-4562-e0d7-a9131e1a9f30","executionInfo":{"status":"ok","timestamp":1591163655218,"user_tz":-540,"elapsed":1237,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":56}},"source":["print(X[2000])"],"execution_count":14,"outputs":[{"output_type":"stream","text":["[2493, 3970, 3, 5, 668, 3, 908, 2406, 173, 177, 68, 300, 77, 4, 25, 20, 464, 6, 49, 3, 1, 24, 4414, 77, 112, 1239, 3, 8, 827, 2, 3330, 104, 764, 7, 16, 35, 48, 6, 571, 5, 87, 415, 714, 31, 368, 256, 56, 2313, 2, 125, 40, 14, 38, 65, 136, 67, 11, 16, 264, 958, 1100, 300, 77, 2354, 1318, 6, 3, 884, 300, 77, 2628, 24, 2287, 2235, 33, 4326, 1221, 3656, 37, 19, 733, 1, 3569, 12, 530, 507, 10, 390, 19, 1, 76, 3, 1, 3569, 1, 144, 35, 43, 1457, 83, 611, 40, 458, 4115, 13, 4, 46, 12, 1892, 7, 1, 3, 611, 82, 568, 2, 72, 90, 46, 48, 415, 15, 29, 1668, 9, 1, 2167, 5, 3599, 12, 113, 1442, 18, 29, 3060, 511, 33, 965, 7, 910, 13, 77, 1219, 48, 24, 571, 1, 415, 53, 1, 511, 1021, 46, 12, 47, 6, 1, 3970, 77, 8, 827, 10, 90, 341, 2, 392, 13, 1, 511, 4, 611, 7, 117, 1, 3, 1, 3970, 2813, 13, 1, 87, 415, 44, 388, 2, 1, 103, 144, 6, 4182, 28, 276, 74, 49, 145, 63, 77, 35, 748, 10, 1518, 1012, 13, 25, 860, 120, 34, 1, 546, 258, 233, 887, 34, 104, 910, 13, 306, 6, 25, 9, 5, 976, 3561, 1787, 1442, 105, 81, 18, 1762, 77, 45, 1198, 3161, 2, 396, 5, 976, 13, 6, 4473, 1100, 4220, 13, 144, 2964, 37, 121, 1019, 58, 220, 991, 2, 310, 13, 213, 10, 367, 452, 1019, 1065, 1127, 2, 2477, 9, 2077, 59, 8, 67, 144, 2964, 5, 9, 118, 6, 1, 722, 3, 5, 37, 836, 2493, 2406, 13, 77, 24, 4435, 2, 4414, 114, 16, 12, 1239, 2155, 2, 25, 121, 9, 68, 30, 14, 999, 353, 80, 7, 46, 648, 73, 1, 461, 1360, 2, 5, 390, 1291, 1, 3894, 3, 1, 4324, 580, 19, 5, 76, 53, 34, 2283, 274, 976, 3561, 1858, 105, 199, 887, 159, 37, 34, 77, 27, 35, 5, 298, 3, 51, 3837, 25, 1451, 3, 202, 55, 79, 38, 129]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"rxz56oudRn6V","colab_type":"code","outputId":"d19322b8-69ae-49f4-c852-c5f126a88b7d","executionInfo":{"status":"ok","timestamp":1591163657986,"user_tz":-540,"elapsed":1474,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":36}},"source":["X = pad_sequences(sequences = X, maxlen = max_features, padding = 'pre')\n","print(X.shape)"],"execution_count":15,"outputs":[{"output_type":"stream","text":["(5093, 4500)\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"x2IHzM4tRqUy","colab_type":"code","outputId":"c8e33dae-449d-4c10-af18-1eee4f4c2005","executionInfo":{"status":"ok","timestamp":1591166777715,"user_tz":-540,"elapsed":1533,"user":{"displayName":"김민철","photoUrl":"","userId":"01557895546280688331"}},"colab":{"base_uri":"https://localhost:8080/","height":111}},"source":["import numpy as np\n","loaded_model = load_model('/content/gdrive/My Drive/ML Project/GRUbest_model.h5')\n","##xhat = X[1]\n","##yhat = loaded_model.predict(xhat)\n","##print('Predict : ' + str(yhat))\n","\n","xhat_idx = np.random.choice(X.shape[0], 5)\n","xhat = X[xhat_idx]\n","yhat = loaded_model.predict_classes(xhat)\n","\n","for i in range(5):\n"," if(yhat[i]==0):\n"," print('Predict : True '+ str(yhat[i]))\n"," else:\n"," print('Predict : False '+ str(yhat[i]))\n"," ##print('Predict : ' + str(yhat[i]))"],"execution_count":28,"outputs":[{"output_type":"stream","text":["Predict : False [1]\n","Predict : False [1]\n","Predict : False [1]\n","Predict : True [0]\n","Predict : False [1]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"bRvwXbpARswa","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}
1 change: 1 addition & 0 deletions GruTest.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit b5f067b

Please sign in to comment.