diff --git a/.ipynb_checkpoints/stock_sentimental-checkpoint.ipynb b/.ipynb_checkpoints/stock_sentimental-checkpoint.ipynb new file mode 100644 index 0000000..bc8b62b --- /dev/null +++ b/.ipynb_checkpoints/stock_sentimental-checkpoint.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Text Sentiment\n", + "0 Kickers on my watchlist XIDE TIT SOQ PNK CPW B... 1\n", + "1 user: AAP MOVIE. 55% return for the FEA/GEED i... 1\n", + "2 user I'd be afraid to short AMZN - they are lo... 1\n", + "3 MNTA Over 12.00 1\n", + "4 OI Over 21.37 1\n", + "Text 0\n", + "Sentiment 0\n", + "dtype: int64\n", + "(4632, 34) (4632,)\n", + "(1159, 34) (1159,)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.preprocessing import LabelEncoder\n", + "from tensorflow.keras.preprocessing.text import Tokenizer\n", + "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", + "\n", + "# Load dataset\n", + "data = pd.read_csv('Stock_data.csv')\n", + "\n", + "# Display first few rows of the dataset\n", + "print(data.head())\n", + "\n", + "# Check for missing values\n", + "print(data.isnull().sum())\n", + "\n", + "# Drop missing values if any\n", + "data = data.dropna()\n", + "\n", + "# Encode sentiment labels (assuming they are in a column named 'Sentiment')\n", + "le = LabelEncoder()\n", + "data['Sentiment'] = le.fit_transform(data['Sentiment'])\n", + "\n", + "# Split dataset into features and labels\n", + "X = data['Text']\n", + "y = data['Sentiment']\n", + "\n", + "# Split data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Tokenization\n", + "tokenizer = Tokenizer(num_words=5000) # Consider top 5000 words\n", + "tokenizer.fit_on_texts(X_train)\n", + "\n", + "# Convert texts to sequences\n", + "X_train_seq = tokenizer.texts_to_sequences(X_train)\n", + "X_test_seq = tokenizer.texts_to_sequences(X_test)\n", + "\n", + "# Pad sequences to ensure uniform input size\n", + "max_length = max(len(x) for x in X_train_seq)\n", + "X_train_pad = pad_sequences(X_train_seq, maxlen=max_length, padding='post')\n", + "X_test_pad = pad_sequences(X_test_seq, maxlen=max_length, padding='post')\n", + "\n", + "# Display shapes of data\n", + "print(X_train_pad.shape, y_train.shape)\n", + "print(X_test_pad.shape, y_test.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Model: \"sequential\"\n",
+ "
\n"
+ ],
+ "text/plain": [
+ "\u001b[1mModel: \"sequential\"\u001b[0m\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ embedding (Embedding) │ ? │ 0 (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm (LSTM) │ ? │ 0 (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout (Dropout) │ ? │ 0 │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm_1 (LSTM) │ ? │ 0 (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout_1 (Dropout) │ ? │ 0 │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dense (Dense) │ ? │ 0 (unbuilt) │\n", + "└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘\n", + "\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ embedding (\u001b[38;5;33mEmbedding\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm_1 (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dense (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Total params: 0 (0.00 B)\n", + "\n" + ], + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Trainable params: 0 (0.00 B)\n", + "\n" + ], + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
Non-trainable params: 0 (0.00 B)\n", + "\n" + ], + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from tensorflow.keras.models import Sequential\n", + "from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout\n", + "\n", + "# Define LSTM model\n", + "model = Sequential()\n", + "model.add(Embedding(input_dim=5000, output_dim=128)) # Embedding layer\n", + "model.add(LSTM(128, return_sequences=True)) # LSTM layer\n", + "model.add(Dropout(0.5)) # Dropout to prevent overfitting\n", + "model.add(LSTM(64)) # Second LSTM layer\n", + "model.add(Dropout(0.5)) # Dropout\n", + "model.add(Dense(1, activation='sigmoid')) # Output layer for binary classification\n", + "\n", + "# Compile the model\n", + "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n", + "\n", + "# Display model summary\n", + "model.summary()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 73ms/step - accuracy: 0.6265 - loss: 0.6656 - val_accuracy: 0.6343 - val_loss: 0.6423\n", + "Epoch 2/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.7415 - loss: 0.5434 - val_accuracy: 0.7573 - val_loss: 0.4971\n", + "Epoch 3/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 59ms/step - accuracy: 0.8876 - loss: 0.3093 - val_accuracy: 0.7616 - val_loss: 0.5779\n", + "Epoch 4/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.9329 - loss: 0.1984 - val_accuracy: 0.7519 - val_loss: 0.6860\n", + "Epoch 5/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 60ms/step - accuracy: 0.9661 - loss: 0.1211 - val_accuracy: 0.7454 - val_loss: 0.7584\n", + "Epoch 6/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 60ms/step - accuracy: 0.9642 - loss: 0.1280 - val_accuracy: 0.7357 - val_loss: 0.8707\n", + "Epoch 7/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 60ms/step - accuracy: 0.9692 - loss: 0.1043 - val_accuracy: 0.7195 - val_loss: 0.8572\n", + "Epoch 8/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.9730 - loss: 0.0922 - val_accuracy: 0.7335 - val_loss: 0.8631\n", + "Epoch 9/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.9842 - loss: 0.0609 - val_accuracy: 0.7357 - val_loss: 1.1649\n", + "Epoch 10/10\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 73ms/step - accuracy: 0.9794 - loss: 0.0758 - val_accuracy: 0.7228 - val_loss: 1.1020\n" + ] + } + ], + "source": [ + "# Train the model\n", + "history = model.fit(X_train_pad, y_train, epochs=10, batch_size=64, validation_split=0.2)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m37/37\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 14ms/step - accuracy: 0.7369 - loss: 1.0339\n", + "Test Accuracy: 74.46%\n" + ] + } + ], + "source": [ + "# Evaluate the model\n", + "loss, accuracy = model.evaluate(X_test_pad, y_test)\n", + "print(f'Test Accuracy: {accuracy * 100:.2f}%')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 500ms/step\n", + "Sentiment: Positive\n" + ] + } + ], + "source": [ + "def predict_sentiment(text):\n", + " sequence = tokenizer.texts_to_sequences([text])\n", + " padded = pad_sequences(sequence, maxlen=max_length, padding='post')\n", + " prediction = model.predict(padded)\n", + " # Assuming binary classification: 0 for Negative, 1 for Positive\n", + " if prediction[0] > 0.5:\n", + " print(\"Sentiment: Positive\")\n", + " else:\n", + " print(\"Sentiment: Negative\")\n", + "\n", + "# Example usage\n", + "new_text = \"The stock market is bad today.\"\n", + "predict_sentiment(new_text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "model.save('sentiment_model.keras')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Text: \"The stock market is performing well today.\"\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step\n", + "Sentiment: Positive\n", + "Text: \"The stock market is performing bad today.\"\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step\n", + "Sentiment: Positive\n", + "Text: \"I'm very happy with the profits I've made.\"\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 41ms/step\n", + "Sentiment: Positive\n", + "Text: \"I'm disappointed with the losses this quarter.\"\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 53ms/step\n", + "Sentiment: Negative\n", + "Text: \"It's a great time to invest in stocks!\"\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step\n", + "Sentiment: Positive\n" + ] + } + ], + "source": [ + "test_texts = [\n", + " \"The stock market is performing well today.\",\n", + " \"The stock market is performing bad today.\",\n", + " \"I'm very happy with the profits I've made.\",\n", + " \"I'm disappointed with the losses this quarter.\",\n", + " \"It's a great time to invest in stocks!\"\n", + " \n", + "]\n", + "\n", + "for text in test_texts:\n", + " print(f'Text: \"{text}\"')\n", + " predict_sentiment(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/PROJECT_STRUCTURE.md b/PROJECT_STRUCTURE.md index 1ed8936..088af5f 100644 --- a/PROJECT_STRUCTURE.md +++ b/PROJECT_STRUCTURE.md @@ -15,6 +15,10 @@ │ ├── SBI Test data.csv │ ├── SBI Train data.csv │ └── SBIN.csv +├── Data Analysis/ +│ ├── SBI Stock Analysis .png +│ ├── SBI Stock Analysis Updated.pptx +│ └── SBI Stock Analysis Updated.twbx ├── Financial Environment Segmentation/ │ ├── Financial Environment Segmentation.ipynb │ ├── Financial Insights - Market Segmentation.png diff --git a/repo_structure.txt b/repo_structure.txt index 3af122d..149e9b4 100644 --- a/repo_structure.txt +++ b/repo_structure.txt @@ -11,6 +11,10 @@ │ ├── SBI Test data.csv │ ├── SBI Train data.csv │ └── SBIN.csv +├── Data Analysis/ +│ ├── SBI Stock Analysis .png +│ ├── SBI Stock Analysis Updated.pptx +│ └── SBI Stock Analysis Updated.twbx ├── Financial Environment Segmentation/ │ ├── Financial Environment Segmentation.ipynb │ ├── Financial Insights - Market Segmentation.png diff --git a/sentiment_model.h5 b/sentiment_model.h5 index 3183c43..8b6b6a8 100644 Binary files a/sentiment_model.h5 and b/sentiment_model.h5 differ diff --git a/sentiment_model.keras b/sentiment_model.keras new file mode 100644 index 0000000..457085e Binary files /dev/null and b/sentiment_model.keras differ diff --git a/stock_sentimental.ipynb b/stock_sentimental.ipynb index 746daff..bc8b62b 100644 --- a/stock_sentimental.ipynb +++ b/stock_sentimental.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 12, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -74,25 +74,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 3, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "c:\\Users\\sapni\\anaconda3\\lib\\site-packages\\keras\\src\\layers\\core\\embedding.py:90: UserWarning: Argument `input_length` is deprecated. Just remove it.\n", - " warnings.warn(\n" - ] - }, { "data": { "text/html": [ - "
Model: \"sequential_1\"\n", + "Model: \"sequential\"\n", "
\n" ], "text/plain": [ - "\u001b[1mModel: \"sequential_1\"\u001b[0m\n" + "\u001b[1mModel: \"sequential\"\u001b[0m\n" ] }, "metadata": {}, @@ -101,39 +93,39 @@ { "data": { "text/html": [ - "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", - "┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", - "│ embedding_1 (Embedding) │ ? │ 0 (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ lstm_2 (LSTM) │ ? │ 0 (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dropout_2 (Dropout) │ ? │ 0 │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ lstm_3 (LSTM) │ ? │ 0 (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dropout_3 (Dropout) │ ? │ 0 │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dense_1 (Dense) │ ? │ 0 (unbuilt) │\n", - "└─────────────────────────────────┴────────────────────────┴───────────────┘\n", + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ embedding (Embedding) │ ? │ 0 (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm (LSTM) │ ? │ 0 (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout (Dropout) │ ? │ 0 │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm_1 (LSTM) │ ? │ 0 (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout_1 (Dropout) │ ? │ 0 │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dense (Dense) │ ? │ 0 (unbuilt) │\n", + "└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘\n", "\n" ], "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", - "│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ lstm_2 (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dropout_2 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ lstm_3 (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dropout_3 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", - "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩\n", + "│ embedding (\u001b[38;5;33mEmbedding\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ lstm_1 (\u001b[38;5;33mLSTM\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m │\n", + "├──────────────────────────────────────┼─────────────────────────────┼─────────────────┤\n", + "│ dense (\u001b[38;5;33mDense\u001b[0m) │ ? │ \u001b[38;5;34m0\u001b[0m (unbuilt) │\n", + "└──────────────────────────────────────┴─────────────────────────────┴─────────────────┘\n" ] }, "metadata": {}, @@ -185,7 +177,7 @@ "\n", "# Define LSTM model\n", "model = Sequential()\n", - "model.add(Embedding(input_dim=5000, output_dim=128, input_length=max_length)) # Embedding layer\n", + "model.add(Embedding(input_dim=5000, output_dim=128)) # Embedding layer\n", "model.add(LSTM(128, return_sequences=True)) # LSTM layer\n", "model.add(Dropout(0.5)) # Dropout to prevent overfitting\n", "model.add(LSTM(64)) # Second LSTM layer\n", @@ -201,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -209,25 +201,25 @@ "output_type": "stream", "text": [ "Epoch 1/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 67ms/step - accuracy: 0.6284 - loss: 0.6606 - val_accuracy: 0.6268 - val_loss: 0.6536\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 73ms/step - accuracy: 0.6265 - loss: 0.6656 - val_accuracy: 0.6343 - val_loss: 0.6423\n", "Epoch 2/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.7146 - loss: 0.5622 - val_accuracy: 0.7745 - val_loss: 0.4920\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.7415 - loss: 0.5434 - val_accuracy: 0.7573 - val_loss: 0.4971\n", "Epoch 3/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 75ms/step - accuracy: 0.8898 - loss: 0.3168 - val_accuracy: 0.7756 - val_loss: 0.4849\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 59ms/step - accuracy: 0.8876 - loss: 0.3093 - val_accuracy: 0.7616 - val_loss: 0.5779\n", "Epoch 4/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 71ms/step - accuracy: 0.9278 - loss: 0.2154 - val_accuracy: 0.7433 - val_loss: 0.5643\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.9329 - loss: 0.1984 - val_accuracy: 0.7519 - val_loss: 0.6860\n", "Epoch 5/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 71ms/step - accuracy: 0.9486 - loss: 0.1725 - val_accuracy: 0.7594 - val_loss: 0.6393\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 60ms/step - accuracy: 0.9661 - loss: 0.1211 - val_accuracy: 0.7454 - val_loss: 0.7584\n", "Epoch 6/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 62ms/step - accuracy: 0.9671 - loss: 0.1213 - val_accuracy: 0.7530 - val_loss: 0.8493\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m3s\u001b[0m 60ms/step - accuracy: 0.9642 - loss: 0.1280 - val_accuracy: 0.7357 - val_loss: 0.8707\n", "Epoch 7/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 62ms/step - accuracy: 0.9738 - loss: 0.0973 - val_accuracy: 0.7357 - val_loss: 1.0902\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 60ms/step - accuracy: 0.9692 - loss: 0.1043 - val_accuracy: 0.7195 - val_loss: 0.8572\n", "Epoch 8/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 64ms/step - accuracy: 0.9737 - loss: 0.0950 - val_accuracy: 0.7357 - val_loss: 0.8958\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.9730 - loss: 0.0922 - val_accuracy: 0.7335 - val_loss: 0.8631\n", "Epoch 9/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 68ms/step - accuracy: 0.9826 - loss: 0.0744 - val_accuracy: 0.7443 - val_loss: 0.9316\n", + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 61ms/step - accuracy: 0.9842 - loss: 0.0609 - val_accuracy: 0.7357 - val_loss: 1.1649\n", "Epoch 10/10\n", - "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 75ms/step - accuracy: 0.9851 - loss: 0.0689 - val_accuracy: 0.7303 - val_loss: 1.0088\n" + "\u001b[1m58/58\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m4s\u001b[0m 73ms/step - accuracy: 0.9794 - loss: 0.0758 - val_accuracy: 0.7228 - val_loss: 1.1020\n" ] } ], @@ -238,15 +230,15 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[1m37/37\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 12ms/step - accuracy: 0.7407 - loss: 0.9643\n", - "Test Accuracy: 74.63%\n" + "\u001b[1m37/37\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 14ms/step - accuracy: 0.7369 - loss: 1.0339\n", + "Test Accuracy: 74.46%\n" ] } ], @@ -258,19 +250,15 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 10, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'tokenizer' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_10044\\3289756518.py\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;31m# Example usage\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mnew_text\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;34m\"The stock market is bad today.\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mpredict_sentiment\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnew_text\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_10044\\3289756518.py\u001b[0m in \u001b[0;36mpredict_sentiment\u001b[1;34m(text)\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mpredict_sentiment\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0msequence\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtokenizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtexts_to_sequences\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mtext\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mpadded\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpad_sequences\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0msequence\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmaxlen\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_length\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpadding\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'post'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mprediction\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpadded\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# Assuming binary classification: 0 for Negative, 1 for Positive\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", - "\u001b[1;31mNameError\u001b[0m: name 'tokenizer' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 500ms/step\n", + "Sentiment: Positive\n" ] } ], @@ -292,25 +280,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n" - ] - } - ], + "outputs": [], "source": [ - "model.save('sentiment_model.h5')\n", + "model.save('sentiment_model.keras')\n", "\n" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -318,19 +298,19 @@ "output_type": "stream", "text": [ "Text: \"The stock market is performing well today.\"\n", - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step\n", "Sentiment: Positive\n", "Text: \"The stock market is performing bad today.\"\n", - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 52ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step\n", "Sentiment: Positive\n", "Text: \"I'm very happy with the profits I've made.\"\n", - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 59ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 41ms/step\n", "Sentiment: Positive\n", "Text: \"I'm disappointed with the losses this quarter.\"\n", - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 53ms/step\n", "Sentiment: Negative\n", "Text: \"It's a great time to invest in stocks!\"\n", - "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 24ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 36ms/step\n", "Sentiment: Positive\n" ] } @@ -367,7 +347,7 @@ ], "metadata": { "kernelspec": { - "display_name": "base", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -381,9 +361,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.12.4" } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 }