From 3f13b45bbfdac1c296cc272ccfdb3e0dbbaea39d Mon Sep 17 00:00:00 2001 From: SaurabhIndi <116150732+SaurabhIndi@users.noreply.github.com> Date: Wed, 9 Oct 2024 15:42:00 +0530 Subject: [PATCH] Ensemble Method with Validation code --- Stock_Price_Prediction.ipynb | 89 +++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 6 deletions(-) diff --git a/Stock_Price_Prediction.ipynb b/Stock_Price_Prediction.ipynb index 6113649..ab9c593 100644 --- a/Stock_Price_Prediction.ipynb +++ b/Stock_Price_Prediction.ipynb @@ -276,7 +276,6 @@ "\n", "" ], - "text/plain": [ " Open High Low Close Volume\n", "0 18.691147 18.978922 18.540184 18.823240 43733533.0\n", @@ -284,7 +283,6 @@ "2 18.327892 18.568489 17.643839 17.738192 68296318.0\n", "3 17.502312 17.832542 17.223972 17.676863 86073880.0\n", "4 17.738192 17.785366 17.459852 17.577793 76613039.0" - ] }, "execution_count": 30, @@ -438,11 +436,9 @@ }, "outputs": [ { - "data": { "text/plain": [ "(1415,)" - ] }, "execution_count": 46, @@ -4382,7 +4378,6 @@ }, "outputs": [ { - "name": "stdout", "output_type": "stream", "text": [ @@ -4483,7 +4478,6 @@ "MAPE: 0.0126215060590655\n", "\n" ] - } ], "source": [ @@ -4679,6 +4673,89 @@ "metrics_catboost = train_and_evaluate_model(model_catboost, X_train, X_test, y_train, y_test)\n", "print(\"CatBoost Metrics:\", metrics_catboost)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ensemble method using random forest and AdaBoost" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_squared_error\n", + "\n", + "# Assuming you have your features (X) and target variable (y)\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Create individual models\n", + "rf_model = RandomForestRegressor(n_estimators=100, random_state=42)\n", + "adaboost_model = AdaBoostRegressor(n_estimators=100, random_state=42)\n", + "\n", + "# Train the models\n", + "rf_model.fit(X_train, y_train)\n", + "adaboost_model.fit(X_train, y_train)\n", + "\n", + "# Make predictions\n", + "rf_predictions = rf_model.predict(X_test)\n", + "adaboost_predictions = adaboost_model.predict(X_test)\n", + "\n", + "# Combine predictions (simple averaging)\n", + "ensemble_predictions = (rf_predictions + adaboost_predictions) / 2\n", + "\n", + "# Evaluate the ensemble model\n", + "ensemble_mse = mean_squared_error(y_test, ensemble_predictions)\n", + "print(\"Ensemble MSE:\", ensemble_mse)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Validation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error\n", + "\n", + "# Assuming we have the true labels (y_test) and the ensemble predictions (ensemble_predictions)\n", + "\n", + "# Calculate accuracy\n", + "accuracy = accuracy_score(y_test, ensemble_predictions.round())\n", + "print(\"Accuracy:\", accuracy)\n", + "\n", + "# Calculate RMSE\n", + "rmse = mean_squared_error(y_test, ensemble_predictions, squared=False)\n", + "print(\"RMSE:\", rmse)\n", + "\n", + "# Calculate MAE\n", + "mae = mean_absolute_error(y_test, ensemble_predictions)\n", + "print(\"MAE:\", mae)\n", + "\n", + "# Other relevant metrics\n", + "# For example, if your target variable is categorical:\n", + "# precision = precision_score(y_test, ensemble_predictions.round())\n", + "# recall = recall_score(y_test, ensemble_predictions.round())\n", + "# f1_score = f1_score(y_test, ensemble_predictions.round())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {