From d59dd3e91fec6816a75ae2b60452c529cfaac219 Mon Sep 17 00:00:00 2001 From: "jeffey97@gmail.com" Date: Sun, 10 Nov 2024 16:57:43 -0500 Subject: [PATCH] Comment on immediate recording of target variable income provided --- 02_activities/assignments/assignment_2.ipynb | 58 ++++++++++++++------ 1 file changed, 40 insertions(+), 18 deletions(-) diff --git a/02_activities/assignments/assignment_2.ipynb b/02_activities/assignments/assignment_2.ipynb index 5008f2cdd..13d0a2459 100644 --- a/02_activities/assignments/assignment_2.ipynb +++ b/02_activities/assignments/assignment_2.ipynb @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 58, "metadata": {}, "outputs": [], "source": [ @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 59, "metadata": {}, "outputs": [], "source": [ @@ -84,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 60, "metadata": {}, "outputs": [ { @@ -151,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -214,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 62, "metadata": {}, "outputs": [], "source": [ @@ -242,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 63, "metadata": {}, "outputs": [ { @@ -310,7 +310,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 64, "metadata": {}, "outputs": [ { @@ -320,11 +320,11 @@ "Sorted Cross-Validation Results by Test Negative Log Loss:\n", "\n", " fit_time score_time test_neg_log_loss train_neg_log_loss test_roc_auc \\\n", - "1 3.270346 0.146423 -0.331249 -0.309713 0.906679 \n", - "0 3.128752 0.165946 -0.330795 -0.311253 0.907006 \n", - "2 3.231312 0.153061 -0.329205 -0.313765 0.910617 \n", - "4 2.981757 0.148714 -0.325861 -0.313799 0.914390 \n", - "3 2.921376 0.148241 -0.323481 -0.310539 0.914494 \n", + "1 3.632418 0.175431 -0.331249 -0.309713 0.906679 \n", + "0 3.129728 0.156102 -0.330795 -0.311253 0.907006 \n", + "2 2.931507 0.144618 -0.329205 -0.313765 0.910617 \n", + "4 3.292911 0.146331 -0.325861 -0.313799 0.914390 \n", + "3 3.131725 0.149254 -0.323481 -0.310539 0.914494 \n", "\n", " train_roc_auc test_accuracy train_accuracy test_balanced_accuracy \\\n", "1 0.926736 0.855651 0.865331 0.747288 \n", @@ -362,7 +362,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 65, "metadata": {}, "outputs": [ { @@ -370,8 +370,8 @@ "output_type": "stream", "text": [ "Mean values for each metric across the folds:\n", - "fit_time 3.106709\n", - "score_time 0.152477\n", + "fit_time 3.223658\n", + "score_time 0.154347\n", "test_neg_log_loss -0.328118\n", "train_neg_log_loss -0.311814\n", "test_roc_auc 0.910637\n", @@ -403,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 66, "metadata": {}, "outputs": [ { @@ -449,10 +449,32 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 69, "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[92mThe target variable 'income' was recoded to:\n", + " - 1 for >50K \n", + " - 0 for <=50K \n", + "1.This transformation ensures compatibility with machine learning algorithms that expect numeric targets.\n", + "2.Simplifies data processing and avoids the need for later conversions.\n", + "3.Removes extra whitespace using 'str.strip()' for consistent data.\n", + "4.Makes subsequent modeling steps more efficient by treating 'income' as a clean, binary numeric variable.\u001b[0m\n" + ] + } + ], "source": [ - "(Answer here.)" + "print(\"\\033[92mThe target variable 'income' was recoded to:\")\n", + "print(\" - 1 for >50K \")\n", + "print(\" - 0 for <=50K \")\n", + "print(\"1.This transformation ensures compatibility with machine learning algorithms that expect numeric targets.\")\n", + "print(\"2.Simplifies data processing and avoids the need for later conversions.\")\n", + "print(\"3.Removes extra whitespace using 'str.strip()' for consistent data.\")\n", + "print(\"4.Makes subsequent modeling steps more efficient by treating 'income' as a clean, binary numeric variable.\\033[0m\")\n" ] }, {