diff --git a/Titanic logistic regression Tensorflow.ipynb b/Titanic logistic regression Tensorflow.ipynb deleted file mode 100644 index 803d699..0000000 --- a/Titanic logistic regression Tensorflow.ipynb +++ /dev/null @@ -1,1751 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013006, - "end_time": "2020-08-16T00:03:03.128147", - "exception": false, - "start_time": "2020-08-16T00:03:03.115141", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# **Introduction**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011151, - "end_time": "2020-08-16T00:03:03.152442", - "exception": false, - "start_time": "2020-08-16T00:03:03.141291", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "***Howdy, Welcome to the Titanic***" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.010739, - "end_time": "2020-08-16T00:03:03.174414", - "exception": false, - "start_time": "2020-08-16T00:03:03.163675", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**To whom does this notebook appeal to?**

\n", - "If you are just starting with WNNC's HELLO FOSS and want to do beginner task in ML, you can try this project!\n", - "\n", - "Data reading, visualization and pre-processing has been done for you. We need you to use logistic regression using sklearn library to predict whether or not they survived the sinking of the Titanic. We have also included `gender_submission.csv`, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.\n", - "\n", - "Head over to [this cell](#main) to implement logistic regression and then add code for writing the output to file `predictions-ann.csv` [here](#main2). " - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.010834, - "end_time": "2020-08-16T00:03:03.196238", - "exception": false, - "start_time": "2020-08-16T00:03:03.185404", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "![](https://faithmag.com/sites/default/files/styles/article_full/public/2018-09/titanic2.jpg?h=6521bd5e&itok=H8td6QVv)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.010991, - "end_time": "2020-08-16T00:03:03.218357", - "exception": false, - "start_time": "2020-08-16T00:03:03.207366", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Basic Imports**" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", - "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", - "execution": { - "iopub.execute_input": "2020-08-16T00:03:03.250148Z", - "iopub.status.busy": "2020-08-16T00:03:03.249436Z", - "iopub.status.idle": "2020-08-16T00:03:04.161970Z", - "shell.execute_reply": "2020-08-16T00:03:04.162628Z" - }, - "papermill": { - "duration": 0.933339, - "end_time": "2020-08-16T00:03:04.162859", - "exception": false, - "start_time": "2020-08-16T00:03:03.229520", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "gender_submission.csv\n", - "test.csv\n", - "train.csv\n" - ] - } - ], - "source": [ - "\n", - "import numpy as np # linear algebra\n", - "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", - "import seaborn as sns\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline\n", - "sns.set()\n", - "\n", - "import os\n", - "files = [f for f in os.listdir('./titanic/')]\n", - "for f in files:\n", - " print(f)\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.01215, - "end_time": "2020-08-16T00:03:04.187774", - "exception": false, - "start_time": "2020-08-16T00:03:04.175624", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# Reading In the Data" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011264, - "end_time": "2020-08-16T00:03:04.210794", - "exception": false, - "start_time": "2020-08-16T00:03:04.199530", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We will be first importing the data, and creating copies. I recommend this because it give you access to a clean untouched base file.\n", - "Next, we will be dropping some things that we don't need such as passenger Id and Ticket price" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", - "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", - "execution": { - "iopub.execute_input": "2020-08-16T00:03:04.244520Z", - "iopub.status.busy": "2020-08-16T00:03:04.243877Z", - "iopub.status.idle": "2020-08-16T00:03:04.272874Z", - "shell.execute_reply": "2020-08-16T00:03:04.272333Z" - }, - "papermill": { - "duration": 0.050645, - "end_time": "2020-08-16T00:03:04.272984", - "exception": false, - "start_time": "2020-08-16T00:03:04.222339", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "train_data = pd.read_csv('./titanic/train.csv')\n", - "test_data = pd.read_csv('./titanic/test.csv')\n", - "#make a copy so your original data is not touched\n", - "train = train_data.copy()\n", - "test = test_data.copy()\n", - "train.shape\n", - "y_train = train['Survived']\n", - "\n", - "#We won't need passenger ID or ticket price for the model! They do not provide much insight on the training.\n", - "Id = pd.DataFrame(test['PassengerId'])\n", - "train.drop(['PassengerId'], axis = 1, inplace=True)\n", - "test.drop(['PassengerId'], axis = 1, inplace=True)\n", - "train.drop(['Survived'], axis = 1, inplace=True)\n", - "train.drop(['Ticket'], axis = 1, inplace=True)\n", - "test.drop(['Ticket'], axis = 1, inplace=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011318, - "end_time": "2020-08-16T00:03:04.295950", - "exception": false, - "start_time": "2020-08-16T00:03:04.284632", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Let's take a look at our data!**" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:04.331882Z", - "iopub.status.busy": "2020-08-16T00:03:04.331062Z", - "iopub.status.idle": "2020-08-16T00:03:04.342848Z", - "shell.execute_reply": "2020-08-16T00:03:04.342359Z" - }, - "papermill": { - "duration": 0.035458, - "end_time": "2020-08-16T00:03:04.342950", - "exception": false, - "start_time": "2020-08-16T00:03:04.307492", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PclassNameSexAgeSibSpParchFareCabinEmbarked
03Braund, Mr. Owen Harrismale22.0107.2500NaNS
11Cumings, Mrs. John Bradley (Florence Briggs Th...female38.01071.2833C85C
23Heikkinen, Miss. Lainafemale26.0007.9250NaNS
31Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01053.1000C123S
43Allen, Mr. William Henrymale35.0008.0500NaNS
\n", - "
" - ], - "text/plain": [ - " Pclass Name Sex Age \\\n", - "0 3 Braund, Mr. Owen Harris male 22.0 \n", - "1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", - "2 3 Heikkinen, Miss. Laina female 26.0 \n", - "3 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", - "4 3 Allen, Mr. William Henry male 35.0 \n", - "\n", - " SibSp Parch Fare Cabin Embarked \n", - "0 1 0 7.2500 NaN S \n", - "1 1 0 71.2833 C85 C \n", - "2 0 0 7.9250 NaN S \n", - "3 1 0 53.1000 C123 S \n", - "4 0 0 8.0500 NaN S " - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013019, - "end_time": "2020-08-16T00:03:04.368044", - "exception": false, - "start_time": "2020-08-16T00:03:04.355025", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Lets see how many null values there are! We need to fill out these values later.**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011338, - "end_time": "2020-08-16T00:03:04.392074", - "exception": false, - "start_time": "2020-08-16T00:03:04.380736", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**We see that we have some midding values from age, and a ton missing from cabin**" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:04.421999Z", - "iopub.status.busy": "2020-08-16T00:03:04.421198Z", - "iopub.status.idle": "2020-08-16T00:03:04.759985Z", - "shell.execute_reply": "2020-08-16T00:03:04.759385Z" - }, - "papermill": { - "duration": 0.356365, - "end_time": "2020-08-16T00:03:04.760097", - "exception": false, - "start_time": "2020-08-16T00:03:04.403732", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.heatmap(train.isnull(),yticklabels=False,cbar='BuPu')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:04.797107Z", - "iopub.status.busy": "2020-08-16T00:03:04.795587Z", - "iopub.status.idle": "2020-08-16T00:03:04.799528Z", - "shell.execute_reply": "2020-08-16T00:03:04.799041Z" - }, - "papermill": { - "duration": 0.027205, - "end_time": "2020-08-16T00:03:04.799633", - "exception": false, - "start_time": "2020-08-16T00:03:04.772428", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Cabin 687\n", - "Age 177\n", - "Embarked 2\n", - "Fare 0\n", - "Parch 0\n", - "SibSp 0\n", - "Sex 0\n", - "Name 0\n", - "Pclass 0\n", - "dtype: int64" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train.isnull().sum().sort_values(ascending=False)[0:20]\n", - "# we can see that cabin is midding a lot of values, and age is tooi!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011933, - "end_time": "2020-08-16T00:03:04.823720", - "exception": false, - "start_time": "2020-08-16T00:03:04.811787", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# **Data Cleaning**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011829, - "end_time": "2020-08-16T00:03:04.847591", - "exception": false, - "start_time": "2020-08-16T00:03:04.835762", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**This is an awesome function I created that preprocesses the data. It does thes following**\n", - "\n", - "* Fills in null values based on mean or mode\n", - "* Drops columns that are missing 50 percent of the data\n", - "\n", - "*You guys are free to copy this for loop for your own projects!*[](http://)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:04.892575Z", - "iopub.status.busy": "2020-08-16T00:03:04.891608Z", - "iopub.status.idle": "2020-08-16T00:03:04.907126Z", - "shell.execute_reply": "2020-08-16T00:03:04.906618Z" - }, - "papermill": { - "duration": 0.047502, - "end_time": "2020-08-16T00:03:04.907233", - "exception": false, - "start_time": "2020-08-16T00:03:04.859731", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "\n", - "\n", - "#clean the train data\n", - "for i in list(train.columns):\n", - " dtype = train[i].dtype\n", - " values = 0\n", - " if(dtype == float or dtype == int):\n", - " method = 'mean'\n", - " else:\n", - " method = 'mode'\n", - " if(train[i].notnull().sum() / 891 <= .5):\n", - " train.drop(i, axis = 1, inplace=True)\n", - " elif method == 'mean':\n", - " train[i]=train[i].fillna(train[i].mean())\n", - "\n", - " else:\n", - " train[i]=train[i].fillna(train[i].mode()[0])\n", - "\n", - "# WE CAN DO THIS FOR THE TEST SET TOO!\n", - "\n", - "#clean the test data\n", - "for i in list(test.columns):\n", - " dtype = test[i].dtype\n", - " values = 0\n", - " if(dtype == float or dtype == int):\n", - " method = 'mean'\n", - " else:\n", - " method = 'mode'\n", - " if(test[i].notnull().sum() / 418 <= .5):\n", - " test.drop(i, axis = 1, inplace=True)\n", - " elif method == 'mean':\n", - " test[i]=test[i].fillna(test[i].mean())\n", - "\n", - " else:\n", - " test[i]=test[i].fillna(test[i].mode()[0])\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.011993, - "end_time": "2020-08-16T00:03:04.931751", - "exception": false, - "start_time": "2020-08-16T00:03:04.919758", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**As we can see, all of the missing values are gone!**" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "_kg_hide-input": true, - "execution": { - "iopub.execute_input": "2020-08-16T00:03:04.963771Z", - "iopub.status.busy": "2020-08-16T00:03:04.962852Z", - "iopub.status.idle": "2020-08-16T00:03:05.218436Z", - "shell.execute_reply": "2020-08-16T00:03:05.218946Z" - }, - "papermill": { - "duration": 0.275108, - "end_time": "2020-08-16T00:03:05.219105", - "exception": false, - "start_time": "2020-08-16T00:03:04.943997", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.heatmap(train.isnull(),yticklabels=False,cbar='BuPu')\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013345, - "end_time": "2020-08-16T00:03:05.246558", - "exception": false, - "start_time": "2020-08-16T00:03:05.233213", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# **Feature Engineering**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014138, - "end_time": "2020-08-16T00:03:05.274477", - "exception": false, - "start_time": "2020-08-16T00:03:05.260339", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Title**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.012818, - "end_time": "2020-08-16T00:03:05.300776", - "exception": false, - "start_time": "2020-08-16T00:03:05.287958", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Lets take out the Mr, Miss, etc from the name section, and create a new column names title!" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:05.346398Z", - "iopub.status.busy": "2020-08-16T00:03:05.345561Z", - "iopub.status.idle": "2020-08-16T00:03:05.349914Z", - "shell.execute_reply": "2020-08-16T00:03:05.349327Z" - }, - "papermill": { - "duration": 0.035659, - "end_time": "2020-08-16T00:03:05.350024", - "exception": false, - "start_time": "2020-08-16T00:03:05.314365", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#TITLE\n", - "\n", - "train_test_data = [train, test] # combining train and test dataset\n", - "\n", - "for dataset in train_test_data:\n", - " dataset['Title'] = dataset['Name'].str.extract(' ([A-Za-z]+)\\.', expand=False)\n", - "\n", - "\n", - "title_mapping = {\"Mr\": 0, \"Miss\": 1, \"Mrs\": 1, \n", - " \"Master\": 0, \"Dr\": 1, \"Rev\": 0, \"Col\": 0, \"Major\": 0, \"Mlle\": 1,\"Countess\": 1,\n", - " \"Ms\": 1, \"Lady\": 1, \"Jonkheer\": 1, \"Don\": 0, \"Dona\" : 1, \"Mme\": 0,\"Capt\": 0,\"Sir\": 0 }\n", - "for dataset in train_test_data:\n", - " dataset['Title'] = dataset['Title'].map(title_mapping)\n", - " \n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013586, - "end_time": "2020-08-16T00:03:05.377981", - "exception": false, - "start_time": "2020-08-16T00:03:05.364395", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Sex**\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013191, - "end_time": "2020-08-16T00:03:05.405476", - "exception": false, - "start_time": "2020-08-16T00:03:05.392285", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We can make male and female into catagorical variables such as 0 and 1" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:05.442498Z", - "iopub.status.busy": "2020-08-16T00:03:05.441620Z", - "iopub.status.idle": "2020-08-16T00:03:05.444837Z", - "shell.execute_reply": "2020-08-16T00:03:05.444291Z" - }, - "papermill": { - "duration": 0.025484, - "end_time": "2020-08-16T00:03:05.444947", - "exception": false, - "start_time": "2020-08-16T00:03:05.419463", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - " \n", - "sex_mapping = {\"male\": 0, \"female\":1}\n", - "for dataset in train_test_data:\n", - " dataset['Sex'] = dataset['Sex'].map(sex_mapping)\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013681, - "end_time": "2020-08-16T00:03:05.472676", - "exception": false, - "start_time": "2020-08-16T00:03:05.458995", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Embarked**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014216, - "end_time": "2020-08-16T00:03:05.500391", - "exception": false, - "start_time": "2020-08-16T00:03:05.486175", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Lets test to see if there is any correlation with Pclass and Embarked " - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:05.537924Z", - "iopub.status.busy": "2020-08-16T00:03:05.537005Z", - "iopub.status.idle": "2020-08-16T00:03:05.764053Z", - "shell.execute_reply": "2020-08-16T00:03:05.763057Z" - }, - "papermill": { - "duration": 0.249537, - "end_time": "2020-08-16T00:03:05.764169", - "exception": false, - "start_time": "2020-08-16T00:03:05.514632", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAFVCAYAAAAzEJGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy86wFpkAAAACXBIWXMAAAsTAAALEwEAmpwYAAAjEUlEQVR4nO3dfVSUdeL38c/AMGjCatJMFvGjLJVyj889WBuUFQ8Sqehaa8mW1W1tUrKmW0KWa26u4u3muj3dvx5O6l2SaahLWCr5Oy2maQ9mP9TdCv2FLowmBQgMMHP/kc69riWSX7hmmPfrnM5xrrlm+GDfkQ/X9b2+l83n8/kEAAAAY8KsDgAAANDZULAAAAAMo2ABAAAYRsECAAAwjIIFAABgGAULAADAMLvVAf7dkSN18npZOaKjxMRE6fDhWqtjAO2KcY5QwDjvWGFhNp19drcffT7gCpbX66NgdTD+vhEKGOcIBYzzwMEpQgAAAMMoWAAAAIad1inCiRMn6ptvvpHd/v3uv//971VXV6ennnpKjY2NSktLU05OjiSprKxMubm5qqur07BhwzR79mz/6wAAAEJBq83H5/OpvLxcJSUl/qLU0NCg1NRULV26VOedd54mT56szZs3KykpSdOnT9eTTz6pQYMGaebMmSooKNCECRPOKGRLS7OOHHGrudlzRu/T0cLCwtW1a5SiorrLZrNZHQcAAHSQVgvWl19+KUmaNGmSqqurNX78ePXt21fx8fGKi4uTJGVkZKi4uFiXXHKJGhoaNGjQIElSZmamFi9efMYF68gRt7p0OUvduvUKmqLi8/nU0tKsmppqHTniVs+eLqsjAQCADtJqwfruu+80fPhwPfbYY2pqalJWVpbuueceOZ1O/z4ul0uVlZWqqqo6YbvT6VRlZWWbAsXERJ20rarqf9S9e4+gKVfHRUSEKzLSqX/+83/kdEZbHedHBXI2wBTGOUIB4zxwtFqwBg8erMGDB/sfjxs3TosXL9bQoUP923w+n2w2m7xe7wkl6Pj2tjh8uPaky0y9Xq9aWnySgvPy05YWr9zuGqtj/CCnMzpgswGmMM4RChjnHSsszPaDB4WOa7Vgbd++XU1NTRo+fLik70tTbGys3G63fx+32y2Xy6VevXqdsP3QoUNyucyfGov+WVd1iTQ/cb6hsVk139Ubf18AABBaWm0pNTU1Wrx4sV5//XU1NTVp9erVmj17tqZOnap9+/bpggsu0Lp16zR27FjFxsYqMjJSO3bs0NChQ1VYWKjExETjobtE2pUxrdD4+65dOEqn0/1LSjZo6dJX1NLSIp/Pq9TUdE2YkGU8DwAACE6tFqzrr79en376qUaPHi2v16sJEyZo8ODBmjdvnrKzs9XY2KikpCSlpqZKkvLz85WXl6fa2lr1799fWVmdq3i43VVasuRPeumlZerevYeOHj2qKVP+l/7jP+L1i18kWR0PAAAEgNM6zzZ16lRNnTr1hG3Dhw/XmjVrTto3ISFBK1euNBIuEFVXV6u5uVkNDQ3q3l0666yzlJf3hByOSKujAQAMiO4RqS4RDqtj/CTBOsm9ocmjmupGq2MYxQqgbdSnT19de22Sxo8fpb59+2nw4GG66aZUXXBBnNXRAAAGdIlwaPyK+62OEVIKbn1WNepcBYtb5fwEDz/8qFauXKvRo8epsvKgJk++S5s3b7I6FgAACBAcwWqj0tL3VV9/VDfckKz09FuUnn6L1qxZrXXrCpWUNMLqeAAAIABwBKuNunTpouee+4sOHjwg6ftlK/7+973q06efxckAAECgCMojWA2NzVq7cFS7vG9rhgwZpkmT7tWMGVPV3Pz9/ldeOVx33nmP8TwAACA4BWXBqvmu/rTWq2ovaWk3Ky3tZgsTAACAQMYpQgAAAMMoWAAAAIZRsAAAAAyjYAEAABhGwQIAADCMggUAAGBYUC7TcHZ3h+ztcHPlZk+jjnzrMf6+AAAgtARlwbI7IvXl3LHG37d37puSWi9YdXW1eu65v+iTT3YoPNyu6OhoTZmSo379EoxnAgAAwScoC5aVvF6vHn74IQ0ZMkwvv/x/Zbfb9dFH2/Xwww9q2bICde/ew+qIAADAYhSsNvroo+2qrPyn7r57ssLCvp/CNmTIMM2cOUter9fidAAAIBBQsNpo79496tOnr79cHTd8+C8sSgQAAAINVxG2UViYTY52mGAPAAA6DwpWGyUkXKa9e3fL5/OdsP355/+ijz7ablEqAAAQSChYbTRw4GCdfXZPvfTSC2ppaZEkbd26RUVFa3ThhRdZnA4AAASCoJyD1expPLakgvn3bY3NZtO8ef9bf/7zQmVl3Sq73a7u3XtowYKn1bNnjPFMAAAg+ARlwfp+MVDrFgTt0aOHHntsjmVfHwAABDZOEQIAABhGwQIAADCMggUAAGAYBQsAAMAwChYAAIBhQXkVYXSPSHWJcBh/34Ymj2qqW1+qAQAA4FSCsmB1iXBo/Ir7jb9vwa3PqkYULAAAcGaCsmBZrb6+Xv/5n8+qtPR9ORyRioqK0t13T9aQIcOsjgYAAAIABauNfD6fHn10muLjL9TSpQWy2+3au3e3ZszI0ezZf9DAgYOtjggAACzGJPc22rnzU+3fv0/Z2b+V3f59P+3bN0FZWZP08sv/x+J0AAAgEFCw2qisbJf69u3nL1fHDRo0RP/9359blAoAAAQSClYb+Xzf3/D533k8jfL5vBYkAgAAgYaC1UaXXdZfe/bsVnNzsyTpyJEj8vl8+vzzz9Sv36UWpwMAAIEgKCe5NzR5VHDrs+3yvq0ZMGCQ4uMv1JIlizRlSo7efnud/uu/SlRR8bUef/xJ45kAAEDwCcqCVVPdaNl6VTabTU89tVDPPbdEd9zxS9ntEYqOjtYFF1ygrVu3aMCAQXI4zC+CCgAAgkdQFiyrdenSRVOnPizpYf82r9erLVv+poiICOuCAQDOmKe5fc6S4Md5mls/gxRsKFiGhIWF6ZprrrU6BgDgDDnsDn05d6zVMUJK79w3pU52J5WgmeTu8/msjvCTBGtuAADw0wVFwQoLC1dLS7PVMX6SpiaPwsM5UAgAQCgJioLVtWuUamqqg2qdKZ/PJ4+nUdXVbkVF9bA6DgAA6EBBcWglKqq7jhxxq7Lya0nBc8otPNyu6Oiz1bVrN6ujAACADhQUBctms6lnT5fVMQAAAE7LaZ8i/OMf/6hHHnlEklRaWqqMjAwlJydr0aJF/n3KysqUmZmplJQU5ebm+lc7BwAACCWnVbC2bNmi1atXS5IaGho0c+ZMPfPMMyoqKtKuXbu0efNmSdL06dM1a9YsrV+/Xj6fTwUFBe2XHAAAIEC1WrCqq6u1aNEi3XfffZKknTt3Kj4+XnFxcbLb7crIyFBxcbEqKirU0NCgQYMGSZIyMzNVXFzcruEBAAACUatzsGbNmqWcnBwdPHhQklRVVSWn0+l/3uVyqbKy8qTtTqdTlZWVbQ4UExPV5tfgzDid0VZHANod4xwIbJ3tM3rKgvXGG2/ovPPO0/Dhw7Vq1SpJ398Sxmaz+ffx+Xyy2Ww/ur2tDh+uldcbPFcKBjunM1pud43VMYB2xThHW3S2H/TBItg+o2FhtlMeFDplwSoqKpLb7daoUaP07bff6ujRo6qoqFB4eLh/H7fbLZfLpV69esntdvu3Hzp0SC4XV/4BAIDQc8qC9fLLL/v/vGrVKm3btk2zZ89WcnKy9u3bpwsuuEDr1q3T2LFjFRsbq8jISO3YsUNDhw5VYWGhEhMT2/0bAAAACDRtXgcrMjJS8+bNU3Z2thobG5WUlKTU1FRJUn5+vvLy8lRbW6v+/fsrKyvLeGAAAIBAZ/MF2N2ImYPVsZibglDAOEdbOJ3R+nLuWKtjhJTeuW8G3We0tTlYQXEvQgAAgGBCwQIAADCMggUAAGAYBQsAAMAwChYAAIBhFCwAAADDKFgAAACGUbAAAAAMo2ABAAAYRsECAAAwjIIFAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMIyCBQAAYJjd6gAAAAQSb5NHvXPftDpGSPE2eayOYBwFCwCAfxEW4VDGtEKrY4SUtQtHSWq0OoZRnCIEAAAwjIIFAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMIyCBQAAYBgFCwAAwDAKFgAAgGEULAAAAMMoWAAAAIZRsAAAAAyjYAEAABhGwQIAADCMggUAAGAYBQsAAMAwChYAAIBhFCwAAADDKFgAAACGUbAAAAAMO62C9fTTT2vkyJFKT0/Xyy+/LEkqLS1VRkaGkpOTtWjRIv++ZWVlyszMVEpKinJzc9Xc3Nw+yQEAAAJUqwVr27Zt+uCDD7RmzRq9+eabWrp0qXbv3q2ZM2fqmWeeUVFRkXbt2qXNmzdLkqZPn65Zs2Zp/fr18vl8KigoaPdvAgAAIJC0WrCuuOIKvfrqq7Lb7Tp8+LBaWlr03XffKT4+XnFxcbLb7crIyFBxcbEqKirU0NCgQYMGSZIyMzNVXFzc3t8DAABAQLGfzk4RERFavHixXnrpJaWmpqqqqkpOp9P/vMvlUmVl5UnbnU6nKisr2xQoJiaqTfvjzDmd0VZHANod4xwIbJ3tM3paBUuSHnzwQd1777267777VF5eLpvN5n/O5/PJZrPJ6/X+4Pa2OHy4Vl6vr02vwU/ndEbL7a6xOgbQrhjnaIvO9oM+WATbZzQszHbKg0KtniL84osvVFZWJknq2rWrkpOTtXXrVrndbv8+brdbLpdLvXr1OmH7oUOH5HK5ziQ/AABA0Gm1YH399dfKy8uTx+ORx+PRxo0bddttt+mrr77Svn371NLSonXr1ikxMVGxsbGKjIzUjh07JEmFhYVKTExs928CAAAgkLR6ijApKUk7d+7U6NGjFR4eruTkZKWnp6tnz57Kzs5WY2OjkpKSlJqaKknKz89XXl6eamtr1b9/f2VlZbX7NwEAABBIbD6fL6AmPDEHq2MxNwWhgHGOtnA6o5UxrdDqGCFl7cJRQfcZPeM5WAAAAGgbChYAAIBhFCwAAADDKFgAAACGUbAAAAAMo2ABAAAYRsECAAAwjIIFAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMIyCBQAAYBgFCwAAwDAKFgAAgGEULAAAAMMoWAAAAIZRsAAAAAyjYAEAABhGwQIAADCMggUAAGAYBQsAAMAwChYAAIBhFCwAAADDKFgAAACGUbAAAAAMo2ABAAAYRsECAAAwjIIFAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMMxudYDOIrpHpLpEOKyO8ZM4ndFWR/hJGpo8qqlutDoGAAAn4QiWIWE2qxOEHv7OAQCBiiNYhjjsDn05d6zVMUJK79w3JXEECwAQeDiCBQAAYBgFCwAAwLDTKlhLlixRenq60tPTNX/+fElSaWmpMjIylJycrEWLFvn3LSsrU2ZmplJSUpSbm6vm5ub2SQ4AABCgWi1YpaWlev/997V69Wq99dZb+vzzz7Vu3TrNnDlTzzzzjIqKirRr1y5t3rxZkjR9+nTNmjVL69evl8/nU0FBQbt/EwAAAIGk1YLldDr1yCOPyOFwKCIiQhdffLHKy8sVHx+vuLg42e12ZWRkqLi4WBUVFWpoaNCgQYMkSZmZmSouLm7v7wEAACCgtHoVYZ8+ffx/Li8v19tvv6077rhDTqfTv93lcqmyslJVVVUnbHc6naqsrGxToJiYqDbtj9AWrGt4oeMxVoDA1tk+o6e9TMPf//53TZ48WTNmzFB4eLjKy8v9z/l8PtlsNnm9XtlstpO2t8Xhw7Xyen1tek0g6GwDI1i43TVWR0AQcDqjGSs4bfx7bo1g+4yGhdlOeVDotCa579ixQ3feeaemTZumMWPGqFevXnK73f7n3W63XC7XSdsPHTokl8t1BvEBAACCT6sF6+DBg3rggQeUn5+v9PR0SdLAgQP11Vdfad++fWppadG6deuUmJio2NhYRUZGaseOHZKkwsJCJSYmtu93AAAAEGBaPUX44osvqrGxUfPmzfNvu+222zRv3jxlZ2ersbFRSUlJSk1NlSTl5+crLy9PtbW16t+/v7KystovPQAAQABqtWDl5eUpLy/vB59bs2bNSdsSEhK0cuXKM08GAAAQpFjJHQAAwDAKFgAAgGGnvUwDAET3iFSXCIfVMX6SYL30vqHJo5rqRqtjAGgjChaA09YlwqHxK+63OkZIKbj1WdWIggUEGwoWgNPmafao4NZnrY4RUjzNHqsjAPgJKFgATpvD7tCXc8daHSOk9M59U+IIFhB0mOQOAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGMY6WABOm7fJc2xdJnQUbxMLjQLBiIIF4LSFRTiUMa3Q6hghZe3CUWKhUSD4cIoQAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMIyCBQAAYBgFCwAAwDAKFgAAgGEULAAAAMMoWAAAAIZRsAAAAAyjYAEAABhGwQIAADCMggUAAGAYBQsAAMAwu9UBOgtvk0e9c9+0OkZI8TZ5rI4AAMAPomAZEhbhUMa0QqtjhJS1C0dJarQ6BgAAJ+EUIQAAgGEULAAAAMMoWAAAAIZRsAAAAAyjYAEAABhGwQIAADCMggUAAGAYBQsAAMAwChYAAIBhp1WwamtrdfPNN+vrr7+WJJWWliojI0PJyclatGiRf7+ysjJlZmYqJSVFubm5am5ubp/UAAAAAazVgvXpp5/qV7/6lcrLyyVJDQ0Nmjlzpp555hkVFRVp165d2rx5syRp+vTpmjVrltavXy+fz6eCgoJ2DQ8AABCIWi1YBQUFevzxx+VyuSRJO3fuVHx8vOLi4mS325WRkaHi4mJVVFSooaFBgwYNkiRlZmaquLi4XcMDAAAEolZv9jx37twTHldVVcnpdPofu1wuVVZWnrTd6XSqsrKyzYFiYqLa/BqELqcz2uoIQLtjnCMUdLZx3mrB+nder1c2m83/2OfzyWaz/ej2tjp8uFZer6/Nr7NaZxsYwcLtrrE6QkhhnFuDcd6xGOfWCLZxHhZmO+VBoTZfRdirVy+53W7/Y7fbLZfLddL2Q4cO+U8rAgAAhJI2F6yBAwfqq6++0r59+9TS0qJ169YpMTFRsbGxioyM1I4dOyRJhYWFSkxMNB4YAAAg0LX5FGFkZKTmzZun7OxsNTY2KikpSampqZKk/Px85eXlqba2Vv3791dWVpbxwAAAAIHutAvWpk2b/H8ePny41qxZc9I+CQkJWrlypZlkAAAAQYqV3AEAAAyjYAEAABhGwQIAADCMggUAAGAYBQsAAMAwChYAAIBhFCwAAADDKFgAAACGUbAAAAAMo2ABAAAYRsECAAAwjIIFAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMIyCBQAAYBgFCwAAwDAKFgAAgGEULAAAAMMoWAAAAIZRsAAAAAyjYAEAABhGwQIAADCMggUAAGAYBQsAAMAwChYAAIBhFCwAAADDKFgAAACGUbAAAAAMo2ABAAAYRsECAAAwjIIFAABgGAULAADAMAoWAACAYRQsAAAAwyhYAAAAhlGwAAAADKNgAQAAGEbBAgAAMIyCBQAAYFi7FKy1a9dq5MiRSk5O1vLly9vjSwAAAAQsu+k3rKys1KJFi7Rq1So5HA7ddtttuvLKK3XJJZeY/lIAAAAByXjBKi0t1VVXXaUePXpIklJSUlRcXKwpU6ac1uvDwmymI3UY19ldrY4QcoJ5vAQrxnnHY5x3PMZ5xwu2cd5aXuMFq6qqSk6n0//Y5XJp586dp/36s8/uZjpSh3kxL9nqCCEnJibK6gghh3He8RjnHY9x3vE62zg3PgfL6/XKZvv/rc7n853wGAAAoLMzXrB69eolt9vtf+x2u+VyuUx/GQAAgIBlvGBdffXV2rJli7755hvV19frnXfeUWJioukvAwAAELCMz8E699xzlZOTo6ysLDU1NWncuHEaMGCA6S8DAAAQsGw+n89ndQgAAIDOhJXcAQAADKNgAQAAGEbBAgAAMIyCBQAAYBgFCwAAwDAKVoirra21OgIA4CeqqqqSJG3fvl3Lly9XQ0ODxYlwHAUrxJSUlGjBggWqq6tTWlqabrjhBq1atcrqWIBR+/fv15o1a+Tz+fTYY49p7Nix+uyzz6yOBRj1+OOP609/+pP+8Y9/aNq0afr888+Vl5dndSwcQ8EKMUuWLFFGRoaKioo0YMAAbdq0ScuWLbM6FmDUo48+Kq/Xq40bN6q8vFyPPvqo5s6da3UswKjPPvtMc+fO1dtvv61x48bpD3/4g7766iurY+EYClYISkhI0HvvvacRI0aoW7duampqsjoSYFRjY6NGjx6tkpISZWRkaNiwYfJ4PFbHAoxqaWnx/yKRmJio+vp61dfXWx0Lx1CwQsw555yjOXPmaNeuXbr22ms1b948nX/++VbHAowKDw/X+vXr9d577+m6667Thg0bFBbGP3foXEaPHq1f/OIXio2N1cCBAzV27FjdeuutVsfCMdwqJ8TU1tZqw4YNGjx4sOLj47V8+XKNGjVKUVFRVkcDjNmzZ49eeeUVXXfddUpJSVFOTo4mT56shIQEq6MBRnm9Xv8vD0eOHNHZZ59tcSIcx690Iaa5uVkul0vx8fF6/vnntXXrVn3zzTdWxwKM6tevn3JycpSSkqLt27dr2LBhuvDCC62OBRhVUlKihQsX+i9aSk1N5aKlAELBCjHTpk1TWVmZSktLVVxcrBEjRig3N9fqWIBRXF2FUMBFS4GNghVivv32W919993auHGjxowZo9GjR6uurs7qWIBRXF2FUMFFS4GLghVivF6vdu3apQ0bNuj6669XWVmZWlparI4FGMXVVQgFXLQU2OxWB0DHmj59uubPn6+77rpLcXFxGj9+vB599FGrYwFGHb+6asiQIRo4cKBGjhyp8ePHWx0LMGrhwoXasGGDsrKydNZZZykuLk7Z2dlWx8IxXEUY4rxeryoqKhQXF2d1FMCof7266ptvvlHPnj0tTgSY5fF4tHnzZv80j5aWFn399dd66KGHLE4GiSNYIef111/X/PnzTzhdEhsbqw0bNliYCjDrk08+0fPPP6+jR4/K5/PJ6/XqwIED2rRpk9XRAGN++9vf6ttvv9X+/fs1bNgwbd26VUOGDLE6Fo5hDlaIeeGFF1RYWKiRI0fq3XffVV5engYOHGh1LMComTNn6sYbb1RLS4tuv/12nXvuubrxxhutjgUYtWfPHr366qu66aabdM899+i1115TRUWF1bFwDAUrxMTExCguLk79+vXT3r17dfvtt2vPnj1WxwKMcjgcGjt2rK644gr97Gc/0/z58/X+++9bHQswKiYmRjabTRdddJH27NmjuLg4riIMIBSsENO1a1d98MEH6tevn0pKSuR2u9XQ0GB1LMCoyMhIVVdX66KLLtKnn36q8PBwrpZFp9OnTx/NmTNHV155pV555RW98MILYlp14KBghZi8vDxt2rRJ1157raqrq5Wamqo77rjD6liAUXfeeadycnJ0/fXXq7CwUOnp6fr5z39udSzAqCeeeEJpaWm65JJL9OCDD6qqqkoLFy60OhaO4SpCAJ2Sz+eTzWbT0aNHVV5erksvvVQ2m83qWMAZ+/DDD0/5/OWXX95BSXAqFKwQMWLEiFP+cNm4cWMHpgHaR2truj311FMdlARoPxMnTvzR52w2m1599dUOTIMfQ8EKEa1dWRIbG9tBSYD2s3r16lM+P2bMmA5KAnSMw4cPKyYmRvX19aqqqlJ8fLzVkXAMc7BCRGxsrGJjY1VXV6f8/HzFxsaqoaFBM2bMUGNjo9XxACPGjBmjMWPG6KabbtLRo0c1ZswYXX311dq/f79SU1OtjgcYtXTpUt1zzz2Svl9M97777tOKFSssToXjKFghJi8vT6NHj5YkXXzxxfrNb36j3Nxca0MBhj388MOqqqqSJHXr1k1er1czZsywOBVg1ooVK7R8+XJJ3/8SvWrVKi1btsziVDiOghVi6uvrlZSU5H98zTXXcBNcdDoHDhxQTk6OJCkqKko5OTnav3+/xakAs5qamuRwOPyPIyIiLEyDf8etckJMz5499dprr+mWW26RJBUVFSkmJsbiVIBZNptNe/bsUb9+/SRJX3zxhex2/rlD53LjjTfq17/+tdLS0mSz2bR+/XrdcMMNVsfCMUxyDzEHDhzQ7NmztW3bNjkcDg0bNkyPPfaYevXqZXU0wJjS0lJNnz5d5557riTpyJEjWrBggYYNG2ZxMsCs4uJiffjhh7Lb7br88su5JVQAoWAB6JQ8Ho/27t0ru92u3r17n3AqBQDaGwULAADAMCa5AwAAGMaszxDzt7/9Tddcc80J29555x0lJydblAgA0BbcKic4cIowRBQVFcnj8Wjx4sV68MEH/dubmpr0wgsv6N1337UwHWAGt8pBKDh+q5zq6mrt379fQ4YMUVhYmD7++GP17dtXr7/+usUJIXEEK2TU1dXpo48+Ul1dnbZu3erfHh4e7l8vCAh2V1xxhSSppKREdXV1uuWWW2S321VUVKTo6GiL0wFmLF26VJJ07733asmSJf7b41RUVGjWrFlWRsO/4AhWiNmyZYuGDx/uf1xbW6uoqCgLEwHm/fKXv9SKFSsUFvb9NFOv16vx48dr5cqVFicDzElPT9df//pX/2Ofz6eRI0fq7bfftjAVjmOSe4ipr6/XggULVFdXp7S0NN1www1atWqV1bEAo2pqalRdXe1/fOjQIR09etS6QEA7uOyyy/S73/1O7733nkpKSjRt2jTWegsgHMEKMWPHjtXcuXP12Wefafv27Zo1a5YmTpxIyUKn8tZbbyk/P19DhgyRz+fTJ598ory8PKWkpFgdDTDG4/Fo2bJl2rZtmyTp6quv1oQJE7hrQYDg/0IISkhI0J///Gfdcsst6tatm5qamqyOBBg1evRoXX311fr4449ls9n0xBNPcEsodDr333+/XnzxRU2aNMnqKPgBFKwQc84552jOnDnatWuXFixYoHnz5un888+3OhZg1Hfffad3331X1dXV8vl82rt3ryRpypQpFicDzKmvr9fBgwd13nnnWR0FP4CCFWIWLlyoDRs2KCsrS2eddZbi4uL4oYNO56GHHlJ0dLT69Okjm81mdRygXRw5ckQjRoxQTEyMIiMj5fP5ZLPZtHHjRqujQczBAtAJZWRkaO3atVbHANpVRUXFD26PjY3t4CT4IRzBChEHDhw45fOcJkRncumll2r37t1KSEiwOgrQLsrLy9W1a1ede+65euONN7Rnzx4NHTpUaWlpVkfDMRzBChEZGRkqLy+Xy+XSv/8v55AyOpsxY8Zo9+7dnDpBp/TKK69o6dKl8nq9uuqqq3Tw4EHddNNN2rRpk4YMGaIHHnjA6ogQBStk1NbWasKECXr88cc1dOhQq+MA7YpTJ+jMMjIytHLlSh06dEg333yzPvjgA0VGRsrj8WjcuHFas2aN1REhFhoNGVFRUXryySf11ltvWR0FaFcbNmzQpk2b5PV6FRsb6//v/ffftzoaYITX65XD4VBsbKwmTZqkyMhI/3MtLS0WJsO/omCFkAEDBmjOnDlWxwDaTX5+vpYtW6by8nLddtttKiws9D/HDXDRWSQnJ+uOO+5QS0uLsrOzJUm7d+/WhAkTmIMVQJjkDqDT2Lx5s1avXi273a6JEydq0qRJcjgcSktLO2nuIRCsHnroIX344YcKDw/3b3M4HMrOzlZSUpKFyfCvKFgAOo3jk9kl6cILL9Tzzz+vu+66Sz179mQ9LHQql19++QmPe/furd69e1uUBj+EU4QAOo3U1FRNnDhRO3fulCT16dNHTz/9tKZOnar9+/dbnA5AKOEqQgCdypYtW+RyuXTxxRf7tx08eFAvvfSScnNzLUwGIJRQsAAAAAzjFCEAAIBhFCwAAADDKFgAAACGUbAAAAAM+391xyr7fBVrugAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "Pclass1 = train_data[train_data['Pclass'] == 1]['Embarked'].value_counts()\n", - "Pclass2 = train_data[train_data['Pclass'] == 2]['Embarked'].value_counts()\n", - "Pclass3 = train_data[train_data['Pclass'] == 3]['Embarked'].value_counts()\n", - "\n", - "df = pd.DataFrame([Pclass1, Pclass2, Pclass3])\n", - "df.index = ['1st class','2nd class', '3rd class']\n", - "df.plot(kind='bar',stacked=True, figsize=(10,5))" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014366, - "end_time": "2020-08-16T00:03:05.793175", - "exception": false, - "start_time": "2020-08-16T00:03:05.778809", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We can assign each embarked value to a numberical value for training later!" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:05.830003Z", - "iopub.status.busy": "2020-08-16T00:03:05.829030Z", - "iopub.status.idle": "2020-08-16T00:03:05.835622Z", - "shell.execute_reply": "2020-08-16T00:03:05.835037Z" - }, - "papermill": { - "duration": 0.027899, - "end_time": "2020-08-16T00:03:05.835744", - "exception": false, - "start_time": "2020-08-16T00:03:05.807845", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "for data in train_test_data:\n", - " data['Embarked'] = data['Embarked'].fillna(\"S\")\n", - " \n", - "embarked_mapping = {\"S\": 0, \"C\": 1, \"Q\": 2}\n", - "for dataset in train_test_data:\n", - " dataset['Embarked'] = dataset['Embarked'].map(embarked_mapping)" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.013917, - "end_time": "2020-08-16T00:03:05.863782", - "exception": false, - "start_time": "2020-08-16T00:03:05.849865", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Family Size**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014221, - "end_time": "2020-08-16T00:03:05.892815", - "exception": false, - "start_time": "2020-08-16T00:03:05.878594", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "Parch ( Parent & child ) and Sibsp( Sibling & Spouse ) are both contributing factors to family size, so lets make a new column called family size, and drop the other ones." - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:05.931503Z", - "iopub.status.busy": "2020-08-16T00:03:05.930571Z", - "iopub.status.idle": "2020-08-16T00:03:05.933769Z", - "shell.execute_reply": "2020-08-16T00:03:05.933193Z" - }, - "papermill": { - "duration": 0.025993, - "end_time": "2020-08-16T00:03:05.933872", - "exception": false, - "start_time": "2020-08-16T00:03:05.907879", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "train[\"FamilySize\"] = train['SibSp'] + train['Parch'] + 1\n", - "test[\"FamilySize\"] = test['SibSp'] + test['Parch'] + 1" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014325, - "end_time": "2020-08-16T00:03:05.962742", - "exception": false, - "start_time": "2020-08-16T00:03:05.948417", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# **Correlations**" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:05.997135Z", - "iopub.status.busy": "2020-08-16T00:03:05.996451Z", - "iopub.status.idle": "2020-08-16T00:03:06.312682Z", - "shell.execute_reply": "2020-08-16T00:03:06.312109Z" - }, - "papermill": { - "duration": 0.334816, - "end_time": "2020-08-16T00:03:06.312796", - "exception": false, - "start_time": "2020-08-16T00:03:05.977980", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "sns.heatmap(train.corr(),cbar='plasma')\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:06.354102Z", - "iopub.status.busy": "2020-08-16T00:03:06.352041Z", - "iopub.status.idle": "2020-08-16T00:03:06.354808Z", - "shell.execute_reply": "2020-08-16T00:03:06.355312Z" - }, - "papermill": { - "duration": 0.027178, - "end_time": "2020-08-16T00:03:06.355462", - "exception": false, - "start_time": "2020-08-16T00:03:06.328284", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "train.drop(['Name'], axis = 1, inplace=True)\n", - "test.drop(['Name'], axis = 1, inplace=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:06.398320Z", - "iopub.status.busy": "2020-08-16T00:03:06.397342Z", - "iopub.status.idle": "2020-08-16T00:03:06.401654Z", - "shell.execute_reply": "2020-08-16T00:03:06.401144Z" - }, - "papermill": { - "duration": 0.031322, - "end_time": "2020-08-16T00:03:06.401771", - "exception": false, - "start_time": "2020-08-16T00:03:06.370449", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PclassSexAgeSibSpParchFareEmbarkedTitleFamilySize
03022.0107.2500002
11138.01071.2833112
23126.0007.9250011
31135.01053.1000012
43035.0008.0500001
\n", - "
" - ], - "text/plain": [ - " Pclass Sex Age SibSp Parch Fare Embarked Title FamilySize\n", - "0 3 0 22.0 1 0 7.2500 0 0 2\n", - "1 1 1 38.0 1 0 71.2833 1 1 2\n", - "2 3 1 26.0 0 0 7.9250 0 1 1\n", - "3 1 1 35.0 1 0 53.1000 0 1 2\n", - "4 3 0 35.0 0 0 8.0500 0 0 1" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:06.444039Z", - "iopub.status.busy": "2020-08-16T00:03:06.443083Z", - "iopub.status.idle": "2020-08-16T00:03:06.447367Z", - "shell.execute_reply": "2020-08-16T00:03:06.446894Z" - }, - "papermill": { - "duration": 0.030995, - "end_time": "2020-08-16T00:03:06.447465", - "exception": false, - "start_time": "2020-08-16T00:03:06.416470", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
PclassSexAgeSibSpParchFareEmbarkedTitleFamilySize
03034.5007.8292201
13147.0107.0000012
22062.0009.6875201
33027.0008.6625001
43122.01112.2875013
\n", - "
" - ], - "text/plain": [ - " Pclass Sex Age SibSp Parch Fare Embarked Title FamilySize\n", - "0 3 0 34.5 0 0 7.8292 2 0 1\n", - "1 3 1 47.0 1 0 7.0000 0 1 2\n", - "2 2 0 62.0 0 0 9.6875 2 0 1\n", - "3 3 0 27.0 0 0 8.6625 0 0 1\n", - "4 3 1 22.0 1 1 12.2875 0 1 3" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test.head()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.01354, - "end_time": "2020-08-16T00:03:06.476026", - "exception": false, - "start_time": "2020-08-16T00:03:06.462486", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# **PreProcessing**" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014099, - "end_time": "2020-08-16T00:03:06.504426", - "exception": false, - "start_time": "2020-08-16T00:03:06.490327", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Now that our data looks good, lets get ready to build our models!**" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:06.538376Z", - "iopub.status.busy": "2020-08-16T00:03:06.537612Z", - "iopub.status.idle": "2020-08-16T00:03:12.103848Z", - "shell.execute_reply": "2020-08-16T00:03:12.102607Z" - }, - "papermill": { - "duration": 5.585812, - "end_time": "2020-08-16T00:03:12.103987", - "exception": false, - "start_time": "2020-08-16T00:03:06.518175", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#imports\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.model_selection import GridSearchCV" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.01503, - "end_time": "2020-08-16T00:03:12.134200", - "exception": false, - "start_time": "2020-08-16T00:03:12.119170", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "We should scale the values in the data, so that the neural network can train better!" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:12.174580Z", - "iopub.status.busy": "2020-08-16T00:03:12.173590Z", - "iopub.status.idle": "2020-08-16T00:03:12.188156Z", - "shell.execute_reply": "2020-08-16T00:03:12.187674Z" - }, - "papermill": { - "duration": 0.038245, - "end_time": "2020-08-16T00:03:12.188266", - "exception": false, - "start_time": "2020-08-16T00:03:12.150021", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "continuous = ['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'FamilySize']\n", - "\n", - "scaler = StandardScaler()\n", - "\n", - "for var in continuous:\n", - " train[var] = train[var].astype('float64')\n", - " train[var] = scaler.fit_transform(train[var].values.reshape(-1, 1))\n", - "for var in continuous:\n", - " test[var] = test[var].astype('float64')\n", - " test[var] = scaler.fit_transform(test[var].values.reshape(-1, 1))" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:12.223938Z", - "iopub.status.busy": "2020-08-16T00:03:12.223053Z", - "iopub.status.idle": "2020-08-16T00:03:12.259925Z", - "shell.execute_reply": "2020-08-16T00:03:12.260500Z" - }, - "papermill": { - "duration": 0.057722, - "end_time": "2020-08-16T00:03:12.260637", - "exception": false, - "start_time": "2020-08-16T00:03:12.202915", - "status": "completed" - }, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
countmeanstdmin25%50%75%max
Pclass891.0-8.772133e-171.000562-1.566107-0.3693650.8273770.8273770.827377
Sex891.03.524130e-010.4779900.0000000.0000000.0000001.0000001.000000
Age891.02.232906e-161.000562-2.253155-0.5924810.0000000.4079263.870872
SibSp891.04.386066e-171.000562-0.474545-0.474545-0.4745450.4327936.784163
Parch891.05.382900e-171.000562-0.473674-0.473674-0.473674-0.4736746.974147
Fare891.03.987333e-181.000562-0.648422-0.489148-0.357391-0.0242469.667167
Embarked891.03.613917e-010.6356730.0000000.0000000.0000001.0000002.000000
Title891.03.591470e-010.4800200.0000000.0000000.0000001.0000001.000000
FamilySize891.0-2.392400e-171.000562-0.560975-0.560975-0.5609750.0591605.640372
\n", - "
" - ], - "text/plain": [ - " count mean std min 25% 50% \\\n", - "Pclass 891.0 -8.772133e-17 1.000562 -1.566107 -0.369365 0.827377 \n", - "Sex 891.0 3.524130e-01 0.477990 0.000000 0.000000 0.000000 \n", - "Age 891.0 2.232906e-16 1.000562 -2.253155 -0.592481 0.000000 \n", - "SibSp 891.0 4.386066e-17 1.000562 -0.474545 -0.474545 -0.474545 \n", - "Parch 891.0 5.382900e-17 1.000562 -0.473674 -0.473674 -0.473674 \n", - "Fare 891.0 3.987333e-18 1.000562 -0.648422 -0.489148 -0.357391 \n", - "Embarked 891.0 3.613917e-01 0.635673 0.000000 0.000000 0.000000 \n", - "Title 891.0 3.591470e-01 0.480020 0.000000 0.000000 0.000000 \n", - "FamilySize 891.0 -2.392400e-17 1.000562 -0.560975 -0.560975 -0.560975 \n", - "\n", - " 75% max \n", - "Pclass 0.827377 0.827377 \n", - "Sex 1.000000 1.000000 \n", - "Age 0.407926 3.870872 \n", - "SibSp 0.432793 6.784163 \n", - "Parch -0.473674 6.974147 \n", - "Fare -0.024246 9.667167 \n", - "Embarked 1.000000 2.000000 \n", - "Title 1.000000 1.000000 \n", - "FamilySize 0.059160 5.640372 " - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train.describe(include='all').T\n" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.014497, - "end_time": "2020-08-16T00:03:12.290830", - "exception": false, - "start_time": "2020-08-16T00:03:12.276333", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "# **Logistic Regression**\n", - "\n", - "* `train` - stores training set data\n", - "* `y_train` - stores training set results\n", - "* `test` - stores test set data on which predictions have to be made\n", - "\n", - "First, fit the model using `train` and `y_train` and then make prediction on `test`\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:12.359244Z", - "iopub.status.busy": "2020-08-16T00:03:12.358425Z", - "iopub.status.idle": "2020-08-16T00:03:15.147061Z", - "shell.execute_reply": "2020-08-16T00:03:15.146465Z" - }, - "papermill": { - "duration": 2.812936, - "end_time": "2020-08-16T00:03:15.147176", - "exception": false, - "start_time": "2020-08-16T00:03:12.334240", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "from sklearn.linear_model import LogisticRegression\n", - "#Write code for logistic regression here" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "papermill": { - "duration": 0.026701, - "end_time": "2020-08-16T00:03:18.692199", - "exception": false, - "start_time": "2020-08-16T00:03:18.665498", - "status": "completed" - }, - "tags": [] - }, - "source": [ - "**Create Predictions**\n", - "" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": { - "execution": { - "iopub.execute_input": "2020-08-16T00:03:18.751021Z", - "iopub.status.busy": "2020-08-16T00:03:18.750047Z", - "iopub.status.idle": "2020-08-16T00:03:19.062102Z", - "shell.execute_reply": "2020-08-16T00:03:19.061231Z" - }, - "papermill": { - "duration": 0.350469, - "end_time": "2020-08-16T00:03:19.062253", - "exception": false, - "start_time": "2020-08-16T00:03:18.711784", - "status": "completed" - }, - "tags": [] - }, - "outputs": [], - "source": [ - "#Write code to generate predictions file" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.8" - }, - "papermill": { - "duration": 22.836091, - "end_time": "2020-08-16T00:03:21.705944", - "environment_variables": {}, - "exception": null, - "input_path": "__notebook__.ipynb", - "output_path": "__notebook__.ipynb", - "parameters": {}, - "start_time": "2020-08-16T00:02:58.869853", - "version": "2.1.0" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/Titanic logistic regression Tensorflow_Omkar.ipynb b/Titanic logistic regression Tensorflow_Omkar.ipynb new file mode 100644 index 0000000..1cf7e6d --- /dev/null +++ b/Titanic logistic regression Tensorflow_Omkar.ipynb @@ -0,0 +1,1765 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013006, + "end_time": "2020-08-16T00:03:03.128147", + "exception": false, + "start_time": "2020-08-16T00:03:03.115141", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# **Introduction**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011151, + "end_time": "2020-08-16T00:03:03.152442", + "exception": false, + "start_time": "2020-08-16T00:03:03.141291", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "***Howdy, Welcome to the Titanic***" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.010739, + "end_time": "2020-08-16T00:03:03.174414", + "exception": false, + "start_time": "2020-08-16T00:03:03.163675", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**To whom does this notebook appeal to?**

\n", + "If you are just starting with WNNC's HELLO FOSS and want to do beginner task in ML, you can try this project!\n", + "\n", + "Data reading, visualization and pre-processing has been done for you. We need you to use logistic regression using sklearn library to predict whether or not they survived the sinking of the Titanic. We have also included `gender_submission.csv`, a set of predictions that assume all and only female passengers survive, as an example of what a submission file should look like.\n", + "\n", + "Head over to [this cell](#main) to implement logistic regression and then add code for writing the output to file `predictions-ann.csv` [here](#main2). " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.010834, + "end_time": "2020-08-16T00:03:03.196238", + "exception": false, + "start_time": "2020-08-16T00:03:03.185404", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "![](https://faithmag.com/sites/default/files/styles/article_full/public/2018-09/titanic2.jpg?h=6521bd5e&itok=H8td6QVv)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.010991, + "end_time": "2020-08-16T00:03:03.218357", + "exception": false, + "start_time": "2020-08-16T00:03:03.207366", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Basic Imports**" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", + "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5", + "execution": { + "iopub.execute_input": "2020-08-16T00:03:03.250148Z", + "iopub.status.busy": "2020-08-16T00:03:03.249436Z", + "iopub.status.idle": "2020-08-16T00:03:04.161970Z", + "shell.execute_reply": "2020-08-16T00:03:04.162628Z" + }, + "papermill": { + "duration": 0.933339, + "end_time": "2020-08-16T00:03:04.162859", + "exception": false, + "start_time": "2020-08-16T00:03:03.229520", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "gender_submission.csv\n", + "ouput.csv\n", + "test.csv\n", + "train.csv\n" + ] + } + ], + "source": [ + "\n", + "import numpy as np # linear algebra\n", + "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "sns.set()\n", + "\n", + "import os\n", + "files = [f for f in os.listdir('./titanic/')]\n", + "for f in files:\n", + " print(f)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.01215, + "end_time": "2020-08-16T00:03:04.187774", + "exception": false, + "start_time": "2020-08-16T00:03:04.175624", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Reading In the Data" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011264, + "end_time": "2020-08-16T00:03:04.210794", + "exception": false, + "start_time": "2020-08-16T00:03:04.199530", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We will be first importing the data, and creating copies. I recommend this because it give you access to a clean untouched base file.\n", + "Next, we will be dropping some things that we don't need such as passenger Id and Ticket price" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", + "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a", + "execution": { + "iopub.execute_input": "2020-08-16T00:03:04.244520Z", + "iopub.status.busy": "2020-08-16T00:03:04.243877Z", + "iopub.status.idle": "2020-08-16T00:03:04.272874Z", + "shell.execute_reply": "2020-08-16T00:03:04.272333Z" + }, + "papermill": { + "duration": 0.050645, + "end_time": "2020-08-16T00:03:04.272984", + "exception": false, + "start_time": "2020-08-16T00:03:04.222339", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "train_data = pd.read_csv('./titanic/train.csv')\n", + "test_data = pd.read_csv('./titanic/test.csv')\n", + "#make a copy so your original data is not touched\n", + "train = train_data.copy()\n", + "test = test_data.copy()\n", + "train.shape\n", + "y_train = train['Survived']\n", + "\n", + "#We won't need passenger ID or ticket price for the model! They do not provide much insight on the training.\n", + "Id = pd.DataFrame(test['PassengerId'])\n", + "train.drop(['PassengerId'], axis = 1, inplace=True)\n", + "test.drop(['PassengerId'], axis = 1, inplace=True)\n", + "train.drop(['Survived'], axis = 1, inplace=True)\n", + "train.drop(['Ticket'], axis = 1, inplace=True)\n", + "test.drop(['Ticket'], axis = 1, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011318, + "end_time": "2020-08-16T00:03:04.295950", + "exception": false, + "start_time": "2020-08-16T00:03:04.284632", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Let's take a look at our data!**" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:04.331882Z", + "iopub.status.busy": "2020-08-16T00:03:04.331062Z", + "iopub.status.idle": "2020-08-16T00:03:04.342848Z", + "shell.execute_reply": "2020-08-16T00:03:04.342359Z" + }, + "papermill": { + "duration": 0.035458, + "end_time": "2020-08-16T00:03:04.342950", + "exception": false, + "start_time": "2020-08-16T00:03:04.307492", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassNameSexAgeSibSpParchFareCabinEmbarked
03Braund, Mr. Owen Harrismale22.0107.2500NaNS
11Cumings, Mrs. John Bradley (Florence Briggs Th...female38.01071.2833C85C
23Heikkinen, Miss. Lainafemale26.0007.9250NaNS
31Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01053.1000C123S
43Allen, Mr. William Henrymale35.0008.0500NaNS
\n", + "
" + ], + "text/plain": [ + " Pclass Name Sex Age \\\n", + "0 3 Braund, Mr. Owen Harris male 22.0 \n", + "1 1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n", + "2 3 Heikkinen, Miss. Laina female 26.0 \n", + "3 1 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n", + "4 3 Allen, Mr. William Henry male 35.0 \n", + "\n", + " SibSp Parch Fare Cabin Embarked \n", + "0 1 0 7.2500 NaN S \n", + "1 1 0 71.2833 C85 C \n", + "2 0 0 7.9250 NaN S \n", + "3 1 0 53.1000 C123 S \n", + "4 0 0 8.0500 NaN S " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013019, + "end_time": "2020-08-16T00:03:04.368044", + "exception": false, + "start_time": "2020-08-16T00:03:04.355025", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Lets see how many null values there are! We need to fill out these values later.**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011338, + "end_time": "2020-08-16T00:03:04.392074", + "exception": false, + "start_time": "2020-08-16T00:03:04.380736", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**We see that we have some midding values from age, and a ton missing from cabin**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:04.421999Z", + "iopub.status.busy": "2020-08-16T00:03:04.421198Z", + "iopub.status.idle": "2020-08-16T00:03:04.759985Z", + "shell.execute_reply": "2020-08-16T00:03:04.759385Z" + }, + "papermill": { + "duration": 0.356365, + "end_time": "2020-08-16T00:03:04.760097", + "exception": false, + "start_time": "2020-08-16T00:03:04.403732", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAU4AAAEnCAYAAADGqKr7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAkw0lEQVR4nO3de1hUdf4H8PcooZg7ki4zbWammZoKalki+jOzDG8lI6wlJGo+mLkDRRdJAfMCUl5SA9fNXSsKKEzxgu2ioaSZV6wVViE11gjMGYx0VCAH5vv7w2UAuTgH58wwzPv1PPM8nnOG8xlh5jOf7+V8j0IIIUBERBZrY+8XQETkaJg4iYgkYuIkIpKIiZOISCImTiIiiZg4iYgkYuIkolbv6tWrmDhxIoqKiuody8vLg7+/P3x9fREZGYnKyspbno+Jk4hatRMnTmDq1Kk4d+5cg8fffPNNREdHY9euXRBCYNOmTbc8JxMnETkkg8GAoqKieg+DwVDneZs2bcLbb78NlUpV7xzFxcWoqKjAoEGDAACTJ09GRkbGLWO7NHnQtauE/wYR1VZ+/hu7xHW75//sErfyevFtn8N4scDi5yZ+9iUSEhLq7ddqtQgNDTVvx8bGNnoOvV4PDw8P87aHhwd0Ot0tYzeZOImo+eyVwJzF9OnTodFo6u1XKpUWn6OhK84VCsUtf46Jk4haDlOVxU9VKpWSkmRD1Go1Ll68aN4uKSlpsEl/MyZOIpk4W1PdKoTJpuG6du2Kdu3a4fjx43jkkUewbds2jBw58pY/x8EhImoxRFWlxY/bERISgtzcXADAypUrERcXh3HjxqG8vBzBwcG3/HlFU8vKcXCIqPmcreK0xuDQ9aJci5/req/nbcdrLjbV7czZPlxETbJxU725mDjtjAmMqBYJg0P2xMRJRC0HK04iImlud9DHVpg4iajlMLHiJAtwcIioFjbVyRJMYES1cHCILMGKs/Xi77gZWHGSJfjhar34pdgM7OMkIpKIo+pERNIIwT5OIiJp2MdJ5Nwcuq/RXtjHSeTcODjUDKw4iYgk4jxOIufm0JWfvXBUnYhIIjbViYgk4uAQkXPj4FAzMHESEUnDCfBERFJxcIiISCI21YmIJOKoOhGRRKw4iYgkYsVJRCQRK04iIok4qk7k3Bx6Irq9sOIkcm68cqgZ2MdJRCQRK04iIolYcRIRScSKk4hIoiou8kFEJA0rTrKUPUZfHXrk1YHw9ywREydZih+u1ol/12aQaXAoPT0d69evh9FoxIwZMxAUFFTn+MmTJ7Fw4UIYjUb86U9/wooVK6BUKhs9XxtZXiURUXOYTJY/LKTT6bB69WqkpKRg+/btSE1NxdmzZ+s8JzY2FmFhYdixYwd69OiBjRs3NnlOVpxEMuEE+GaQMDhkMBhgMBjq7VcqlXWqxYMHD8Lb2xvu7u4AAF9fX2RkZECr1ZqfYzKZcO3aNQBAeXk5OnXq1GRsJk4iajkkVJKJiYlISEiot1+r1SI0NNS8rdfr4eHhYd5WqVTIycmp8zNvvfUWZs6ciWXLlsHNzQ2bNm1qMjYTp52xKiGqRUIf5/Tp06HRaOrtv7lvUghR7zkKhcL874qKCkRGRiIxMRFeXl746KOPEBERgQ0bNjQam4nTzpjAiGoIU/0k15ibm+SNUavVyM7ONm/r9XqoVCrz9unTp9GuXTt4eXkBAJ577jmsXbu2yXMycRLJhF+KzSDDdCQfHx/Ex8ejtLQUbm5u2L17N5YuXWo+3r17d1y4cAEFBQXo2bMn9uzZA09PzybPycRpZ2yqt1782zaDDNOR1Go1wsPDERwcDKPRiICAAHh5eSEkJARhYWHw9PREXFwcXn31VQgh0KVLFyxbtqzJcypEQx0A/+Pi2tXq/wkiZ+FsibPyevFtn6Msfq7Fz+0Q+tfbjtdcrDiJqOXglUNERBI13gBuUZg4iWTi0H2N9sKKk8i5OVsfp1VImI5kT0ycdsYPF1EtXI+TLMEERlRDsKlORCQRm+pkCTbViWrhzdrIEkxgRLWw4iQikoh9nEREEnFUnci5sRumGdhUJ0twcKj14t9WOk5HIiKSihUnEZFETJxkCUduVlHT+LdtBs7jJHJu7OOUTlQycRIRScOmOhGRRBxVJ0uwOdd68XfcDKw4yRL8cLVe/FJsBiZOIiJpRBWb6kROzaErP3thxUmWYHOu9eLfVjrBxEmWcOQ3OTWNf9tmYOIkIpLIMbo4mTiJ5MKmunRsqhMRSVXJxElEJAkrTiIn58hNZrthHycRkTSsOMli9hhEYDUkPw4ONQMrTrKUQ7/RiazIQdYxZuIkopZDVNr7FVimjb1fABGRmUnCQ4L09HSMHz8eY8aMQXJycr3jBQUFmDZtGp599lnMmjULly9fbvJ8TJxE1GIIk+UPS+l0OqxevRopKSnYvn07UlNTcfbs2ZqYQuDll19GSEgIduzYgYceeggbNmxo8pxsqtsZBxCIakhJiAaDAQaDod5+pVIJpVJp3j548CC8vb3h7u4OAPD19UVGRga0Wi0A4OTJk+jQoQNGjhwJAJgzZ06D562NiZOIWgwpiTMxMREJCQn19mu1WoSGhpq39Xo9PDw8zNsqlQo5OTnm7cLCQvzxj39EREQETp06hd69eyM6OrrJ2EycdsbKj6iGqFJY/Nzp06dDo9HU21+72gRuNMVvplDUxKmsrMTRo0eRlJQET09PrFmzBu+88w7eeeedRmMzcRLJhF+K0gmT5Ynz5iZ5Y9RqNbKzs83ber0eKpXKvO3h4YHu3bvD09MTADBx4kSEhYU1eU4ODhFRiyHH4JCPjw8OHTqE0tJSlJeXY/fu3eb+TAAYPHgwSktLkZ+fDwDYu3cv+vfv3+Q5WXESyYQDf9IJYXnFaSm1Wo3w8HAEBwfDaDQiICAAXl5eCAkJQVhYGDw9PbFu3TpERUWhvLwcd999N5YvX97kORWioQ6A/3Fx7Wr1/wSRs3C2xFl5vfi2z1E0dLTFz733yN7bjtdcrDiJqMWQ0sdpT0ycRNRimCSMqtsTEycRtRisOImIJGp8xKVlYeIkohaDFScRkURyTEeSAxMnEbUYXMiYLOJsc/2ImlJlcoyLGZk4iajFYB8nEZFEHFUni7DJTFSDFSdZhH2cRDVMHFUnSzCBEdUwseIkIpKGFScRkUScAE8WYR8nUQ2OqpNFmMCIarCpTuTk+KUoHZvqREQSVTFxEjk39l9Lx6Y6EZFEbKoTEUnkIKvKMXESycWRm8z2IsCKk8ipsY9Tuko21ckS/HAR1WDFSUQkEfs4ySKs/IhqsOIkIpKIFSdZhH2cRDUcJXE6xi3liBwQv5ykq1IoLH7YEytOO+OHq/WyV2vCkZnYx0lEJI2DLMfJxElELYej9HEycRJRi2Gyc9+lpZg47Yyj6kQ12FQnizCBEdWolKngTE9Px/r162E0GjFjxgwEBQU1+Lyvv/4aS5Yswd69e5s8HxMnEbUYcoyq63Q6rF69GmlpaXB1dcXzzz+PoUOHolevXnWed/HiRbz77rsWnZPzOImoxRASHgaDAUVFRfUeBoOhzjkPHjwIb29vuLu7o0OHDvD19UVGRka92FFRUdBqtRa9TlacdsY+TqIaJgkFZ2JiIhISEurt12q1CA0NNW/r9Xp4eHiYt1UqFXJycur8zCeffIJ+/fph4MCBFsVm4rQzJjCiGlKmI02fPh0ajabefqVSWWdbNHCzdkWt0fvTp09j9+7d+Pjjj3HhwgWLYjNx2hkrTqIaVRIqTqVSWS9JNkStViM7O9u8rdfroVKpzNsZGRkoKSmBv78/jEYj9Ho9AgMDkZKS0ug5mTjtjAmMqIYcE+B9fHwQHx+P0tJSuLm5Yffu3Vi6dKn5eFhYGMLCwgAARUVFCA4ObjJpAhwcIqIWxCThYSm1Wo3w8HAEBwfDz88PEydOhJeXF0JCQpCbm9us16kQDXUA/I+La9dmnZSInK8bpvJ68W2f42/dXrD4uXN+TrrteM3FprqdOduHi6gpvFadLMIERlSDiZPIyfFLUTopo+r2xMRJJBN2w0jHipOISCImTiIn58iVn71wWTkiJ8emunRSrlW3JyZOImox2FQni7AqIapR5SCNdSZOO2MCI6rBipOISCLHqDeZOImoBWHFSRZhHydRDY6qk0WYwIhqcHCIiEgiNtWJiCQyseIkS7CPk6iGY6RNJk4iakHYVCeLsPIjqsGmOpGT45eidFX2fgEWYuIkohZDsOIkcm4c+JOOfZxkEX64iGqwj5MswgRGVMMx0iYTJxG1IKw4iYgkcpRr1dvY+wUQtVbshpHOJOFhT6w47YyDQ62Xvf62jozTkcgiTGBENexdSVqKiZOIWgyTYMVJRCSJowwOMXESyYTdMNKxj5PIyXHgTzr2cRI5OUdOYPbCCfBETo4Vp3RyNdXT09Oxfv16GI1GzJgxA0FBQXWOZ2ZmIj4+HkII3HvvvYiLi0OnTp0aPR8nwBNRiyHHBHidTofVq1cjJSUF27dvR2pqKs6ePWs+fvXqVSxatAgbNmzAjh070KdPH8THxzd5TlacLYA9KhNHrkocBX/H0lUJy1OiwWCAwWCot1+pVEKpVJq3Dx48CG9vb7i7uwMAfH19kZGRAa1WCwAwGo1YtGgR1Go1AKBPnz5IT09vMjYTZwvADxjRDVIqycTERCQkJNTbr9VqERoaat7W6/Xw8PAwb6tUKuTk5Ji377rrLjz11FMAgIqKCmzYsAHTpk1rMjYTJ5FM2McpnZQ+zunTp0Oj0dTbX7vaBADRwKR6hUJRb9+VK1cwd+5c9O3bt8Hz1sbESUQthpRR9Zub5I1Rq9XIzs42b+v1eqhUqjrP0ev1mDVrFry9vbFgwYJbnpOJ085YlRDVaKg6vF0+Pj6Ij49HaWkp3NzcsHv3bixdutR8vKqqCnPmzMG4ceMwd+5ci87JxGlnTGBENeSYAK9WqxEeHo7g4GAYjUYEBATAy8sLISEhCAsLw4ULF3Dq1ClUVVVh165dAIABAwYgNja20XMqRBMp3sW1q/X/F0ROwtlaE5XXi2/7HE9187X4uZk/77rteM3FitPOnO3DRdQUOZrqcmDitDMmMKIavOSSiEgiro5E5OTYmpCOCxkTOTn2X0vHhYzJIvxwEdVgHydZhAmMqAZH1ckirDiJarDiJIswgbVe/NtKx1F1sggrTqIabKqTRZjAWi9+KUonZSFje2LiJKIWg32cREQSsY+TLMLmHFENXjlEFmECI6rBipOISCIODhERScSmOpGTYzeMdGyqEzk5DvxJx4qTiEgiVpxERBIJDg6RJdicI6rBUXWyCBNY62Wvv629voytgZdcEjk5Z2tNWOO+6o6yOlIbe78AotaKrQnpTEJY/LAnVpxEMnHkJrO9cFSdLOJszTmipjhKU52J086YwIhqcFSdiEgie/ddWoqJk4haDDbViZwcu2Gk4zxOIifHgT/pWHESEUnEwSEiIok4OEQWKT//jUM3rahp/NtKw6Y6WYQfrNbNHv2cjvyekuvKofT0dKxfvx5GoxEzZsxAUFBQneN5eXmIiorC1atXMWTIECxevBguLo2nR4VoIsW7uHa13isnolbNGot8uLa71+LnXv+9yKLn6XQ6TJ06FWlpaXB1dcXzzz+P9957D7169TI/Z+LEiYiJicGgQYOwYMECDBgwAIGBgY2ekxUnkUw4qi6dlKa6wWCAwWCot1+pVEKpVJq3Dx48CG9vb7i7uwMAfH19kZGRAa1WCwAoLi5GRUUFBg0aBACYPHky3n///eYnTmt8gxCRbTny59Yo4bXHx8cjISGh3n6tVovQ0FDztl6vh4eHh3lbpVIhJyen0eMeHh7Q6XRNxmbFSUQOafr06dBoNPX21642gYarWIVCYfHxhjBxEpFDurlJ3hi1Wo3s7Gzztl6vh0qlqnP84sWL5u2SkpI6xxvChYyJqFXz8fHBoUOHUFpaivLycuzevRsjR440H+/atSvatWuH48ePAwC2bdtW53hDmhxVJyJqDdLT0/HBBx/AaDQiICAAISEhCAkJQVhYGDw9PZGfn4+oqChcu3YN/fr1Q1xcHFxdXRs9HxMnEZFEbKoTEUnExElEJBETJxGRREycREQSMXESEUnExOmALl++bO+XQOTUrJo4CwsLsWPHDgghEB0dDX9//zoz9luTzz77rM52RUUFlixZImvMvLw8jB07FpMmTYJOp8OYMWNw8uRJWWPWVllZiZMnTyI/P9+m6yaWlZXhl19+wfnz580PW7H1l9SZM2eQnZ2NY8eOmR/U8lj1ksv58+fjhRdewJ49e3Du3DnMnz8fy5cvx6ZNm6wZpp7i4mJERUWhuLgYSUlJeOONN7Bs2TLce6/lS1RJlZmZiaysLMTFxaGgoABRUVH4v/+Td1WamJgYrFu3Dq+//jrUajUWLVqEt99+G5s3b5Y1LgB8++23iIiIgEqlgslkgsFgwJo1a+Dl5SVr3ISEBGzcuBF33XWXeZ9CocCePXtkjZuXl4fw8HBUVFQgNTUVL7zwAtasWYP+/fvLFnPx4sXIyspCt27dzPsUCgU++eQTWeLNnz+/yeNxcXGyxG0VhBX5+/sLIYRYsGCBSE1NFUIIodForBmiQS+++KL45ptvhJ+fnzCZTCI1NVUEBgbKHjcpKUkMGTJEjBgxQuTk5Mger/p3OWnSJPO+Z555Rva4QggxYcIEkZeXZ97Oycmxyd/2iSeeEKWlpbLHuVlgYKA4e/as+Xd94MAB8/tbLmPGjBHl5eWyxqgtLS1NpKWlidDQUPHiiy+Kbdu2iZ07d4q5c+eKiIgIm70OR2TVpnrbtm2xa9cufP311xg1ahQyMzPRpo383ai//fYbRowYASEEFAoFpkyZgqtXr8oa8/Dhw/j0008xYcIE9OzZE+vXr7/lUlS3y93dHfn5+eaVW3bs2IFOnTrJGrOaq6sr+vbta9729PS0SVyVSoU//OEPNolVW3l5OR544AHz9vDhw3H9+nVZY3br1s2mXSAajQYajQa//PIL/v73v2PSpEmYMGEC4uPjcfbsWZu9Dkdk1ab6kiVL8PHHH2PhwoVQqVT48ssvERMTY80QDWrfvj0uXLhgTijZ2dlNXmdqDQsWLMCyZcvg7e0NAEhOTkZAQAC++Ua+xWsXLVqEiIgInDlzBkOGDEH37t2xYsUK2eLV5uXlhcjISEyZMgVt27bFl19+ia5du5r74B599FGrxqteZ1GpVOK5557DyJEj0bZtW/Px6kVo5WKPL6lOnTphwoQJGDx4cJ33r9xN5itXruDSpUvo3LkzAODixYsoKyuTNaajs/q16tVLNmVnZ+OHH36ARqNBhw4drBmintzcXERFRaGwsBD33XcfLl++jDVr1phXdJbDtWvXcOedd9bZV1RUJGu/arWysjKYTCZ07NhR9ljVpk2b1ugxOfrhGlqgtja5E2dhYSEiIiKQm5uL9u3bo3v37li5ciV69OghW8ytW7c2uL+hNSetadu2bVi5ciUefvhhmEwmnDhxAtHR0Xj66adljevIrJo43377bbRp0wZBQUGYNWsWhg8fjitXriA+Pt5aIRplNBpx7tw5VFVVoWfPnrJXnPYYkJo2bVqdBVYVCgXat2+Pnj17Ys6cOTZrtttSZWUl9u3bhyeffBKlpaXYu3cv/P39b7nQ7O367LPPMHXqVJt8SZWUlMDDw6PR2QL33HOPbLGr6fV6fP/991AoFHjkkUfQpUsX2WM6Mqs21XNzc7FlyxYkJCQgICAAoaGhmDx5sjVDNKigoACbNm2qN3VEzibOwoULMWvWLKxatQoeHh6YOHEiIiIikJycLFvMXr16wcXFBf7+/gCAnTt34sKFC1Cr1YiMjLxlldYcJpMJKSkpeOyxx9C7d2988skn+OKLL9CvXz9ER0fLXvVGR0fDZDLhySefBAAcOXIEOTk5sk/9Sk5OxtSpU2VvLQFAVFQUPvjgA7zwwgtQKBR1+jltMYPg+vXrSEtLQ0FBAaKjo5GYmIjZs2fLXnw4MqsmzqqqKphMJuzZsweLFy9GeXk5KioqrBmiQVqtFuPHj0efPn1kj1WtekBq5cqV5gEpOZMmAJw4cQJpaWnm7b59+8Lf3x8rV67Etm3bZIm5atUqFBQUYNSoUTh+/DjWrl2L+Ph4nDlzBjExMXjnnXdkiVvtP//5D9LT0wEAnTt3xooVK/DMM8/IGhMA7r77bgQHB2PgwIFo166deb8cXQQffPABAGDv3r1WP7cllixZgs6dO+PUqVNwcXFBYWEhIiMjbdZ/7oismjj9/PwwYsQIPPzwwxg4cCDGjRuH5557zpohGqRUKmXv87qZPQakjEYjzpw5gwcffBAAcPr0aZhMJlRUVMBoNMoSc//+/di6dStcXFyQmJgIX19f+Pj4wMfHB+PGjZMlZm0mk6nOrQ5+/fVXm8zUkLN/vDFXrlzBunXrcPToUbi4uMDHxwcvvfQS3NzcZI178uRJbN26Ffv374ebmxveffddm3w5OTKrJs6ZM2ciODjYPPqZnJxsHqmTk0ajwerVq+Ht7V3nJvLWHumtbf78+XjppZdQWFiISZMm4fLly1i7dq1s8YAbTbqQkBB06dIFQghcvnwZK1asQHx8PCZNmiRLzDZt2ph/p0ePHsVLL71kPmYymWSJWducOXOg0WjwyCOPQAiBnJwcREZGyh735i9iIQSKiiy7j3dzRUZGolu3boiLi4MQAlu2bEF0dDRWrlwpa1yFQoHr16+bi4DffvtN9j5kR2fVxJmdnY2NGzeirKwMQgiYTCacP39e9ibI0aNHkZubi++++868T84rLrKystCrVy9s3rwZGzZswJEjRzBq1ChZryoBgKFDhyIzMxOnTp3C/v37ceDAAcyaNQvff/+9bDHd3Nxw/vx5XLt2DT/++CN8fHwAAPn5+TYZ1X/wwQeRlpaGf//733BxcUF0dPQtb6RlDUlJSXjvvfdQXl5u3nfvvffiq6++ki3mTz/9hPfff9+8HRkZaZPKLzg4GDNnzkRJSQliY2ORmZmJuXPnyh7XoVlzNr2vr6/YvHmzCAoKEhkZGWLBggUiNjbWmiEa9PTTT8seo9o//vEPMXnyZHHmzBmRl5cnBg4cKDZt2iSWLVsmYmJiZI1dWFgoVqxYIYYNGyb69+8v4uPjxa+//iprzMOHD4sRI0aIQYMGifXr1wshhEhOThY+Pj5i3759ssYWQoixY8fKHqMhTzzxhCgsLBSvvfaa+Pnnn8WWLVvEa6+9JmvMV155RXz33Xfm7by8PKHVamWNWe3MmTMiKSlJJCYmiry8PGEymWwS11FZteJs3749/P39UVxcDKVSiZiYGJuMqvfu3Rv5+fl1rmyRy/bt25Gamgo3NzesXLkSo0ePxp///GcIITB+/HhZYn711Vf4/PPPcfLkSYwZMwYrVqxAdHS0Tfp1hw4dij179qCiosJ8K9b+/fsjOTkZ999/v+zxe/XqhYSEBAwcOBDt27c375ezGwYAunTpgm7duqFPnz44ffo0Jk+ejKSkJFlijR49GgqFAr///jt27dqFnj17ok2bNigoKED37t1liVlbSkoKAgMD0atXLwA3WhNTpkzBF198IXtsR2XVxNmuXTtcunQJPXr0wIkTJzBs2DCbXIHw888/Q6PRwMPDA3fccYf50ks5pnEoFApzZ/2RI0cQGBho3i+X0NBQjB07FqmpqeYPki37oFxdXeHq6oq9e/fWGbiwReK8dOkSjhw5giNHjpj3ydkNU83NzQ2HDx9Gnz59kJmZCU9PTxgMBlliffrpp7Kc11I7d+5EVVUVpkyZgrVr1yI9PR2vv/66XV9TS2fVxDljxgyEh4cjPj4eAQEBSE9Px4ABA6wZokHr1q2TPUa1tm3bwmAwoKysDHl5eRg+fDiAGxPiaw9MWdOOHTuwdetWBAYGomvXrpgwYQKqqqpkidWYVatW4fjx4xg3bhyEEFi7di1yc3PrDBbJwdZJRafTQa1WIzo6Gps3b0ZERAQ2b96McePGyVbhd+3aFcCN+ZT79u3DtWvXANyY3ldUVIRXXnlFlrjVPvzwQ2i1WmzYsAGjRo3Czp07W+XFFNZk9Usuq6u9srIynDt3Dg899JDs1ZEt33AZGRlYvnw5KisrMXr0aCxatAj//Oc/sXr1avzlL3+Bn5+f1WNWq6qqQlZWFrZu3Yp9+/bBx8cHQUFBePzxx2WLWe2ZZ54xT0sCgN9//x3+/v7YuXOnrHFtPeCo0WjMlz5++OGHePHFF2WJ05DZs2ejvLwchYWFGDJkCI4dO4ZBgwbVGTCyptpzf69fv461a9di4sSJeOihhwBA1veyo7NKiWTvdf20Wm2Dbzg5jB07FoMHD8Zvv/1m7lO98847ERMTg6FDh8oSs1rbtm3x1FNP4amnnkJpaSm2b9+OVatW2SRxdurUCVevXoW7uzuAG3NKbTGqXj0Fa+vWrZg2bRr279+Pfv36yRavdh2Rnp5u08T53//+F7t370ZsbCz8/f0xb948WavN2t0fADBy5EgYDAbzfibOxlklcT722GPWOE2z2foNp1aroVarzdu2SFw369y5M2bOnImZM2fKGqf6S9FkMmHSpEkYPXo02rZti/3796Nnz56yxgZsP+BYu3Vk5cbYLXXp0gUKhQI9evTADz/8AD8/P1mXsqsuaFavXo3w8HDZ4rRGVkmc1au3XL16Fdu3b0dQUBB0Oh0+//xzzJ492xohmlT7DZefnw+NRiP72onOovpL8eYvR7nnrFaz14AjYNsBOODGnNWlS5di6tSpeOONN6DX62W7Iqy2rKwsvPrqq5z0LoFVRzPeeOMN8/Xid955J0wmE+bNmyf76kg3v+FKSkps8oZzBiNGjGhy5R65zZw506YDjmfOnDEvKKLT6cz/lnOmBnDj3kbh4eEoKChAr169EBoaigMHDmDVqlWyxKvN3d0dY8eORf/+/etcl89bZzTOqonz/Pnz+Nvf/gYA6NixI8LDw2W7FLA6HnCjU12n06FDhw7w9/fHoUOH8N5778kW15ncvHLPzeRKJDqdDkuXLsVPP/2EwYMHw2QyIS0tDefOnZN1vu6uXbtkO3djTp06hdmzZ2PZsmUYOXIkACAnJweZmZk2WetB7vU+WyOrjqpPmjQJy5cvN1edP/74I+bNm4ctW7ZYK0Qd1ROHgZr+KIVCAZ1Oh6qqKuTl5ckS19lkZWXhgQcewH333YevvvoKmzdvRr9+/TB37lzccccdssScNWsW+vfvjyFDhuBf//oXgNZbAU2fPh1z586tN7j4zTffYOPGjfj4449lfw2XLl1CeXk5hBDmWSnDhg2TPa7DsuZlSAcPHhQ+Pj5Co9EIjUYjRo0aJY4dO2bNEE26evWqiI6OFk888YQ4cOCAzeK2Zhs3bmz0ElM5L6edMGGC+d/Xr18X48ePly2Wvfn5+TV67Nlnn5U9/qpVq8TDDz8svLy8xOOPPy769u0rAgICZI/ryKzSVK/drHr88cfx/PPPw9XV1SYrsVc7dOgQoqKiMHz4cOzYscOmt5VozbZt22bzS0wB1Klk77jjDtkq25agsrISJpOp3nJ5JpPJJn31O3fuxL59+xAbG4uXX34Z58+fx0cffSR7XEdmlYUNFyxYgJ49e+LNN9+EEAKfffYZ+vbta5OkWVZWhoULFyIyMhJLlizBkiVLmDSt6OZLTKvvHW/rEdjWPOL76KOPNrh6/1//+lebXHmnUqnQsWNHPPjgg8jPz4e3tzcuXrwoe1xHZrWKc+PGjQCAYcOG2WzibO0qMz09vd7N0+j22eMSU6Du6DZQM8ItZB7dtofXXnsNs2fPRnp6Ojw9PSGEwKlTp9C5c2esX79e9vgdO3bEtm3b0L9/fyQlJUGlUsl2XX5rYZV3vr2aVTNnzoSLiwsOHDiAb7/91ry/NX647GX27Nnw8/NDZWUlAgICoFKp6lxiKhd7jG7bS8eOHZGcnIzDhw8jLy/PfMPDIUOG2CR+bGwsvvzyS/j5+SErKwsLFy7Eq6++apPYjsoqo+q1r+9taFsuxcXFTR6vXjyBbo9Op6tziem+ffvQvn172S8xJdsxGo0oKCiAi4sL7r///jr3sKf6rJI4BwwYUOcSxOoVZlj5EbV8R48exbx589C5c2cIIXDt2jWsWrUKnp6e9n5pLZZVEicrPyLHNXnyZMTFxZnnX+fm5mLx4sXYvHmznV9Zy2WVPk4mRiLHVvvW2p6enjZf79XRyDcsSkQt2rFjxwAAPXr0wMKFCxEQEAAXFxfz6D41zuoLGRORY5g2bVqjx2xxexJHxsRJRCQRm+pETi47OxuJiYm4fPlynf2sOBvHxEnk5N566y1otVrcc8899n4pDoOJk8jJqdVq3l9IIvZxEjm5jIwMZGZmwtvbu876A0ymjWPFSeTkUlJSAADHjx+vs5+Js3FMnEROrqSkxLzKPlnGKutxEpHjGjJkCLKyslBZWWnvl+Iw2MdJ5ORGjBiBixcv1rl/l0Kh4D27msCmOpGTSklJQWBgIA4cOIDTp0+jd+/e5mMxMTF2fGUtH5vqRE7qiy++MP87IiKizrGbB4qoLiZOIidVu5fu5h479uA1jYmTiOrdDK813xzPGpg4iZwUk2PzcVSdyEnVvuVN9e1ugBvN9JKSEuTm5trz5bVoTJxEToq3vGk+Jk4iIonYx0lEJBETJxGRREycREQSMXESEUn0/zN9+CtYjGEIAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(train.isnull(),yticklabels=False,cbar='BuPu')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:04.797107Z", + "iopub.status.busy": "2020-08-16T00:03:04.795587Z", + "iopub.status.idle": "2020-08-16T00:03:04.799528Z", + "shell.execute_reply": "2020-08-16T00:03:04.799041Z" + }, + "papermill": { + "duration": 0.027205, + "end_time": "2020-08-16T00:03:04.799633", + "exception": false, + "start_time": "2020-08-16T00:03:04.772428", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Cabin 687\n", + "Age 177\n", + "Embarked 2\n", + "Fare 0\n", + "Parch 0\n", + "SibSp 0\n", + "Sex 0\n", + "Name 0\n", + "Pclass 0\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.isnull().sum().sort_values(ascending=False)[0:20]\n", + "# we can see that cabin is midding a lot of values, and age is tooi!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011933, + "end_time": "2020-08-16T00:03:04.823720", + "exception": false, + "start_time": "2020-08-16T00:03:04.811787", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# **Data Cleaning**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011829, + "end_time": "2020-08-16T00:03:04.847591", + "exception": false, + "start_time": "2020-08-16T00:03:04.835762", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**This is an awesome function I created that preprocesses the data. It does thes following**\n", + "\n", + "* Fills in null values based on mean or mode\n", + "* Drops columns that are missing 50 percent of the data\n", + "\n", + "*You guys are free to copy this for loop for your own projects!*[](http://)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:04.892575Z", + "iopub.status.busy": "2020-08-16T00:03:04.891608Z", + "iopub.status.idle": "2020-08-16T00:03:04.907126Z", + "shell.execute_reply": "2020-08-16T00:03:04.906618Z" + }, + "papermill": { + "duration": 0.047502, + "end_time": "2020-08-16T00:03:04.907233", + "exception": false, + "start_time": "2020-08-16T00:03:04.859731", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "\n", + "#clean the train data\n", + "for i in list(train.columns):\n", + " dtype = train[i].dtype\n", + " values = 0\n", + " if(dtype == float or dtype == int):\n", + " method = 'mean'\n", + " else:\n", + " method = 'mode'\n", + " if(train[i].notnull().sum() / 891 <= .5):\n", + " train.drop(i, axis = 1, inplace=True)\n", + " elif method == 'mean':\n", + " train[i]=train[i].fillna(train[i].mean())\n", + "\n", + " else:\n", + " train[i]=train[i].fillna(train[i].mode()[0])\n", + "\n", + "# WE CAN DO THIS FOR THE TEST SET TOO!\n", + "\n", + "#clean the test data\n", + "for i in list(test.columns):\n", + " dtype = test[i].dtype\n", + " values = 0\n", + " if(dtype == float or dtype == int):\n", + " method = 'mean'\n", + " else:\n", + " method = 'mode'\n", + " if(test[i].notnull().sum() / 418 <= .5):\n", + " test.drop(i, axis = 1, inplace=True)\n", + " elif method == 'mean':\n", + " test[i]=test[i].fillna(test[i].mean())\n", + "\n", + " else:\n", + " test[i]=test[i].fillna(test[i].mode()[0])\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.011993, + "end_time": "2020-08-16T00:03:04.931751", + "exception": false, + "start_time": "2020-08-16T00:03:04.919758", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**As we can see, all of the missing values are gone!**" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "_kg_hide-input": true, + "execution": { + "iopub.execute_input": "2020-08-16T00:03:04.963771Z", + "iopub.status.busy": "2020-08-16T00:03:04.962852Z", + "iopub.status.idle": "2020-08-16T00:03:05.218436Z", + "shell.execute_reply": "2020-08-16T00:03:05.218946Z" + }, + "papermill": { + "duration": 0.275108, + "end_time": "2020-08-16T00:03:05.219105", + "exception": false, + "start_time": "2020-08-16T00:03:04.943997", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(train.isnull(),yticklabels=False,cbar='BuPu')\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013345, + "end_time": "2020-08-16T00:03:05.246558", + "exception": false, + "start_time": "2020-08-16T00:03:05.233213", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# **Feature Engineering**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014138, + "end_time": "2020-08-16T00:03:05.274477", + "exception": false, + "start_time": "2020-08-16T00:03:05.260339", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Title**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.012818, + "end_time": "2020-08-16T00:03:05.300776", + "exception": false, + "start_time": "2020-08-16T00:03:05.287958", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Lets take out the Mr, Miss, etc from the name section, and create a new column names title!" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:05.346398Z", + "iopub.status.busy": "2020-08-16T00:03:05.345561Z", + "iopub.status.idle": "2020-08-16T00:03:05.349914Z", + "shell.execute_reply": "2020-08-16T00:03:05.349327Z" + }, + "papermill": { + "duration": 0.035659, + "end_time": "2020-08-16T00:03:05.350024", + "exception": false, + "start_time": "2020-08-16T00:03:05.314365", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "#TITLE\n", + "\n", + "train_test_data = [train, test] # combining train and test dataset\n", + "\n", + "for dataset in train_test_data:\n", + " dataset['Title'] = dataset['Name'].str.extract(' ([A-Za-z]+)\\.', expand=False)\n", + "\n", + "\n", + "title_mapping = {\"Mr\": 0, \"Miss\": 1, \"Mrs\": 1, \n", + " \"Master\": 0, \"Dr\": 1, \"Rev\": 0, \"Col\": 0, \"Major\": 0, \"Mlle\": 1,\"Countess\": 1,\n", + " \"Ms\": 1, \"Lady\": 1, \"Jonkheer\": 1, \"Don\": 0, \"Dona\" : 1, \"Mme\": 0,\"Capt\": 0,\"Sir\": 0 }\n", + "for dataset in train_test_data:\n", + " dataset['Title'] = dataset['Title'].map(title_mapping)\n", + " \n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013586, + "end_time": "2020-08-16T00:03:05.377981", + "exception": false, + "start_time": "2020-08-16T00:03:05.364395", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Sex**\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013191, + "end_time": "2020-08-16T00:03:05.405476", + "exception": false, + "start_time": "2020-08-16T00:03:05.392285", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We can make male and female into catagorical variables such as 0 and 1" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:05.442498Z", + "iopub.status.busy": "2020-08-16T00:03:05.441620Z", + "iopub.status.idle": "2020-08-16T00:03:05.444837Z", + "shell.execute_reply": "2020-08-16T00:03:05.444291Z" + }, + "papermill": { + "duration": 0.025484, + "end_time": "2020-08-16T00:03:05.444947", + "exception": false, + "start_time": "2020-08-16T00:03:05.419463", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + " \n", + "sex_mapping = {\"male\": 0, \"female\":1}\n", + "for dataset in train_test_data:\n", + " dataset['Sex'] = dataset['Sex'].map(sex_mapping)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013681, + "end_time": "2020-08-16T00:03:05.472676", + "exception": false, + "start_time": "2020-08-16T00:03:05.458995", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Embarked**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014216, + "end_time": "2020-08-16T00:03:05.500391", + "exception": false, + "start_time": "2020-08-16T00:03:05.486175", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Lets test to see if there is any correlation with Pclass and Embarked " + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:05.537924Z", + "iopub.status.busy": "2020-08-16T00:03:05.537005Z", + "iopub.status.idle": "2020-08-16T00:03:05.764053Z", + "shell.execute_reply": "2020-08-16T00:03:05.763057Z" + }, + "papermill": { + "duration": 0.249537, + "end_time": "2020-08-16T00:03:05.764169", + "exception": false, + "start_time": "2020-08-16T00:03:05.514632", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "Pclass1 = train_data[train_data['Pclass'] == 1]['Embarked'].value_counts()\n", + "Pclass2 = train_data[train_data['Pclass'] == 2]['Embarked'].value_counts()\n", + "Pclass3 = train_data[train_data['Pclass'] == 3]['Embarked'].value_counts()\n", + "\n", + "df = pd.DataFrame([Pclass1, Pclass2, Pclass3])\n", + "df.index = ['1st class','2nd class', '3rd class']\n", + "df.plot(kind='bar',stacked=True, figsize=(10,5))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014366, + "end_time": "2020-08-16T00:03:05.793175", + "exception": false, + "start_time": "2020-08-16T00:03:05.778809", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We can assign each embarked value to a numberical value for training later!" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:05.830003Z", + "iopub.status.busy": "2020-08-16T00:03:05.829030Z", + "iopub.status.idle": "2020-08-16T00:03:05.835622Z", + "shell.execute_reply": "2020-08-16T00:03:05.835037Z" + }, + "papermill": { + "duration": 0.027899, + "end_time": "2020-08-16T00:03:05.835744", + "exception": false, + "start_time": "2020-08-16T00:03:05.807845", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "for data in train_test_data:\n", + " data['Embarked'] = data['Embarked'].fillna(\"S\")\n", + " \n", + "embarked_mapping = {\"S\": 0, \"C\": 1, \"Q\": 2}\n", + "for dataset in train_test_data:\n", + " dataset['Embarked'] = dataset['Embarked'].map(embarked_mapping)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.013917, + "end_time": "2020-08-16T00:03:05.863782", + "exception": false, + "start_time": "2020-08-16T00:03:05.849865", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Family Size**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014221, + "end_time": "2020-08-16T00:03:05.892815", + "exception": false, + "start_time": "2020-08-16T00:03:05.878594", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "Parch ( Parent & child ) and Sibsp( Sibling & Spouse ) are both contributing factors to family size, so lets make a new column called family size, and drop the other ones." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:05.931503Z", + "iopub.status.busy": "2020-08-16T00:03:05.930571Z", + "iopub.status.idle": "2020-08-16T00:03:05.933769Z", + "shell.execute_reply": "2020-08-16T00:03:05.933193Z" + }, + "papermill": { + "duration": 0.025993, + "end_time": "2020-08-16T00:03:05.933872", + "exception": false, + "start_time": "2020-08-16T00:03:05.907879", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "train[\"FamilySize\"] = train['SibSp'] + train['Parch'] + 1\n", + "test[\"FamilySize\"] = test['SibSp'] + test['Parch'] + 1" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014325, + "end_time": "2020-08-16T00:03:05.962742", + "exception": false, + "start_time": "2020-08-16T00:03:05.948417", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# **Correlations**" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:05.997135Z", + "iopub.status.busy": "2020-08-16T00:03:05.996451Z", + "iopub.status.idle": "2020-08-16T00:03:06.312682Z", + "shell.execute_reply": "2020-08-16T00:03:06.312109Z" + }, + "papermill": { + "duration": 0.334816, + "end_time": "2020-08-16T00:03:06.312796", + "exception": false, + "start_time": "2020-08-16T00:03:05.977980", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.heatmap(train.corr(),cbar='plasma')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:06.354102Z", + "iopub.status.busy": "2020-08-16T00:03:06.352041Z", + "iopub.status.idle": "2020-08-16T00:03:06.354808Z", + "shell.execute_reply": "2020-08-16T00:03:06.355312Z" + }, + "papermill": { + "duration": 0.027178, + "end_time": "2020-08-16T00:03:06.355462", + "exception": false, + "start_time": "2020-08-16T00:03:06.328284", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "train.drop(['Name'], axis = 1, inplace=True)\n", + "test.drop(['Name'], axis = 1, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:06.398320Z", + "iopub.status.busy": "2020-08-16T00:03:06.397342Z", + "iopub.status.idle": "2020-08-16T00:03:06.401654Z", + "shell.execute_reply": "2020-08-16T00:03:06.401144Z" + }, + "papermill": { + "duration": 0.031322, + "end_time": "2020-08-16T00:03:06.401771", + "exception": false, + "start_time": "2020-08-16T00:03:06.370449", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeSibSpParchFareEmbarkedTitleFamilySize
03022.0107.2500002
11138.01071.2833112
23126.0007.9250011
31135.01053.1000012
43035.0008.0500001
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age SibSp Parch Fare Embarked Title FamilySize\n", + "0 3 0 22.0 1 0 7.2500 0 0 2\n", + "1 1 1 38.0 1 0 71.2833 1 1 2\n", + "2 3 1 26.0 0 0 7.9250 0 1 1\n", + "3 1 1 35.0 1 0 53.1000 0 1 2\n", + "4 3 0 35.0 0 0 8.0500 0 0 1" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:06.444039Z", + "iopub.status.busy": "2020-08-16T00:03:06.443083Z", + "iopub.status.idle": "2020-08-16T00:03:06.447367Z", + "shell.execute_reply": "2020-08-16T00:03:06.446894Z" + }, + "papermill": { + "duration": 0.030995, + "end_time": "2020-08-16T00:03:06.447465", + "exception": false, + "start_time": "2020-08-16T00:03:06.416470", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSexAgeSibSpParchFareEmbarkedTitleFamilySize
03034.5007.8292201
13147.0107.0000012
22062.0009.6875201
33027.0008.6625001
43122.01112.2875013
\n", + "
" + ], + "text/plain": [ + " Pclass Sex Age SibSp Parch Fare Embarked Title FamilySize\n", + "0 3 0 34.5 0 0 7.8292 2 0 1\n", + "1 3 1 47.0 1 0 7.0000 0 1 2\n", + "2 2 0 62.0 0 0 9.6875 2 0 1\n", + "3 3 0 27.0 0 0 8.6625 0 0 1\n", + "4 3 1 22.0 1 1 12.2875 0 1 3" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.01354, + "end_time": "2020-08-16T00:03:06.476026", + "exception": false, + "start_time": "2020-08-16T00:03:06.462486", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# **PreProcessing**" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014099, + "end_time": "2020-08-16T00:03:06.504426", + "exception": false, + "start_time": "2020-08-16T00:03:06.490327", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Now that our data looks good, lets get ready to build our models!**" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:06.538376Z", + "iopub.status.busy": "2020-08-16T00:03:06.537612Z", + "iopub.status.idle": "2020-08-16T00:03:12.103848Z", + "shell.execute_reply": "2020-08-16T00:03:12.102607Z" + }, + "papermill": { + "duration": 5.585812, + "end_time": "2020-08-16T00:03:12.103987", + "exception": false, + "start_time": "2020-08-16T00:03:06.518175", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "#imports\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import GridSearchCV" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.01503, + "end_time": "2020-08-16T00:03:12.134200", + "exception": false, + "start_time": "2020-08-16T00:03:12.119170", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "We should scale the values in the data, so that the neural network can train better!" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:12.174580Z", + "iopub.status.busy": "2020-08-16T00:03:12.173590Z", + "iopub.status.idle": "2020-08-16T00:03:12.188156Z", + "shell.execute_reply": "2020-08-16T00:03:12.187674Z" + }, + "papermill": { + "duration": 0.038245, + "end_time": "2020-08-16T00:03:12.188266", + "exception": false, + "start_time": "2020-08-16T00:03:12.150021", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "continuous = ['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'FamilySize']\n", + "\n", + "scaler = StandardScaler()\n", + "\n", + "for var in continuous:\n", + " train[var] = train[var].astype('float64')\n", + " train[var] = scaler.fit_transform(train[var].values.reshape(-1, 1))\n", + "for var in continuous:\n", + " test[var] = test[var].astype('float64')\n", + " test[var] = scaler.fit_transform(test[var].values.reshape(-1, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:12.223938Z", + "iopub.status.busy": "2020-08-16T00:03:12.223053Z", + "iopub.status.idle": "2020-08-16T00:03:12.259925Z", + "shell.execute_reply": "2020-08-16T00:03:12.260500Z" + }, + "papermill": { + "duration": 0.057722, + "end_time": "2020-08-16T00:03:12.260637", + "exception": false, + "start_time": "2020-08-16T00:03:12.202915", + "status": "completed" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
Pclass891.0-8.772133e-171.000562-1.566107-0.3693650.8273770.8273770.827377
Sex891.03.524130e-010.4779900.0000000.0000000.0000001.0000001.000000
Age891.02.232906e-161.000562-2.253155-0.5924810.0000000.4079263.870872
SibSp891.04.386066e-171.000562-0.474545-0.474545-0.4745450.4327936.784163
Parch891.05.382900e-171.000562-0.473674-0.473674-0.473674-0.4736746.974147
Fare891.03.987333e-181.000562-0.648422-0.489148-0.357391-0.0242469.667167
Embarked891.03.613917e-010.6356730.0000000.0000000.0000001.0000002.000000
Title891.03.591470e-010.4800200.0000000.0000000.0000001.0000001.000000
FamilySize891.0-2.392400e-171.000562-0.560975-0.560975-0.5609750.0591605.640372
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% \\\n", + "Pclass 891.0 -8.772133e-17 1.000562 -1.566107 -0.369365 0.827377 \n", + "Sex 891.0 3.524130e-01 0.477990 0.000000 0.000000 0.000000 \n", + "Age 891.0 2.232906e-16 1.000562 -2.253155 -0.592481 0.000000 \n", + "SibSp 891.0 4.386066e-17 1.000562 -0.474545 -0.474545 -0.474545 \n", + "Parch 891.0 5.382900e-17 1.000562 -0.473674 -0.473674 -0.473674 \n", + "Fare 891.0 3.987333e-18 1.000562 -0.648422 -0.489148 -0.357391 \n", + "Embarked 891.0 3.613917e-01 0.635673 0.000000 0.000000 0.000000 \n", + "Title 891.0 3.591470e-01 0.480020 0.000000 0.000000 0.000000 \n", + "FamilySize 891.0 -2.392400e-17 1.000562 -0.560975 -0.560975 -0.560975 \n", + "\n", + " 75% max \n", + "Pclass 0.827377 0.827377 \n", + "Sex 1.000000 1.000000 \n", + "Age 0.407926 3.870872 \n", + "SibSp 0.432793 6.784163 \n", + "Parch -0.473674 6.974147 \n", + "Fare -0.024246 9.667167 \n", + "Embarked 1.000000 2.000000 \n", + "Title 1.000000 1.000000 \n", + "FamilySize 0.059160 5.640372 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train.describe(include='all').T\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.014497, + "end_time": "2020-08-16T00:03:12.290830", + "exception": false, + "start_time": "2020-08-16T00:03:12.276333", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# **Logistic Regression**\n", + "\n", + "* `train` - stores training set data\n", + "* `y_train` - stores training set results\n", + "* `test` - stores test set data on which predictions have to be made\n", + "\n", + "First, fit the model using `train` and `y_train` and then make prediction on `test`\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:12.359244Z", + "iopub.status.busy": "2020-08-16T00:03:12.358425Z", + "iopub.status.idle": "2020-08-16T00:03:15.147061Z", + "shell.execute_reply": "2020-08-16T00:03:15.146465Z" + }, + "papermill": { + "duration": 2.812936, + "end_time": "2020-08-16T00:03:15.147176", + "exception": false, + "start_time": "2020-08-16T00:03:12.334240", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "#Write code for logistic regression here\n", + "clf = LogisticRegression(random_state=0)\n", + "model = clf.fit(train,y_train)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "papermill": { + "duration": 0.026701, + "end_time": "2020-08-16T00:03:18.692199", + "exception": false, + "start_time": "2020-08-16T00:03:18.665498", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "**Create Predictions**\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "execution": { + "iopub.execute_input": "2020-08-16T00:03:18.751021Z", + "iopub.status.busy": "2020-08-16T00:03:18.750047Z", + "iopub.status.idle": "2020-08-16T00:03:19.062102Z", + "shell.execute_reply": "2020-08-16T00:03:19.061231Z" + }, + "papermill": { + "duration": 0.350469, + "end_time": "2020-08-16T00:03:19.062253", + "exception": false, + "start_time": "2020-08-16T00:03:18.711784", + "status": "completed" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "#Write code to generate predictions file\n", + "y_final = model.predict(test)\n", + "output = pd.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': y_final})" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "output.to_csv('prediction-ann-logistic-regression.csv', index=False)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.6" + }, + "papermill": { + "duration": 22.836091, + "end_time": "2020-08-16T00:03:21.705944", + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2020-08-16T00:02:58.869853", + "version": "2.1.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/prediction-ann-logistic-regression.csv b/prediction-ann-logistic-regression.csv new file mode 100644 index 0000000..154257a --- /dev/null +++ b/prediction-ann-logistic-regression.csv @@ -0,0 +1,419 @@ +PassengerId,Survived +892,0 +893,0 +894,0 +895,0 +896,1 +897,0 +898,1 +899,0 +900,1 +901,0 +902,0 +903,0 +904,1 +905,0 +906,1 +907,1 +908,0 +909,0 +910,1 +911,0 +912,0 +913,0 +914,1 +915,1 +916,1 +917,0 +918,1 +919,0 +920,0 +921,0 +922,0 +923,0 +924,0 +925,0 +926,0 +927,0 +928,1 +929,1 +930,0 +931,0 +932,0 +933,0 +934,0 +935,1 +936,1 +937,0 +938,0 +939,0 +940,1 +941,0 +942,0 +943,0 +944,1 +945,1 +946,0 +947,0 +948,0 +949,0 +950,0 +951,1 +952,0 +953,0 +954,0 +955,1 +956,1 +957,1 +958,1 +959,0 +960,1 +961,1 +962,1 +963,0 +964,1 +965,1 +966,1 +967,1 +968,0 +969,1 +970,0 +971,1 +972,0 +973,0 +974,0 +975,0 +976,0 +977,0 +978,1 +979,1 +980,1 +981,0 +982,1 +983,0 +984,1 +985,0 +986,1 +987,0 +988,1 +989,0 +990,1 +991,0 +992,1 +993,0 +994,0 +995,0 +996,1 +997,0 +998,0 +999,0 +1000,0 +1001,0 +1002,0 +1003,1 +1004,1 +1005,1 +1006,1 +1007,0 +1008,0 +1009,1 +1010,1 +1011,1 +1012,1 +1013,0 +1014,1 +1015,0 +1016,0 +1017,1 +1018,0 +1019,1 +1020,0 +1021,0 +1022,0 +1023,0 +1024,1 +1025,0 +1026,0 +1027,0 +1028,0 +1029,0 +1030,1 +1031,0 +1032,0 +1033,1 +1034,0 +1035,0 +1036,0 +1037,0 +1038,0 +1039,0 +1040,0 +1041,0 +1042,1 +1043,0 +1044,0 +1045,0 +1046,0 +1047,0 +1048,1 +1049,1 +1050,0 +1051,1 +1052,1 +1053,0 +1054,1 +1055,0 +1056,0 +1057,1 +1058,0 +1059,0 +1060,1 +1061,1 +1062,0 +1063,0 +1064,0 +1065,0 +1066,0 +1067,1 +1068,1 +1069,0 +1070,1 +1071,1 +1072,0 +1073,0 +1074,1 +1075,0 +1076,1 +1077,0 +1078,1 +1079,0 +1080,0 +1081,0 +1082,0 +1083,0 +1084,0 +1085,0 +1086,0 +1087,0 +1088,1 +1089,1 +1090,0 +1091,1 +1092,1 +1093,0 +1094,0 +1095,1 +1096,0 +1097,1 +1098,1 +1099,0 +1100,1 +1101,0 +1102,0 +1103,0 +1104,0 +1105,0 +1106,0 +1107,0 +1108,1 +1109,0 +1110,1 +1111,0 +1112,1 +1113,0 +1114,1 +1115,0 +1116,1 +1117,1 +1118,0 +1119,1 +1120,0 +1121,0 +1122,0 +1123,1 +1124,0 +1125,0 +1126,0 +1127,0 +1128,0 +1129,0 +1130,1 +1131,1 +1132,1 +1133,1 +1134,0 +1135,0 +1136,0 +1137,0 +1138,1 +1139,0 +1140,1 +1141,1 +1142,1 +1143,0 +1144,1 +1145,0 +1146,0 +1147,0 +1148,0 +1149,0 +1150,1 +1151,0 +1152,0 +1153,0 +1154,1 +1155,1 +1156,0 +1157,0 +1158,0 +1159,0 +1160,1 +1161,0 +1162,0 +1163,0 +1164,1 +1165,1 +1166,0 +1167,1 +1168,0 +1169,0 +1170,0 +1171,0 +1172,1 +1173,0 +1174,1 +1175,1 +1176,1 +1177,0 +1178,0 +1179,0 +1180,0 +1181,0 +1182,0 +1183,1 +1184,0 +1185,0 +1186,0 +1187,0 +1188,1 +1189,0 +1190,0 +1191,0 +1192,0 +1193,0 +1194,0 +1195,0 +1196,1 +1197,1 +1198,0 +1199,0 +1200,0 +1201,0 +1202,0 +1203,0 +1204,0 +1205,1 +1206,1 +1207,1 +1208,0 +1209,0 +1210,0 +1211,0 +1212,0 +1213,0 +1214,0 +1215,0 +1216,1 +1217,0 +1218,1 +1219,0 +1220,0 +1221,0 +1222,1 +1223,0 +1224,0 +1225,1 +1226,0 +1227,0 +1228,0 +1229,0 +1230,0 +1231,0 +1232,0 +1233,0 +1234,0 +1235,1 +1236,0 +1237,1 +1238,0 +1239,1 +1240,0 +1241,1 +1242,1 +1243,0 +1244,0 +1245,0 +1246,1 +1247,0 +1248,1 +1249,0 +1250,0 +1251,0 +1252,0 +1253,1 +1254,1 +1255,0 +1256,1 +1257,0 +1258,0 +1259,1 +1260,1 +1261,0 +1262,0 +1263,1 +1264,0 +1265,0 +1266,1 +1267,1 +1268,0 +1269,0 +1270,0 +1271,0 +1272,0 +1273,0 +1274,1 +1275,1 +1276,0 +1277,1 +1278,0 +1279,0 +1280,0 +1281,0 +1282,1 +1283,1 +1284,0 +1285,0 +1286,0 +1287,1 +1288,0 +1289,1 +1290,0 +1291,0 +1292,1 +1293,0 +1294,1 +1295,1 +1296,0 +1297,0 +1298,0 +1299,0 +1300,1 +1301,1 +1302,1 +1303,1 +1304,1 +1305,0 +1306,1 +1307,0 +1308,0 +1309,0