Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
AnandInguva authored Apr 2, 2019
1 parent df149fe commit 577d9af
Show file tree
Hide file tree
Showing 4 changed files with 1,821 additions and 0 deletions.
339 changes: 339 additions & 0 deletions code.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,339 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"from matplotlib import pyplot as plt\n",
"from matplotlib import style\n",
"import sklearn\n",
"from sklearn.linear_model import LogisticRegression"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"\n",
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"from sklearn.linear_model import Perceptron\n",
"\n",
"from sklearn.tree import DecisionTreeClassifier\n",
"\n",
"from sklearn.svm import LinearSVC\n",
"from sklearn.naive_bayes import GaussianNB"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"train_df = pd.read_csv('train.csv')\n",
"test_df = pd.read_csv('test.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"train_df = train_df.drop(labels = ['PassengerId', 'Cabin'], axis = 1)\n",
"test_df = test_df.drop(labels = ['PassengerId', 'Cabin'], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"data = [train_df, test_df]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#Removing NaN from the age Column\n",
"for dataset in data:\n",
" mean = train_df['Age'].mean()\n",
" std = train_df['Age'].std()\n",
" missing_points = dataset['Age'].isnull().sum()\n",
" random_age = np.random.randint(mean - std, mean + std, size = missing_points)\n",
" age_slice = dataset['Age'].copy()\n",
" age_slice[np.isnan(age_slice)] = random_age\n",
" dataset['Age'] = age_slice\n",
" dataset['Age'] = train_df['Age'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test_df['Age'].isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"##Removing NaN from the embark column\n",
"\n",
"common_value = 'S'\n",
"data = [train_df, test_df]\n",
"for dataset in data:\n",
" dataset['Embarked'] = dataset['Embarked'].fillna(common_value)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"data = [train_df, test_df]\n",
"titles = {\"Mr\": 1, \"Miss\": 2, \"Mrs\": 3, \"Master\": 4, \"Rare\": 5}\n",
"\n",
"for dataset in data:\n",
" dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False)\n",
" dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr',\\\n",
" 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')\n",
" dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')\n",
" dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')\n",
" dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')\n",
" dataset['Title'] = dataset['Title'].map(titles)\n",
" dataset['Title'] = dataset['Title'].fillna(0)\n",
"\n",
"train_df = train_df.drop(['Name'], axis=1)\n",
"test_df = test_df.drop(['Name'], axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"data = [train_df, test_df]\n",
"for dataset in data:\n",
" dataset['Fare'] = dataset['Fare'].fillna(0)\n",
" dataset['Fare'] = dataset[\"Fare\"].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"genders = {'male':0, 'female': 1}\n",
"data = [train_df, test_df]\n",
"for dataset in data:\n",
" dataset['Sex'] = dataset['Sex'].map(genders)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"train_df = train_df.drop(['Ticket'], axis = 1)\n",
"test_df = test_df.drop(['Ticket'], axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"ports = {'S': 0, 'C' : 1, 'Q': 2}\n",
"data = [train_df, test_df]\n",
"for dataset in data:\n",
" dataset['Embarked'] = dataset['Embarked'].map(ports)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"data = [train_df, test_df]\n",
"\n",
"for dataset in data:\n",
" dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0\n",
" dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1\n",
" dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare'] = 2\n",
" dataset.loc[(dataset['Fare'] > 31) & (dataset['Fare'] <= 99), 'Fare'] = 3\n",
" dataset.loc[(dataset['Fare'] > 99) & (dataset['Fare'] <= 250), 'Fare'] = 4\n",
" dataset.loc[ dataset['Fare'] > 250, 'Fare'] = 5\n",
" dataset['Fare'] = dataset['Fare'].astype(int)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"data = [train_df, test_df]\n",
"for dataset in data:\n",
" dataset['Age'] = dataset['Age'].astype(int)\n",
" dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0\n",
" dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1\n",
" dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2\n",
" dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3\n",
" dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4\n",
" dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5\n",
" dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6\n",
" dataset.loc[ dataset['Age'] > 66, 'Age'] = 6\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"data = [train_df, test_df]\n",
"for dataset in data:\n",
" dataset['relatives'] = dataset['SibSp'] + dataset['Parch']\n",
" dataset.loc[dataset['relatives'] > 0, 'not_alone'] = 0\n",
" dataset.loc[dataset['relatives'] == 0, 'not_alone'] = 1\n",
" dataset['not_alone'] = dataset['not_alone'].astype(int)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"x_train = train_df.drop(['Survived'], axis = 1)\n",
"y_train = train_df['Survived']"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"x_test = test_df"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"l = LogisticRegression()\n",
"l.fit(x_train, y_train)\n",
"y_pred = l.predict(x_test)\n",
"log_accuracy = round(l.score(x_train,y_train)*100, 2)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"decision_tree = DecisionTreeClassifier()\n",
"decision_tree.fit(x_train, y_train)\n",
"y_pred = decision_tree.predict(x_test)\n",
"acc_decision_tree = round(decision_tree.score(x_train, y_train) * 100, 2)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"svm = LinearSVC()\n",
"svm.fit(x_train, y_train)\n",
"y_pred = svm.predict(x_test)\n",
"svm_accuracy = round(svm.score(x_train,y_train)*100)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"perceptron = Perceptron(max_iter=5)\n",
"perceptron.fit(x_train, y_train)\n",
"y_pred = perceptron.predict(x_test)\n",
"acc_perceptron = round(perceptron.score(x_train, y_train) * 100, 2)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"gaussian = GaussianNB() \n",
"gaussian.fit(x_train, y_train) \n",
"y_pred = gaussian.predict(x_test) \n",
"acc_gaussian = round(gaussian.score(x_train, y_train) * 100, 2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 577d9af

Please sign in to comment.