-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathday414_00.py
executable file
·50 lines (45 loc) · 1.41 KB
/
day414_00.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 14 13:54:55 2019
@author: amanv
"""
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
data = pd.read_csv(r"C:\Users\amanv\Documents\ai&mlclass\Datasets\Churn_Modelling.csv")
data2 = data.drop(['RowNumber','CustomerId','Surname'],axis=1)
corr = data2.corr(method = 'pearson')
sns.heatmap(corr,cmap='coolwarm',annot=True)
plt.show()
ip = data2.drop(['Exited'],axis=1)
op = data2['Exited']
ip.head()
from sklearn.preprocessing import LabelEncoder
le1 = LabelEncoder()
ip.Gender = le1.fit_transform(ip.Gender)
le2 = LabelEncoder()
ip.Geography = le2.fit_transform(ip.Geography)
ip.tail()
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder(categorical_features=[1,2])
ip = ohe.fit_transform(ip).toarray()
#from gender to 2 columns and from Geography to 3 columns so 3 extra columns so we are using toarray()
from sklearn.model_selection import train_test_split
xtr,xts,ytr,yts = train_test_split(ip,op,test_size=0.2)
plt.boxplot(xtr)
plt.show()
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
sc.fit(xtr)
xtr = sc.transform(xtr)
xts = sc.transform(xts)
plt.boxplot(xtr)
plt.show()
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(xtr,ytr)
from sklearn.metrics import confusion_matrix
y_pred = model.predict(xts)
print(confusion_matrix(yts,y_pred))
model.score(xts,yts)