-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
143 lines (81 loc) · 3.5 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# -*- coding: utf-8 -*-
"""python sklearn.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1VQQuE7zemTSAUntTrIrPMGhThj4cBH8N
"""
import sklearn
"""# sklearn documentation database"""
from sklearn import datasets
diabetes = datasets.load_diabetes()
diabetes
import pandas as pd
diabetes = pd.read_csv('https://gist.githubusercontent.com/davidneves11/944edb5ecb7bf6d1770eae91cb20d049/raw/50d3d054185815b0c49561f94badedc06ef3c313/diabetes.csv')
diabetes.head()
x = diabetes['idade']
y = diabetes['resultado']
from sklearn.model_selection import train_test_split
x_treino, x_teste, y_treino, y_teste = train_test_split(x, y) # modelo está como default por enquanto, então fica 25% para treino e 75% para teste
x = diabetes.drop('resultado', axis=1)
x
y = diabetes['resultado']
y
from sklearn.model_selection import train_test_split
SEED = 4121988
x_treino, x_teste, y_treino, y_teste = train_test_split(x, y)
from sklearn.tree import DecisionTreeClassifier
clf_arvore = DecisionTreeClassifier(random_state=SEED, max_depth=3)
clf_arvore.fit(x_treino, y_treino)
clf_arvore.score(x_teste, y_teste)
from sklearn.dummy import DummyClassifier
clf_dummy = DummyClassifier(strategy='most_frequent')
clf_dummy.fit(x_treino, y_treino)
clf_dummy.score(x_teste, y_teste)
from sklearn import tree
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(15, 10))
tree.plot_tree(clf_arvore,
ax=ax,
fontsize=10,
rounded=True,
filled=True,
feature_names=x_treino.columns,
class_names=['Não Ativado', 'Ativado'],)
plt.show
import pandas as pd
batimentos = pd.read_csv('https://gist.githubusercontent.com/davidneves11/d72e7f49ab01c856acc5d07be4b1a9dd/raw/37631e3a40da92e6261c00fffdf0fb9b869b35dd/batimentos%2520cardiacos.csv')
batimentos.head()
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
plt.figure(figsize=(100, 50))
sns.lmplot(x='Peso', y='Batimentos cardiacos', data=batimentos,line_kws={'color':'red'} )
plt.show()
x = batimentos[['Peso', 'Idade']]
y = batimentos['Batimentos cardiacos']
from sklearn.model_selection import train_test_split
SEED = 4500
x_treino, x_teste, y_treino, y_teste = train_test_split(x, y, random_state=SEED)
from sklearn import linear_model
scaler = StandardScaler()
x_treino_scaled = scaler.fit_transform(x_treino)
x_teste_scaled = scaler.transform(x_teste)
rgs = linear_model.LinearRegression(fit_intercept=False)
rgs.fit(x_treino_scaled, y_treino)
y_pred = rgs.predict(x_teste_scaled)
print("Coeficientes do modelo:", rgs.coef_)
import pandas as pd
colesterol = pd.read_csv('https://gist.githubusercontent.com/davidneves11/01b2963f7a8dfd87d79010fbf847b221/raw/685870f4365bcda4e5bb9e342285e0aac37dd556/colesterol.csv')
colesterol.head()
import seaborn as sns
sns.scatterplot(x = 'pressao_sanguinea_repouso', y = 'colesterol', data = colesterol)
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=2, random_state=9)
x = colesterol.select_dtypes('int64')
colesterol.info()
kmeans.fit(x)
kmeans.labels_ # separa em dois grupos (1 e 0)
sns.scatterplot(x = 'pressao_sanguinea_repouso', y = 'colesterol', data = colesterol, hue = kmeans.labels_) # visualização desses "dois" grupos, 1 e 0
colesterol['clusters'] = kmeans.labels_ # identifica a média desses clusters de colesterol
colesterol.head()
colesterol.groupby('clusters')['colesterol'].mean() # Agrupamento da coluna de clusters baseado na média de colesterol