-
Notifications
You must be signed in to change notification settings - Fork 2
/
code.py
160 lines (150 loc) · 6.79 KB
/
code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import numpy as np
import pandas as pd
import seaborn as sbn
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
import plotly.express as px
from warnings import filterwarnings
filterwarnings("ignore")
stress = pd.read_csv("/var/SaYoPillow.csv")
df = stress.copy()
df.dropna()
df.columns=['snoring_rate', 'respiration_rate', 'body_temperature', 'limb_movement', 'blood_oxygen', \
'eye_movement', 'sleeping_hours', 'heart_rate', 'stress_level']
df.head()
snoring_rate respiration_rate body_temperature limb_movement blood_oxygen eye_movement sleeping_hours heart_rate stress_level
0 93.80 25.680 91.840 16.600 89.840 99.60 1.840 74.20 3
1 91.64 25.104 91.552 15.880 89.552 98.88 1.552 72.76 3
2 60.00 20.000 96.000 10.000 95.000 85.00 7.000 60.00 1
3 85.76 23.536 90.768 13.920 88.768 96.92 0.768 68.84 3
4 48.12 17.248 97.872 6.496 96.248 72.48 8.248 53.12 0
df.describe().T
count mean std min 25% 50% 75% max
snoring_rate 630.0 71.6 19.372833 45.0 52.50 70.0 91.25 100.0
respiration_rate 630.0 21.8 3.966111 16.0 18.50 21.0 25.00 30.0
body_temperature 630.0 92.8 3.529690 85.0 90.50 93.0 95.50 99.0
limb_movement 630.0 11.7 4.299629 4.0 8.50 11.0 15.75 19.0
blood_oxygen 630.0 90.9 3.902483 82.0 88.50 91.0 94.25 97.0
eye_movement 630.0 88.5 11.893747 60.0 81.25 90.0 98.75 105.0
sleeping_hours 630.0 3.7 3.054572 0.0 0.50 3.5 6.50 9.0
heart_rate 630.0 64.5 9.915277 50.0 56.25 62.5 72.50 85.0
stress_level 630.0 2.0 1.415337 0.0 1.00 2.0 3.00 4.0
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 630 entries, 0 to 629
Data columns (total 9 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 snoring_rate 630 non-null float64
1 respiration_rate 630 non-null float64
2 body_temperature 630 non-null float64
3 limb_movement 630 non-null float64
4 blood_oxygen 630 non-null float64
5 eye_movement 630 non-null float64
6 sleeping_hours 630 non-null float64
7 heart_rate 630 non-null float64
8 stress_level 630 non-null int64
dtypes: float64(8), int64(1)
memory usage: 44.4 KB
df.isnull().values.any()
False
df["stress_level"].value_counts()
3 126
1 126
0 126
2 126
4 126
Name: stress_level, dtype: int64
sbn.countplot(data = df, x = "stress_level")
df.groupby('stress_level').mean()
snoring_rate respiration_rate body_temperature limb_movement blood_oxygen eye_movement sleeping_hours heart_rate
stress_level
0 47.5 17.0 97.5 6.0 96.0 70.0 8.0 52.5
1 55.0 19.0 95.0 9.0 93.5 82.5 6.0 57.5
2 70.0 21.0 93.0 11.0 91.0 90.0 3.5 62.5
3 87.5 24.0 91.0 14.5 89.0 97.5 1.0 70.0
4 98.0 28.0 87.5 18.0 85.0 102.5 0.0 80.0
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')
snoring_rate respiration_rate body_temperature limb_movement blood_oxygen eye_movement sleeping_hours heart_rate stress_level
snoring_rate 1.000000 0.976268 -0.902475 0.981078 -0.903140 0.950600 -0.920554 0.976268 0.975322
respiration_rate 0.976268 1.000000 -0.889237 0.991738 -0.889210 0.935572 -0.891855 1.000000 0.963516
body_temperature -0.902475 -0.889237 1.000000 -0.896412 0.998108 -0.857299 0.954860 -0.889237 -0.962354
limb_movement 0.981078 0.991738 -0.896412 1.000000 -0.898527 0.964703 -0.901102 0.991738 0.971071
blood_oxygen -0.903140 -0.889210 0.998108 -0.898527 1.000000 -0.862136 0.950189 -0.889210 -0.961092
eye_movement 0.950600 0.935572 -0.857299 0.964703 -0.862136 1.000000 -0.893952 0.935572 0.951988
sleeping_hours -0.920554 -0.891855 0.954860 -0.901102 0.950189 -0.893952 1.000000 -0.891855 -0.973036
heart_rate 0.976268 1.000000 -0.889237 0.991738 -0.889210 0.935572 -0.891855 1.000000 0.963516
stress_level 0.975322 0.963516 -0.962354 0.971071 -0.961092 0.951988 -0.973036 0.963516 1.000000
def comparisonplot(data, column1, column2, target):
figure = px.scatter(data_frame = data, x=column1,
y=column2, color= target)
figure.show()
cols = []
for col1 in df.columns:
cols.append(col1)
for col2 in df.columns:
if col2 not in cols:
comparisonplot(df,col1, col2, 'stress_level')
y = df["stress_level"]
X = df.drop(["stress_level"], axis= 1)
y[0:5]
0 3
1 3
2 1
3 3
4 0
Name: stress_level, dtype: int64
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.25, random_state= 42)
knn = KNeighborsClassifier()
knn_model = knn.fit(X_train, y_train)
knn_model
KNeighborsClassifier()
y_pred = knn_model.predict(X_test)
accuracy_score(y_test, y_pred)
1.0
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 1.00 1.00 1.00 34
1 1.00 1.00 1.00 31
2 1.00 1.00 1.00 33
3 1.00 1.00 1.00 31
4 1.00 1.00 1.00 29
accuracy 1.00 158
macro avg 1.00 1.00 1.00 158
weighted avg 1.00 1.00 1.00 158
mat = confusion_matrix(y_test, y_pred)
p = sbn.heatmap(pd.DataFrame(mat), annot=True, cmap="YlGnBu" ,fmt='g')
df = stress.copy()
df = df.dropna()
df.columns=['snoring_rate', 'respiration_rate', 'body_temperature', 'limb_movement', 'blood_oxygen', \
'eye_movement', 'sleeping_hours', 'heart_rate', 'stress_level']
y = df["stress_level"]
X = df.drop(["stress_level"], axis= 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.25, random_state= 42)
rf_model = RandomForestClassifier().fit(X_train, y_train)
y_pred = rf_model.predict(X_test)
accuracy_score(y_test, y_pred)
newData = [[93.80, 25.680, 91.840, 16.600, 89.840, 99.60, 1.840, 74.20]]
rf_model.predict(newData)
Importance = pd.DataFrame({"Importance": rf_model.feature_importances_*100},
index = X_train.columns)
Importance.sort_values(by = "Importance",
axis = 0,
ascending = True).plot(kind = "barh", color = "r")
plt.xlabel("Variables Importance Ratio")
print(classification_report(y_test, y_pred))
precision recall f1-score support
0 1.00 1.00 1.00 34
1 1.00 0.97 0.98 31
2 0.97 1.00 0.99 33
3 1.00 1.00 1.00 31
4 1.00 1.00 1.00 29
accuracy 0.99 158
macro avg 0.99 0.99 0.99 158
weighted avg 0.99 0.99 0.99 158
mat = confusion_matrix(y_test, y_pred)
p = sbn.heatmap(pd.DataFrame(mat), annot=True, cmap="YlGnBu" ,fmt='g')