-
Notifications
You must be signed in to change notification settings - Fork 1
/
Training_Test_Data.py
27 lines (24 loc) · 1.03 KB
/
Training_Test_Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import pandas
import csv
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
import numpy as np
from sklearn.model_selection import StratifiedKFold
data = pandas.read_csv("dataRegularizada.csv", decimal=".")
data_total = data
#data_total = data.drop(columns=["patient_id","encounter_id", "readmission_status"])
data_training_total =data_total.drop(columns=["hospital_death"])
skf = StratifiedKFold(n_splits=10)
skf.get_n_splits(data_training_total, data_total["hospital_death"])
print(skf)
train_x = pandas.DataFrame()
test_x = pandas.DataFrame()
train_y = pandas.DataFrame()
test_y =pandas.DataFrame()
contador = 0
for train_index, test_index in skf.split(data_training_total, data_total["hospital_death"]):
train_x = data_total.iloc[train_index]
test_x = data_total.iloc[test_index]
train_x.to_csv("dataRegularizada/train_"+ str(contador) + ".csv",sep=",", header=True, index=False)
test_x.to_csv("dataRegularizada/test_"+ str(contador) + ".csv",sep=",", header=True, index=False)
contador = contador + 1