-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathData_Split.py
123 lines (94 loc) · 3.65 KB
/
Data_Split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import csv
from sklearn.cross_validation import train_test_split
import pandas as pd
def split_combine():
for a in pd.read_csv('../Data/Normalised-Data/met_normalised_combine.csv', chunksize=1200):
df = pd.DataFrame(data=a)
mylist = df.values.tolist()
# mylist_a = []
# for i in xrange(len(mylist)):
# if mylist[i][9] >= 0.5 :
# mylist_a.append(mylist[i])
mylist_train, mylist_test = train_test_split(
mylist, test_size=0.2)
# for i in xrange(0,8,2):
# mylist_train.append(mylist_a[i])
# for j in xrange(1,8,2):
# mylist_test.append(mylist_a[j])
# print len(mylist_train)
if not os.path.exists("../Data/Train"):
os.makedirs("../Data/Train")
if not os.path.exists("../Data/Test"):
os.makedirs("../Data/Test")
with open('../Data/Train/Train_Combine.csv', 'w') as csvfile:
wr = csv.writer(csvfile, dialect='excel')
wr.writerow(
['SNO', 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])
wr.writerows(mylist_train)
with open('../Data/Test/Test_Combine.csv', 'w') as csvfile:
wr = csv.writer(csvfile, dialect='excel')
wr.writerow(
['SNO', 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])
wr.writerows(mylist_test)
def split(year):
mylist = []
if year == 2013:
cs = 343
elif year == 2014:
cs = 346
elif year == 2015:
cs = 349
else:
cs = 59
for a in pd.read_csv('../Data/Normalised-Data/met_normalised_' + str(year) + '.csv', chunksize=cs):
df = pd.DataFrame(data=a)
mylist = df.values.tolist()
mylist_train, mylist_test = train_test_split(
mylist, test_size=0.3)
if not os.path.exists("../Data/Train"):
os.makedirs("../Data/Train")
if not os.path.exists("../Data/Test"):
os.makedirs("../Data/Test")
with open('../Data/Train/Train_' + str(year) + '.csv', 'w') as csvfile:
wr = csv.writer(csvfile, dialect='excel')
wr.writerow(
['SNO', 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])
wr.writerows(mylist_train)
with open('../Data/Test/Test_' + str(year) + '.csv', 'w') as csvfile:
wr = csv.writer(csvfile, dialect='excel')
wr.writerow(
['SNO', 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])
wr.writerows(mylist_test)
def combine_train(year, cs):
for a in pd.read_csv('Train/Train_' + str(year) + '.csv', chunksize=cs):
df = pd.DataFrame(data=a)
mylist = df.values.tolist()
return mylist
def combine_test(year, cs):
for a in pd.read_csv('Test/Test_' + str(year) + '.csv', chunksize=cs):
df = pd.DataFrame(data=a)
mylist = df.values.tolist()
return mylist
if __name__ == "__main__":
# for year in xrange(2013, 2017):
# split(year)
split_combine()
# a = combine_train(2013,343)
# b = combine_train(2014,346)
# c = combine_train(2015,349)
# d = combine_train(2016,59)
# final_train = a+b+c+d
# with open('Train/Train_Combine.csv', 'w') as csvfile:
# wr = csv.writer(csvfile, dialect='excel')
# wr.writerow(['SNO', 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])
# wr.writerows(final_train)
# a = combine_test(2013,343)
# b = combine_test(2014,346)
# c = combine_test(2015,349)
# d = combine_test(2016,59)
# final_test = a+b+c+d
# with open('Test/Test_Combine.csv', 'w') as csvfile:
# wr = csv.writer(csvfile, dialect='excel')
# wr.writerow(['SNO', 'T', 'TM', 'Tm', 'SLP', 'H', 'VV', 'V', 'VM', 'PM 2.5'])
# wr.writerows(final_test)