-
Notifications
You must be signed in to change notification settings - Fork 0
/
task_three.py
173 lines (134 loc) · 8.35 KB
/
task_three.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# to be run each day: collects problems from task two and aggregates them into daily issue areas.
import datetime
import sqlalchemy
import pandas as pd
import sqlite3
import numpy as np
import csv
PATH = 'my_file'
# Reads in databases from tasks 1 and 2
engine = sqlalchemy.create_engine('sqlite:///' + PATH)
cold_data = pd.read_sql_table("ColdDatabase", engine)
warm_data = pd.read_sql_table("WarmDatabase", engine)
carbon_data = pd.read_sql_table("CarbonDatabase", engine)
low_carbon_data = pd.read_sql_table("LowCarbonDatabase", engine)
# Convert times to integers so that they compare accurately
for x in range(0, len(cold_data['Timestamp'])):
cold_data['Timestamp'].loc[x] = (pd.to_datetime(cold_data['Timestamp'].loc[x]) - datetime.timedelta(0))
for x in range(0, len(warm_data['Timestamp'])):
warm_data['Timestamp'].loc[x] = (pd.to_datetime(warm_data['Timestamp'].loc[x]) - datetime.timedelta(0))
for x in range(0, len(carbon_data['Timestamp'])):
carbon_data['Timestamp'].loc[x] = (pd.to_datetime(carbon_data['Timestamp'].loc[x]) - datetime.timedelta(0))
for x in range(0, len(low_carbon_data['Timestamp'])):
low_carbon_data['Timestamp'].loc[x] = (pd.to_datetime(low_carbon_data['Timestamp'].loc[x]) - datetime.timedelta(0))
# Creates copies such that we can do different kinds of analysis on different copies
cold_data_copy = cold_data.copy()
warm_data_copy = warm_data.copy()
carbon_data_copy = carbon_data.copy()
low_carbon_data_copy = low_carbon_data.copy()
# Sets up timestamp piece -- first/last time too cold/warm
cold_data_copy['First Time Too Cold'] = cold_data_copy['Timestamp']
cold_data_copy['Last Time Too Cold'] = cold_data_copy['Timestamp']
warm_data_copy['First Time Too Warm'] = warm_data_copy['Timestamp']
warm_data_copy['Last Time Too Warm'] = warm_data_copy['Timestamp']
carbon_data_copy['First Time Too Much CO2'] = carbon_data_copy['Timestamp']
carbon_data_copy['Last Time Too Much CO2'] = carbon_data_copy['Timestamp']
low_carbon_data_copy['First Time Too Little CO2'] = low_carbon_data_copy['Timestamp']
low_carbon_data_copy['Last Time Too Little CO2'] = low_carbon_data_copy['Timestamp']
# Time Testing
# print(cold_data_copy['First Time Too Cold'])
# print(np.min(cold_data_copy['First Time Too Cold'])) # prints incorrectly across days - nov 9 is min
# print(np.max(cold_data_copy['First Time Too Cold'])) # prints incorrectly across days - nov 5 is max on same trial
# print(np.min(["Sun Nov 10 17:00:00 2019", "Wed Nov 6 16:00:00 2019"]))
# but this only happens when i put it in the pandas format -- in other formats it was calculating correctly
cold_with_times = cold_data_copy.groupby("Room #").agg({'First Time Too Cold': np.min, 'Last Time Too Cold': np.max})
warm_with_times = warm_data_copy.groupby("Room #").agg({'First Time Too Warm': np.min, 'Last Time Too Warm': np.max})
carbon_with_times = carbon_data_copy.groupby('Room #').agg({'First Time Too Much CO2': np.min, 'Last Time Too Much CO2': np.max})
low_carbon_with_times = low_carbon_data_copy.groupby('Room #').agg({'First Time Too Little CO2': np.min, 'Last Time Too Little CO2': np.max})
time_temp_vals = pd.merge(cold_with_times, warm_with_times, how='outer', on=['Room #']).fillna("N/A")
time_carbon_vals = pd.merge(carbon_with_times, low_carbon_with_times, how='outer', on=['Room #']).fillna("N/A")
# all_times = pd.merge(time_temp_vals, time_carbon_vals, how='outer', on=['Room #']).fillna("N/A")
'''
coldest = cold_data.sort_values(by='Temperature', ascending=True).reset_index().iloc[0]
warmest = warm_data.sort_values(by='Temperature', ascending=False).reset_index().iloc[0]
carbonest = carbon_data.sort_values(by='CO2', ascending=False).reset_index().iloc[0]
print(coldest)
print(warmest)
print(carbonest)
nice data to have, but NOT part of the required weekly report.
'''
# ORIGINAL VERSION (used for interval counting):
warm_data['Intervals Too Warm'] = None
cold_data['Intervals Too Cold'] = None
carbon_data['Intervals Too Much CO2'] = None
low_carbon_data['Intervals Too Little CO2'] = None
warm_data = warm_data.groupby("Room #").agg({'Intervals Too Warm': np.size})
cold_data = cold_data.groupby("Room #").agg({'Intervals Too Cold': np.size})
carbon_data = carbon_data.groupby("Room #").agg({'Intervals Too Much CO2': np.size})
low_carbon_data = low_carbon_data.groupby("Room #").agg({'Intervals Too Little CO2': np.size})
temp_vals = pd.merge(warm_data, cold_data, how='outer', on=['Room #'])
carbon_vals = pd.merge(carbon_data, low_carbon_data, how='outer', on=['Room #'])
# COPY VERSION (used for high/low temps):
cold_data_copy = cold_data_copy.drop('index', axis=1)
warm_data_copy = warm_data_copy.drop('index', axis=1)
temp_vals_copy = pd.merge(warm_data_copy, cold_data_copy, how='outer', on=['Room #', 'Temperature', 'CO2'])
temp_vals_copy['Highest Temperature'] = temp_vals_copy['Temperature']
temp_vals_copy['Lowest Temperature'] = temp_vals_copy['Temperature']
all_temps = temp_vals_copy.set_index("Room #")['Temperature']
carbon_vals_copy = pd.merge(carbon_data_copy, low_carbon_data_copy, how='outer', on=['Room #', 'Temperature', 'CO2'])
carbon_vals_copy['Highest CO2'] = carbon_vals_copy['CO2']
carbon_vals_copy['Lowest CO2'] = carbon_vals_copy['CO2']
all_carbon = carbon_vals_copy.set_index("Room #")['CO2']
temp_vals_copy = temp_vals_copy.groupby("Room #").agg({'Highest Temperature': np.max, 'Lowest Temperature': np.min})
carbon_vals_copy = carbon_vals_copy.groupby("Room #").agg({'Highest CO2': np.max, "Lowest CO2": np.min})
most_temp_data = pd.merge(temp_vals, temp_vals_copy, on=['Room #']).fillna(0)
# print(most_temp_data)
most_carbon_data = pd.merge(carbon_vals, carbon_vals_copy, on=['Room #']).fillna(0)
print(most_carbon_data)
all_temp_data = pd.merge(most_temp_data, time_temp_vals, on=['Room #']).fillna(0)
# print(all_temp_data)
all_carbon_data = pd.merge(most_carbon_data, time_carbon_vals, on=['Room #']).fillna(0)
print(all_carbon_data)
all_data = pd.merge(all_temp_data, all_carbon_data, how='outer', on=['Room #']).fillna(0)
# print(all_data.T.index) # range of timestamps is 4-7
def convert_datetime(z):
if type(z) == str:
return z
elif type(z) == pd.Timestamp:
print(datetime.datetime.strftime(z.to_pydatetime(), '%Y-%m-%d %H:%M:%S'))
return datetime.datetime.strftime(z.to_pydatetime(), '%Y-%m-%d %H:%M:%S')
for x in range(0, len(all_data['First Time Too Cold'])):
all_data['First Time Too Cold'].iloc[x] = convert_datetime(all_data['First Time Too Cold'].iloc[x])
all_data['Last Time Too Cold'].iloc[x] = convert_datetime(all_data['Last Time Too Cold'].iloc[x])
all_data['First Time Too Warm'].iloc[x] = convert_datetime(all_data['First Time Too Warm'].iloc[x])
all_data['Last Time Too Warm'].iloc[x] = convert_datetime(all_data['Last Time Too Warm'].iloc[x])
all_data['First Time Too Much CO2'].iloc[x] = convert_datetime(all_data['First Time Too Much CO2'].iloc[x])
all_data['Last Time Too Much CO2'].iloc[x] = convert_datetime(all_data['Last Time Too Much CO2'].iloc[x])
all_data['First Time Too Little CO2'].iloc[x] = convert_datetime(all_data['First Time Too Little CO2'].iloc[x])
all_data['Last Time Too Little CO2'].iloc[x] = convert_datetime(all_data['Last Time Too Little CO2'].iloc[x])
print(all_data['First Time Too Cold'].iloc[x])
conn = sqlite3.connect(PATH)
all_data.to_sql("DailyDatabase", conn, if_exists='append')
all_temps.to_sql("DailyTempDatabase", conn, if_exists='append')
all_carbon.to_sql("DailyCarbonDatabase", conn, if_exists='append')
'''
with open('basic_weekly.csv', 'w') as weekly_df:
csv_writer = csv.writer(weekly_df, delimiter=";")
for index, row in all_data.iterrows():
my_index = index
temp_warm = row['Intervals Too Warm']
temp_cold = row['Intervals Too Cold']
temp_carbon = row['Intervals Too Much CO2']
csv_writer.writerow(["{0}, {1}, {2}, {3}".format(my_index, temp_warm, temp_cold, temp_carbon)])
'''
# Daily Clear
cursor = conn.cursor()
drop = "DROP TABLE ColdDatabase"
drop2 = "DROP TABLE WarmDatabase"
drop3 = "DROP TABLE CarbonDatabase"
drop4 = "DROP TABLE LowCarbonDatabase"
cursor.execute(drop)
cursor.execute(drop2)
cursor.execute(drop3)
cursor.execute(drop4)
conn.close()