-
Notifications
You must be signed in to change notification settings - Fork 0
/
predict_daily_growth_deaths.py
97 lines (77 loc) · 4.26 KB
/
predict_daily_growth_deaths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import math
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
def exponential(x, a, k):
return a * np.exp(x * k)
dataset = pd.read_csv('data/country_deaths_comparison.csv')
days_to_predict = 10
days_for_regression = 90
countries = dataset.columns.values[1:]
countries_collection = {}
curve_fit_collection = {}
#get the latest date in the report
raw_data = pd.read_csv('data/raw_data_confirmed_latest.csv')
as_of_date = raw_data.columns.values[-1]
raw_data = [] #clear memory
for c in countries:
country_ds = dataset[c]
country_ds.dropna(inplace=True)
country_ds = country_ds.reset_index()
country_ds['rate'] = np.nan
for i in range(1,len(country_ds.index)):
country_ds['rate'].at[i] = (country_ds.loc[i][1]/country_ds.loc[i-1][1])-1
country_ds = country_ds.loc[1:].copy()
X = country_ds['index'].values.flatten()
y = country_ds['rate'].values.flatten()
X_regression = country_ds['index'].iloc[-days_for_regression:].values.flatten()
y_regression = country_ds['rate'].iloc[-days_for_regression:].values.flatten()
popt_exponential, pcov_exponential = scipy.optimize.curve_fit(exponential, X_regression, y_regression, p0=[0.5, -0.1], maxfev=2000)
country_ds['curve_fit'] = country_ds.apply(lambda x: exponential(x['index'], popt_exponential[0], popt_exponential[1]), axis=1)
# Calculate R squared of the regression line
residuals = y_regression - exponential(X_regression, popt_exponential[0], popt_exponential[1])
ss_res = np.sum(residuals ** 2)
ss_tot = np.sum((y_regression - np.mean(y_regression)) ** 2)
r_squared = round(1 - (ss_res / ss_tot), 3)
# Copy plot data and the fit line into the collection and set the name to include R squared, to be displayed later
curve_fit_collection[c + str(' (R^2=') + str(r_squared) + str(')')] = country_ds
X_values = {}
for n in range(len(country_ds.index)+1,len(country_ds.index)+days_to_predict+1): X_values[n-len(country_ds.index)] = n
X_pred = pd.DataFrame(X_values, index=[0]).transpose()
X_pred['pred_y'] = X_pred.apply(lambda x: exponential(x[0], popt_exponential[0], popt_exponential[1]), axis=1)
y_pred = X_pred['pred_y'].values.flatten()
country_ds_pred = country_ds.iloc[-1:,].copy()
country_ds_pred = pd.concat([country_ds_pred,pd.DataFrame(y_pred)],axis=0)
country_ds_pred.reset_index(drop=True, inplace=True)
for i in range(0,len(country_ds_pred)-1):
country_ds_pred.iloc[i+1,1] = country_ds_pred.iloc[i,1]*(1+country_ds_pred.iloc[i+1,0])
country_ds_pred.drop(labels=[0,'index','rate'], inplace=True, axis=1)
countries_collection[c] = country_ds_pred[c]
aligned_countries = pd.concat(countries_collection, axis=1, sort=True)
aligned_countries = aligned_countries.round(0)
aligned_countries.to_csv('data/predict_daily_growth_deaths_comparison.csv')
graph = aligned_countries.plot()
graph.minorticks_on()
graph.set_title('Next '+str(days_to_predict)+' Days Death Totals (As of '+as_of_date+')')
graph.grid(True)
graph.set_xlabel('Days in the future')
graph.set_ylabel('Total Deaths')
graph.figure.text(0.15, 0.115, "Data source: CSSE at JHU // Data calculations: Dmitri Prigojev", verticalalignment='bottom', horizontalalignment='left', color='grey', fontsize=7)
graph.figure.savefig('graphs/predicting_deaths.png', dpi=200)
n = 1
num_graph_rows = math.ceil(len(curve_fit_collection)/3)
plt.figure(dpi=200)
plt.suptitle('Daily Growth Rates of Deaths with Regression (As of '+as_of_date+')')
for curve in curve_fit_collection:
plt.subplot(num_graph_rows, 3, n, yticklabels='', xticklabels='', xticks=[], yticks=[])
plt.plot(curve_fit_collection[curve]['index'].values, curve_fit_collection[curve]['rate'].values)
plt.plot(curve_fit_collection[curve]['index'].values, curve_fit_collection[curve]['curve_fit'].values)
plt.title(curve, fontsize=8)
n = n + 1
plt.figtext(0.05, 0.05, "Regression trend based on last "+str(days_for_regression)+" days only\nData source: CSSE at JHU // Data calculations: Dmitri Prigojev", verticalalignment='bottom', horizontalalignment='left', color='grey', fontsize=7)
plt.savefig('graphs/rates_w_regression_deaths.png')
print("Done")