-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpandasTest.py
126 lines (99 loc) · 3.98 KB
/
pandasTest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
from netCDF4 import Dataset, num2date
import statsmodels.api as sm
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import pandas as pd
def R2(data, model):
data_mean = np.mean(data)
SS_tot = np.sum((data - data_mean)**2.)
SS_res = np.sum((data - model)**2.)
r2 = 1 - SS_res/SS_tot
return r2
# input the mass balance data
mb_taku = np.loadtxt('taku_mb.csv', delimiter=',')
M_dot_taku = mb_taku[:, 1]
years_taku = mb_taku[:, 0]
'input the climate data'
clim = Dataset('taku_climate.nc', 'a')
# clim = Dataset('RGI60-01.01390/climate_monthly.nc', 'a')
# read the variables
clim_time = clim.variables['time'][:] # days since 1801-01-01 00:00:00
clim_time_units = clim.variables['time'].units
clim_temp = clim.variables['temp'][:] # in degC
clim_prec = clim.variables['prcp'][:] # in kg m-2S
# convert to dates
clim_dates = num2date(clim_time[:], units=clim_time_units, calendar='standard')
# Note, taku MB from 1946 - 2015
# climate data in Oct 1901 to Sep 2016
# use climate data from index 531 to 1370
# filter the temp and prec data to fit the MB idea
TMP_melt = 0.0 # degC
P_TMP_snow = 7.0 # degC
# clim_temp_f = np.maximum(clim_temp-TMP_melt, 0)
clim_temp_f = np.copy(clim_temp)
clim_temp_f[clim_temp < TMP_melt] = 0
clim_prec_f = np.copy(clim_prec)
clim_prec_f[clim_temp > P_TMP_snow] = 0
# reshape and sum climate variables
index_calendar = np.arange(531, 1371)
index_hydro = np.arange(528, 1368)
index_used = index_hydro
len_index_used = len(index_used)
clim_dates_reshape = np.reshape(clim_dates[index_used], (70, 12))
clim_temp_reshape = np.reshape(clim_temp_f[index_used], (70, 12))
clim_prec_reshape = np.reshape(clim_prec_f[index_used], (70, 12))
len_t_used = np.count_nonzero(clim_temp_f[index_used])
len_p_used = np.count_nonzero(clim_prec_f[index_used])
print("Only %.02f percent of tmp data used" % (len_t_used/len_index_used*100))
print("Only %.02f percent of pre data used" % (len_p_used/len_index_used*100))
clim_temp_ysum = np.sum(clim_temp_reshape, axis=1)
clim_prec_ysum = np.sum(clim_prec_reshape, axis=1)
# up to 70 years
N1 = 0
N2 = 70
print(mb_taku[N1:N2, 0])
clim_temp_ysum_use = clim_temp_ysum[N1:N2]
clim_prec_ysum_use = clim_prec_ysum[N1:N2]
M_dot_taku_use = M_dot_taku[N1:N2]
years_taku_use = years_taku[N1:N2]
# the independent variables namely Prec and Temp
X0 = np.column_stack((clim_prec_ysum_use, clim_temp_ysum_use))
# add constant for intercept
X = sm.add_constant(X0)
sm_massbal = sm.OLS(M_dot_taku_use, X).fit()
# print(sm_massbal.summary())
beta_vals = sm_massbal.params
m_model_dd = beta_vals[0] + [1]*clim_prec_ysum_use + beta_vals[2]*clim_temp_ysum_use
# now comes the neural network
# convert to pandas dataframe
dataset = pd.DataFrame({'MB': M_dot_taku_use,
'TMP': clim_temp_ysum_use,
'PRE': clim_prec_ysum_use})
# dataset = pd.DataFrame({'MB': M_dot_taku_use,
# 'TMP': clim_temp_reshape,
# 'PRE': clim_prec_reshape})
t_cols = ['T_JAN', 'T_FEB', 'T_MAR', 'T_APR', 'T_MAY', 'T_JUN', 'T_JUL', 'T_AUG', 'T_SEP', 'T_OCT', 'T_NOV', 'T_DEZ',
'P_JAN', 'P_FEB', 'P_MAR', 'P_APR', 'P_MAY', 'P_JUN', 'P_JUL', 'P_AUG', 'P_SEP', 'P_OCT', 'P_NOV', 'P_DEZ']
data = np.column_stack([clim_temp_reshape, clim_prec_reshape])
dataset2 = pd.DataFrame(data = data, columns = t_cols)
dataset2['MB'] = M_dot_taku_use
print(dataset2)
# split the dataset to train and test
train_dataset = dataset.sample(frac=0.4, random_state=0)
test_dataset = dataset.drop(train_dataset.index)
#
# Pop the label, the target value
train_labels = train_dataset.pop('MB')
test_labels = test_dataset.pop('MB')
# normalize the dataset
train_stats = train_dataset.describe()
train_stats = train_stats.transpose()
print(len(train_dataset.keys()))
def norm(x):
return (x - train_stats['mean']) / train_stats['std']
# this is what the ANN understands
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)