-
Notifications
You must be signed in to change notification settings - Fork 0
/
train
114 lines (84 loc) · 3.51 KB
/
train
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import env
import agent
import choose_action
from sklearn import preprocessing
import pandas as pd
from collections import deque
env=env()
agent=DQNagent()
choose_action=choose_action
import dataset
dataset='soilwat_f_4days.csv'
df=pd.read_csv(dataset)
df.set_index('date', inplace=True)
df.dropna(inplace=True)
#preprocessing the dataset usin Min_Max
def data_preprocessing(df):
values=df.values
scaler=preprocessing.MinMaxScaler()
values_normal=scaler.fit_transform(values)
df=pd.DataFrame(values_normal, columns= df.columns, index=df.index)
return df, scaler
df,scaler=data_preprocessing(df)
best_score=-100
n_games=10_000
score_history=[]
avg_rewards = []
steps=seq_len=4
#concat the climate data and soil water content
def concat(x,s):
s=tf.convert_to_tensor(s,tf.float32)
s=np.expand_dims(s,axis=1)
x=tf.convert_to_tensor(x,tf.float32)
s=tf.concat((x,s),axis=1)
return s
#train the model
for i in range(n_games):
agent.tensorboard.step = i
#set the First state, action, next_state
states=deque((df.values[steps-seq_len+j][-2] for j in range(seq_len)) , maxlen=seq_len)
actions=deque((0 for j in range(seq_len)), maxlen=seq_len)
new_states=deque([df.values[0][-2] for j in range(seq_len)],maxlen=seq_len)
X_train=deque((df.values[steps-seq_len+j][:-3] for j in range(seq_len)) , maxlen=seq_len)
st=concat(X_train,states)
current_state= st#first state at the begining of the season
done=False
score=0
while not done:
# the action was selected every 4 days
for k in range(steps,steps+seq_len):
if k==steps:
# Get action from Q table
action_num,ran = agent.get_qs(st,epsilon,i)
action=choose_action(action_num)
actions.append(action)
else:
actions.append(0)
new_state,reward,y,done,s=env.step(states,actions,X_train,k)
new_states.append(new_state[0,0])
X_train.append(df.values[k][:-3])
states.append(new_state[0,0])
st=concat(X_train,states)
score += reward
new_st=concat(X_train,new_states)
agent.update_replay_memory((current_state, action_num, reward, new_st, done),done)
agent.train(done)
current_state=new_st
steps+=seq_len
steps=steps%len(df)+seq_len
print(f'episode={ i}, score= {score}, avg score= {score}')
print(y, s)
score_history.append(score)
if not i % 50:
average_reward = np.mean(score_history[-50:])
min_reward = min(score_history[-50:])
max_reward = max(score_history[-50:])
agent.tensorboard.update_stats(reward_avg=average_reward,reward_min=min_reward,
reward_max=max_reward, epsilon=epsilon)
if avg> best_score:
best_score=avg
agent.save_models()
# Decay epsilon
if epsilon > MIN_EPSILON:
epsilon *= EPSILON_DECAY
epsilon = max(MIN_EPSILON, epsilon)