-
Notifications
You must be signed in to change notification settings - Fork 0
/
3machine.py
116 lines (104 loc) · 3.24 KB
/
3machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
from entities import Policy, Machine, MaintenanceTask, EpochResult
from engine import IndustrySim
from tools import e_greedy, NN
import numpy as np
import time
from industry import epoch_length, max_epochs, max_labor, wages, num_machines, job_demand
from industry import delay_penalty, mt_fixed_cost, mt_RF, mt_ttr, mt_labor, beta, age
from industry import compatible_jobs, machine_names3, mt_task1, mt_task2, mt_task3
from industry import machine1, machine2, machine3
from industry import state_size, action_size, nn_arch
from multiprocessing.dummy import Pool as ThreadPool
def thread_task(t):
epoch_result, state = t.run_epoch()
objfun = epoch_result.get_objfun()
return objfun
# function to be mapped over
def simulateParallel(tasks, threads=4):
pool = ThreadPool(threads)
results = pool.map(thread_task, tasks)
pool.close()
pool.join()
return np.mean(results)
"""
State consists of (in order):
machine params:
pending jobs in last epoch
machine age
last epoch params:
free labour
next epoch demand:
avg due after
delay penalty
reward = objfun, cost
"""
env = IndustrySim(machines=[machine1,machine2, machine3], epoch_length=epoch_length, max_labor=max_labor,
wages=wages, job_demand=job_demand, delay_penalty=delay_penalty, state_size=state_size)
start = time.time()
env.reset()
res = EpochResult(None, None, None)
nn = NN(dim_input=state_size, dim_hidden_layers=nn_arch, dim_output=action_size, do_dropout=True)
states = np.zeros((max_epochs, state_size))
actions = np.zeros((max_epochs, action_size))
rewards = np.zeros(max_epochs)
state = np.zeros(state_size)
# hyper params
e = 0.2
training_passes = 10000
start = time.time()
#par = [NN.clone(env), NN.clone(env)]#, (NN.clone(env), pm_plan)]
for exp in range(training_passes):
print('Training Pass: {}'.format(exp))
for i in range(max_epochs):
pm_probs = nn.run_forward(state)
pm_plan, action_vector = e_greedy(machine_names3, pm_probs, e=e)
#print(pm_plan)
states[i] = state
actions[i] = action_vector
pol = Policy('SJF', pm_plan)
# for p in par:
# p.set(env)
# p.set_policy(pol)
#par_objfun = simulateParallel(par)
epoch_result, state = env.run_epoch(pol)#{}))#{'FnC1':'HIGH', 'Lathe':'HIGH'}))#pm_plan))
# print(epoch_result)
rewards[i] = (epoch_result.get_objfun())#+par_objfun)/2.0
returns = nn.get_returns(rewards, actions)
nn.backprop(states, returns)
res = env.get_result()
#print(res)
env.reset()
print('Training took '+str(time.time()-start)+'s')
validation = 200
avg_obj = 0
high_jobs = {
'FnC1':0,
'Lathe1':0,
'Milling1':0
}
low_jobs = {
'FnC1':0,
'Lathe1':0,
'Milling1':0
}
for exp in range(validation):
for i in range(max_epochs):
pm_probs = nn.run_forward(state, testing=True)
pm_plan, action_vector = e_greedy(machine_names3, pm_probs,e=None)
#print(pm_plan)
states[i] = state
actions[i] = action_vector
epoch_result, state = env.run_epoch(Policy('SJF', pm_plan))
res = env.get_result()
print(res)
avg_obj += res.objfun
for mr in res.machine_results:
high_jobs[mr.name] += mr.mt_jobs_done['high']
low_jobs[mr.name] += mr.mt_jobs_done['low']
env.reset()
avg_obj/=validation
print('Avg obj: '+ str(avg_obj))
print('Total high: '+str(high_jobs))
print('Total low: '+ str(low_jobs))
print('Took '+str(time.time()-start)+'s')
nn.save(filename='NN.pickle')