-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathmain3.py
568 lines (510 loc) · 29.4 KB
/
main3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
import random
import numpy as np
import agent
import dqnvianumpy
from collections import deque
from dqnvianumpy.q_learning import train_main
from dqnvianumpy.q_learning import test_main
from colors import MyColors
from kivywidgets import Widgets, FlyScatter, FlyScatterV3, AsyncConsoleScatter
from kivy.app import App
from kivy.properties import NumericProperty
from kivy.uix.label import Label
from kivy.uix.popup import Popup
from kivy.uix.button import Button
from kivy.core.window import Window
from kivy.uix.spinner import Spinner
from kivy.uix.slider import Slider
from kivy.uix.checkbox import CheckBox
from kivy.uix.scatter import Scatter
from kivy.uix.screenmanager import ScreenManager, Screen
from kivy.uix.tabbedpanel import TabbedPanel, TabbedPanelHeader
from kivy.uix.boxlayout import BoxLayout
from kivy.uix.textinput import TextInput
from kivy.graphics import Color, Rectangle
from kivy.clock import Clock
from kivy.config import Config
from kivy_garden.graph import LinePlot
from kivy.garden.matplotlib.backend_kivyagg import FigureCanvasKivyAgg
import matplotlib.pyplot as plt
import os
WhiteBackColor = True
__version__ = '0.0.3.4'
class MainApp(App):
sm = ScreenManager()
FlyScatters = []
IdsPngs = [j for j in range(1, 41)]
AdaptUiOnOff = False
total_reward = 0
rewards_count = 0
reward_data = []
reward_ik_data = []
cumulative_reward_data = []
loss_data = []
m_loss_data = []
graph_points_for_mean = []
graph1_points = []
graph2_points = []
target_ui_vect = [[0. for j in range(4)] for i in range(40)]
current_ui_vect = [[0. for j in range(4)] for i in range(40)]
sliders_reward = []
strategy = None
# IQL hyperparameters
batch_size = 128
gamma = 0.99
eps_start = 0.9
eps_end = 0.05
eps_decay = 0.001
eps_decay_steps = 1000
target_update = 10
TAU = 0.01 # TAU is the update rate of the target network, Параметр мягкой замены
memory_size = 10000
lr = 1e-3
steps_learning = 1
hidden_layer = 64
# MADDPG hyperparameters
batch_size_MADDPG = 32
start_steps = 100 # начинаем обучать через 1000 шагов
steps_train = 4 # после начала обучения продолжаем обучать каждый 4 шаг
alpha_actor = 0.01 # Скорость обучения исполнителя
alpha_critic = 0.01 # Скорость обучения критика
noise_rate = 0.01 # Уровень случайного шума
noise_rate_max = 0.9 # Начальное значение случайного шума
noise_rate_min = 0.01 # Финальное значение случайного шума
noise_decay_steps = 15000 # Шаг затухания уровня случайного шума
buffer_len = 10000 # Объем буфера воспроизведения
def __init__(self, **kwargs):
super(MainApp, self).__init__(**kwargs)
self.title = 'MARL Mobile User Interface v.'+__version__
self.text_color = MyColors.get_textcolor(WhiteBackColor)
self.modes = ('IQL', 'MADDPG', 'Fly', 'Size', 'Rotate', 'Fly+Size+Rotate')
self.cols_rows = ('1х1', '2х2', '3х3', '4х4', '5х5', '6х6', '8x5')
self.objects = ('Apps', 'Foods', 'Widgets')
self.kitchen = ('rus', 'eur', 'asia', 'ui vect')
self.episodes = ('200', '2000', '20000', '200000')
self.modeargs = ('train', 'test')
self.r_modeargs = ('map', 'weights', 'stats')
self.usability_metrics = ['DM', 'TS', 'BL', 'Tr', 'Tp', 'Tl', 'LA', 'TV', 'BH', 'BV']
self.frame_area = (Window.width * Window.height)
self.frame_diagonal = np.sqrt(np.power(Window.width, 2) + np.power(Window.height, 2))
self.window_width = Window.width
self.window_height = Window.height
def build(self):
# MAIN SCREEN
self.root = BoxLayout(orientation='vertical', padding=10)
#HEAD PANEL
self.colrowspinner = Spinner(text=self.cols_rows[6], values=self.cols_rows, background_color=(0.527, 0.154, 0.861, 1))
self.colrowspinner.bind(text=self.colrowspinner_selected_value)
self.objectspinner = Spinner(text=self.objects[1], values=self.objects, background_color=(0.027, 0.954, 0.061, 1))
self.objectspinner.bind(text=self.colrowspinner_selected_value)
lbl = Label(text='Size/Objs:', color=(0, 0, 1, 1))
btn = Button(text='rebuild', size_hint_y=None, height='30dp', on_press=self.mainscreen_rebuild_btn_click)
self.grid_flag = CheckBox(active=False, color=(0, 0, 1, 1), size_hint_x=None, width='30dp')
self.headpanel = self.ihbl([lbl, self.colrowspinner, self.objectspinner, btn, self.grid_flag])
self.root.add_widget(self.headpanel)
self.headpanel.bind(size=self._update_rect_headpanel, pos=self._update_rect_headpanel)
with self.headpanel.canvas.before:
Color(0.827, 0.827, 0.827, 1.)
self.rect_headpanel = Rectangle()
#FOOT PANEL
self.episodespinner = Spinner(text=self.episodes[0], values=self.episodes, size_hint_x=None, width='50dp', background_color=(0.225, 0.155, 0.564, 1))
self.modespinner = Spinner(text="MADDPG", values=self.modes, background_color=(0.127,0.854,0.561,1))
self.modespinner.bind(text=self.colrowspinner_selected_value)
self.adapt_btn = Button(text='ADAPT UI', size_hint_y=None, height='30dp', background_color=(1, 0, 0, 1), on_press=self.adapt_ui)
test_btn = Button(text='TEST UI', size_hint_y=None, height='30dp', size_hint_x=None, width='100dp', background_color=(0, 0, 1, 1), on_press=lambda null: self.adapt_ui(self, False, True))
quit_btn = Button(text='QUIT', size_hint_y=None, height='30dp', background_color=(0.9, 0.9, 0.9, 1), on_press=lambda null: self.get_running_app().stop())
sett_btn = Button(text='SETTINGS', size_hint_y=None, height='30dp', background_color=(0.2, 0.2, 0.2, 1), on_press=lambda null: self.to_screen('settings', 'left'))
self.footpanel = self.ihbl([quit_btn, self.modespinner, self.episodespinner, self.adapt_btn, test_btn, sett_btn])
self.root.add_widget(self.footpanel)
self.footpanel.bind(size=self._update_rect_footpanel, pos=self._update_rect_footpanel)
with self.footpanel.canvas.before:
Color(0.827, 0.827, 0.827, 1.)
self.rect_footpanel = Rectangle()
# SETTINGS SCREEN, params
self.root5 = BoxLayout(orientation='vertical', padding=10) # params
self.text_hidden_layer = TextInput(text='64', password=False)
self.root5.add_widget(self.ihbl([Label(text='Hidden layer:', color=(0, 0, 0, 1)), self.text_hidden_layer]))
self.text_memory_size = TextInput(text='2000', password=False)
self.root5.add_widget(self.ihbl([Label(text='Memory Size:', color=(0, 0, 0, 1)), self.text_memory_size]))
self.text_batch_size = TextInput(text='128', password=False)
self.root5.add_widget(self.ihbl([Label(text='Batch Size:', color=(0, 0, 0, 1)), self.text_batch_size]))
self.text_gamma = TextInput(text='0.99', password=False)
self.root5.add_widget(self.ihbl([Label(text='Gamma:', color=(0, 0, 0, 1)), self.text_gamma]))
self.text_lr = TextInput(text='1e-3', password=False)
self.root5.add_widget(self.ihbl([Label(text='Learning rate:', color=(0, 0, 0, 1)), self.text_lr]))
self.text_target_update = TextInput(text='30', password=False)
self.root5.add_widget(self.ihbl([Label(text='Target update:', color=(0, 0, 0, 1)), self.text_target_update]))
self.text_eps_start = TextInput(text='0.9', password=False)
self.root5.add_widget(self.ihbl([Label(text='Eps Start:', color=(0, 0, 0, 1)), self.text_eps_start]))
self.text_eps_end = TextInput(text='0.05', password=False)
self.root5.add_widget(self.ihbl([Label(text='Eps End:', color=(0, 0, 0, 1)), self.text_eps_end]))
self.text_eps_decay_steps = TextInput(text='1000', password=False)
self.root5.add_widget(self.ihbl([Label(text='Eps Decay Steps:', color=(0, 0, 0, 1)), self.text_eps_decay_steps]))
self.root5.add_widget(Label(text='REBUILD after modify!', color=(1, 0, 0, 1)))
# MAIN CONTENT
self.mainscreen_widgets = BoxLayout(orientation='vertical', padding=0, spacing=0)
self.mainscreen_rebuild_btn_click(self)
self.root.add_widget(self.mainscreen_widgets)
# SWAP: the main content is the top layer and center of the screen.
swap = self.root.children[0]
self.root.children[0] = self.root.children[1]
self.root.children[1] = swap
self.add_screen('mainscreen', self.root)
# SETTINGS SCREEN
self.root1 = BoxLayout(orientation='vertical', padding=10)
self.root2 = BoxLayout(orientation='vertical', padding=10) #graph tab
self.root3 = BoxLayout(orientation='vertical', padding=10) #reward func tab
self.root4 = BoxLayout(orientation='vertical', padding=10) #dqn test
# SETTINGS SCREEN, reward func tab
self.targetUiVect = TextInput(password=False, multiline=True)
cur_sl_label = Label(text='REWARDS: _._', color=(1, 0, 0, 1))
temp_sliders = [cur_sl_label]; sliders = []; checkboxes = []; values = []
for i in range(17):
param = 'nx' if i==0 else 'ny' if i==1 else 'ns' if i==2 else 'nr' if i==3 else 'nt' if i==4 else 'p+'if i==5 else 'p-' if i==6 else self.usability_metrics[i-7]
sl_label = Label(text=f'{param}', color=(0, 0, 0, 1), size_hint_x=None, width='20dp', halign='center')
value = 0.01 if i == 5 else .95 if i == 4 or i == 3 else 0.55 if i < 5 else -.95 if i<7 else .95
values.append(value)
slider = Widgets.get_random_widget('Slider', -2., 2., value, 0.01)
labelcallback = lambda instance, value: self.OnSliderRewardChangeValue(cur_sl_label, value)
slider.bind(value=labelcallback)
sliders.append(slider)
reward_flag = CheckBox(active=True, color=(0, 0, 1, 1), size_hint_x=None, width='20dp')
checkboxes.append(reward_flag)
reward_flagcallback = lambda instance, active: self.reward_flag_change(instance, sliders, checkboxes, values)
reward_flag.bind(active=reward_flagcallback)
temp_sliders.append(self.ihbl([reward_flag, sl_label, slider], my_height=False))
self.sliders_reward.append(slider)
btn_get_vect_state = Button(text='get', size_hint_y=None, height='30dp', background_color=(0, 0, 1, 1), on_press=self.get_current_vect_state)
btn_set_vect_state = Button(text='set', size_hint_y=None, height='30dp', background_color=(1, 0, 0, 1), on_press=self.set_current_vect_state)
self.root3.add_widget(self.ihbl([self.ivbl(temp_sliders),self.ivbl([self.ihbl([Label(text='UI vect:',color=(1, 0, 0, 1),size_hint_y=None,height='30dp'),btn_get_vect_state,btn_set_vect_state]),self.targetUiVect], my_width=False)], my_height=False))
self.kitchenspinner = Spinner(text=self.kitchen[0], values=self.kitchen, background_color=(0.027, 0.954, 0.061, 1))
self.kitchenspinner.bind(text=self.target_ui_selected_value)
self.root3.add_widget(self.ihbl([Label(text='Kitchen:', color=(0, 0, 1, 1)),self.kitchenspinner]))
# SETTINGS SCREEN, graph tab
self.graph_values_mode = Spinner(text='MEAN LOSS', values=['MEAN LOSS', 'LOSS', 'TOTAL REWARD', 'REWARD', 'MEAN REWARD ik ', 'MOV.AVER. REWARD ik', 'DISC.CUMM. REWARD ik'], background_color=(0.127, 0.234, 0.761, 1))
self.root2.add_widget(self.ihbl([Label(text='Graph values:', color=(0, 0, 0, 1)), self.graph_values_mode]))
self.graph_widget_id = Spinner(text='1', values=[str(j) for j in range(1, 41)], background_color=(0.327, 0.634, 0.161, 1))
self.root2.add_widget(self.ihbl([Label(text='Widget ID:', color=(0, 0, 0, 1)), self.graph_widget_id]))
self.reward_graph = Widgets.get_graph_widget(.5, .5, 0, .1, 0, .1, 'Time, [sec]', WhiteBackColor)
self.root2.add_widget(self.ihbl([Button(text='Save graph', size_hint_y=None, height='30dp', on_press=lambda null: self.save_plot()),
Button(text='Reset points', size_hint_y=None, height='30dp', color=(1, 0, 0, 1), on_press=lambda null: self.reset_graph_points(True))]))
self.graph_layout2 = BoxLayout(orientation='vertical')
self.root2.add_widget(self.graph_layout2)
# SETTINGS SCREEN, dqn test
self.console = TextInput(password=False, multiline=True, readonly=True)
self.root4.add_widget(self.console)
self.dqnmodespinner = Spinner(text=self.modeargs[0], values=self.modeargs, background_color=(0.027, 0.954, 0.061, 1))
self.dqnr_modespinner = Spinner(text=self.r_modeargs[0], values=self.r_modeargs, background_color=(0.027, 0.125, 0.061, 1))
self.test_dqn_btn = Button(text='TEST', size_hint_y=None, height='30dp', background_color=(1, 0, 0, 1), on_press=self.test_dqn)
self.root4.add_widget(self.ihbl([Label(text='Mode:', color=(1, 0, 1, 1)), self.dqnmodespinner, self.dqnr_modespinner, self.test_dqn_btn]))
tp = TabbedPanel(do_default_tab=False, background_color=(0,0,0,0))
reward_th = TabbedPanelHeader(text='LOSS')
tp.add_widget(reward_th)
reward_th.content = self.root2
uivect_th = TabbedPanelHeader(text='Reward')
tp.add_widget(uivect_th)
uivect_th.content = self.root3
params_th = TabbedPanelHeader(text='Params')
tp.add_widget(params_th)
params_th.content = self.root5
self.root1.add_widget(tp)
dqnvect_th = TabbedPanelHeader(text='Frozen')
tp.add_widget(dqnvect_th)
dqnvect_th.content = self.root4
btn2 = Button(text='MAIN SCREEN', size_hint_y=None, height='30dp', background_color=(0.2, 0.2, 0.2, 1),
on_press=lambda null: self.to_screen('mainscreen', 'right'))
self.root1.add_widget(btn2)
self.target_ui_selected_value(self.kitchenspinner, self.kitchenspinner.text)
self.add_screen('settings', self.root1)
if WhiteBackColor:
self.sm.bind(size=self._update_rect, pos=self._update_rect)
with self.sm.canvas.before:
Color(1, 1, 1, 1)
self.rect = Rectangle(size=self.root.size, pos=self.sm.pos)
return self.sm
def adapt_ui(self, instance, learning=True, test=False):
m = self.modespinner.text
self.AdaptUiOnOff = not self.AdaptUiOnOff
self.adapt_btn.background_color = (0.127, 0.854, 0.561, 1) if self.AdaptUiOnOff else (1, 0, 0, 1)
if self.AdaptUiOnOff:
self.total_reward = 0
self.rewards_count = 0
self.env.steps_left = int(self.episodespinner.text) * len(self.FlyScatters)
self.env.done = False
self.env.steps_learning = int(self.env.steps_left * self.steps_learning) if learning else 0
self.env.Loss_History = [0]
self.env.Reward_History = [0]
self.env.winrate_history = [0]
self.env.total_loss = [0]
self.env.m_loss = [0]
self.env.Loss_History_actor = [0]
self.env.total_loss_actor = [0]
self.env.m_loss_actor = [0]
if m == 'IQL':
for s in self.FlyScatters:
s.mode = m
s.change_emulation()
if s.emulation:
s.agent.total_reward = 0
s.agent.current_step = 0
s.loss_data = [0]
s.total_loss = [0]
s.m_loss = [0]
if test: s.agent.strategy = agent.EpsilonGreedyStrategy(self.eps_end, self.eps_end, self.eps_decay, self.eps_decay_steps)
elif m == 'MADDPG':
self.env.change_emulation()
if self.AdaptUiOnOff:
Clock.schedule_interval(self._update_clock, 1 / 8.)
self.env.start_emulation()
self.total_reward = 0
self.reset_graph_points()
else:
Clock.unschedule(self._update_clock)
self.env.stop_emulation()
print('- adapt ui stopped -')
def stop_emulation_async(self,Text='Stop emulation!', Header='Adapt', par=0):
if self.AdaptUiOnOff:
self.AdaptUiOnOff = False
Clock.unschedule(self._update_clock)
self.env.stop_emulation()
self.adapt_btn.background_color = (1, 0, 0, 1)
self.matplot_output()
def matplot_output(self):
text_values = self.graph_values_mode.text
vm = self.graph_values_mode.text
plt.figure(figsize=(5, 5))
if len(self.graph1_points)>0:
plt.plot(self.graph1_points, '-', label="IQL", color='r')
x, y, ci = self.get_plot_params(self.graph1_points)
plt.fill_between(x, (y - ci), (y + ci), color='r', alpha=.1)
if len(self.graph2_points) > 0:
plt.plot(self.graph2_points, '-', label="MADDPG", color='b')
x, y, ci = self.get_plot_params(self.graph2_points)
plt.fill_between(x, (y - ci), (y + ci), color='b', alpha=.1)
plt.xlabel("Steps")
plt.ylabel(self.graph_values_mode.text)
plt.legend(loc="best")
graph_widget = FigureCanvasKivyAgg(plt.gcf())
self.graph_layout2.clear_widgets()
self.graph_layout2.add_widget(graph_widget)
def get_plot_params(self, graph_points):
y = np.asarray(graph_points)
x = [i for i, v in enumerate(graph_points)]
ci = 1.96 * np.std(graph_points) / np.sqrt(len(x))
return x, y, ci
def save_plot(self):
script_dir = os.path.dirname(__file__)
graphs_dir = os.path.join(script_dir, 'Graphs/')
if not os.path.isdir(graphs_dir): os.makedirs(graphs_dir)
plt.savefig(graphs_dir + 'graph2.png', format='png', pad_inches=0.1)
def _update_clock(self, dt):
m = self.modespinner.text
vm = self.graph_values_mode.text
if m=='IQL':
graph1 = 0
widget_id = int(self.graph_widget_id.text)-1
if vm == 'MEAN LOSS': graph1 = self.m_loss_data[widget_id]
elif vm=='TOTAL REWARD': graph1 = self.cumulative_reward_data[widget_id]
elif vm == 'REWARD': graph1 = self.reward_data[widget_id]
elif vm == 'MEAN REWARD ik': graph1 = self.reward_ik_data[widget_id]
elif vm == 'MOV.AVER. REWARD ik':
self.graph_points_for_mean.append(np.mean(self.reward_ik_data))
self.graph1_points.append(np.sum(self.graph_points_for_mean[-1000:]))
elif vm == 'DISC.CUMM. REWARD ik':
self.total_reward += np.sum(self.reward_ik_data) * self.gamma**self.FlyScatters[widget_id].agent.current_step
self.graph1_points.append(self.total_reward)
else: graph1 = self.loss_data[widget_id]
if graph1 != 0: self.graph1_points.append(graph1)
elif m=='MADDPG':
graph2 = 0
if vm == 'MEAN LOSS': graph2 = self.env.m_loss_actor[-1]
elif vm=='TOTAL REWARD': graph2 = self.env.Reward_History[-1]
elif vm == 'REWARD': graph2 = self.env.reward_data[-1]
else: graph2 = self.env.total_loss_actor[-1]
if graph2!=0: self.graph2_points.append(graph2)
def mainscreen_rebuild_btn_click(self, instance):
self.mainscreen_widgets.clear_widgets()
self.FlyScatters.clear()
TextSize = self.colrowspinner.text
Objects = self.objectspinner.text
mode = self.modespinner.text
rows = int(TextSize[0])
cols = int(TextSize[2])
self.reward_data = [0. for i in range(40)]
self.reward_ik_data = [0. for i in range(40)]
self.cumulative_reward_data = [0. for i in range(40)]
self.loss_data = [0. for i in range(40)]
self.m_loss_data = [0. for i in range(40)]
random.shuffle(self.IdsPngs)
self.set_hyperparams()
n_agents = rows * cols
steps_left = int(self.episodespinner.text)
steps_learning = int((int(self.episodespinner.text) - self.batch_size) * self.steps_learning)
self.env = agent.Environment(steps_left*n_agents, steps_learning*n_agents, mode, self)
if mode == 'IQL':
self.env.strategy = agent.EpsilonGreedyStrategy(self.eps_start, self.eps_end, self.eps_decay, self.eps_decay_steps)
elif mode == 'MADDPG':
self.env.strategy = agent.NoiseRateStrategy(self.noise_rate_min, self.noise_rate_max, self.noise_decay_steps)
self.device = self.env.device = agent.get_torch_device()
self.env.experience_buffer = deque(maxlen=self.buffer_len)
obs_size = 5 #e.num_state_available(s.agent) - 1
# Создаем основную нейронную сеть критика
self.env.critic_network = agent.MADDPG_Critic(obs_size * n_agents, n_agents).to(self.device)
# Создаем целевую нейронную сеть критика
self.env.tgtCritic_network = agent.MADDPG_Critic(obs_size * n_agents, n_agents).to(self.device)
# Синхронизуем веса нейронных сетей критиков
self.env.tgtCritic_network.load_state_dict(self.env.critic_network.state_dict())
# Создаем оптимизатор нейронной сети критика
self.env.optimizerCritic = agent.get_optimizer_Adam(self.env.critic_network, self.alpha_critic)
# Создаем функцию потерь критика
self.env.objectiveCritic = agent.get_MSELoss_func()
for i in range(rows):
hor = BoxLayout(orientation='horizontal', padding=0, spacing=0)
for j in range(cols):
s = FlyScatterV3(do_rotation=True, do_scale=True, auto_bring_to_front=False, do_collide_after_children=False)
s.app = self
if mode=='IQL': self.IQL_init_numpy(s, self.env)
elif mode=='MADDPG': self.MADDPG_init(s, self.env, self.device)
hor.add_widget(s)
s.id = ids = self.IdsPngs[i*cols+j]
s.grid_rect = Widgets.get_random_widget('LineRectangle', 0, 0, Window.width // cols, Window.height // (rows + 1), f'S{i*cols+j}')
if self.grid_flag.active: s.add_widget(s.grid_rect)
w = Widgets.get_app_icon(ids) if Objects=='Apps' else Widgets.get_food_icon(ids) if Objects=='Foods' else Widgets.get_random_widget()
w.width = f'{360 // cols}dp'
w.height = f'{800 // rows}dp'
s.add_widget(w)
wi = Widgets.get_random_widget('Label')
s.add_widget(wi)
wi.text = str(ids)
s.raw_width = w.width
s.raw_height = w.height
s.raw_rotate = s.rotation
self.FlyScatters.append(s)
self.mainscreen_widgets.add_widget(hor)
def MADDPG_init(self, s, e, device):
s.env = e
s.set_vect_state()
s.agent = agent.Agent4(self.strategy, e.num_actions_available(), s, device)
# Создаем основную нейронную сеть исполнителя
s.actor_network = agent.MADDPG_Actor(e.num_state_available(s.agent)-1, e.num_actions_available()).to(device)
# Создаем целевую нейронную сеть исполнителя
s.target_net = agent.MADDPG_Actor(e.num_state_available(s.agent)-1, e.num_actions_available()).to(device)
# Синхронизуем веса нейронных сетей исполнителей
s.target_net.load_state_dict(s.actor_network.state_dict())
s.optimizer = agent.get_optimizer_Adam(s.actor_network, self.alpha_actor)
s.objective = agent.get_MSELoss_func()
def IQL_init_numpy(self, s, e):
s.env = e
s.set_vect_state()
s.agent = agent.Agent3(e.strategy, deque(maxlen=self.memory_size), e.num_actions_available(), s)
s.policy_net = dqnvianumpy.model.neural_network(e.num_state_available(s.agent)-1, self.hidden_layer, e.num_actions_available(), self.lr)
s.target_net = dqnvianumpy.model.neural_network(e.num_state_available(s.agent)-1, self.hidden_layer, e.num_actions_available(), self.lr)
s.target_net.load_state_dict(s.policy_net)
def IQL_init(self, s, e):
s.env = e
s.set_vect_state()
s.agent = agent.Agent(s.app.strategy, agent.ReplayMemoryPyTorch(self.memory_size), e.num_actions_available(), s)
s.policy_net = agent.get_nn_module(e.num_state_available(s.agent) - 2, s.agent.device)
s.target_net = agent.get_nn_module(e.num_state_available(s.agent) - 2, s.agent.device)
s.target_net.load_state_dict(s.policy_net.state_dict())
s.target_net.eval()
s.optimizer = agent.get_optimizer_AdamW(s.policy_net, self.lr)
def set_hyperparams(self):
self.hidden_layer = int(self.text_hidden_layer.text)
self.memory_size = int(self.text_memory_size.text)
self.batch_size = int(self.text_batch_size.text)
self.gamma = float(self.text_gamma.text)
self.lr = float(self.text_lr.text)
self.target_update = int(self.text_target_update.text)
self.eps_start = float(self.text_eps_start.text)
self.eps_end = float(self.text_eps_end.text)
self.eps_decay_steps = int(self.text_eps_decay_steps.text)
def OnSliderRewardChangeValue(self, label, value): label.text=f"REWARDS: {value:.{2}f}"
def reward_flag_change(self, instance, sliders, checkboxes, values):
i = checkboxes.index(instance)
sliders[i].value = values[i] if instance.active else 0
def expand_graph_axes(self, graph, new_ymax=1.):
if new_ymax==0: return
new_ymax = float(new_ymax)
if graph.ymax < new_ymax: graph.ymax = new_ymax*1.2
elif graph.ymin > new_ymax: graph.ymin = new_ymax*0.8 if new_ymax>0 else new_ymax*1.2
if abs(new_ymax) > graph.y_ticks_major * 20: graph.y_ticks_major *= 4
if graph.xmax > graph.x_ticks_major * 20: graph.x_ticks_major *= 2
def reset_graph_points(self, allpoint=False):
m = self.modespinner.text
if m=='IQL': self.graph1_points = []
elif m=='MADDPG': self.graph2_points = []
def do_current_ui_vect(self, vect):
self.current_ui_vect[vect[0]-1] = [vect[2], vect[3], vect[4], vect[5]]
def get_current_vect_state(self, instance):
for s in self.FlyScatters:
v = s.set_vect_state()
self.current_ui_vect[v[0] - 1] = v[2:]
self.targetUiVect.text = str(self.current_ui_vect)
def set_current_vect_state(self, instance):
for s in self.FlyScatters:
v = self.target_ui_vect[s.id-1]
s.update_vect_state_from(v)
print('-- vect state updated from target UI --')
self.show_popup('Vect state updated from target UI!', 'New design')
def target_ui_selected_value(self, spinner, text):
if text == 'ui vect':
self.get_current_vect_state(spinner)
self.target_ui_vect = [v for v in self.current_ui_vect]
else:
self.target_ui_vect = Widgets.target_ui(text)
self.targetUiVect.text = str(self.target_ui_vect).replace(' ','')
def test_dqn(self, instance):
if self.dqnmodespinner.text == "train":
train_main(self.dqnr_modespinner.text, self)
else:
test_main('model.pkl', self)
def cwriteline(self, string=''):
self.console.text += '\n' + string
def cwrite(self, string=''):
self.console.text += string
def cclear(self):
self.console.text = ''
def add_screen(self, name, widget):
scr = Screen(name=name)
scr.add_widget(widget)
self.sm.add_widget(scr)
def to_screen(self, namescreen='mainscreen', direction='right'):
self.sm.transition.direction = direction
self.sm.current = namescreen
def colrowspinner_selected_value(self, spinner, text):
self.mainscreen_rebuild_btn_click(self)
def ihbl(self, widjets, my_height=True):
hor = BoxLayout(orientation='horizontal', padding=0, spacing=0)
if my_height:
hor.size_hint_y=None; hor.height='30dp'
for w in widjets: hor.add_widget(w)
return hor
def ivbl(self, widjets, my_width=False):
vert = BoxLayout(orientation='vertical', padding=0, spacing=0)
if my_width: vert.size_hint_x=None; vert.width=f'{Window.width//2}dp'
for w in widjets: vert.add_widget(w)
return vert
def _update_rect(self, instance, value):
self.rect.pos = instance.pos
self.rect.size = instance.size
def _update_rect_headpanel(self, instance, value):
self.rect_headpanel.pos = (instance.pos[0]-5, instance.pos[1]-5)
self.rect_headpanel.size = (instance.size[0]+10, instance.size[1]+10)
def _update_rect_footpanel(self, instance, value):
self.rect_footpanel.pos = (instance.pos[0]-5, instance.pos[1]-5)
self.rect_footpanel.size = (instance.size[0]+10, instance.size[1]+10)
def show_popup(self, text='', title='Popup Window'):
popup = Popup(title=title, size_hint=(None, None),
size=(7*Window.width / 8, Window.height / 4))
layout = BoxLayout(orientation='vertical', padding=10)
layout.add_widget(Label(text=text))
layout.add_widget(Button(text='OK', on_press=popup.dismiss,size_hint_y=None,height='30dp'))
popup.content = layout
popup.open()
if __name__ == "__main__":
Config.set('graphics', 'width', '300')
Config.set('graphics', 'height', '800')
Window.size = (435, 940)
app = MainApp()
app.run()