-
Notifications
You must be signed in to change notification settings - Fork 0
/
model_build.py
590 lines (511 loc) · 25.9 KB
/
model_build.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
'''
COMP9418 Assignment 2
This file is model builder that learns from given initial data, then store the model into model.pkl.
Note, some of the function is based on tutorial.
Name: Kan-Lin Lu zID: z3417618
'''
from __future__ import division
from __future__ import print_function
# Allowed libraries
import numpy as np
import pandas as pd
from itertools import product, combinations
from collections import OrderedDict as odict
from graphviz import Digraph
from tabulate import tabulate
#Global functions to be used ------------------------------------------------------------------
def printFactor(f):
"""
argument
`f`, a factor to print on screen
"""
# Create a empty list that we will fill in with the probability table entries
table = list()
# Iterate over all keys and probability values in the table
for key, item in f['table'].items():
# Convert the tuple to a list to be able to manipulate it
k = list(key)
# Append the probability value to the list with key values
k.append(item)
# Append an entire row to the table
table.append(k)
# dom is used as table header. We need it converted to list
dom = list(f['dom'])
# Append a 'Pr' to indicate the probabity column
dom.append('Pr')
print(tabulate(table,headers=dom,tablefmt='orgtbl'))
# Notice for initiation
data_filename = 'data.csv'
print(f"Begin building model based on {data_filename}...")
# Dataframe set up ---------------------------------------------------------------------------
# First we read the ground truth data:
raw_df = pd.read_csv(data_filename, index_col=0)
# We split it into motion_df and trianing_df, former is same format as input for each round, latter is ground truth
# Dataframe for Storing data (Note this is different to ground truth training data):
# Motion
motion_df = pd.DataFrame(columns=['reliable_sensor1','reliable_sensor2','reliable_sensor3','reliable_sensor4',
'unreliable_sensor1','unreliable_sensor2','unreliable_sensor3','unreliable_sensor4',
'robot1','robot2','door_sensor1','door_sensor2','door_sensor3','door_sensor4','time',
'electricity_price'])
# Copying the dataframe
motion_df = raw_df.loc[:, motion_df.columns]
motion_df.set_index('time', inplace=True)
# Training:
training_df = pd.DataFrame(columns=['time', 'r1','r2','r3','r4','r5','r6','r7','r8','r9','r10','r11','r12','r13','r14','r15',
'r16','r17','r18','r19','r20','r21','r22','r23','r24','r25','r26','r27','r28','r29',
'r30','r31','r32','r33','r34','r35','c1','c2','c3','c4','o1','outside'])
# Copying the datarame
training_df = raw_df.loc[:, training_df.columns]
training_df.set_index('time', inplace=True)
# Area Dataframe:
# With Neighbours we restrict ourself to direct door neighbour, unless specified, see report
# Influence is the designated room based on DAG created
area_df = pd.DataFrame(columns=['type','number', 'door_sensors', 'motion_sensors', 'robot_sensors', 'neighbour_room',
'corridor', 'open_area', 'outside', 'influence'])
#Rooms:
area_df.loc[len(area_df)] = ['Room', 1, [], ['unreliable_sensor3'], [], [2, 3], [], [],[], ['r7']]
area_df.loc[len(area_df)] = ['Room', 2, [], [], [], [1, 4], [], [],[], ['r4', 'r1']]
area_df.loc[len(area_df)] = ['Room', 3, [], [], [], [1, 7], [], [],[], ['r1','r7']]
area_df.loc[len(area_df)] = ['Room', 4, [], [], [], [2, 8], [], [],[], ['r1','r8']]
area_df.loc[len(area_df)] = ['Room', 5, [], ['reliable_sensor2'], [], [9, 6], [3], [],[], []]
area_df.loc[len(area_df)] = ['Room', 6, [], [], [], [5], [3], [],[], ['c3','r5']]
area_df.loc[len(area_df)] = ['Room', 7, [], [], [], [3], [1], [],[], ['c2','c1']]
area_df.loc[len(area_df)] = ['Room', 8, ['door_sensor1'], [], [], [4, 9], [], [],[], ['r9','r5']]
area_df.loc[len(area_df)] = ['Room', 9, ['door_sensor1'], [], [], [5, 8, 13], [], [],[], ['r5']]
area_df.loc[len(area_df)] = ['Room', 10, [], [], [], [], [3], [],[], ['c3','r16']]
area_df.loc[len(area_df)] = ['Room', 11, [], [], [], [], [3], [],[], ['c3','r16']]
area_df.loc[len(area_df)] = ['Room', 12, [], [], [], [22], [], [],[1], ['r22','r25']]
area_df.loc[len(area_df)] = ['Room', 13, [], [], [], [9,24], [], [],[], ['r8','r9']]
area_df.loc[len(area_df)] = ['Room', 14, [], [], [], [24], [], [],[], ['r24','r13']]
area_df.loc[len(area_df)] = ['Room', 15, [], [], [], [], [3], [],[], ['c3','r16']]
area_df.loc[len(area_df)] = ['Room', 16, [], ['reliable_sensor1'], [], [], [3], [],[], []]
area_df.loc[len(area_df)] = ['Room', 17, [], [], [], [], [3], [],[], ['c3', 'r16']]
area_df.loc[len(area_df)] = ['Room', 18, [], [], [], [], [3], [],[], ['c3', 'r16']]
area_df.loc[len(area_df)] = ['Room', 19, [], [], [], [], [3], [],[], ['c3', 'r16']]
area_df.loc[len(area_df)] = ['Room', 20, [], [], [], [], [3], [],[], ['c3', 'r16']]
area_df.loc[len(area_df)] = ['Room', 21, [], [], [], [], [3], [],[], ['c3', 'r16']]
area_df.loc[len(area_df)] = ['Room', 22, [], [], [], [12,25], [], [],[], ['r26','c1']]
area_df.loc[len(area_df)] = ['Room', 23, [], [], [], [24], [], [],[], ['r24','r13']]
area_df.loc[len(area_df)] = ['Room', 24, [], ['unreliable_sensor4'], [], [23,13,14], [], [],[], ['r13','r9']]
area_df.loc[len(area_df)] = ['Room', 25, [], ['reliable_sensor3'], [], [22,26], [1], [],[], []]
area_df.loc[len(area_df)] = ['Room', 26, ['door_sensor3'], [], [], [27,25], [], [],[], ['r25']]
area_df.loc[len(area_df)] = ['Room', 27, ['door_sensor3'], [], [], [26,32], [], [],[], ['r26','r25']]
area_df.loc[len(area_df)] = ['Room', 28, [], [], [], [], [4], [],[], ['r35','c4']]
area_df.loc[len(area_df)] = ['Room', 29, [], [], [], [30], [4], [],[], ['o1','c4']]
area_df.loc[len(area_df)] = ['Room', 30, [], [], [], [29], [], [],[], ['r29','c4']]
area_df.loc[len(area_df)] = ['Room', 31, [], ['reliable_sensor4'], [], [32], [], [],[], []]
area_df.loc[len(area_df)] = ['Room', 32, [], [], [], [33,27,31], [], [],[], ['r27','r31']]
area_df.loc[len(area_df)] = ['Room', 33, [], [], [], [32], [], [],[], ['r32','r31','r27']]
area_df.loc[len(area_df)] = ['Room', 34, [], [], [], [], [2], [],[], ['c4','c2']]
area_df.loc[len(area_df)] = ['Room', 35, ['door_sensor4'], [], [], [], [4], [],[], ['o1','c4']]
#Corridors:
area_df.loc[len(area_df)] = ['Corridor', 1, ['door_sensor2'], [], [], [7,25], [2], [],[], ['r25']]
area_df.loc[len(area_df)] = ['Corridor', 2, ['door_sensor2'], [], [], [34], [1,4], [],[], ['c1']]
area_df.loc[len(area_df)] = ['Corridor', 3, [], ['unreliable_sensor2'], [], [5,6,10,11,15,16,17,18,19,20,21], [], [1],[], ['o1','r5']]
area_df.loc[len(area_df)] = ['Corridor', 4, ['door_sensor4'], [], [], [28,29,35], [2], [1],[], ['c2','c1']]
#Open-Space:
area_df.loc[len(area_df)] = ['Open-Area', 1, [], ['unreliable_sensor1'], [], [], [3,4], [],[], ['c4','c2']]
#Outside:
area_df.loc[len(area_df)] = ['Outside', 1, [], [], [], [12], [], [],[], ['r12','r22']]
#Iteration Order
iteration_order = ['r5', 'r25', 'r31', 'r16', # First Layer
'r9', 'r26', 'c1', # Second Layer
'r8', 'r27', 'r22', 'c2', # Third Layer
'r13', 'r32', 'r12', 'c4', 'r7', # Fourth Layer
'r24', 'r33', 'outside', 'r34', 'o1','r1', # Fifth Layer
'r14', 'r23', 'c3', 'r29', 'r35', 'r3', 'r4', # Sixth Layer
'r6', 'r30', 'r28', 'r2', 'r10', 'r11', 'r15', 'r17', 'r18', 'r19', 'r20', 'r21' # Seventh Layer
]
# HMM - dataframe, the dataframe consiste of transition, emission and outcome space tables:
# Note we introduced a node call infleunce, see report, mainly looking at the neighbour room and check if there is
# increase or decrease in number. It is highly dependent on given structure, therefore, iteration order is essential
hmm_df = pd.DataFrame(columns=['area', 'influence', 'transition_table', 'emission_table', 'outComeSpace'])
# Write the area name into hmm_df
def area_name(x):
if x.type == 'Outside' :
return 'outside'
else:
return x.type[0].lower() + str(x.number)
# Applying to all Dataframe
hmm_df['area'] = area_df.apply(lambda x: area_name(x), axis=1)
# Write the influence based on graph
hmm_df['influence'] = area_df['influence']
# Write transition table
def transition_table(x,ground_truth_df):
# First find the column
if x.type == 'Outside' :
column_name = 'outside'
else:
column_name = x.type[0].lower() + str(x.number)
# Locate the stats
column_stats = ground_truth_df[column_name].to_list()
# Raw for incrementing
# Note: p here represents people, where p stands for True, and not_p stands for False
# Hence p_to_not_p = from people to no people
transition_raw = {
'p_to_not_p' : 0,
'p_to_p' : 0,
'not_p_to_p' : 0,
'not_p_to_not_p' : 0,
}
# Iterating over every 2 data point to find the transition
for index in range(0,len(column_stats)):
# If index is zero, meaning at initial, ignore
if index == 0:
continue
else:
# Write States:
# Previous
if column_stats[index-1] == 0:
previous_state = False
elif column_stats[index-1] != 0:
previous_state = True
# Current
if column_stats[index] == 0:
current_state = False
elif column_stats[index] != 0:
current_state = True
# Incrementation
if previous_state and current_state:
transition_raw['p_to_p']+=1
elif previous_state and not current_state:
transition_raw['p_to_not_p']+=1
elif not previous_state and not current_state:
transition_raw['not_p_to_not_p'] +=1
elif not previous_state and current_state:
transition_raw['not_p_to_p'] +=1
# Define raw probailities
# We Add Smoothening
transition_prob = {
'p_to_not_p' : (transition_raw['p_to_not_p'] +5000) / ((transition_raw['p_to_not_p'] + transition_raw['p_to_p']) + 5000*4),
'p_to_p' : (transition_raw['p_to_p'] +5000) / ((transition_raw['p_to_not_p'] + transition_raw['p_to_p']) + 5000*4),
'not_p_to_p' : (transition_raw['not_p_to_p'] +5000) / ((transition_raw['not_p_to_not_p'] + transition_raw['not_p_to_p']) +5000*4),
'not_p_to_not_p' : (transition_raw['not_p_to_not_p'] +5000) / ((transition_raw['not_p_to_not_p'] + transition_raw['not_p_to_p']) +5000*4),
}
"""
# We explore the Affect of influence:
# Weak
Weak Smooethning with Alpha = 1:
| r5_t-1 | r5_t | Pr |
|----------+--------+-----------|
| true | true | 0.616314 |
| true | false | 0.383686 |
| false | true | 0.0613823 |
| false | false | 0.938618 |
Given:
{'reliable_sensor2_t': 'motion', 'r5_t-1': 'false'}
Inference:
| r5_t | Pr |
|--------+---------|
| true | 0.82533 |
| false | 0.17467 |
# Medium
Medium Smoothening with Alpha = 1000:
| r5_t-1 | r5_t | Pr |
|----------+--------+----------|
| true | true | 0.277996 |
| true | false | 0.260217 |
| false | true | 0.185698 |
| false | false | 0.484759 |
Given:
{'reliable_sensor2_t': 'motion', 'r5_t-1': 'false'}
Inference:
| r5_t | Pr |
|--------+----------|
| true | 0.446487 |
| false | 0.553513 |
# Strong
Strong Smoothening with Alpha = 5000:
| r5_t-1 | r5_t | Pr |
|----------+--------+----------|
| true | true | 0.255964 |
| true | false | 0.252176 |
| false | true | 0.232317 |
| false | false | 0.314559 |
Given:
{'reliable_sensor2_t': 'motion', 'r5_t-1': 'false'}
Inference:
| r5_t | Pr |
|--------+-----------|
| true | 0.901085 |
| false | 0.0989146 |
"""
# Convert it to table of same format as tutorial
if x.type != 'Outside':
area_name = x.type[0].lower() + str(x.number)
else:
area_name = 'outside'
# Transition Table
transition_table = {
'dom': (area_name+'_t-1', area_name+'_t'),
'table': odict([
(('true', 'true'), transition_prob['p_to_p']),
(('true', 'false'), transition_prob['p_to_not_p']),
(('false', 'true'), transition_prob['not_p_to_p']),
(('false', 'false'), transition_prob['not_p_to_not_p']),
])
}
# Return the transition Table
return transition_table
# Applying to all Dataframe
hmm_df['transition_table'] = area_df.apply(lambda x: transition_table(x, training_df), axis=1)
# Write the outcomeSpace
def outcomeSpace_construct(x):
# First find the area name, and store for easier use latter
if x.type != 'Outside':
area_name = x.type[0].lower() + str(x.number)
else:
area_name = 'outside'
# Initialize:
outComeSpace = {
area_name+'_t-1': ('true', 'false'),
area_name+ '_t': ('true', 'false'),
'robot1_t' : ('motion', 'no motion'), # Assuming Robot sensor moves into all area
'robot2_t' : ('motion', 'no motion'), # Assuming Robot sensor moves into all area
}
# Sensor List:
for _ in x['door_sensors']:
outComeSpace[_ + '_t'] = ('motion', 'no motion')
for _ in x['motion_sensors']:
outComeSpace[_+'_t'] = ('motion', 'no motion')
# Influence List
for _ in x['influence']:
outComeSpace[_+'_state_t'] = ('decrease', 'increase', 'consistent')
# Return
return outComeSpace
# Applying to all Dataframe
hmm_df['outComeSpace'] = area_df.apply(lambda x: outcomeSpace_construct(x), axis=1)
# Write emission table
def emission_table_construct(x, ground_truth, data):
# We want to construct:
emission_table = {}
# Tutorial Functions:
def prob(factor, *entry):
return factor['table'][entry]
def transposeGraph(G):
GT = dict((v, []) for v in G)
for v in G:
for w in G[v]:
GT[w].append(v)
return GT
def allEqualThisIndex(dict_of_arrays, **fixed_vars):
# base index is a boolean vector, everywhere true
first_array = dict_of_arrays[list(dict_of_arrays.keys())[0]]
index = np.ones_like(first_array, dtype=np.bool_)
for var_name, var_val in fixed_vars.items():
index = index & (np.asarray(dict_of_arrays[var_name])==var_val)
return index
def estProbTable(data, var_name, parent_names, outcomeSpace):
var_outcomes = outcomeSpace[var_name]
parent_outcomes = [outcomeSpace[var] for var in (parent_names)]
# cartesian product to generate a table of all possible outcomes
all_parent_combinations = product(*parent_outcomes)
prob_table = odict()
for i, parent_combination in enumerate(all_parent_combinations):
parent_vars = dict(zip(parent_names, parent_combination))
parent_index = allEqualThisIndex(data, **parent_vars)
possibilities = sum([len(x) for x in outcomeSpace.values()])
for var_outcome in var_outcomes:
var_index = (np.asarray(data[var_name])==var_outcome)
# Apply Additive Smoothening
prob_table[tuple(list(parent_combination)+[var_outcome])] = ((var_index & parent_index).sum()+1)/ (parent_index.sum() + possibilities)
return {'dom': tuple(list(parent_names)+[var_name]), 'table': prob_table}
def join(f1, f2, outcomeSpace):
# First, we need to determine the domain of the new factor. It will be union of the domain in f1 and f2
# But it is important to eliminate the repetitions
common_vars = list(f1['dom']) + list(set(f2['dom']) - set(f1['dom']))
# We will build a table from scratch, starting with an empty list. Later on, we will transform the list into a odict
table = list()
# Here is where the magic happens. The product iterator will generate all combinations of varible values
# as specified in outcomeSpace. Therefore, it will naturally respect observed values
for entries in product(*[outcomeSpace[node] for node in common_vars]):
# We need to map the entries to the domain of the factors f1 and f2
entryDict = dict(zip(common_vars, entries))
f1_entry = (entryDict[var] for var in f1['dom'])
f2_entry = (entryDict[var] for var in f2['dom'])
# Insert your code here
p1 = prob(f1, *f1_entry) # Use the fuction prob to calculate the probability in factor f1 for entry f1_entry
p2 = prob(f2, *f2_entry) # Use the fuction prob to calculate the probability in factor f2 for entry f2_entry
# Create a new table entry with the multiplication of p1 and p2
table.append((entries, p1 * p2))
return {'dom': tuple(common_vars), 'table': odict(table)}
def p_joint(outcomeSpace, cond_tables):
node_list = list(outcomeSpace.keys())
for index in range(1, len(node_list)):
if index == 1:
p = join(cond_tables[node_list[index-1]], cond_tables[node_list[index]], outcomeSpace)
else:
p = join(p, cond_tables[node_list[index]], outcomeSpace)
return p
# Graph Checking:
def graph_check(input_graph, output_name):
from graphviz import Source
dot = Digraph(engine="neato", comment='Direct graph example')
dot.attr(overlap="false", splines="true")
for v in input_graph.keys():
dot.node(str(v))
for v in input_graph.keys():
for w in input_graph[v]:
dot.edge(str(v), str(w))
dot.format = 'png'
dot.render(filename = output_name)
# Robot Sensor Data Pre-Processing functions:
def robot_sensor_room_data(x):
return x.split(',')[0].replace('(', '').replace("'", "")
def robot_sensor_count_data(x):
if int(x.split(',')[1].replace(')', '')) != 0:
return 'motion'
else:
return 'no motion'
# Pre Processing ---------------------------------------------------------------------------------------
# First we convert ground truth into true or false
outcome_map_truth = {True: 'true', False: 'false'}
# We map the ground truth to True or False, note ground truth is dataframe with people count
new_groundTruth = ground_truth.astype('bool').replace(outcome_map_truth)
# As we are considering influence, we also greate a dataframe call state_df
# We need to also create a column to append onto ground truth that provide 'increase', 'decrease', 'consistent'
tmp_state_df = pd.DataFrame(index=ground_truth.index,columns=[x + '_state' for x in ground_truth.columns])
#Iterate over dataframe, as we can not use apply, since we are using every 2 rows:
first_row_indicator = True # Special case for first row
for row in ground_truth.itertuples():
if first_row_indicator:
previous_row = [0 for x in ground_truth.columns]
first_row_indicator = False
# Locate current row
current_row = [_ for _ in row[1:]]
# Subtract Current row and Previous Row
subtraction = list(map(int.__sub__, current_row, previous_row))
# Now we find an update row
updated_row = []
for _ in subtraction:
# If subtraction has > 0 output means there is an increase in number
if _ > 0 :
updated_row.append('increase')
# If subtraction == 0 output means there is no change in number of people
elif _ == 0:
updated_row.append('consistent')
# If subtraction < 0 output means there is a decrease in number
elif _ < 0:
updated_row.append('decrease')
# Now we store the state into tmporary dataframe
tmp_state_df.loc[row[0]] = updated_row
# For next row
previous_row = current_row
# We merge the 2 dataframe
new_groundTruth = new_groundTruth.join(tmp_state_df)
# We also split into non-robot, and robot data:
outcome_map_motion = {True: 'motion', False: 'no motion'} # Motion represents there is people, No motion represents there is none
# We clean robot data into various dataframes, stored based on room in a dictionary
robot_data_overall = data[['robot1', 'robot2']]
# Robot1
robot_data_overall['robot1_room'] = robot_data_overall['robot1'].apply(lambda x: robot_sensor_room_data(x))
robot_data_overall['robot1_count'] = robot_data_overall['robot1'].apply(lambda x: robot_sensor_count_data(x))
# Robot2
robot_data_overall['robot2_room'] = robot_data_overall['robot2'].apply(lambda x: robot_sensor_room_data(x))
robot_data_overall['robot2_count'] = robot_data_overall['robot2'].apply(lambda x: robot_sensor_count_data(x))
# Non Robot Data:
non_robot_data = data[[x for x in data.columns if x not in ['robot1', 'robot2', 'electricity_price']]]
non_robot_data['door_sensor1'] = data['door_sensor1'].astype('bool').replace(outcome_map_motion)
non_robot_data['door_sensor2'] = data['door_sensor2'].astype('bool').replace(outcome_map_motion)
non_robot_data['door_sensor3'] = data['door_sensor3'].astype('bool').replace(outcome_map_motion)
non_robot_data['door_sensor4'] = data['door_sensor4'].astype('bool').replace(outcome_map_motion)
# Then we establish the outcomeSpace and Graph, note emission dose not rely on t-1:
# We create a new outcome Space so we dont overwrite the original
reduced_outcomeSpace = {}
for _, val in x.outComeSpace.items():
if '-1' in _:
continue #Ignore
elif '_t' in _:
reduced_outcomeSpace[_[:-2]] = val # We remove_t here for latter operation
else:
reduced_outcomeSpace[_] = val
# Now we are getting somewhere, we store the domain
emission_table['dom'] = tuple([x + '_t' for x in reduced_outcomeSpace.keys()])
# Now we establish Graph
# In theory, it should be a DAG with a single parent to sensors and influences
graph = {}
motion_sensor, door_sensor = '',''
influence = []
for _ in reduced_outcomeSpace.keys():
if _ == x.area:
graph[_] = [y for y in reduced_outcomeSpace.keys() if y!=_]
continue # Go to next
elif 'reliable' in _ :
motion_sensor = _
elif 'door' in _:
door_sensor = _
elif 'robot' not in _:
influence.append(_)
graph[_] = []
# Plot for checking
if x.area == 'r9':
graph_check(graph, 'emission_construct_eval')
# Tranpose the graph and store for easier operation
graphT = transposeGraph(graph)
# Now we construct the CPT for each node given the data
conditional_probs = {}
# First we find prior:
# P(People)
conditional_probs[x.area] = estProbTable(new_groundTruth, x.area, [], reduced_outcomeSpace)
# P( Motion Sensor | People )
# Find the sensor conditional probabilities in the room:
if motion_sensor != '':
conditional_probs[motion_sensor] = estProbTable(new_groundTruth.join(non_robot_data), motion_sensor, graphT[motion_sensor], reduced_outcomeSpace)
# P ( Door Sensor | People)
# Find the Sensor conditional probabilities in the room:
if door_sensor != '':
conditional_probs[door_sensor] = estProbTable(new_groundTruth.join(non_robot_data), door_sensor, graphT[door_sensor], reduced_outcomeSpace)
# P ( Robot Sensor | People)
# Robot 1 We need to consider both Robot 1 and robot 2
# We take a smaller dataframe
rb1_tmp_df = robot_data_overall.loc[robot_data_overall['robot1_room'] == x.area ]['robot1_count']
rb1_tmp_df = rb1_tmp_df.to_frame().rename({'robot1_count':'robot1'}, axis='columns')
# Then we take the join of rb1 with new ground truth
conditional_probs['robot1'] = estProbTable(rb1_tmp_df.join(new_groundTruth), 'robot1', graphT['robot1'], reduced_outcomeSpace)
# Robot 2 We take a smaller dataframe
rb2_tmp_df = robot_data_overall.loc[robot_data_overall['robot2_room'] == x.area ]['robot2_count']
rb2_tmp_df = rb2_tmp_df.to_frame().rename({'robot2_count':'robot2'}, axis='columns')
# Then we take the join of rb1 with new ground truth
conditional_probs['robot2'] = estProbTable(rb2_tmp_df.join(new_groundTruth), 'robot2', graphT['robot2'], reduced_outcomeSpace)
# P( Influences | People )
# Influences:
for _ in influence:
conditional_probs[_] = estProbTable(new_groundTruth.join(non_robot_data), _, graphT[_], reduced_outcomeSpace)
# Find Joint Probability:
joint_p = p_joint(reduced_outcomeSpace, conditional_probs)
# Now Convert it to emission Table, where p is given:
# P( Join ) / P ( People )
# We make a copy:
tmp_table = joint_p.copy()
# Now we iterate over joint probability
for _, val in joint_p['table'].items():
# Find the entires of P( People )
area_val = (_[0],)
# Divide the entry
tmp_table['table'][_] =tmp_table['table'][_]/conditional_probs[x.area]['table'][area_val]
emission_table['table'] = tmp_table['table']
"""
# For testing Purpose we look at a single node
if x.area == 'r5':
print("Joint")
printFactor(joint_p)
print("Emission")
printFactor(emission_table)
print("Conditional")
for _, factor in conditional_probs.items():
printFactor(factor)
evi_dict = {'reliable_sensor2':'motion'}
test = emission_table.copy()
test['dom'] = joint_p['dom']
printFactor(query(test, reduced_outcomeSpace, x.area, evi_dict))
sys.exit()
"""
return emission_table
hmm_df['emission_table'] = hmm_df.apply(lambda x: emission_table_construct(x, training_df, motion_df), axis=1)
# Let's try to reduce speed:
def merge_columns(x):
return {0 : x.outComeSpace,1: x.transition_table,2: x.emission_table,}
hmm_df['merged'] = hmm_df.apply(lambda x: merge_columns(x), axis=1)
# Save Model as Pickle
hmm_df.to_pickle('model.pkl')
print("Model Has finished been built and stored in model.pkl. Execute solution.py for further stage.")