-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTwoRiverSwimDomain.py
62 lines (47 loc) · 1.94 KB
/
TwoRiverSwimDomain.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import numpy as np
from FactoredStruct import FactoredStruct
from FactoredMDP import FactoredMDP
class TwoRiverSwimDomain( FactoredMDP ):
def __init__( self, nlocations ):
super().__init__( 2, 1, [nlocations, nlocations, 2, 2] )
self.nlocations = nlocations
# create reward structure
scope = np.arange( 4 )
params = np.zeros ( self.nelements( scope ) )
for sa in range( np.size( params, 0 ) ):
state = self.decode( sa, scope )
if state[0] == 0 and state[2] == 0:
params[sa] += 0.05
elif state[0] == nlocations - 1 and state[2] == 1:
params[sa] += 1
if state[1] == 0 and state[3] == 0:
params[sa] += 0.05
elif state[1] == nlocations - 1 and state[3] == 1:
params[sa] += 1
if np.array_equal( state, [nlocations - 1, nlocations - 1, 1, 1] ):
params[sa] += 2
self.rewardstruct[0] = FactoredStruct( scope, params, np.zeros( ( self.nstates * self.nactions, np.size( params, 0 ) ) ) )
# normalize in [0,1]
self.rewardstruct[0].params = self.rewardstruct[0].params / 4
scope = np.array( [0, 2] )
params = np.zeros ( ( self.nelements( scope ), nlocations ) )
for sa in range( np.size( params, 0 ) ):
state = self.decode( sa, scope )
if state[1] == 0:
j = max( 0, state[0] - 1 )
params[sa, j] = 1
elif state[0] == 0:
params[sa, 0] = 0.4
params[sa, 1] = 0.6
elif state[0] == nlocations - 1:
params[sa, nlocations - 2] = 0.4
params[sa, nlocations - 1] = 0.6
else:
params[sa, state[0] - 1] = 0.05
params[sa, state[0] ] = 0.6
params[sa, state[0] + 1] = 0.35
self.transitionstruct[0] = FactoredStruct( np.array( [0, 2] ), params, np.zeros( ( self.nstates * self.nactions, np.size( params, 0 ) ) ) )
self.transitionstruct[1] = FactoredStruct( np.array( [1, 3] ), params, np.zeros( ( self.nstates * self.nactions, np.size( params, 0 ) ) ) )
self.assignmappings()
def resetstate( self ):
return self.encode( np.array( [0, 0] ), range( self.nstatefactors ) )