-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrollout.m
68 lines (55 loc) · 1.93 KB
/
rollout.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
function [total,tree] = rollout(state,nodeTree,d,currentNode)
gamma = 0.95;
if d == 0
total = 0;
tree = nodeTree;
return;
end
action = randsample([1 2 3 4],1);
[observation,reward,newState] = forwardSimulate(state,action);
nodeTmp = table(0,1,{action},0,false,'VariableNames', { 'M' 'N', 'actionObs', 'Q','free'});
%nodeTree = addnode(nodeTree, nodeTmp);
k = find(nodeTree.Nodes.free,1,'first');
if isempty(k)
nodeTree = addnode(nodeTree,1);
nodeToChange = max(size(nodeTree.Nodes(:,1)));
else
nodeToChange = k;
end
nodeTree.Nodes(nodeToChange,:) = nodeTmp; %TODO remove tabular subassignParens
newNodeID = nodeToChange;%
% newNodeID = max(size(nodeTree.Nodes(:,1)));
nodeTree = addedge(nodeTree, currentNode, newNodeID);
currentNode = newNodeID;
nodeTmp = table(0,0,{observation},0,false,'VariableNames', { 'M' 'N', 'actionObs', 'Q','free'});
k = find(nodeTree.Nodes.free,1,'first');
if isempty(k)
nodeTree = addnode(nodeTree,1);
nodeToChange = max(size(nodeTree.Nodes(:,1)));
else
nodeToChange = k;
end
nodeTree.Nodes(nodeToChange,:) = nodeTmp;%TODO remove tabularSubassignParens
newNodeID = nodeToChange;%
% nodeTree = addnode(nodeTree,nodeTmp);
% newNodeID = max(size(nodeTree.Nodes(:,1)));
nodeTree = addedge(nodeTree, currentNode, newNodeID);
currentNode = newNodeID; %move down the tree
nodeTmp = table(0,0,{newState},0,false,'VariableNames', { 'M' 'N', 'actionObs', 'Q','free'});
%nodeTree = addnode(nodeTree,nodeTmp);
%newNodeID = max(size(nodeTree.Nodes(:,1)));
k = find(nodeTree.Nodes.free,1,'first');
if isempty(k)
nodeTree = addnode(nodeTree,1);
nodeToChange = max(size(nodeTree.Nodes(:,1)));
else
nodeToChange = k;
end
nodeTree.Nodes(nodeToChange,:) = nodeTmp;
newNodeID = nodeToChange;%
nodeTree = addedge(nodeTree, currentNode, newNodeID);
currentNode = newNodeID; %move down the tree
tree = nodeTree;
[total,tree] = rollout(newState,nodeTree,d-1,currentNode);
total = reward+gamma*total;
end