-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch.hpp
121 lines (79 loc) · 2.78 KB
/
search.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#ifndef __SEARCH_HPP__
#define __SEARCH_HPP__
#include <unordered_map>
#include "agent.hpp"
#include "main.hpp"
// Observation/Reward pair hashing function for storage in unordered_map
class or_hasher {
public:
size_t operator()(const obsrew_t & p) const {
return p.first * 100 + p.second * 10000;
}
};
class ChanceNode;
class DecisionNode;
typedef std::unordered_map<obsrew_t, DecisionNode*, or_hasher> decision_map_t;
typedef std::unordered_map<action_t, ChanceNode*> chance_map_t;
typedef unsigned long long visits_t;
class Agent;
class SearchNode {
public:
SearchNode(void);
// determine the expected reward from this node
reward_t expectation(void) const;
// number of times the search node has been visited
visits_t visits(void) const;
protected:
double m_mean; // the expected reward of this node
visits_t m_visits; // number of times the search node has been visited
};
class DecisionNode: SearchNode {
public:
// constructor
DecisionNode(obsrew_t obsrew);
~DecisionNode();
// print node data for debugging purposes
void print() const;
// get (observation,reward) label
obsrew_t obsRew(void) const;
void pruneAllBut(action_t action);
int getDecisionNodeInfo(void);
// add a new child chance node
bool addChild(ChanceNode* child);
ChanceNode* getChild(action_t a);
// perform a sample run through this node and its children,
// returning the accumulated reward from this sample run
reward_t sample(Agent &agent, unsigned int dfr);
// determine the next action to play
action_t selectAction(Agent &agent);
// return the best action for a decision node
action_t bestAction(Agent &agent) const;
private:
obsrew_t m_obsrew; // observation/reward pair
chance_map_t m_children; // list of child chance nodes
};
class ChanceNode: public SearchNode {
public:
// constructor
ChanceNode(action_t action);
~ChanceNode();
// add a new child decision node
bool addChild(DecisionNode* child);
void pruneAllBut(obsrew_t obsrew);
DecisionNode* getChild(obsrew_t o_r);
int getChanceNodeInfo(void);
// get action label
action_t action(void) const;
// perform a sample run through this node and it's children,
// returning the accumulated reward from this sample run
reward_t sample(Agent &agent, unsigned int dfr);
private:
action_t m_action;
decision_map_t m_children; // list of child decision nodes
};
// determine the best action by searching ahead
extern action_t search(Agent &agent);
// simulate a path through a hypothetical future for the agent within its
// internal model of the world, returning the accumulated reward.
static reward_t playout(Agent &agent, unsigned int playout_len);
#endif // __SEARCH_HPP__