-
Notifications
You must be signed in to change notification settings - Fork 1
/
menace.c
110 lines (82 loc) · 2.82 KB
/
menace.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#include <stdio.h>
#include <stdlib.h>
#include "menace.h"
//temperature for simulated annealing
float getTemperature(int iteration, int NumberIterations, float tempScale) {
return 0.02 + expf(-(float)iteration * (16 * tempScale / (float)NumberIterations));
}
//update heuristic of last SIZE_LAST_POSITIONS states before reaching solved state
void updateHeuristics(Library lib, State s, float lambda, float epsilon) {
float new, n, current;
for (int i = 0; i < SIZE_LAST_POSITIONS; i++) {
s->indexOldestPos = s->indexOldestPos == 0 ? SIZE_LAST_POSITIONS - 1
: s->indexOldestPos - 1;
if (s->lastPositions[s->indexOldestPos] != NULL) {
//reward for this state:
new = pow(lambda, i) * epsilon;
current = s->lastPositions[s->indexOldestPos]->heuristic;
s->lastPositions[s->indexOldestPos]->timesReevaluate += 1;
n = s->lastPositions[s->indexOldestPos]->timesReevaluate;
s->lastPositions[s->indexOldestPos]->heuristic += (new - current) / n;
}
}
}
//get heuristic of all neighbour states
void getAllHeuristics(Cube c, Library lib, float *heuristics,
int action[NACTION][SWAP]) {
Tree leave;
for (int i = 0; i < NACTION; i++) {
turn(c, action[i]);
getNode(lib, c, &leave);
heuristics[i] = leave->heuristic + (float)(rand() % 100) / 100000;
turn(c, action[i]);
}
}
//run algorithms until cube is solved
State runEpisode(State s, Library lib, float temperature,
int action[NACTION][SWAP], float param, int policy) {
int a = 0;
float nextH, currentH = 0;
Tree leave;
float *heuristics = safeMalloc(NACTION * sizeof(float));
;
while (!isSolved(s->c)) {
if (policy == 2) {
a = simulated_annealing(lib, s, temperature, action);
} else {
getAllHeuristics(s->c, lib, heuristics, action);
a = actionSelection(heuristics, NACTION, param, policy);
}
turn(s->c, action[a]);
getNode(lib, s->c, &leave);
s->numberMoves += 1;
s->lastPositions[s->indexOldestPos] = leave;
s->indexOldestPos = (s->indexOldestPos + 1) % SIZE_LAST_POSITIONS;
}
free(heuristics);
return s;
}
//run MENACE approach over multiple epsiodes
void menace_approach(int policy, int nEpisodes, float lambda, float reward,
float param, long *out) {
Library lib;
Cube c;
State s;
int action[NACTION][SWAP];
float temperature;
int index = 0;
initCube(&c);
initActions(action);
initLibrary(&lib);
for (int i = 0; i < nEpisodes; i++) {
scrambleCube(c, 25, action);
s = initState(c);
temperature = getTemperature(i, nEpisodes, param);
runEpisode(s, lib, temperature, action, param, policy);
updateHeuristics(lib, s, lambda, reward);
out[i] = s->numberMoves;
freeState(s);
}
freeCube(c);
freeLibrary(lib);
}