-
Notifications
You must be signed in to change notification settings - Fork 0
/
main-run.py
75 lines (57 loc) · 2.45 KB
/
main-run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
'''
Deep Reinforcement Learning for mini-greenhouse
Author: Efraim Manurung
MSc Thesis in Information Technology Group, Wageningen University
efraim.manurung@gmail.com
Read more information in the MiniGreenhouse class.
'''
# Import tensorflow
import tensorflow as tf
# Enable eager execution
tf.config.run_functions_eagerly(True)
# Import Ray Reinforcement Learning Library
from ray.rllib.algorithms.algorithm import Algorithm
# Import supporting libraries
import time
# Assuming the NeuralNetworksModel class is defined as provided
from MiniGreenhouse import MiniGreenhouse
# Use the Algorithm's `from_checkpoint` utility to get a new algo instance
# that has the exact same state as the old one, from which the checkpoint was
# created in the first place:
ppo_model_from_checkpoint = Algorithm.from_checkpoint('trained-drl-models/model-calibrator-config-4-checkpoint')
# Make the calibratorModel instance
env = MiniGreenhouse({"flag_run": True,
"first_day_gl": 1,
"first_day_dnn": 0,
"season_length_gl": 1/72,
"season_length_dnn": 0,
"online_measurements": True,
"action_from_drl": True,
"flag_run_dnn": True,
"flag_run_gl": True,
"flag_run_combined_models": True,
"is_mature": True,
"max_steps": 18 #5 * 72 # 3 steps = 1 hour or 1 episode, so for 24 hours = 24 * 3 = 72 steps, 72 steps is equal to 24 hours
})
# Get the initial observation (should be: [0.0] for the starting position).
obs, info = env.reset()
terminated = truncated = False
total_rewards = 0.0
total_rewards_list = [] # List to collect rewards
# Play episodes
while not terminated and not truncated:
# Make some delay, so it is easier to see
time.sleep(1.0)
# Compute a single action, given the current observation
# from the environment.
action = ppo_model_from_checkpoint.compute_single_action(obs)
# Apply the computed action in the environment.
'''TO-DO: The server determine again the actions based on the observation'''
obs, reward, terminated, _, info = env.step(action)
# Print obs
print(obs)
# sum up rewards for reporting purposes
total_rewards += reward
total_rewards_list.append(total_rewards) # Append the total reward to the list
# Report results.
print(f"Played 1 episode; total-reward={total_rewards}")