Skip to content

Commit

Permalink
Fix relu inplace errors
Browse files Browse the repository at this point in the history
  • Loading branch information
saidinesh_pola committed Dec 9, 2023
1 parent 396ec35 commit 9d37261
Show file tree
Hide file tree
Showing 8 changed files with 488 additions and 23 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -142,3 +142,4 @@ dmypy.json
rllib-env/
runs/
wandb/
maps_old/
31 changes: 21 additions & 10 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def reward_done_info(self, reward, done, info):
attack_bonus = 0
gold_bonus = 0 # GoldBonus
harvest_bonus = 0
give_bonus = 0
level_bonus = 0
if self.agent_id in self.env.realm.players:
log = self.env.realm.event_log.get_data(agents=[self.agent_id],
Expand All @@ -119,16 +120,25 @@ def reward_done_info(self, reward, done, info):
llog = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.LEVEL_UP,
tick=self.env.realm.tick)
hlog = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.HARVEST_ITEM,
tick=self.env.realm.tick)
# if llog.shape[0] > 0 and llog[0][-4] > 0:
# # print('\033[93m' + 'level up log', log,
# # 'agent_id', self.agent_id, '\033[0m')
# level_bonus = 0.01
if hlog.shape[0] > 0 and hlog[0][-3] > 0:
# print('\033[94m' + 'harvest log', log,
harvest_log = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.HARVEST_ITEM,
tick=self.env.realm.tick)
give_log = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.GIVE_ITEM,
tick=self.env.realm.tick)
give_gold_log = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.GIVE_GOLD,
tick=self.env.realm.tick)
# Combat, Fishing Skills
if llog.shape[0] > 0 and llog[0][-4] > 0 and (llog[0][-5] in range(1, 5)):
# print('\033[93m' + 'level up log', llog,
# 'agent_id', self.agent_id, '\033[0m')
level_bonus = 0.01
if give_log.shape[0] > 0 and give_log[0][-3] > 0:
give_bonus = 0.03
if give_gold_log.shape[0] > 0 and give_gold_log[0][-3] > 0:
give_bonus = 0.03
if harvest_log.shape[0] > 0 and harvest_log[0][-3] > 0:
harvest_bonus = 0.01

# Add meandering bonus to encourage moving to various directions
Expand All @@ -149,7 +159,8 @@ def reward_done_info(self, reward, done, info):
explore_bonus *= self.explore_bonus_weight

reward = reward + explore_bonus + healing_bonus + \
meander_bonus + attack_bonus + gold_bonus + harvest_bonus+level_bonus
meander_bonus + attack_bonus + gold_bonus + \
harvest_bonus+level_bonus + give_bonus

return reward, done, info

Expand Down
9 changes: 9 additions & 0 deletions evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,15 @@ def make_policy(envs):
avg_completed_tick = np.mean([task.progress_info["completed_tick"]
for task in nmmo_env.tasks if task.completed])
print(f"Average completed tick: {avg_completed_tick:.1f}")
# can you save it inside csv with above 4 lines?
print("--------------------------------------------------")
df = pd.DataFrame({'Task': [nmmo_env.tasks[0].spec_name],
'Number of agents completed the task': [num_completed],
'Average maximum progress': [avg_progress],
'Average completed tick': [avg_completed_tick]})
# df.to_csv(os.path.join('results', 'task.csv'), index=False)
with open(os.path.join('results', 'task.csv'), 'a') as f:
df.to_csv(f, header=False, index=False)

# Save the replay file
replay_file = os.path.join(
Expand Down
4 changes: 2 additions & 2 deletions reinforcement_learning/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Config:
# record_loss = False # log all minibatch loss and actions, for debugging

# Trainer Args
seed = 17
seed = 9
num_cores = None # Number of cores to use for training
num_envs = 6 # Number of environments to use for training
num_buffers = 2 # Number of buffers to use for training
Expand All @@ -23,7 +23,7 @@ class Config:
eval_num_steps = 1_000_000 # 1_000_000 # Number of steps to evaluate
checkpoint_interval = 5_000_000 # Interval to save models
# f"nmmo_{time.strftime('%Y%m%d_%H%M%S')}_{seed}" # Run name
run_name = f"test_nmmo_all_attn_hv_bonus_seed{seed}_exp15"
run_name = f"nmmo_rp_cr_attn_skill_bonus_seed{seed}_exp17"
runs_dir = "./runs" # Directory for runs
policy_store_dir = None # Policy store directory
use_serial_vecenv = False # Use serial vecenv implementation
Expand Down
Loading

0 comments on commit 9d37261

Please sign in to comment.