Skip to content

Commit

Permalink
Working Advice Exchange
Browse files Browse the repository at this point in the history
  • Loading branch information
saidinesh_pola committed Dec 8, 2023
1 parent e5af681 commit cfef276
Show file tree
Hide file tree
Showing 4 changed files with 465 additions and 25 deletions.
8 changes: 4 additions & 4 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,10 +122,10 @@ def reward_done_info(self, reward, done, info):
hlog = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.HARVEST_ITEM,
tick=self.env.realm.tick)
if llog.shape[0] > 0 and llog[0][-4] > 0:
# print('\033[93m' + 'level up log', log,
# 'agent_id', self.agent_id, '\033[0m')
level_bonus = 0.01
# if llog.shape[0] > 0 and llog[0][-4] > 0:
# # print('\033[93m' + 'level up log', log,
# # 'agent_id', self.agent_id, '\033[0m')
# level_bonus = 0.01
if hlog.shape[0] > 0 and hlog[0][-3] > 0:
# print('\033[94m' + 'harvest log', log,
# 'agent_id', self.agent_id, '\033[0m')
Expand Down
6 changes: 3 additions & 3 deletions reinforcement_learning/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class Config:
# record_loss = False # log all minibatch loss and actions, for debugging

# Trainer Args
seed = 45
seed = 17
num_cores = None # Number of cores to use for training
num_envs = 6 # Number of environments to use for training
num_buffers = 2 # Number of buffers to use for training
Expand All @@ -23,7 +23,7 @@ class Config:
eval_num_steps = 1_000_000 # 1_000_000 # Number of steps to evaluate
checkpoint_interval = 5_000_000 # Interval to save models
# f"nmmo_{time.strftime('%Y%m%d_%H%M%S')}_{seed}" # Run name
run_name = f"nmmo_c3_hv_lu_bonus_seed{seed}_exp13"
run_name = f"test_nmmo_all_attn_hv_bonus_seed{seed}_exp15"
runs_dir = "./runs" # Directory for runs
policy_store_dir = None # Policy store directory
use_serial_vecenv = False # Use serial vecenv implementation
Expand Down Expand Up @@ -60,7 +60,7 @@ class Config:
heal_bonus_weight = 0.03
meander_bonus_weight = 0.02
explore_bonus_weight = 0.01
gold_bonus_weight = 0 # 0.005
gold_bonus_weight = 0 # 0.002
attack_bonus_weight = 0 # 0.03 added
spawn_immunity = 20

Expand Down
96 changes: 78 additions & 18 deletions reinforcement_learning/rl_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,54 @@ def forward(self, env_outputs):
return actions, value


class SelfAttention(torch.nn.Module):
def __init__(self, input_size, heads=4, embed_size=32):
super().__init__()
self.input_size = input_size
self.heads = heads
self.emb_size = embed_size

self.tokeys = torch.nn.Linear(
self.input_size, self.emb_size * heads, bias=False)
self.toqueries = torch.nn.Linear(
self.input_size, self.emb_size * heads, bias=False)
self.tovalues = torch.nn.Linear(
self.input_size, self.emb_size * heads, bias=False)

def forward(self, x):
b, t, hin = x.size()
assert hin == self.input_size, f'Input size {{hin}} should match {{self.input_size}}'

h = self.heads
e = self.emb_size

keys = self.tokeys(x).view(b, t, h, e)
queries = self.toqueries(x).view(b, t, h, e)
values = self.tovalues(x).view(b, t, h, e)

# dot-product attention
# folding heads to batch dimensions
keys = keys.transpose(1, 2).contiguous().view(b * h, t, e)
queries = queries.transpose(1, 2).contiguous().view(b * h, t, e)
values = values.transpose(1, 2).contiguous().view(b * h, t, e)

queries = queries / (e ** (1/4))
keys = keys / (e ** (1/4))

dot = torch.bmm(queries, keys.transpose(1, 2))
assert dot.size() == (b*h, t, t)

# row wise self attention probabilities
dot = F.softmax(dot, dim=2)
self.dot = dot
out = torch.bmm(dot, values).view(b, h, t, e)
out = out.transpose(1, 2).contiguous().view(b, t, h * e)
values = values.view(b, h, t, e)
values = values.transpose(1, 2).contiguous().view(b, t, h * e)
self.values = values
return out


class TileEncoder(torch.nn.Module):
def __init__(self, input_size):
super().__init__()
Expand All @@ -82,10 +130,6 @@ def __init__(self, input_size):
self.tile_conv_1 = torch.nn.Conv2d(96, 64, 3)
self.tile_conv_2 = torch.nn.Conv2d(64, 32, 3)
self.tile_conv_3 = torch.nn.Conv2d(32, 16, 3)
# Tile Shape before conv: torch.Size([768, 96, 15, 15])
# Tile Shape after conv1: torch.Size([768, 32, 13, 13])
# Tile Shape after conv2: torch.Size([768, 8, 11, 11])
# Tile Shape after conv3: torch.Size([768, 4, 9, 9])
self.tile_fc = torch.nn.Linear(16 * 9 * 9, input_size)
self.activation = torch.nn.ReLU()

Expand Down Expand Up @@ -223,19 +267,6 @@ def forward(self, market):
return self.fc(market).mean(-2)


# class TaskEncoder(torch.nn.Module):
# def __init__(self, input_size, hidden_size, task_size):
# super().__init__()
# self.fc1 = torch.nn.Linear(task_size, hidden_size)
# self.fc2 = torch.nn.Linear(hidden_size, input_size)
# self.relu = torch.nn.ReLU()
# self.dropout = torch.nn.Dropout(0.2)

# def forward(self, task):
# x = self.relu(self.fc1(task))
# # x = self.dropout(x)
# encoded_task = self.fc2(x)
# return encoded_task
class TaskEncoder(torch.nn.Module):
def __init__(self, input_size, hidden_size, task_size):
super().__init__()
Expand Down Expand Up @@ -264,6 +295,9 @@ def __init__(self, input_size, hidden_size):
"inventory_use": torch.nn.Linear(hidden_size, hidden_size),
}
)
self.attn = SelfAttention(hidden_size, 4, hidden_size//4)
self.fc = torch.nn.Linear(hidden_size * 2, hidden_size)
self.activation = torch.nn.ReLU()

def apply_layer(self, layer, embeddings, mask, hidden):
hidden = layer(hidden)
Expand All @@ -282,7 +316,33 @@ def forward(self, hidden, lookup):
market_embeddings,
action_targets,
) = lookup

# player_embeddings.shape: torch.Size([768, 100, 256])
# inventory_embeddings.shape: torch.Size([768, 12, 256])
# market_embeddings.shape: torch.Size([768, 1024, 256])
# hidden.shape: torch.Size([768, 256])

player_embeddings_before = player_embeddings.clone()
inventory_embeddings_before = inventory_embeddings.clone()
hidden_before = hidden.clone()

player_embeddings = self.attn(player_embeddings)
inventory_embeddings = self.attn(inventory_embeddings)
hidden = hidden.unsqueeze(1)
hidden = self.attn(hidden)
hidden = hidden.squeeze(1)

player_embeddings = torch.cat(
[player_embeddings_before, player_embeddings], dim=-1)
inventory_embeddings = torch.cat(
[inventory_embeddings_before, inventory_embeddings], dim=-1)
hidden = torch.cat([hidden_before, hidden], dim=-1)
# print("Afterplayer_embeddings.shape: ", player_embeddings.shape)
# print("After inventory_embeddings.shape: ", inventory_embeddings.shape)
# print("After hidden.shape: ", hidden.shape)
# now can you use MLP to get the same shape as before as concat increases the shapes
player_embeddings = self.activation(self.fc(player_embeddings))
inventory_embeddings = self.activation(self.fc(inventory_embeddings))
hidden = self.activation(self.fc(hidden))
embeddings = {
"attack_target": player_embeddings,
"market_buy": market_embeddings,
Expand Down
Loading

0 comments on commit cfef276

Please sign in to comment.