Skip to content

Commit

Permalink
Final changes
Browse files Browse the repository at this point in the history
  • Loading branch information
saidinesh_pola committed Dec 14, 2023
1 parent 8651cdf commit 902386b
Show file tree
Hide file tree
Showing 11 changed files with 2,092 additions and 182 deletions.
32 changes: 21 additions & 11 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,24 @@ def reward_done_info(self, reward, done, info):
if self.agent_id in self.env.realm.players:
if self.env.realm.players[self.agent_id].resources.health_restore > 0:
healing_bonus = self.heal_bonus_weight
# TODO Add equip/harvest weapon bonus based on level improvement
# if self.env.realm.players[self.agent_id].inventory:
# print('\033[93m' + 'inventory log', self.env.realm.players[self.agent_id].ration_consumed,
# 'agent_id', self.agent_id, '\033[0m')

# Attacking bonus
attack_bonus = 0
gold_bonus = 0 # GoldBonus
attack_bonus = 0 # 0.0004
gold_bonus = 0
harvest_bonus = 0
give_bonus = 0
level_bonus = 0
equip_bonus = 0
if self.agent_id in self.env.realm.players:
log = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.PLAYER_KILL,
tick=self.env.realm.tick)
if log.shape[0] > 0 and log[0][-1] > 0:
attack_bonus = self.attack_bonus_weight
attack_log = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.PLAYER_KILL,
tick=self.env.realm.tick)
if attack_log.shape[0] > 0 and attack_log[0][-1] > 0:
attack_bonus = 0.00056

# log = self.env.realm.event_log.get_data(agents=[self.agent_id],
# event_code=EventCode.EARN_GOLD,
Expand All @@ -129,11 +134,11 @@ def reward_done_info(self, reward, done, info):
give_gold_log = self.env.realm.event_log.get_data(agents=[self.agent_id],
event_code=EventCode.GIVE_GOLD,
tick=self.env.realm.tick)
# equipment_log = self.env.realm.event_log.get_data(agents=[self.agent_id],
# event_code=EventCode.EQUIP_ITEM,
# tick=self.env.realm.tick)
# Combat, Fishing Skills
if llog.shape[0] > 0 and llog[0][-4] > 0 and (llog[0][-5] in range(1, 5)):
# print('\033[93m' + 'level up log', llog,
# 'agent_id', self.agent_id, '\033[0m')
# use switch case for llog[0][-4] >5 and <=5
if llog[0][-4] <= 5:
level_bonus = 0.01
else:
Expand All @@ -144,6 +149,11 @@ def reward_done_info(self, reward, done, info):
give_bonus = 0.03
if harvest_log.shape[0] > 0 and harvest_log[0][-3] > 0:
harvest_bonus = 0.01
# if equipment_log.shape[0] > 0 and equipment_log[0][-3] > 0:
# if equipment_log[0][-3] in range(1, 3):
# equip_bonus = 0.005*equipment_log[0][-3]
# else:
# equip_bonus = 0.01*equipment_log[0][-3]

# Add meandering bonus to encourage moving to various directions
meander_bonus = 0
Expand All @@ -164,7 +174,7 @@ def reward_done_info(self, reward, done, info):

reward = reward + explore_bonus + healing_bonus + \
meander_bonus + attack_bonus + gold_bonus + \
harvest_bonus+level_bonus + give_bonus
harvest_bonus+level_bonus + give_bonus + equip_bonus

return reward, done, info

Expand Down
12 changes: 6 additions & 6 deletions reinforcement_learning/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ class Config:
# record_loss = False # log all minibatch loss and actions, for debugging

# Trainer Args
seed = 9
seed = 768
num_cores = None # Number of cores to use for training
num_envs = 6 # Number of environments to use for training
num_buffers = 2 # Number of buffers to use for training
rollout_batch_size = 2**15 # Number of steps to rollout
eval_batch_size = 2**15 # Number of steps to rollout for eval
train_num_steps = 10_000_000 # 10_000_000 # Number of steps to train
train_num_steps = 12_000_000 # 10_000_000 # Number of steps to train
eval_num_steps = 1_000_000 # 1_000_000 # Number of steps to evaluate
checkpoint_interval = 5_000_000 # Interval to save models
checkpoint_interval = 100 # epochs # Interval to save models
# f"nmmo_{time.strftime('%Y%m%d_%H%M%S')}_{seed}" # Run name
run_name = f"nmmo_rp_cr_attn_lstm_seed{seed}_exp17"
run_name = f"nmmo_cr_ls_si_att_EQUIP_seed{seed}_exp21"
runs_dir = "./runs" # Directory for runs
policy_store_dir = None # Policy store directory
use_serial_vecenv = False # Use serial vecenv implementation
Expand Down Expand Up @@ -60,8 +60,8 @@ class Config:
heal_bonus_weight = 0.03
meander_bonus_weight = 0.02
explore_bonus_weight = 0.01
gold_bonus_weight = 0 # 0.002
attack_bonus_weight = 0 # 0.03 added
gold_bonus_weight = 0 # 0.0002
attack_bonus_weight = 0 # 0.0003 added
spawn_immunity = 20

# Policy Args
Expand Down
210 changes: 210 additions & 0 deletions reinforcement_learning/model_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import math
import numpy as np


class MultiHeadSelfAttention(nn.Module):
Expand Down Expand Up @@ -40,6 +42,109 @@ def forward(self, inputs, mask=None):
return x


class ConvCNN(nn.Module):
def __init__(self, insize, outsize, kernel_size=7, padding=2, pool=2, avg=True):
super(ConvCNN, self).__init__()
self.avg = avg
self.math = torch.nn.Sequential(
torch.nn.Conv2d(insize, outsize,
kernel_size=kernel_size, padding=padding),
torch.nn.LeakyReLU(),
torch.nn.MaxPool2d(pool, pool),
)
self.avgpool = torch.nn.AvgPool2d(pool, pool)

def forward(self, x):
x = self.math(x)
if self.avg is True:
x = self.avgpool(x)
return x


class Block(nn.Module):
'''Grouped convolution block.'''
expansion = 2

def __init__(self, in_planes, cardinality=32, bottleneck_width=4, stride=1):
super(Block, self).__init__()
group_width = cardinality * bottleneck_width
self.conv1 = nn.Conv2d(in_planes, group_width,
kernel_size=1, bias=False)

self.conv2 = nn.Conv2d(group_width, group_width, kernel_size=3, stride=stride, padding=1, groups=cardinality,
bias=False)

self.conv3 = nn.Conv2d(
group_width, self.expansion * group_width, kernel_size=1, bias=False)

self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion * group_width:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * group_width,
kernel_size=1, stride=stride, bias=False)
)

def forward(self, x):
out = F.relu((self.conv1(x)))
out = F.relu((self.conv2(out)))
out = (self.conv3(out))
out += self.shortcut(x)
out = F.relu(out)
return out


class ResNeXt(nn.Module):
def __init__(self, num_blocks, cardinality, bottleneck_width, num_classes=256):
super(ResNeXt, self).__init__()
self.cardinality = cardinality
self.bottleneck_width = bottleneck_width
self.in_planes = 64

self.conv1 = nn.Conv2d(96, 64, kernel_size=1, bias=False)

self.layer1 = self._make_layer(num_blocks[0], 1)
self.layer2 = self._make_layer(num_blocks[1], 2)
self.layer3 = self._make_layer(num_blocks[2], 2)
# self.layer4 = self._make_layer(num_blocks[3], 2)

# self.linear = nn.Linear(cardinality*bottleneck_width*8, num_classes)
# self.linear = nn.Linear(3840, num_classes)
self.activation = torch.nn.ReLU()
self.sig = nn.Sigmoid()

def _make_layer(self, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(Block(self.in_planes, self.cardinality,
self.bottleneck_width, stride))
self.in_planes = Block.expansion * self.cardinality * self.bottleneck_width
# Increase bottleneck_width by 2 after each stage.
self.bottleneck_width *= 2
return nn.Sequential(*layers)

def forward(self, x):
out = F.relu((self.conv1(x)))
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
# out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(x.size(0), -1)
# print (out.data.shape)
out = self.activation(out)
# out = F.log_softmax(out)
# out = self.sig(out)
return out


def ResNeXt29_2x64d():
"""
https://www.kaggle.com/code/solomonk/pytorch-resnext-cnn-end-to-end-lb-0-65?scriptVersionId=1872910&cellId=2
"""
return ResNeXt(num_blocks=[1, 1, 1], cardinality=4, bottleneck_width=8)


class TransformerBlock(nn.Module):
def __init__(self, dim=81, num_heads=3, expand=1, activation=F.relu):
super().__init__()
Expand Down Expand Up @@ -70,3 +175,108 @@ def forward(self, inputs):
x = self.fc2(x)
x = x + attn_out
return x


class PopArt(torch.nn.Module):

def __init__(self, input_shape, output_shape, norm_axes=1, beta=0.99999, epsilon=1e-5, device=torch.device("cpu")):

super(PopArt, self).__init__()

self.beta = beta
self.epsilon = epsilon
self.norm_axes = norm_axes
self.tpdv = dict(dtype=torch.float32, device=device)

self.input_shape = input_shape
self.output_shape = output_shape

self.weight = nn.Parameter(torch.Tensor(
output_shape, input_shape)).to(**self.tpdv)
self.bias = nn.Parameter(torch.Tensor(output_shape)).to(**self.tpdv)

self.stddev = nn.Parameter(torch.ones(
output_shape), requires_grad=False).to(**self.tpdv)
self.mean = nn.Parameter(torch.zeros(
output_shape), requires_grad=False).to(**self.tpdv)
self.mean_sq = nn.Parameter(torch.zeros(
output_shape), requires_grad=False).to(**self.tpdv)
self.debiasing_term = nn.Parameter(torch.tensor(
0.0), requires_grad=False).to(**self.tpdv)

self.reset_parameters()

def reset_parameters(self):
torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(
self.weight)
bound = 1 / math.sqrt(fan_in)
torch.nn.init.uniform_(self.bias, -bound, bound)
self.mean.zero_()
self.mean_sq.zero_()
self.debiasing_term.zero_()

def forward(self, input_vector):
if type(input_vector) == np.ndarray:
input_vector = torch.from_numpy(input_vector)
input_vector = input_vector.to(**self.tpdv)

return F.linear(input_vector, self.weight, self.bias)

@torch.no_grad()
def update(self, input_vector):
if type(input_vector) == np.ndarray:
input_vector = torch.from_numpy(input_vector)
input_vector = input_vector.to(**self.tpdv)

old_mean, old_var = self.debiased_mean_var()
old_stddev = torch.sqrt(old_var)

batch_mean = input_vector.mean(dim=tuple(range(self.norm_axes)))
batch_sq_mean = (
input_vector ** 2).mean(dim=tuple(range(self.norm_axes)))

self.mean.mul_(self.beta).add_(batch_mean * (1.0 - self.beta))
self.mean_sq.mul_(self.beta).add_(batch_sq_mean * (1.0 - self.beta))
self.debiasing_term.mul_(self.beta).add_(1.0 * (1.0 - self.beta))

self.stddev = (self.mean_sq - self.mean ** 2).sqrt().clamp(min=1e-4)

new_mean, new_var = self.debiased_mean_var()
new_stddev = torch.sqrt(new_var)

self.weight = self.weight * old_stddev / new_stddev
self.bias = (old_stddev * self.bias + old_mean - new_mean) / new_stddev

def debiased_mean_var(self):
debiased_mean = self.mean / self.debiasing_term.clamp(min=self.epsilon)
debiased_mean_sq = self.mean_sq / \
self.debiasing_term.clamp(min=self.epsilon)
debiased_var = (debiased_mean_sq - debiased_mean ** 2).clamp(min=1e-2)
return debiased_mean, debiased_var

def normalize(self, input_vector):
if type(input_vector) == np.ndarray:
input_vector = torch.from_numpy(input_vector)
input_vector = input_vector.to(**self.tpdv)

mean, var = self.debiased_mean_var()
out = (input_vector - mean[(None,) * self.norm_axes]
) / torch.sqrt(var)[(None,) * self.norm_axes]

return out

def denormalize(self, input_vector):
if type(input_vector) == np.ndarray:
input_vector = torch.from_numpy(input_vector)
input_vector = input_vector.to(**self.tpdv)

mean, var = self.debiased_mean_var()
out = input_vector * \
torch.sqrt(var)[(None,) * self.norm_axes] + \
mean[(None,) * self.norm_axes]

out = out.cpu().numpy()

return out
Loading

0 comments on commit 902386b

Please sign in to comment.