-
Notifications
You must be signed in to change notification settings - Fork 1
/
pac_trainer.py
135 lines (113 loc) · 4.62 KB
/
pac_trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""
Introductory Deep Learning exercise for training agents to navigate
small Pacman Mazes
"""
import time
import random
import re
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from constants import *
from maze_gen import MazeGen
class PacmanMazeDataset(Dataset):
"""
PyTorch Dataset extension used to vectorize Pacman mazes consisting of the
entities listed in Constants.py to be used for neural network training
See: https://pytorch.org/tutorials/beginner/basics/data_tutorial.html
"""
# [!] Class maps that may be useful for vectorizing the maze
maze_entity_indexes = {entity: index for index, entity in enumerate(Constants.ENTITIES)}
move_indexes = {move: index for index, move in enumerate(Constants.MOVES)}
def __init__(self, training_data):
self.training_data = training_data
print(self.__getitem__(0))
def __len__(self):
return len(self.training_data)
def __getitem__(self, idx):
row = self.training_data.iloc[idx]
maze, move = row["X"], row["y"]
return PacmanMazeDataset.vectorize_maze(maze), PacmanMazeDataset.vectorize_move(move)
def vectorize_maze(maze):
'''
Converts the raw input maze (some Strings representing the Maze
entities as specified in Constants.ENTITIES) into the vectorized
input layer for the PacNet.
[!] Indicies of maze entities should always correspond to their
order in Constants.ENTITIES; see maze_entity_indexes map as
a convenient tool for ensuring this.
[!] Used in both training and deployment
:maze: String grid representation of the maze and its entities
:returns: 1-D numerical pytorch tensor representing the maze
'''
# TODO: Task 1 - Part 1
return torch.tensor([])
def vectorize_move(move):
'''
Converts the given move from the possibilities of Constants.MOVES to
the one-hot pytorch tensor representation.
[!] Indicies of moves should always correspond to their
order in Constants.MOVES; see move_indexes map as a convenient
tool for ensuring this.
[!] Used in both training and deployment
:move: String representing an action to be taken
:returns: One-hot vector representation of that action.
'''
# TODO: Task 1 - Part 2
return torch.tensor([])
class PacNet(nn.Module):
"""
PyTorch Neural Network extension for the Pacman gridworld, which is fit to a
particular maze configuration (walls, and optionally pellets, are in fixed spots)
See: https://pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html
"""
def __init__(self, maze):
"""
Initializes a PacNet for the given maze, which has maze-specific configuration
requirements like the number of rows and cols. Used to perform imitation learning
to select one of the 4 available actions in Constants.MOVES in response to the
positional maze entities in Constants.ENTITIES
:maze: The Pacman Maze structure on which this PacNet will be trained
"""
super(PacNet, self).__init__()
# TODO: Task 3 Here
def forward(self, x):
"""
Computes the output of the PacNet for input maze x
:x: Raw input vector at the first layer of the neural network
:returns: Output activations
"""
logits = self.linear_relu_stack(x)
return logits
def train_loop(dataloader, model, loss_fn, optimizer):
"""
PyTorch Neural Network optimization loop; need not be modified unless tweaks are
desired.
See: https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
"""
size = len(dataloader.dataset)
for batch, (X, y) in enumerate(dataloader):
# Compute prediction and loss
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
if __name__ == "__main__":
"""
Main method used to load training data, construct PacNet, and then
train it, finally saving the network's parameters for use by the
pacman agent.
See: https://pytorch.org/tutorials/beginner/basics/optimization_tutorial.html
"""
# TODO: Task 2 Here
# TODO: Task 4 Here
# TODO: Task 5 Here
# TODO: Task 6 Here