-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
212 lines (163 loc) · 7.62 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from transformers import BatchEncoding
import matplotlib.pyplot as plt
from matplotlib.patches import Circle, RegularPolygon
from matplotlib.path import Path
from matplotlib.projections.polar import PolarAxes
from matplotlib.projections import register_projection
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
def get_device():
device = "cuda" if torch.cuda.is_available() else "cpu"
return device
def score_text(model, tokenizer, story):
device = get_device()
tokenized = tokenizer(story, is_split_into_words=True)
input_ids = tokenized["input_ids"][1:]
batch_size = int(np.ceil(len(input_ids)/model.config.max_length))
to_pad = model.config.max_length * batch_size - len(input_ids)
# batched input ids
input_ids = input_ids + [tokenizer.pad_token_id for _ in range(to_pad-batch_size)]
input_ids = torch.LongTensor(input_ids).reshape(batch_size, model.config.max_length-1)
# batched attention mask
attention_mask = tokenized["attention_mask"][1:]
attention_mask = attention_mask + [0 for _ in range(to_pad-batch_size)]
attention_mask = torch.LongTensor(attention_mask).reshape(batch_size, model.config.max_length-1)
batch = BatchEncoding({
"input_ids": torch.stack(
[torch.concat([torch.LongTensor([tokenizer.eos_token_id]), t]).to(device) for t in input_ids]
),
"labels": torch.stack(
[torch.concat([torch.LongTensor([tokenizer.eos_token_id]), t]).to(device) for t in input_ids]
),
"attention_mask": torch.stack(
[torch.concat([torch.LongTensor([1]), t]).to(device) for t in input_ids]
)
})
assert batch["input_ids"].shape == (batch_size, model.config.max_length), batch["input_ids"].shape
# inference
with torch.no_grad():
output = model(**batch)
# calculate word-level surprisal
words, word_surprisal = [], []
curr_word_ix = 0
curr_word_surp = []
curr_toks = ""
for logits, input_ids in zip(output.logits, batch["input_ids"]):
output_ids = input_ids[1:]
tokens = [tok for tok in tokenizer.convert_ids_to_tokens(output_ids) if tok != tokenizer.pad_token]
indices = torch.arange(0, output_ids.shape[0]).to(device)
surprisal = -1*torch.log2(F.softmax(logits, dim=-1)).squeeze(0)[indices, output_ids]
for i in range(0, len(tokens)):
# necessary for diacritics in Dundee
cleaned_tok = tokens[i].replace("Ġ", "", 1).encode("latin-1").decode("utf-8")
# for word-level surprisal
curr_word_surp.append(surprisal[i].item())
curr_toks += cleaned_tok
# summing subword token surprisal ("rolling")
story[curr_word_ix] = story[curr_word_ix].replace(cleaned_tok, "", 1)
if story[curr_word_ix] == "":
words.append(curr_toks)
word_surprisal.append(np.round(sum(curr_word_surp),4))
curr_word_surp = []
curr_toks = ""
curr_word_ix += 1
assert len(words) == len(story), f"len(story)={len(story)} != len(words)={len(words)}"
return pd.DataFrame({
"word": words,
"surprisal": word_surprisal
})
def print_latex_table(latex_table_content):
task0 = list(latex_table_content.keys())[0]
configs_short = [config for config in latex_table_content[task0]]
print("\\begin{tabular}{l|c " + " c "*24 + "}")
print("\hline")
header = "\\textbf{Task} & " + " & ".join([f"\\textbf{{{config}}}" for config in configs_short]) + " \\\\"
print(header)
print("\hline\hline")
for task, row in latex_table_content.items():
if task != "average":
task_str = f"{task.replace('_', '-')} & " + " & ".join([latex_table_field for latex_table_field in row.values()]) + " \\\\"
print(task_str)
print("\hline")
avg_str = "\\textbf{Average} & " + " & ".join([latex_table_field for latex_table_field in latex_table_content["average"].values()]) + " \\\\"
print(avg_str)
print("\hline")
print("\\end{tabular}")
# code for radar chart taken from: https://matplotlib.org/stable/gallery/specialty_plots/radar_chart.html
def radar_factory(num_vars, frame='circle'):
"""
Create a radar chart with `num_vars` axes.
This function creates a RadarAxes projection and registers it.
Parameters
----------
num_vars : int
Number of variables for radar chart.
frame : {'circle', 'polygon'}
Shape of frame surrounding axes.
"""
# calculate evenly-spaced axis angles
theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
class RadarTransform(PolarAxes.PolarTransform):
def transform_path_non_affine(self, path):
# Paths with non-unit interpolation steps correspond to gridlines,
# in which case we force interpolation (to defeat PolarTransform's
# autoconversion to circular arcs).
if path._interpolation_steps > 1:
path = path.interpolated(num_vars)
return Path(self.transform(path.vertices), path.codes)
class RadarAxes(PolarAxes):
name = 'radar'
PolarTransform = RadarTransform
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# rotate plot such that the first axis is at the top
self.set_theta_zero_location('N')
def fill(self, *args, closed=True, **kwargs):
"""Override fill so that line is closed by default"""
return super().fill(closed=closed, *args, **kwargs)
def plot(self, *args, **kwargs):
"""Override plot so that line is closed by default"""
lines = super().plot(*args, **kwargs)
for line in lines:
self._close_line(line)
def _close_line(self, line):
x, y = line.get_data()
# FIXME: markers at x[0], y[0] get doubled-up
if x[0] != x[-1]:
x = np.append(x, x[0])
y = np.append(y, y[0])
line.set_data(x, y)
def set_varlabels(self, labels):
self.set_thetagrids(np.degrees(theta), labels)
def _gen_axes_patch(self):
# The Axes patch must be centered at (0.5, 0.5) and of radius 0.5
# in axes coordinates.
if frame == 'circle':
return Circle((0.5, 0.5), 0.5)
elif frame == 'polygon':
return RegularPolygon((0.5, 0.5), num_vars,
radius=.5, edgecolor="k")
else:
raise ValueError("Unknown value for 'frame': %s" % frame)
def _gen_axes_spines(self):
if frame == 'circle':
return super()._gen_axes_spines()
elif frame == 'polygon':
# spine_type must be 'left'/'right'/'top'/'bottom'/'circle'.
spine = Spine(axes=self,
spine_type='circle',
path=Path.unit_regular_polygon(num_vars))
# unit_regular_polygon gives a polygon of radius 1 centered at
# (0, 0) but we want a polygon of radius 0.5 centered at (0.5,
# 0.5) in axes coordinates.
spine.set_transform(Affine2D().scale(.5).translate(.5, .5)
+ self.transAxes)
return {'polar': spine}
else:
raise ValueError("Unknown value for 'frame': %s" % frame)
register_projection(RadarAxes)
return theta