forked from jbrew/stereotype
-
Notifications
You must be signed in to change notification settings - Fork 0
/
voicebox.py
379 lines (341 loc) · 15.7 KB
/
voicebox.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
from six.moves import range
from six.moves import input
__author__ = 'jamiebrew'
import os
import corpus
import operator
import textwrap
import random
import re
# import urllib2
# from bs4 import BeautifulSoup
# import unicodedata
"""
input loop for writing with a corpus or set of corpora
WRITING CONTROLS:
[option #]: choose option
x: delete
. or ? or !: end sentence
z: move cursor left
c: move cursor right
r: choose random word (weighted)
v[voice #]: change voice
rand[#]: randomly chooses # number of words
add: add voice
set: set corpus weights for this voice
info: toggle extra info
dynamic: toggle dynamic writing
save: save session
load: load session
[other word]: insert word
0: yield output
"""
class Voicebox(object):
def __init__(self):
self.more_info = False
self.dynamic = False
self.mode_list = ['frequency', 'sigscore', 'count']
self.mode = 'frequency'
self.num_options = 20
load_prev = input('Load previous session? y/n\n')
if load_prev != 'n':
loaded_voicebox = self.load_session() # unpickles a previously-saved object
self.cursor = loaded_voicebox.cursor
self.cursor_position = loaded_voicebox.cursor_position
self.voices = loaded_voicebox.voices
self.active_voice = loaded_voicebox.active_voice
self.log = loaded_voicebox.log
else:
self.cursor = "|"
self.cursor_position = 0
self.voices = {}
self.load_voices()
self.active_voice = self.choose_voice()
self.log = []
def header(self):
headerString = "\nVOICES\n"
vnum = 1
for voice_key in sorted(self.voices):
v = self.voices[voice_key]
headerString += 'v%s. %s\n' % (str(vnum), v.name)
vnum += 1
source_num = 1
if len(v.weighted_corpora) > 1: # only print the component sources of voices with more than one
for corpus_item in sorted(v.weighted_corpora):
c, wt = v.weighted_corpora[corpus_item]
headerString += '\ts%s. %s, weight %s\n' % (str(source_num), c.name, wt)
source_num += 1
headerString += "\n____________________"
return headerString
def write(self):
sentence = ['START_SENTENCE']
self.cursor_position = 1
voice_name = self.active_voice.name.upper()
self.log += [voice_name + ':']
while 1:
words_before = sentence[0:self.cursor_position]
words_after = sentence[self.cursor_position:]
suggestions = self.active_voice.suggest(words_before[-2:], words_after[0:2])
print(self.header())
print(textwrap.fill(" ".join(self.log + words_before[1:] + [self.cursor] + words_after), 80))
self.display_suggestions(suggestions)
# if self.spanish_to_english:
# print words_before[-1]+ ": " + self.to_english(words_before[-1]).encode('utf-8').strip()
# self.spanish_to_english = False
user_input = input('What now?\n')
try:
user_input = int(user_input)
if user_input in range(1, len(suggestions)+1):
choice = self.take_suggestion(suggestions, user_input)
next_word = choice[0]
score_tree = choice[1][1]
words_before.append(next_word)
sentence = words_before + words_after
if self.dynamic:
self.update_weights(self.active_voice, score_tree, .1)
self.cursor_position += 1
elif user_input == 0:
self.log = self.log + sentence
self.log.remove('START_SENTENCE')
print(" ".join(self.log))
return
else:
print("That's out of range!")
except:
pass
if user_input == 'z':
self.cursor_position -= 1
elif user_input == 'c':
self.cursor_position += 1
elif user_input == 'x':
self.delete_word(words_before)
self.cursor_position -= 1
sentence = words_before + words_after
elif user_input == 'r':
next_word = self.weighted_random_choice(suggestions)
words_before.append(next_word)
sentence = words_before + words_after
self.cursor_position += 1
# elif user_input == 't':
# self.spanish_to_english = True
elif user_input == 'info':
self.toggle_info()
elif user_input == 'dynamic':
self.toggle_dynamic()
elif user_input == 'add':
self.add_voice()
elif user_input == 'set':
self.set_weights(self.active_voice)
elif re.compile('v[0-9]').search(user_input): # switch to different corpus
voice_num = user_input[1:]
voice_keys = sorted(self.voices.keys())
chosen_voice_name = voice_keys[int(voice_num) - 1]
self.active_voice = self.voices[chosen_voice_name]
print('%s chosen!' % chosen_voice_name)
finished_sentence = self.finish_sentence(words_before, words_after, '.', '\n\n')
self.log = self.log + [finished_sentence] + [chosen_voice_name.upper() + ':']
sentence = ['START_SENTENCE']
elif re.compile('rand[0-9]').search(user_input):
num_words = user_input[4:]
counter = 0
while counter < int(num_words):
next_word = self.weighted_random_choice(suggestions)
words_before.append(next_word)
sentence = words_before + words_after
self.cursor_position += 1
counter += 1
words_before = sentence[0:self.cursor_position]
words_after = sentence[self.cursor_position:]
suggestions = self.active_voice.suggest(words_before[-2:], words_after[0:2])
elif re.compile('o[0-9]').search(user_input): # change number of options
number_chosen = user_input[1:]
self.num_options = int(number_chosen)
print('Now writing with %s options!' % number_chosen)
elif user_input in ['.', '?', '!']:
finished_sentence = self.finish_sentence(words_before, words_after, user_input)
self.log = self.log + [finished_sentence]
sentence = ['START_SENTENCE']
self.cursor_position = 1
elif user_input == 'save':
self.save_session()
elif user_input == 'load':
self.load_session()
elif isinstance(user_input, str) and len(user_input.strip()) > 0:
words_before.append(user_input)
sentence = words_before + words_after
self.cursor_position += 1
else:
print("Invalid input.")
# toggles whether weights to sources in the current voice adjust automatically
def toggle_dynamic(self):
self.dynamic = not self.dynamic
if self.dynamic:
print("Dynamic weight adjustment on!")
else:
print("Dynamic weight adjustment off!")
# toggles whether to show information about scores (and their decomposition by source)
def toggle_info(self):
self.more_info = not self.more_info
if self.more_info:
print("More info on!")
else:
print("More info off!")
def set_mode(self):
for i in range(len(self.mode_list)):
print("%s %s" % (i + 1, self.mode_list[i]))
choice = input('Enter the number of the session you want to load:\n')
self.mode = self.mode_list[int(choice) - 1]
# saves all information about the current session
def save_session(self):
path = 'saved/%s.pkl' % input("Choose save name:\n")
pickler.save_object(self, path)
print("Saved voicebox to %s!" % path)
# prompts choice of session to load, then loads it.
def load_session(self):
sessions = os.listdir('saved')
for i in range(len(sessions)):
print("%s %s" % (i + 1, sessions[i]))
choice = input('Enter the number of the session you want to load:\n')
session_name = sessions[int(choice) - 1]
path = 'saved/%s' % session_name
return pickler.loadobject(path)
# given a chosen word and a tree of scores assigned to it by different sources, updates the weights of those sources
# according to whether they exceeded or fell short of their expected contribution to the suggestion
def update_weights(self, v, score_tree, delta):
total_score = sum(score_tree.values())
for key in v.weighted_corpora:
corp, wt = v.weighted_corpora[key]
expected_share = wt/1
if key in score_tree:
sub_score = score_tree[key]
else:
sub_score = 0
actual_share = sub_score / total_score
performance_relative_to_expectation = actual_share - expected_share
v.weighted_corpora[corp.name][1] += performance_relative_to_expectation * delta
# prompts user to set weights for each corpus in a given voice
def set_weights(self, v):
for key in v.weighted_corpora:
corpus_name = v.weighted_corpora[key][0].name
corpus_weight_prompt = 'Enter the weight for %s:\n' % corpus_name
corpus_weight = float(input(corpus_weight_prompt))
v.weighted_corpora[key][1] = corpus_weight
v.normalize_weights()
# random choice without weight bias
def flat_random_choice(self, suggestions):
return random.randint(1, len(suggestions))
# returns a word from the suggestion list; choice weighted according to scores
def weighted_random_choice(self, suggestions):
total = sum(score_info[0] for word, score_info in suggestions)
r = random.uniform(0, total)
upto = 0
for word, score_info in suggestions:
if upto + score_info[0] >= r:
return word
upto += score_info[0]
assert False, "Shouldn't get here"
# deletes word before the cursor from sentence
def delete_word(self, before):
if len(before) == 1:
print("Cannot delete the start of the sentence!")
else:
del before[-1] # remove last element of current line
def finish_sentence(self, before, after, delimiter, line_break=''):
sentence = before[1:] + after
if len(sentence) > 0:
sentence[-1] += delimiter
sentence += line_break
return " ".join(sentence)
def load_voices(self):
# load_from_transcript = raw_input('Load from transcript? y/n\n')
load_from_transcript = 'n'
if load_from_transcript in ['y', 'yes']:
self.load_voices_from_transcript()
else:
add_another_voice = ''
while add_another_voice != 'n':
self.add_voice()
add_another_voice = input('Add more? y/n\n')
# asks you to choose corpora from files in 'texts', then adds a voice with those corpora
def add_voice(self):
new_voice = voice.Voice({}) # creates new voice with no name and empty tree of corpora
texts = os.listdir('texts')
add_another_corpus = ''
while add_another_corpus != 'n':
for i in range(len(texts)):
print("%s %s" % (i + 1, texts[i]))
choice = input('Enter the number of the corpus you want to load:\n')
corpus_name = texts[int(choice) - 1]
path = 'texts/%s' % corpus_name
f = open(path, 'r')
text = f.read()
corpus_weight_prompt = 'Enter the weight for %s:\n' % corpus_name
corpus_weight = float(input(corpus_weight_prompt))
new_voice.add_corpus(corpus.Corpus(text, corpus_name), corpus_weight)
texts.remove(corpus_name)
add_another_corpus = input('Add another corpus to this voice? y/n\n')
voicename = input('Name this voice:\n')
new_voice.name = voicename
new_voice.normalize_weights()
self.voices[voicename] = new_voice
# asks user to specify a transcript and number of characters, and makes separate voices for that number of
# the most represented characters in the transcript
def load_voices_from_transcript(self):
transcripts = os.listdir('texts/transcripts')
for i in range(len(transcripts)):
print("%s %s" % (i + 1, transcripts[i]))
choice = input('Enter the number of the transcript you want to load:\n')
transcript_name = transcripts[int(choice) - 1]
number = int(input('Enter the number of voices to load:\n'))
for charname, size in self.biggest_characters(transcript_name, number):
print(charname)
path = 'texts/transcripts/%s/%s' % (transcript_name, charname)
source_text = open(path).read()
corpus_name = charname
weighted_corpora = {}
weighted_corpora[charname] = [corpus.Corpus(source_text, corpus_name), 1]
self.voices[charname] = voice.Voice(weighted_corpora, charname)
# retrieves a list of the top 20 largest character text files in a transcript folder
def biggest_characters(self, tname, number):
size_by_name = {}
tpath = 'texts/transcripts/%s' % tname
for cname in os.listdir(tpath):
cpath = '%s/%s' % (tpath, cname)
size_by_name[cname] = len(open(cpath).read().split())
sorted_chars = list(reversed(sorted(list(size_by_name.items()), key=operator.itemgetter(1))))
return sorted_chars[0:number]
# offers several voice choices, returns a voice
def choose_voice(self):
voice_keys = sorted(self.voices.keys())
print("VOICES:")
for i in range(len(voice_keys)):
print("%s: %s" % (i + 1, voice_keys[i]))
choice = input('Choose a voice by entering a number...\n')
self.active_voice = self.voices[voice_keys[int(choice) - 1]]
return self.active_voice
def display_suggestions(self, suggestions):
suggestion_string = '\n'
for i in range(len(suggestions)):
total_score = format(sum(suggestions[i][1][1].values()), 'g')
info_string = "%s: %s" % (i + 1, str(suggestions[i][0]))
if self.more_info:
info_string += '\t' + str(total_score)
suggestion_string += info_string
score_tree = suggestions[i][1][1]
if self.more_info:
suggestion_string += '\t\t'
for key in score_tree:
score = format(score_tree[key], 'g')
suggestion_string += '\t%s: %s' % (key, score)
suggestion_string += '\n'
print(suggestion_string)
def take_suggestion(self, suggestions, user_input):
return suggestions[int(user_input) - 1]
def main():
vb = Voicebox()
vb.write()
main()