-
Notifications
You must be signed in to change notification settings - Fork 3
/
pop_prose_splits.py
52 lines (45 loc) · 1.5 KB
/
pop_prose_splits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import common
import pickle
import os
N_SONGS = 100
MIN_CHART_POS = 10
PROSE_SOURCES = ['poynton', 'comments', 'hansard']
def get_recent_keys(n=N_SONGS, min_pos=MIN_CHART_POS):
charts = common.get_chartdata()
found = set()
for chart in charts:
for song in chart[:min_pos]:
k = common.song_key(song)
if k in found:
continue
if common.have_lyrics(song):
found.add(k)
if len(found) >= N_SONGS:
break
if len(found) >= N_SONGS:
break
return found
if __name__ == '__main__':
song_keys = get_recent_keys()
print "Loaded song keys to match against"
prosedir = 'prose'
prosefiles = {src: open(os.path.join(prosedir, src+'.txt'))
for src in PROSE_SOURCES}
try:
os.mkdir(os.path.join(prosedir, 'fragments'))
except OSError:
pass
for prose_src in prosefiles:
try:
os.mkdir(os.path.join(prosedir, 'fragments', prose_src))
except OSError:
pass
for i, song_key in enumerate(song_keys):
fname = str(i)
size = os.path.getsize(os.path.join(common.LYRICS_DIR, song_key+'.txt'))
for prose_src, prosefile in prosefiles.iteritems():
acc = ''
while len(acc) < size and abs(len(acc) - size) > 5:
acc += prosefile.readline()
with open(os.path.join(prosedir, 'fragments', prose_src, fname), 'w') as f:
f.write(acc)