-
Notifications
You must be signed in to change notification settings - Fork 1
/
dance.py
57 lines (41 loc) · 1.44 KB
/
dance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#!/usr/bin/env pyython
import pytesseract as tess
# Skyler's path
# tess.pytesseract.tesseract_cmd = r'/usr/local/Cellar/tesseract/4.1.1/bin/tesseract'
# Aaron's path
# tess.pytesseract.tesseract_cmd = r'C:\Users\aaron\AppData\Local\Tesseract-OCR\tesseract.exe'
import re
from PIL import Image
import glob
outputCsv = ''
i2 = 1
for filename in glob.glob('./*.png'):
img = Image.open(filename)
print('image is ' + str(img))
text = tess.image_to_string(img)
print('round ' + str(i2) + ', text is: ' + text)
i2 = i2 + 1
# get the initial name string, this will be a matchobject
name = re.search('Gentleman: ([^\d|\n]+)', text)
name = name.group()
# clean up the text we don't need, we only want name
name = re.sub('Gentleman: ', '', name)
# get rid of gentleman's name in text
text = re.sub('Gentleman: ([^\d]+)', '', text)
# cleanup and format text
text = re.sub('(\d)\s?-\s?(\d)\s?',r'\1-\2,', text)
text = re.sub('\n','', text)
text = re.sub('(.)(\d-\d)',r'\1\n\2', text)
# split women's names into separate list items
textList = text.split('\n')
# add gentleman's name to each line
i = 0
for x in textList:
textList[i] = name + ',' + x
i = i + 1
# replace (regex) all spaces (but not newlines) with commas
outputCsv = outputCsv + '\n'.join(textList) + '\n'
# create the CSV
file = open('dance_list.csv','w')
file.writelines(outputCsv)
file.close()