-
Notifications
You must be signed in to change notification settings - Fork 1
/
clean.py
executable file
·77 lines (59 loc) · 2.03 KB
/
clean.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import sys
import re
hyphen_re = r'---+'
accent_re = r'''
(?<!\{) # Not immediately inside a latex group
( # Start a regular expression grouping
\\[A-Za-z]+ # A command made up of letters
| # ... or ...
\\[^A-Za-z] # A command made up of one non-letter
)
( # The argument
\{ # The start of a grouped argument
[^\}]* # The (possibly empty) argument
\} # The end of the argument
| # ... or ...
\s+ # No argument -- a space
)
'''
lowercase_re = r'\bd\''
firstword_re = r'\s*(\w+)'
hyphen_re = re.compile(hyphen_re)
accent_re = re.compile(accent_re, re.X)
lowercase_re = re.compile(lowercase_re, re.I)
firstword_re = re.compile(firstword_re)
letters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
def accent_replace(match):
command, argument = match.groups()
if argument.strip():
return '{%s%s}' % (command, argument)
else:
return '{%s}' % (command)
def lowercase_replace(match):
return '{' + match.group(0).lower() + '}'
ignore_keywords = ('abstract', 'file', 'keywords', 'annote')
opened_braces = 0
if len(sys.argv) < 3 or sys.argv[2] == '-':
output = sys.stdout
else:
output = file(sys.argv[2], 'w')
for line in file(sys.argv[1]):
line = line.rstrip('\n')
if opened_braces > 0:
opened_braces += line.count('{') - line.count('}')
continue
match = firstword_re.match(line)
if match is not None:
first_word = match.group(1)
if first_word in ignore_keywords:
# If the number of { and } is the same, assume
# that the group is closed and stop the group
opened_braces = line.count('{') - line.count('}')
continue
line = hyphen_re.sub('--', line)
line = accent_re.sub(accent_replace, line)
line = lowercase_re.sub(lowercase_replace, line)
print >> output, line
output.close()