forked from udieckmann/Kielipankki-utilities
-
Notifications
You must be signed in to change notification settings - Fork 0
/
libvrtspecial.py
36 lines (32 loc) · 1.39 KB
/
libvrtspecial.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import html, sys
# Department names do contain spaces and ampersands:
# 'Frågor & svar'
# 'Sex & sånt'
# 'Jord- & skogsbruk'
#
# At least one contains vertical bar (not a main department):
# 'X3M | De bästa intervjuerna'
# Eventually decided to replace that with /.
# Have reconsidered and now replace with ¦ instead.
# Incidentally, any replacement breaks an URL or such.
#
# No " ' < > seen so far.
def finish_av(value):
if "'" in value: print('vrt-special attr', repr(value), file = sys.stderr)
if '"' in value: print('vrt-special attr', repr(value), file = sys.stderr)
if '|' in value:
print('vrt-special attr in', repr(value), file = sys.stderr)
value = value.replace('|', '\N{BROKEN BAR}')
print('vrt-special attr out', repr(value), file = sys.stderr)
return html.escape(value, quote = True)
def finish_avs(values):
'''The "set-valued" thing. Too specific, *need* refactoring!'''
return '|' + ''.join('{}|'.format(finish_av(dep['name']))
for dep
in values)
def finish_t(token):
# if '<' in token: print(repr(token), file = sys.stderr)
# if '>' in token: print(repr(token), file = sys.stderr)
# if '&' in token: print(repr(token), file = sys.stderr)
if '|' in token: print('vrt-special token', repr(token), file = sys.stderr)
return html.escape(token.replace(' ', '\xa0'), quote = False)