-
Notifications
You must be signed in to change notification settings - Fork 2
/
processor.py
82 lines (73 loc) · 3.01 KB
/
processor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import re
import identify
def html_ESC_decode(text):
text = re.sub('"','"',text)
text = re.sub('&','&',text)
text = re.sub('<','<',text)
text = re.sub('>','>',text)
text = re.sub(' ',' ',text)
return text
pattern_process_crayon_content = re.compile('(?s)<div class="crayon-line.*?>(.*?)</div>')
def process_crayon_content(content):
res = ''
lines = re.findall(pattern_process_crayon_content, content)
for line in lines:
res = res + re.sub('<span.*?>|</span>','',line) + '\n'
return res
pattern_get_crayon_codes = re.compile('(?s)<td class="crayon-code">(.*?)</td>')
def get_crayon_codes(page):
codes = re.findall(pattern_get_crayon_codes, page)
res = []
for code in codes:
t = html_ESC_decode(process_crayon_content(code))
if identify.iscpp(t): res.append(t)
return res
pattern_process_intag_code_content_1 = re.compile('</div>')
pattern_process_intag_code_content_2 = re.compile('<.*?>')
pattern_process_intag_code_content_3 = re.compile('(?sm)^[ ]{,2}[\d]*')
def process_intag_code_content(code):
code = re.sub(pattern_process_intag_code_content_1, '\n', code)
code = re.sub(pattern_process_intag_code_content_2, '', code)
code = re.sub(pattern_process_intag_code_content_3, '', code)
return code
pattern_get_intag_codes = re.compile('(?s)<pre.*?>(.*?)</pre>|<code.*?>(.*?)</code>')
def get_intag_codes(page):
codes = re.findall(pattern_get_intag_codes, page)
res = []
for code in codes:
t = html_ESC_decode(process_intag_code_content(code[0]))
if identify.iscpp(t): res.append(t)
t = html_ESC_decode(process_intag_code_content(code[1]))
if identify.iscpp(t): res.append(t)
return res
def getcodes(page):
return (get_crayon_codes(page) + get_intag_codes(page))
pattern_gettitle = re.compile('<title>.*?</title>')
ojs = 'bzoj|poj|pku|hdu|zoj|uoj|lydsy|spoj|la|uva|codevs|vijos|tyvj|luogu'
texpr = '(?i)(' + ojs + r')[ -\]\)]{,3}p?([\d]{4})'
pattern_gettitle_t = re.compile(texpr)
def gettitle(page):
ttitle = re.search(pattern_gettitle, page)
if ttitle:
title = html_ESC_decode(ttitle.group())
res = re.findall(pattern_gettitle_t, title)
return res
else:
return ""
pattern_getfirstresult = re.compile('(?s)<table align=center>.*?</table>')
results = 'Accepted|Presentation_Error|Wrong_Answer|Time_Limit_Exceed|Memory_Limit_Exceed|Output_Limit_Exceed|Runtime_Error|Compile_Error|Pending|Pending_Rejudging|Compiling|Running_&_Judging'
pattern_getfirstresult_results = re.compile(results)
def getfirstresult(page):
statu_c = re.search(pattern_getfirstresult, page).group()
res = re.search(pattern_getfirstresult_results, statu_c)
if res:
return res.group()
else:
return ''
pattern_getsolutions = re.compile('(?s)<div class="result c-container .*?><h3 class="t"><a.*?href="(.*?)"')
def getsolutions(page):
sols = re.findall(pattern_getsolutions, page)
res = []
for sol in sols:
res.append(sol)
return res