-
Notifications
You must be signed in to change notification settings - Fork 12
/
parse.py
121 lines (96 loc) · 3.42 KB
/
parse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import javalang
import javalang.tree
import pathlib
import os
import shutil
import subprocess
import json
def get_name(obj):
if obj is None:
return None
else:
return obj.name
def parse_file(p):
with p.open(encoding='utf-8',errors='ignore') as f:
src_code=f.read()
try:
tree=javalang.parse.parse(src_code)
except javalang.parser.JavaSyntaxError as e:
print('!! syntax error in file',p)
return None
pkg_name=tree.package.name
clses=[]
for sub in tree.types:
if isinstance(sub,(javalang.tree.ClassDeclaration,javalang.tree.InterfaceDeclaration,javalang.tree.EnumDeclaration)):
methods=[]
fields=[]
for method in sub.methods:
params=[]
for param in method.parameters:
params.append({
'type': get_name(param.type),
'name': param.name,
})
methods.append({
'type': get_name(method.return_type),
'name': method.name,
'params': params,
})
for field in sub.fields:
vartype=field.type.name
for var in field.declarators:
fields.append({
'type': vartype,
'name': var.name,
})
clses.append({
'name': sub.name,
'methods': methods,
'fields': fields,
})
elif isinstance(sub,(javalang.tree.AnnotationDeclaration,)):
pass # ignore these types
else:
print('!! unknown',type(sub),'in',p)
return {
'package_name': pkg_name,
'filename': p.name,
'classes': clses,
}
def parse_package(pkg_name,ver_tag):
pathlib.Path('var').mkdir(exist_ok=True)
print('== package %s ver %s'%(pkg_name,ver_tag))
# checkout
print(' checking out')
retcode=os.system('defects4j checkout -p %s -v %sb -w var/code_root 1>/dev/null 2>&1'%(pkg_name,ver_tag))
assert retcode==0
# get class root
retcode=os.system('defects4j export -p dir.src.classes -w var/code_root -o var/class_root_path.txt 1>/dev/null 2>&1')
assert retcode==0
with open('var/class_root_path.txt') as f:
cls_root=f.read()
# run
print(' running')
ret=[]
for p in (pathlib.Path('var/code_root')/cls_root).glob('**/*.java'):
r=parse_file(p)
if r is not None:
ret.append(r)
# save and cleanup
print(' saving and cleaning up')
pathlib.Path('result').mkdir(exist_ok=True)
with pathlib.Path('result/%s-%s.json'%(pkg_name,ver_tag)).open('w',encoding='utf-8') as f:
json.dump(ret,f,indent=1)
shutil.rmtree('var')
#for p in (pathlib.Path('var/code_root')).glob('**/*.java'):
# parse_file(p)
def enum_package_and_ver():
for proj in subprocess.check_output(['defects4j','pids']).splitlines():
proj=proj.decode()
if proj[0] in ('C','G','J'):
continue
for ver in subprocess.check_output(['defects4j','bids','-p',proj]).splitlines():
ver=int(ver.decode())
yield proj,ver
for pr,ver in enum_package_and_ver():
parse_package(pr,ver)