forked from EFForg/https-everywhere
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmerge-rulesets.py
114 lines (88 loc) · 3.56 KB
/
merge-rulesets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3.6
# Merge all the .xml rulesets into a single "default.rulesets" file -- this
# prevents inodes from wasting disk space, but more importantly, this works
# around the fact that zip does not perform well on a pile of small files.
# Currently, it merges rulesets into a JSON Object for minimal overhead,
# in both storage and parsing speed.
import argparse
import glob
import json
import os
import unicodedata
import xml.etree.ElementTree
def normalize(f):
"""
OSX and Linux filesystems encode composite characters differently in
filenames. We should normalize to NFC: http://unicode.org/reports/tr15/
"""
f = unicodedata.normalize("NFC", f)
return f
# commandline arguments parsing (nobody use it, though)
parser = argparse.ArgumentParser(description="Merge rulesets")
parser.add_argument("--source_dir", default="src/chrome/content/rules")
args = parser.parse_args()
# output filename, pointed to the merged ruleset
ofn = os.path.join(args.source_dir, "default.rulesets")
ojson = os.path.join(args.source_dir, "default.rulesets.json")
# XML Ruleset Files
files = map(normalize, glob.glob(os.path.join(args.source_dir, "*.xml")))
# Under git bash, sed -i issues errors and sets the file "read-only".
if os.path.isfile(ofn):
os.system("chmod u+w " + ofn)
if os.path.isfile(ojson):
os.system("chmod u+w " + ojson)
# Library (JSON Object)
library = []
# Parse XML ruleset and construct JSON library
print(" * Parsing XML ruleset and constructing JSON library...")
for filename in sorted(files):
tree = xml.etree.ElementTree.parse(filename)
root = tree.getroot()
ruleset = {}
trivialNameSecureCookie = None
for attr in root.attrib:
ruleset[attr] = root.attrib[attr]
for child in root:
if child.tag in ["target", "rule", "securecookie", "exclusion"]:
if child.tag not in ruleset:
ruleset[child.tag] = []
else:
continue
if child.tag == "target":
ruleset["target"].append(child.attrib["host"])
elif child.tag == "rule":
ru = {}
ru["from"] = child.attrib["from"]
ru["to"] = child.attrib["to"]
ruleset["rule"].append(ru)
elif child.tag == "securecookie":
if child.attrib["name"] == ".+":
if not trivialNameSecureCookie:
trivialNameSecureCookie = {}
trivialNameSecureCookie["host"] = child.attrib["host"]
trivialNameSecureCookie["name"] = ".+"
else:
trivialNameSecureCookie["host"] = (trivialNameSecureCookie["host"] + "|" + child.attrib["host"])
else:
sc = {}
sc["host"] = child.attrib["host"]
sc["name"] = child.attrib["name"]
ruleset["securecookie"].append(sc)
elif child.tag == "exclusion":
if len(ruleset["exclusion"]) == 0:
ruleset["exclusion"].append(child.attrib["pattern"])
else:
ruleset["exclusion"][0] = (ruleset["exclusion"][0] + "|" + child.attrib["pattern"])
if trivialNameSecureCookie:
ruleset["securecookie"].insert(0, trivialNameSecureCookie)
library.append(ruleset);
# Write to default.rulesets
print(" * Writing JSON library to %s and %s"% (ofn, ojson) )
outfile = open(ofn, "w")
jsonout = open(ojson, "w")
outfile.write(json.dumps(library, separators=(",", ":")))
jsonout.write(json.dumps(library, separators=(",", ":")))
outfile.close()
jsonout.close()
# Everything is okay.
print(" * Everything is okay.")