forked from getify/JSON.minify
-
Notifications
You must be signed in to change notification settings - Fork 1
/
minify_json.py
130 lines (105 loc) · 4.03 KB
/
minify_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
'''
Created on 20/01/2011
v0.2 (C) Gerald Storer
MIT License
Based on JSON.minify.js:
https://github.com/getify/JSON.minify
Contributers:
- Pradyun S. Gedam (conditions and variable names changed)
'''
import re
def json_minify(string, strip_space=True):
tokenizer = re.compile('"|(/\*)|(\*/)|(//)|\n|\r')
end_slashes_re = re.compile(r'(\\)*$')
in_string = False
in_multi = False
in_single = False
new_str = []
index = 0
for match in re.finditer(tokenizer, string):
if not (in_multi or in_single):
tmp = string[index:match.start()]
if not in_string and strip_space:
# replace white space as defined in standard
tmp = re.sub('[ \t\n\r]+', '', tmp)
new_str.append(tmp)
index = match.end()
val = match.group()
if val == '"' and not (in_multi or in_single):
escaped = end_slashes_re.search(string, 0, match.start())
# start of string or unescaped quote character to end string
if not in_string or (escaped is None or len(escaped.group()) % 2 == 0):
in_string = not in_string
index -= 1 # include " character in next catch
elif not (in_string or in_multi or in_single):
if val == '/*':
in_multi = True
elif val == '//':
in_single = True
elif val == '*/' and in_multi and not (in_string or in_single):
in_multi = False
elif val in '\r\n' and not (in_multi or in_string) and in_single:
in_single = False
elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)):
new_str.append(val)
new_str.append(string[index:])
return ''.join(new_str)
if __name__ == '__main__':
# Python 2.6+ needed to run tests
import json
import textwrap
import unittest
class JsonMinifyTestCase(unittest.TestCase):
"""Tests for json_minify"""
def template(self, in_string, expected):
in_dict = json.loads(json_minify(in_string))
expected_dict = json.loads(expected)
self.assertEqual(in_dict, expected_dict)
def test_1(self):
self.template(textwrap.dedent('''
// this is a JSON file with comments
{
"foo": "bar", // this is cool
"bar": [
"baz", "bum"
],
/* the rest of this document is just fluff
in case you are interested. */
"something": 10,
"else": 20
}
/* NOTE: You can easily strip the whitespace and comments
from such a file with the JSON.minify() project hosted
here on github at http://github.com/getify/JSON.minify
*/'''),
'{"foo":"bar","bar":["baz","bum"],"something":10,"else":20}'
)
def test_2(self):
self.template(textwrap.dedent('''
{"/*":"*/","//":"",/*"//"*/"/*/"://
"//"}'''),
'{"/*":"*/","//":"","/*/":"//"}'
)
def test_3(self):
self.template(textwrap.dedent(r'''
/*
this is a
multi line comment */{
"foo"
:
"bar/*"// something
, "b\"az":/*
something else */"blah"
}
'''),
r'{"foo":"bar/*","b\"az":"blah"}'
)
def test_4(self):
self.template(textwrap.dedent(r'''
{"foo": "ba\"r//", "bar\\": "b\\\"a/*z",
"baz\\\\": /* yay */ "fo\\\\\"*/o"
}
'''),
r'{"foo":"ba\"r//","bar\\":"b\\\"a/*z","baz\\\\":"fo\\\\\"*/o"}'
)
unittest.main()