-
Notifications
You must be signed in to change notification settings - Fork 2
/
json_lexer.ml
97 lines (79 loc) · 2.27 KB
/
json_lexer.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
open Sedlexing
open Astring
type token = Json_parser.token
open Json_parser
exception LexError of Lexing.position * string
let digit = [%sedlex.regexp? '0' .. '9']
let number = [%sedlex.regexp? Plus digit]
let blank = [%sedlex.regexp? ' ' | '\t']
let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n"]
let any_blank = [%sedlex.regexp? blank | newline]
let letter = [%sedlex.regexp? 'a' .. 'z' | 'A' .. 'Z']
let decimal_ascii = [%sedlex.regexp? Plus ('0' .. '9')]
let octal_ascii = [%sedlex.regexp? "0o", Plus ('0' .. '7')]
let hex_ascii = [%sedlex.regexp? "0x", Plus (('0' .. '9' | 'a' .. 'f' | 'A' .. 'F'))]
let rec nom buf =
match%sedlex buf with
| Plus any_blank -> nom buf
| _ -> ()
let string buf =
let buffer = Buffer.create 10 in
let rec read_string buf =
match%sedlex buf with
| {|\"|} ->
Buffer.add_char buffer '"';
read_string buf
| '"' -> STRING (Buffer.contents buffer)
| Star (Compl '"') ->
Buffer.add_string buffer (Utf8.lexeme buf);
read_string buf
| _ -> assert false
in
read_string buf
let digit_value c =
let open Stdlib in
match c with
| 'a' .. 'f' -> 10 + Char.code c - Char.code 'a'
| 'A' .. 'F' -> 10 + Char.code c - Char.code 'A'
| '0' .. '9' -> Char.code c - Char.code '0'
| _ -> assert false
let num_value buffer ~base ~first =
let buf = Utf8.lexeme buffer in
let c = ref 0 in
for i = first to String.length buf - 1 do
let v = digit_value buf.[i] in
assert (v < base);
c := (base * !c) + v
done;
!c
let token buf =
nom buf;
match%sedlex buf with
| eof -> EOF
| "" -> EOF
| '-' -> MINUS
| '+' -> PLUS
| '"' -> string buf
| ':' -> COLON
| '[' -> LSQUARE
| ']' -> RSQUARE
| '{' -> LBRACKET
| '}' -> RBRACKET
| ',' -> COMMA
| "true" -> BOOL true
| "false" -> BOOL false
| hex_ascii ->
let number = num_value ~base:16 ~first:2 buf in
INT number
| octal_ascii ->
let number = num_value ~base:8 ~first:2 buf in
INT number
| decimal_ascii ->
let number = num_value ~base:10 ~first:0 buf in
INT number
| _ ->
let position = fst @@ lexing_positions buf in
let tok = Utf8.lexeme buf in
raise @@ LexError (position, Printf.sprintf "unexpected character %S" tok)
let lexer buf =
Sedlexing.with_tokenizer token buf