-
Notifications
You must be signed in to change notification settings - Fork 0
/
regexp_lexer.mll
87 lines (73 loc) · 1.66 KB
/
regexp_lexer.mll
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
open Regular_expr;;
open Regexp_parser;;
open Lexing;;
let add_inter a b l =
let rec add_rec a b l =
match l with
| [] -> [(a,b)]
| (a1,b1)::r ->
if b < a1
then (a,b)::l
else
if b1 < a
then (a1,b1)::(add_rec a b r)
else
(* intervals [a,b] and [a1,b1] intersect *)
add_rec (min a a1) (max b b1) r
in
if a>b then l else add_rec a b l
let complement l =
let rec comp_rec a l =
match l with
| [] ->
if a < 256 then [(a,255)] else []
| (a1,b1)::r ->
if a < a1 then (a,a1-1)::(comp_rec (b1+1) r) else comp_rec (b1+1) r
in
comp_rec 0 l
let interv a b = char_interv (Char.chr a) (Char.chr b)
let rec make_charset l =
match l with
| [] -> empty
| (a,b)::r -> alt (interv a b) (make_charset r)
}
rule token = parse
| '\\' _
{ CHAR (lexeme_char lexbuf 1) }
| '.'
{ CHARSET(interv 0 255) }
| '*'
{ STAR }
| '+'
{ PLUS }
| '?'
{ QUESTION }
| '|'
{ ALT }
| '('
{ OPENPAR }
| ')'
{ CLOSEPAR }
| "[^"
{ CHARSET(make_charset (complement (charset lexbuf))) }
| '['
{ CHARSET(make_charset (charset lexbuf)) }
| _
{ CHAR (lexeme_char lexbuf 0) }
| eof
{ EOF }
and charset = parse
| ']'
{ [] }
| '\\' _
{ let c = Char.code(lexeme_char lexbuf 1) in
add_inter c c (charset lexbuf) }
| [^ '\\'] '-' _
{ let c1 = Char.code (lexeme_char lexbuf 0)
and c2 = Char.code (lexeme_char lexbuf 2)
in
add_inter c1 c2 (charset lexbuf) }
| _
{ let c = Char.code(lexeme_char lexbuf 0) in
add_inter c c (charset lexbuf) }