-
Notifications
You must be signed in to change notification settings - Fork 4
/
parse.lisp
420 lines (318 loc) · 11.3 KB
/
parse.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
;;;; Monadic parsing package for Common Lisp
;;;;
;;;; Copyright (c) Jeffrey Massung
;;;;
;;;; This file is provided to you under the Apache License,
;;;; Version 2.0 (the "License"); you may not use this file
;;;; except in compliance with the License. You may obtain
;;;; a copy of the License at
;;;;
;;;; http://www.apache.org/licenses/LICENSE-2.0
;;;;
;;;; Unless required by applicable law or agreed to in writing,
;;;; software distributed under the License is distributed on an
;;;; "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
;;;; KIND, either express or implied. See the License for the
;;;; specific language governing permissions and limitations
;;;; under the License.
;;;;
(defpackage :parse
(:use :cl)
(:export
#:parse
;; declare a parse combinator
#:define-parser
;; monadic bind functions
#:>>=
#:>>
;; combinator macros
#:.prog1
#:.progn
#:.let
#:.let*
#:.do
#:.or
;; monadic functions
#:.ret
#:.fail
#:.get
#:.put
#:.modify
#:.push
#:.pop
;; parse combinators
#:.any
#:.eof
#:.is
#:.either
#:.opt
#:.ignore
#:.all
#:.maybe
#:.many
#:.many1
#:.many-until
#:.sep-by
#:.sep-by1
#:.skip-many
#:.skip-many1
#:.between))
(in-package :parse)
;;; ----------------------------------------------------
(defstruct parse-state read-token tokens token-last data)
;;; ----------------------------------------------------
(defun parse-state-next-token (st)
"Returns the next token in the token list as a cons pair."
(cadr (parse-state-tokens st)))
;;; ----------------------------------------------------
(defun parse-state-token-class (st)
"Returns the class of the current token."
(car (parse-state-next-token st)))
;;; ----------------------------------------------------
(defun parse-state-token-value (st)
"Returns the value of the current token."
(cdr (parse-state-next-token st)))
;;; ----------------------------------------------------
(defun parse (p next-token &key initial-state (errorp t) error-value)
"Create a parse-state and pass it through a parse combinator."
(let* ((token-cache (list nil))
;; create the initial parse state
(st (make-parse-state :tokens token-cache
:token-last token-cache
:data initial-state)))
;; create a function that will read into the shared token list
(setf (parse-state-read-token st)
#'(lambda ()
(multiple-value-bind (class value)
(funcall next-token)
(car (setf (parse-state-token-last st)
(cdr (rplacd (parse-state-token-last st)
(list (cons class value)))))))))
;; read the first token as the current token
(funcall (parse-state-read-token st))
;; parse the token stream
(multiple-value-bind (x okp)
(funcall p st)
(cond (okp (values x t))
;; should we error out?
(errorp (error "Parse failure"))
;; return the error result and parse failure
(t (values error-value nil))))))
;;; ----------------------------------------------------
(defun satisfy (st pred)
"Read the next token if necesary, test class, return value."
(destructuring-bind (class . value)
(let ((token (parse-state-next-token st)))
(if token
token
(funcall (parse-state-read-token st))))
(when (funcall pred class)
(let ((nst (copy-parse-state st)))
(multiple-value-prog1
(values value nst)
(pop (parse-state-tokens nst)))))))
;;; ----------------------------------------------------
(defmacro define-parser (name &body ps)
"Create a parse combinator."
(let ((st (gensym)))
`(defun ,name (,st)
;; add a documentation string to the parser if provided
,(when (stringp (first ps)) (pop ps))
;; parse the combinators, return the final result
(funcall (.do ,@ps) ,st))))
;;; ----------------------------------------------------
(defun >>= (p f)
"Monadic bind combinator."
#'(lambda (st)
(multiple-value-bind (x nst)
(funcall p st)
(when nst
(funcall (funcall f x) nst)))))
;;; ----------------------------------------------------
(defun >> (p m)
"Monadic bind, ignore intermediate result."
#'(lambda (st)
(let ((nst (nth-value 1 (funcall p st))))
(when nst
(funcall m nst)))))
;;; ----------------------------------------------------
(defmacro .prog1 (form &body rest)
"Macro to execute Lisp expressions, returning the first result."
`(.ret (prog1 ,form ,@rest)))
;;; ----------------------------------------------------
(defmacro .progn (&body rest)
"Macro to execute Lisp expressions, returning the last result."
`(.ret (progn ,@rest)))
;;; ----------------------------------------------------
(defmacro .let ((var p) &body body)
"Macro for >>= to make it more readable."
`(>>= ,p #'(lambda (,var) (declare (ignorable ,var)) ,@body)))
;;; ----------------------------------------------------
(defmacro .let* ((binding &rest bindings) &body body)
"Macro for making multiple .let bindings more readable."
(if (null bindings)
`(.let ,binding ,@body)
`(.let ,binding
(.let* ,bindings ,@body))))
;;; ----------------------------------------------------
(defmacro .do (p &rest ps)
"Chained together >> combinators."
(labels ((chain (p ps)
(if (null ps)
p
`(>> ,p ,(chain (first ps) (rest ps))))))
(chain p ps)))
;;; ----------------------------------------------------
(defmacro .or (p &rest ps)
"Chained together or combinators."
(labels ((try (p ps)
(if (null ps)
p
`(.either ,p ,(try (first ps) (rest ps))))))
(try p ps)))
;;; ----------------------------------------------------
(defun .ret (x)
"Convert X into a monadic value."
#'(lambda (st) (values x st)))
;;; ----------------------------------------------------
(defun .fail (datum &rest arguments)
"Ensures that the parse combinator fails."
#'(lambda (st)
(declare (ignore st))
(apply #'error datum arguments)))
;;; ----------------------------------------------------
(defun .get ()
"Always succeeds, returns the current parse state data."
#'(lambda (st)
(values (parse-state-data st) st)))
;;; ----------------------------------------------------
(defun .put (x)
"Always succeeds, puts data into the parse state."
#'(lambda (st)
(let ((nst (copy-parse-state st)))
(values (setf (parse-state-data nst) x) nst))))
;;; ----------------------------------------------------
(defun .modify (f)
"Always succeeds, applys f with the parse state data."
(.let (x (.get))
(.put (funcall f x))))
;;; ----------------------------------------------------
(defun .push (x)
"Always succeeds, assumes data is a list and pushes x onto it."
(.let (xs (.get))
(.put (cons x xs))))
;;; ----------------------------------------------------
(defun .pop ()
"Always succeeds, assumes data is a list an pops it."
(.let (xs (.get))
(.do (.put (cdr xs))
(.ret (car xs)))))
;;; ----------------------------------------------------
(defun .any ()
"Succeeds if not at the end of the token stream."
#'(lambda (st) (satisfy st #'identity)))
;;; ----------------------------------------------------
(defun .eof ()
"Succeeds if at the end of the token stream."
#'(lambda (st) (satisfy st #'null)))
;;; ----------------------------------------------------
(defun .is (class &key (test #'eql))
"Checks if the current token is of a given class."
#'(lambda (st) (satisfy st #'(lambda (c) (funcall test c class)))))
;;; ----------------------------------------------------
(defun .either (p1 p2)
"Attempt to parse p1, if that fails, try p2."
#'(lambda (st)
(multiple-value-bind (x nst)
(funcall p1 st)
(if nst
(values x nst)
(funcall p2 st)))))
;;; ----------------------------------------------------
(defun .opt (x p)
"Optionally match a parse combinator or return x."
(.either p (.ret x)))
;;; ----------------------------------------------------
(defun .ignore (p)
"Parse p, ignore the result."
(.do p (.ret nil)))
;;; ----------------------------------------------------
(defun .all (p &rest ps)
"Parse each combinator in order and return all as a list."
(.let (first p)
#'(lambda (st)
(loop
for p in ps
;; try the next combinator
for (x nst) = (multiple-value-list (funcall p st))
while nst
;; update the parse state
do (setf st nst)
;; keep all the matches in a list
collect x into rest
;; return the matches and final state
finally (return (values (cons first rest) st))))))
;;; ----------------------------------------------------
(defun .maybe (p)
"Try and parse p, ignore it if there."
(.opt nil (.ignore p)))
;;; ----------------------------------------------------
(defun .many (p)
"Try and parse a combinator zero or more times."
(.opt nil (.many1 p)))
;;; ----------------------------------------------------
(defun .many1 (p)
"Try and parse a combinator one or more times."
(.let (first p)
#'(lambda (st)
(loop
;; keep repeating the parse combinator until it fails
for (x nst) = (multiple-value-list (funcall p st))
while nst
;; update the parse state
do (setf st nst)
;; keep all the matches in a list
collect x into rest
;; return the matches and final state
finally (return (values (cons first rest) st))))))
;;; ----------------------------------------------------
(defun .many-until (p end)
"Parse zero or more combinators until an end combinator is reached."
#'(lambda (st)
(loop
;; try and parse the end
for nst = (nth-value 1 (funcall end st))
collect (if nst
(loop-finish)
(multiple-value-bind (x xst)
(funcall p st)
(if (null xst)
(return nil)
(prog1 x
(setf st xst)))))
;; join all the results together
into xs
;; return the results
finally (return (values xs nst)))))
;;; ----------------------------------------------------
(defun .sep-by (p sep)
"Zero or more occurances of p separated by sep."
(.opt nil (.sep-by1 p sep)))
;;; ----------------------------------------------------
(defun .sep-by1 (p sep)
"One or more occurances of p separated by sep."
(.let (x p)
(.let (xs (.many (.do sep p)))
(.ret (cons x xs)))))
;;; ----------------------------------------------------
(defun .skip-many (p)
"Optionally skip a parse combinator zero or more times."
(.opt nil (.skip-many1 p)))
;;; ----------------------------------------------------
(defun .skip-many1 (p)
"Try and parse a combinator one or more times, ignore it."
(.maybe (.many1 p)))
;;; ----------------------------------------------------
(defun .between (open-guard close-guard p)
"Capture a combinator between guards."
(.do open-guard (.let (x p) (.do close-guard (.ret x)))))