-
Notifications
You must be signed in to change notification settings - Fork 37
/
Copy pathParser.fs
508 lines (395 loc) · 18.8 KB
/
Parser.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
module Marksman.Parser
open System
open System.Collections.Generic
open Ionide.LanguageServerProtocol.Types
open Markdig.Syntax
open Marksman.Config
open Marksman.Misc
open Marksman.Names
open Marksman.Text
open Marksman.Structure
module Markdown =
open Markdig
open Markdig.Syntax.Inlines
open Markdig.Parsers
open Markdig.Helpers
open Markdig.Extensions.Yaml
open Marksman.Cst
type WikiLinkInline
(
text: string,
doc: option<string * SourceSpan>,
heading: option<string * SourceSpan>,
title: option<string * SourceSpan>
) =
inherit LeafInline()
member val Text = text
member val Doc = Option.map fst doc
member val DocSpan = Option.map snd doc
member val Heading = Option.map fst heading
member val HeadingSpan = Option.map snd heading
member val Title = Option.map fst title
member val TitleSpan = Option.map snd title
type TagInline(text: string) =
inherit LeafInline()
member val Text = text
type TagsParser() as this =
inherit InlineParser()
do this.OpeningCharacters <- [| '#' |]
override this.Match(processor, slice) =
// tags should not be placed inside words, URLs etc.
if (slice.PeekCharExtra -1).IsAlphaNumeric() then
false
else
let start = slice.Start
let offsetStart = processor.GetSourcePosition(slice.Start)
let shouldAccept (c: char) = c.IsAlphaNumeric() || c = '-' || c = '_' || c = '/'
while (shouldAccept (slice.PeekChar())) do
slice.NextChar() |> ignore
let end_ = slice.Start
let offsetEnd = offsetStart + (end_ - start)
if end_ > start then
let text = slice.Text.Substring(start, end_ - start + 1)
let tag = TagInline(text)
tag.Span <- SourceSpan(offsetStart, offsetEnd)
processor.Inline <- tag
true
else
false
/// <summary>Match links of the form `[[doc#heading|title]]`, where at least one of `doc` and `#heading` must be present (`|title` may be omitted).</summary>
type WikiLinkParser() as this =
inherit InlineParser()
do this.OpeningCharacters <- [| '[' |]
override this.Match(processor, slice_) =
let mutable docSpan: option<SourceSpan> = None
let mutable headingSpan: option<SourceSpan> = None
let mutable titleSpan: option<SourceSpan> = None
let mutable slice = slice_ // this copy is necessary because byrefs cannot be captured
// helper functions for finite state machine
let updateSpanEnd offset (span: option<SourceSpan>) =
span |> Option.map (fun s -> SourceSpan(s.Start, offset - 1))
let isNotEscaped () = slice.PeekCharExtra(-1) <> '\\'
let advance () = (slice.NextChar(), processor.GetSourcePosition(slice.Start))
// state transition functions
let parseEnd () =
let c, _ = advance ()
c = ']'
let rec parseTitle () =
let c, offset = advance ()
if titleSpan.IsNone then
titleSpan <- SourceSpan(offset, -1) |> Some
match c with
| ']' when isNotEscaped () ->
titleSpan <- updateSpanEnd offset titleSpan
parseEnd ()
| c when c.IsNewLineOrLineFeed() || c.IsZero() -> false
| _ -> parseTitle ()
let rec parseHeading () =
let c, offset = advance ()
if headingSpan.IsNone then
headingSpan <- SourceSpan(offset, -1) |> Some
match c with
| '|' when isNotEscaped () ->
headingSpan <- updateSpanEnd offset headingSpan
parseTitle ()
| ']' when isNotEscaped () ->
headingSpan <- updateSpanEnd offset headingSpan
parseEnd ()
| c when c.IsNewLineOrLineFeed() || c.IsZero() -> false
| _ -> parseHeading ()
let rec parseDoc offset =
if docSpan.IsNone then
docSpan <- SourceSpan(offset, -1) |> Some
let c, offset = advance ()
match c with
| '#' when isNotEscaped () ->
docSpan <- updateSpanEnd offset docSpan
parseHeading ()
| '|' when isNotEscaped () ->
docSpan <- updateSpanEnd offset docSpan
parseTitle ()
| ']' when isNotEscaped () ->
docSpan <- updateSpanEnd offset docSpan
parseEnd ()
| c when c.IsNewLineOrLineFeed() || c.IsZero() -> false
| _ -> parseDoc offset
let parse () =
let c, _ = advance ()
if c <> '[' then
false
else
let c, offset = advance ()
match c with
| '#' -> parseHeading ()
| '|' -> parseTitle ()
| ']' -> parseEnd ()
| _ -> parseDoc offset
// do the parsing (run the finite state machine)
let start = slice.Start
let offsetStart = processor.GetSourcePosition(start)
let hasParsedLink = parse ()
slice_ <- slice // update output parameter to modified slice state
if hasParsedLink then
let offsetEnd = processor.GetSourcePosition(slice.Start)
let text = slice.Text.Substring(start, offsetEnd - offsetStart + 1)
let contentAndSpan (span: SourceSpan) =
let contentSliceStart = start + (span.Start - offsetStart)
let contentSliceLen = span.End - span.Start + 1
let content = slice.Text.Substring(contentSliceStart, contentSliceLen)
(content, span)
let link =
WikiLinkInline(
text,
Option.map contentAndSpan docSpan,
Option.map contentAndSpan headingSpan,
Option.map contentAndSpan titleSpan
)
link.Span <- SourceSpan(offsetStart, offsetEnd)
processor.Inline <- link
hasParsedLink
let markdigPipeline =
let pipelineBuilder =
MarkdownPipelineBuilder()
.UsePreciseSourceLocation()
.UseYamlFrontMatter()
pipelineBuilder.InlineParsers.Insert(0, MarkdigPatches.PatchedLinkInlineParser())
pipelineBuilder.InlineParsers.Insert(0, WikiLinkParser())
pipelineBuilder.InlineParsers.Add(TagsParser())
pipelineBuilder.Build()
let sourceSpanToRange (text: Text) (span: SourceSpan) : Range =
let start = text.lineMap.FindPosition(span.Start)
if span.IsEmpty then
{ Start = start; End = start }
else
let endInclusive = text.lineMap.FindPosition(span.End)
let endOffset = if Char.IsSurrogate(text.content, span.End) then 2 else 1
{
Start = start
End = { endInclusive with Character = endInclusive.Character + endOffset }
}
let scrapeText (parserSettings: ParserSettings) (text: Text) : array<Element> =
let parsed: MarkdownObject = Markdown.Parse(text.content, markdigPipeline)
let elements = ResizeArray()
let lastHeadingNo = new Dictionary<Slug, int>()
for b in parsed.Descendants() do
match b with
| :? YamlFrontMatterBlock as y ->
let fullText = text.content.Substring(y.Span.Start, y.Span.Length)
let range = sourceSpanToRange text y.Span
let node: TextNode = Node.mkText fullText range
elements.Add(YML node)
| :? HeadingBlock as h ->
let level = h.Level
let fullText = text.content.Substring(h.Span.Start, h.Span.Length)
let title0 = fullText.TrimStart(' ', '#')
let headingPrefixLen = fullText.Length - title0.Length
let title = title0.TrimEnd(' ')
let headingSuffixLen = title0.Length - title.Length
let titleRange =
sourceSpanToRange
text
(SourceSpan(h.Span.Start + headingPrefixLen, h.Span.End - headingSuffixLen))
let range = sourceSpanToRange text h.Span
let heading = {
level = level
isTitle = parserSettings.titleFromHeading && level <= 1
title = Node.mkText title titleRange
disambiguation = None
scope = range
}
let slugAmbiguous = Heading.slug heading
let headingNum =
match lastHeadingNo.TryGetValue(slugAmbiguous) with
| true, value -> value + 1
| false, _ -> 0
lastHeadingNo.[slugAmbiguous] <- headingNum
let disambiguation =
match parserSettings.glfmHeadingIds, headingNum with
| false, _ -> None
| true, 0 -> None
| true, num -> Some $"{num}"
let heading =
Node.mk fullText range { heading with disambiguation = disambiguation }
elements.Add(H heading)
| :? WikiLinkInline as link ->
let doc =
match link.Doc, link.DocSpan with
| Some doc, Some docSpan ->
Node.mk doc (sourceSpanToRange text docSpan) (WikiEncoded.mkUnchecked doc)
|> Some
| _ -> None
let heading =
match link.Heading, link.HeadingSpan with
| Some heading, Some headingSpan ->
Node.mk
heading
(sourceSpanToRange text headingSpan)
(WikiEncoded.mkUnchecked heading)
|> Some
| _ -> None
let wikiLink: WikiLink = { doc = doc; heading = heading }
let range = sourceSpanToRange text link.Span
let xref = Node.mk link.Text range wikiLink
elements.Add(WL xref)
| :? LinkInline as l ->
let linkRange = sourceSpanToRange text l.Span
let linkText = text.content.Substring(l.Span.Start, l.Span.Length)
let labelSpan = l.LabelSpan
let isRegularLink = linkText.EndsWith(')')
let label =
if labelSpan.IsEmpty then
String.Empty
else
text.content.Substring(labelSpan.Start, labelSpan.Length)
let titleSpan = l.TitleSpan
let title = l.Title
let urlSpan = l.UrlSpan
let url = l.Url
if not l.IsShortcut then
if isRegularLink then
let label = Node.mkText label (sourceSpanToRange text labelSpan)
let url =
if urlSpan.IsEmpty then
None
else
Some(
Node.mk
url
(sourceSpanToRange text urlSpan)
(UrlEncoded.mkUnchecked url)
)
let title =
if titleSpan.IsEmpty then
None
else
Some(Node.mkText title (sourceSpanToRange text titleSpan))
let link =
MdLink.IL(text = label, url = url, title = title)
|> Node.mk linkText linkRange
elements.Add(ML link)
// Another hack: url span = label span => collapsed ref
else if urlSpan = labelSpan then
let label = Node.mkText label (sourceSpanToRange text labelSpan)
let link = MdLink.RC label |> Node.mk linkText linkRange
elements.Add(ML link)
// The last remaining option is full reference
//
// NOTE: there's something off in Markdig's LinkHelper.TryParseLabelTrivia
// This is called from PatchedInlineLinkParser:341.
// TODO: Figure out why there's an incorrect urlSpan (-1 -- 0) returned.
else if not (String.IsNullOrEmpty url) then
let text_ = Node.mkText label (sourceSpanToRange text labelSpan)
let label = Node.mkText url (sourceSpanToRange text urlSpan)
let link = MdLink.RF(text_, label) |> Node.mk linkText linkRange
elements.Add(ML link)
else
let label = Node.mkText label (sourceSpanToRange text labelSpan)
let link = MdLink.RS(label) |> Node.mk linkText linkRange
elements.Add(ML link)
| :? LinkReferenceDefinition as linkDef ->
let defRange = sourceSpanToRange text linkDef.Span
let defText =
text.content.Substring(linkDef.Span.Start, linkDef.Span.Length)
let label = linkDef.Label
let labelSpan = linkDef.LabelSpan
let label = Node.mkText label (sourceSpanToRange text labelSpan)
let url = linkDef.Url
let urlSpan = linkDef.UrlSpan
let url =
Node.mk url (sourceSpanToRange text urlSpan) (UrlEncoded.mkUnchecked url)
let title =
if linkDef.TitleSpan.IsEmpty then
None
else
let title = linkDef.Title
let titleSpan = linkDef.TitleSpan
Node.mkText title (sourceSpanToRange text titleSpan) |> Some
let def = MdLinkDef.mk label url title |> Node.mk defText defRange
elements.Add(MLD def)
()
| :? TagInline as tag ->
let tagText = tag.Text
let tagRange = sourceSpanToRange text tag.Span
let nameText, nameRange =
if tagText.StartsWith('#') then
tagText.Substring(1),
{ Start = tagRange.Start.NextChar(1); End = tagRange.End }
else
tagText, tagRange
let tag = { name = Node.mkText nameText nameRange }
let tag = Node.mk tagText tagRange tag
elements.Add(T tag)
()
| _ -> ()
elements.ToArray()
let rec private sortElements (text: Text) (elements: array<Element>) : unit =
let elemOffsets el =
let range = (Element.range el)
let start = text.lineMap.FindOffset(range.Start)
let end_ = text.lineMap.FindOffset(range.End)
(start, end_)
Array.sortInPlaceBy elemOffsets elements
let buildCst (text: Text) (inputElements: Element[]) : Cst =
let nestedDeeperThan (_, baseHeader) (_, otherHeader) =
otherHeader.data.level >= baseHeader.data.level
let scopeMap = Dictionary()
let childMap = Dictionary()
let outputElements = ResizeArray()
let processEl headStack (idx: int, el: Element) =
outputElements.Add(el)
let parentStack, newHeadStack =
match el with
| H curHead ->
// Close headings nested deeper than curHead
headStack
|> List.takeWhile (nestedDeeperThan (idx, curHead))
|> List.iter (fun (idx, _) -> scopeMap.Add(idx, curHead.data.scope.Start))
let parentStack =
headStack |> List.skipWhile (nestedDeeperThan (idx, curHead))
parentStack, (idx, curHead) :: parentStack
| _ -> headStack, headStack
match parentStack with
| [] -> ()
| (parentIdx, _) :: _ ->
let children =
if childMap.ContainsKey(parentIdx) then
childMap.GetValueOrDefault(parentIdx)
else
let children = ResizeArray()
childMap.Add(parentIdx, children)
children
children.Add(idx)
newHeadStack
// Add unclosed headings to the scope map with 'text end' scope
Array.indexed inputElements
|> Array.fold processEl []
|> List.iter (fun (idx, _) -> scopeMap.Add(idx, text.EndRange().Start))
// Update header scopes
for KeyValue(headerIdx, scopeEnd) in scopeMap do
match outputElements[headerIdx] with
| H header ->
let newScope = { Start = header.data.scope.Start; End = scopeEnd }
outputElements[headerIdx] <-
H { header with data = { header.data with scope = newScope } }
| other -> failwith $"Unexpected non-heading element at idx {headerIdx}: {other}"
let childMap =
seq {
for KeyValue(parentId, childIds) in childMap do
let children =
childIds.ToArray() |> Array.map (fun i -> outputElements[i])
sortElements text children
let parent = outputElements[parentId]
parent, children
}
|> Map.ofSeq
let elements = outputElements.ToArray()
sortElements text elements
{ elements = elements; childMap = childMap }
let parse (parserSettings: ParserSettings) (text: Text) : Structure =
if String.IsNullOrEmpty text.content then
let cst: Cst.Cst = { elements = [||]; childMap = Map.empty }
Structure.ofCst parserSettings cst
else
let flatElements = Markdown.scrapeText parserSettings text
let cst = Markdown.buildCst text flatElements
Structure.ofCst parserSettings cst