Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Wikilink titles support #221

Merged
merged 3 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 110 additions & 89 deletions Marksman/Parser.fs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ module Markdown =
type WikiLinkInline
(
text: string,
doc: Option<string * SourceSpan>,
heading: option<string * SourceSpan>
doc: option<string * SourceSpan>,
heading: option<string * SourceSpan>,
title: option<string * SourceSpan>
) =
inherit LeafInline()
member val Text = text
Expand All @@ -31,6 +32,9 @@ module Markdown =
member val Heading = Option.map fst heading
member val HeadingSpan = Option.map snd heading

member val Title = Option.map fst title
member val TitleSpan = Option.map snd title

type TagInline(text: string) =
inherit LeafInline()

Expand Down Expand Up @@ -63,105 +67,122 @@ module Markdown =
else
false


/// <summary>Match links of the form `[[doc#heading|title]]`, where at least one of `doc` and `#heading` must be present (`|title` may be omitted).</summary>
type WikiLinkParser() as this =
inherit InlineParser()

do this.OpeningCharacters <- [| '[' |]

override this.Match(processor, slice) =
let nextChar = slice.PeekCharExtra(1)
override this.Match(processor, slice_) =
let mutable docSpan: option<SourceSpan> = None
let mutable headingSpan: option<SourceSpan> = None
let mutable titleSpan: option<SourceSpan> = None

let mutable slice = slice_ // this copy is necessary because byrefs cannot be captured

// helper functions for finite state machine
artempyanykh marked this conversation as resolved.
Show resolved Hide resolved
let updateSpanEnd offset (span: option<SourceSpan>) =
span |> Option.map (fun s -> SourceSpan(s.Start, offset - 1))

let isNotEscaped () = slice.PeekCharExtra(-1) <> '\\'
let advance () = (slice.NextChar(), processor.GetSourcePosition(slice.Start))

// state transition functions
let parseEnd () =
let c, _ = advance ()
c = ']'

let rec parseTitle () =
let c, offset = advance ()

if titleSpan.IsNone then
titleSpan <- SourceSpan(offset, -1) |> Some

match c with
| ']' when isNotEscaped () ->
titleSpan <- updateSpanEnd offset titleSpan
parseEnd ()
| c when c.IsNewLineOrLineFeed() || c.IsZero() -> false
| _ -> parseTitle ()

let rec parseHeading () =
let c, offset = advance ()

if headingSpan.IsNone then
headingSpan <- SourceSpan(offset, -1) |> Some

match c with
| '|' when isNotEscaped () ->
headingSpan <- updateSpanEnd offset headingSpan
parseTitle ()
| ']' when isNotEscaped () ->
headingSpan <- updateSpanEnd offset headingSpan
parseEnd ()
| c when c.IsNewLineOrLineFeed() || c.IsZero() -> false
| _ -> parseHeading ()

let rec parseDoc offset =
if docSpan.IsNone then
docSpan <- SourceSpan(offset, -1) |> Some

let c, offset = advance ()

match c with
| '#' when isNotEscaped () ->
docSpan <- updateSpanEnd offset docSpan
parseHeading ()
| '|' when isNotEscaped () ->
docSpan <- updateSpanEnd offset docSpan
parseTitle ()
| ']' when isNotEscaped () ->
docSpan <- updateSpanEnd offset docSpan
parseEnd ()
| c when c.IsNewLineOrLineFeed() || c.IsZero() -> false
| _ -> parseDoc offset

let parse () =
let c, _ = advance ()

if c <> '[' then
false
else
let c, offset = advance ()

let isRef = nextChar = '['
match c with
| '#' -> parseHeading ()
| '|' -> parseTitle ()
| ']' -> parseEnd ()
| _ -> parseDoc offset

if isRef then
let start = slice.Start
let offsetStart = processor.GetSourcePosition(start)
let offsetInnerStart = offsetStart + 2
// do the parsing (run the finite state machine)
let start = slice.Start
let offsetStart = processor.GetSourcePosition(start)
let hasParsedLink = parse ()
slice_ <- slice // update output parameter to modified slice state

let mutable offsetHashDelim: option<int> = None
let mutable found = false
let mutable current = slice.NextChar()
if hasParsedLink then
let offsetEnd = processor.GetSourcePosition(slice.Start)
let text = slice.Text.Substring(start, offsetEnd - offsetStart + 1)

let shouldStop (c: char) = c.IsNewLineOrLineFeed() || c.IsZero() || found
let contentAndSpan (span: SourceSpan) =
let contentSliceStart = start + (span.Start - offsetStart)
let contentSliceLen = span.End - span.Start + 1
let content = slice.Text.Substring(contentSliceStart, contentSliceLen)

while not (shouldStop current) do
if
current = '#'
&& slice.PeekCharExtra(-1) <> '\\'
&& offsetHashDelim.IsNone
then
offsetHashDelim <- Some(processor.GetSourcePosition(slice.Start))
(content, span)

if current = ']' then
let prev = slice.PeekCharExtra(-1)
let link =
WikiLinkInline(
text,
Option.map contentAndSpan docSpan,
Option.map contentAndSpan headingSpan,
Option.map contentAndSpan titleSpan
)

if prev = ']' then found <- true else current <- slice.NextChar()
else
current <- slice.NextChar()

if found then
let end_ = slice.Start
let offsetEnd = offsetStart + (end_ - start)
let offsetInnerEnd = offsetEnd - 2

let text = slice.Text.Substring(start, end_ - start + 1)

let doc, heading =
match offsetHashDelim with
| Some offsetHashDelim ->
let offsetDocStart = offsetInnerStart
let offsetDocEnd = offsetHashDelim - 1
let offsetHeadingStart = offsetHashDelim + 1
let offsetHeadingEnd = offsetInnerEnd

let docText =
if offsetDocEnd >= offsetDocStart then
slice.Text.Substring(
start + 2,
offsetDocEnd - offsetDocStart + 1
)
else
String.Empty

let headingText =
if offsetHeadingEnd >= offsetHeadingStart then
slice.Text.Substring(
start + 2 + (offsetDocEnd - offsetDocStart + 1) + 1,
offsetHeadingEnd - offsetHeadingStart + 1
)
else
String.Empty

let doc =
if String.IsNullOrEmpty docText then
None
else
(docText, SourceSpan(offsetDocStart, offsetDocEnd)) |> Some

let heading =
(headingText, SourceSpan(offsetHeadingStart, offsetHeadingEnd))
|> Some

doc, heading

| None ->
let offsetDocStart = offsetStart + 2
let offsetDocEnd = offsetEnd - 2

let docText =
slice.Text.Substring(start + 2, offsetDocEnd - offsetDocStart + 1)

Some(docText, SourceSpan(offsetDocStart, offsetDocEnd)), None


let link = WikiLinkInline(text, doc, heading)
link.Span <- SourceSpan(offsetStart, offsetEnd)
processor.Inline <- link

found
else
false
link.Span <- SourceSpan(offsetStart, offsetEnd)
processor.Inline <- link

hasParsedLink

let markdigPipeline =
let pipelineBuilder =
Expand Down
37 changes: 36 additions & 1 deletion Tests/ParserTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ module WikiLinkTests =
checkSnapshot document

[<Fact>]
let parser_wiki_empty_heading () =
let parse_wiki_empty_heading () =
// 0123456
let text = "[[T#]]"
let doc = scrapeString text
Expand Down Expand Up @@ -141,6 +141,41 @@ module WikiLinkTests =
let doc = scrapeString text
checkSnapshot doc

[<Fact>]
let parse_wiki_with_title () =
// 0123456789012345
let text = "[[T#head|title]]"
let doc = scrapeString text
checkSnapshot doc

[<Fact>]
let parse_wiki_empty_title () =
// 0123456789012345
let text = "[[T#head|]]"
let doc = scrapeString text
checkSnapshot doc

[<Fact>]
let parse_wiki_no_doc_and_title () =
// 0123456789012345
let text = "[[#head|title]]"
let doc = scrapeString text
checkSnapshot doc

[<Fact>]
let parse_wiki_no_doc_and_no_title () =
// 0123456789012345
let text = "[[|]]"
let doc = scrapeString text
checkSnapshot doc

[<Fact>]
let parse_wiki_all_empty () =
// 0123456789012345
let text = "[[]]"
let doc = scrapeString text
checkSnapshot doc

[<Fact>]
let complex_example_1 () =
let text =
Expand Down
32 changes: 32 additions & 0 deletions Tests/_snapshots/WikiLinkTests.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,37 @@
" doc=F\\#; (0,2)-(0,5)",
" head=Section #3; (0,6)-(0,16)"
]
},
"parse_wiki_all_empty": {
"AutoGenerated": [
"WL: [[]]; (0,0)-(0,4)",
" "
]
},
"parse_wiki_no_doc_and_title": {
"AutoGenerated": [
"WL: [[#head|title]]; (0,0)-(0,15)",
" head=head; (0,3)-(0,7)"
]
},
"parse_wiki_with_title": {
"AutoGenerated": [
"WL: [[T#head|title]]; (0,0)-(0,16)",
" doc=T; (0,2)-(0,3)",
" head=head; (0,4)-(0,8)"
]
},
"parse_wiki_no_doc_and_no_title": {
"AutoGenerated": [
"WL: [[|]]; (0,0)-(0,5)",
" "
]
},
"parse_wiki_empty_title": {
"AutoGenerated": [
"WL: [[T#head|]]; (0,0)-(0,11)",
" doc=T; (0,2)-(0,3)",
" head=head; (0,4)-(0,8)"
]
}
}