From 900f6f1a89f52c17ba6c80a569bbcd9db2d3a95a Mon Sep 17 00:00:00 2001 From: Georg Makowski Date: Tue, 26 Mar 2024 21:09:21 +0100 Subject: [PATCH] Added a working version with **different** NodeKinds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a preliminary version. The goal is to create one NodeKind "inlineTag" for all simple inline tags. But I am not sure if it’s going to work out, so I want to leave this on record. --- extras/_test/insert.txt | 57 +++++++++++ extras/_test/mark.txt | 57 +++++++++++ extras/_test/subscript.txt | 62 ++++++++++++ extras/_test/superscript.txt | 69 +++++++++++++ extras/ast/inline.go | 106 ++++++++++++++++++++ extras/go.mod | 5 + extras/go.sum | 2 + extras/inline.go | 150 ++++++++++++++++++++++++++++ extras/inline_test.go | 187 +++++++++++++++++++++++++++++++++++ extras_old/go.mod | 2 +- extras_old/insert.go | 2 +- extras_old/subscript.go | 2 +- extras_old/superscript.go | 2 +- 13 files changed, 699 insertions(+), 4 deletions(-) create mode 100644 extras/_test/insert.txt create mode 100644 extras/_test/mark.txt create mode 100644 extras/_test/subscript.txt create mode 100644 extras/_test/superscript.txt create mode 100644 extras/ast/inline.go create mode 100644 extras/go.mod create mode 100644 extras/go.sum create mode 100644 extras/inline.go create mode 100644 extras/inline_test.go diff --git a/extras/_test/insert.txt b/extras/_test/insert.txt new file mode 100644 index 0000000..c3d50ef --- /dev/null +++ b/extras/_test/insert.txt @@ -0,0 +1,57 @@ +1 +//- - - - - - - - -// +++Hi++ Hello, world! +//- - - - - - - - -// +

Hi Hello, world!

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +2 +//- - - - - - - - -// +This ++has a + +new paragraph++. +//- - - - - - - - -// +

This ++has a

+

new paragraph++.

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +3 +//- - - - - - - - -// +x ++++foo++ bar++ +//- - - - - - - - -// +

x foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +4 +//- - - - - - - - -// +x ++foo ++bar++++ +//- - - - - - - - -// +

x foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +5 +//- - - - - - - - -// +x ++++foo++++ +//- - - - - - - - -// +

x foo

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +6 +//- - - - - - - - -// +**++test**++ + +++**test++** +//- - - - - - - - -// +

++test++

+

**test**

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +7 +//- - - - - - - - -// +[++link]()++ + +++[link++]() +//- - - - - - - - -// +

++link++

+

++link++

+//= = = = = = = = = = = = = = = = = = = = = = = =// diff --git a/extras/_test/mark.txt b/extras/_test/mark.txt new file mode 100644 index 0000000..a3261ad --- /dev/null +++ b/extras/_test/mark.txt @@ -0,0 +1,57 @@ +1 +//- - - - - - - - -// +==Hello==, world! +//- - - - - - - - -// +

Hello, world!

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +2 +//- - - - - - - - -// +This mark ==has a + +new paragraph==. +//- - - - - - - - -// +

This mark ==has a

+

new paragraph==.

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +3 +//- - - - - - - - -// +x ====foo== bar== +//- - - - - - - - -// +

x foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +4 +//- - - - - - - - -// +x ==foo ==bar==== +//- - - - - - - - -// +

x foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +5 +//- - - - - - - - -// +x ====foo==== +//- - - - - - - - -// +

x foo

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +6 +//- - - - - - - - -// +**==test**== + +==**test==** +//- - - - - - - - -// +

==test==

+

**test**

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +7 +//- - - - - - - - -// +[==link]()== + +==[link==]() +//- - - - - - - - -// +

==link==

+

==link==

+//= = = = = = = = = = = = = = = = = = = = = = = =// diff --git a/extras/_test/subscript.txt b/extras/_test/subscript.txt new file mode 100644 index 0000000..59f3ebb --- /dev/null +++ b/extras/_test/subscript.txt @@ -0,0 +1,62 @@ +1: Surrounded by tildes +//- - - - - - - - -// +~foo~ +//- - - - - - - - -// +

foo

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +2: Formula with tildes in the middle +//- - - - - - - - -// +H~2~O +//- - - - - - - - -// +

H2O

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +3: Indices +//- - - - - - - - -// +x~i~ + x~j~ +//- - - - - - - - -// +

xi + xj

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +4: Escaped tilde +//- - - - - - - - -// +~foo\~ +//- - - - - - - - -// +

~foo~

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +5: Non-breaking space entity +//- - - - - - - - -// +~foo bar~ +//- - - - - - - - -// +

foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +6: Non-breaking space UTF-8 +//- - - - - - - - -// +~foo bar~ +//- - - - - - - - -// +

foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +7: Mixed subscript and strikethrough +//- - - - - - - - -// +~~x~foobar~~~ +//- - - - - - - - -// +

xfoobar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +8: Tildes in the middle and text before +//- - - - - - - - -// +text H~2~O +//- - - - - - - - -// +

text H2O

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +9: Tildes in the middle and text after +//- - - - - - - - -// +H~2~O text +//- - - - - - - - -// +

H2O text

+//= = = = = = = = = = = = = = = = = = = = = = = =// diff --git a/extras/_test/superscript.txt b/extras/_test/superscript.txt new file mode 100644 index 0000000..7bf06bf --- /dev/null +++ b/extras/_test/superscript.txt @@ -0,0 +1,69 @@ +1: Surrounded by cares +//- - - - - - - - -// +^foo^ +//- - - - - - - - -// +

foo

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +2: Ordinal indicator +//- - - - - - - - -// +2^nd^ +//- - - - - - - - -// +

2nd

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +3: Powers +//- - - - - - - - -// +x^2^ + x^5^ +//- - - - - - - - -// +

x2 + x5

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +4: Escaped care +//- - - - - - - - -// +^foo\^ +//- - - - - - - - -// +

^foo^

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +5: : Cares surround text with a non-breaking space entity +//- - - - - - - - -// +^foo bar^ +//- - - - - - - - -// +

foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +6: Cares surround text Surround with a non-breaking space (UTF-8) +//- - - - - - - - -// +^foo bar^ +//- - - - - - - - -// +

foo bar

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +7: Cares in the middle and text before +//- - - - - - - - -// +text C^foo^C +//- - - - - - - - -// +

text CfooC

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +8: Cares in the middle and text after +//- - - - - - - - -// +C^foo^C text +//- - - - - - - - -// +

CfooC text

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +9: Wrong markers in LaTeX style notation should be left untouched +//- - - - - - - - -// +x^2 + x^3 +//- - - - - - - - -// +

x^2 + x^3

+//= = = = = = = = = = = = = = = = = = = = = = = =// + +10: Footnote markers should be left untouched +//- - - - - - - - -// +text[^1] text[^2] +//- - - - - - - - -// +

text[^1] text[^2]

+//= = = = = = = = = = = = = = = = = = = = = = = =// diff --git a/extras/ast/inline.go b/extras/ast/inline.go new file mode 100644 index 0000000..74e2413 --- /dev/null +++ b/extras/ast/inline.go @@ -0,0 +1,106 @@ +package ast + +import ( + gast "github.com/yuin/goldmark/ast" +) + +type TagType int + +const ( + Superscript TagType = iota + 1 + Subscript + Insert + Mark +) + +type Tag struct { + TagType TagType + Char byte + Number int + Html string + WhitespaceAllowed bool + ParsePriority int + RenderPriority int +} + +var SuperscriptTag = Tag{ + TagType: Superscript, + Char: '^', + Number: 1, + Html: "sup", + WhitespaceAllowed: false, + ParsePriority: 600, + RenderPriority: 600, +} + +var SubscriptTag = Tag{ + TagType: Subscript, + Char: '~', + Number: 1, + Html: "sub", + WhitespaceAllowed: false, + ParsePriority: 602, + RenderPriority: 602, +} + +var InsertTag = Tag{ + TagType: Insert, + Char: '+', + Number: 2, + Html: "ins", + WhitespaceAllowed: true, + ParsePriority: 501, + RenderPriority: 501, +} + +var MarkTag = Tag{ + TagType: Mark, + Char: '=', + Number: 2, + Html: "mark", + WhitespaceAllowed: true, + ParsePriority: 550, + RenderPriority: 550, +} + +type InlineTag struct { + gast.BaseInline + + Tag +} + +func NewInlineTag(tag Tag) *InlineTag { + return &InlineTag{ + BaseInline: gast.BaseInline{}, + + Tag: tag, + } +} + +var KindSuperscript = gast.NewNodeKind("Superscript") +var KindSubscript = gast.NewNodeKind("Subscript") +var KindInsert = gast.NewNodeKind("Insert") +var KindMark = gast.NewNodeKind("Mark") + +func NewInlineTagKind(t TagType) gast.NodeKind { + var kind gast.NodeKind + switch t { + case Superscript: + kind = KindSuperscript + case Subscript: + kind = KindSubscript + case Insert: + kind = KindInsert + case Mark: + kind = KindMark + } + return kind +} + +func (n *InlineTag) Kind() gast.NodeKind { + return NewInlineTagKind(n.TagType) +} + +func (n *InlineTag) Dump(source []byte, level int) { + gast.DumpHelper(n, source, level, nil, nil) +} diff --git a/extras/go.mod b/extras/go.mod new file mode 100644 index 0000000..e70ff64 --- /dev/null +++ b/extras/go.mod @@ -0,0 +1,5 @@ +module github.com/gohugoio/hugo-goldmark-extensions/extras + +go 1.22.1 + +require github.com/yuin/goldmark v1.7.0 diff --git a/extras/go.sum b/extras/go.sum new file mode 100644 index 0000000..dd24077 --- /dev/null +++ b/extras/go.sum @@ -0,0 +1,2 @@ +github.com/yuin/goldmark v1.7.0 h1:EfOIvIMZIzHdB/R/zVrikYLPPwJlfMcNczJFMs1m6sA= +github.com/yuin/goldmark v1.7.0/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= diff --git a/extras/inline.go b/extras/inline.go new file mode 100644 index 0000000..ba17a98 --- /dev/null +++ b/extras/inline.go @@ -0,0 +1,150 @@ +package extras + +import ( + "github.com/gohugoio/hugo-goldmark-extensions/extras/ast" + "github.com/yuin/goldmark" + gast "github.com/yuin/goldmark/ast" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/text" + "github.com/yuin/goldmark/util" +) + +type inlineTagDelimiterProcessor struct { + ast.Tag +} + +func newInlineTagDelimiterProcessor(tag ast.Tag) parser.DelimiterProcessor { + return &inlineTagDelimiterProcessor{tag} +} + +func (p *inlineTagDelimiterProcessor) IsDelimiter(b byte) bool { + return b == p.Char +} + +func (p *inlineTagDelimiterProcessor) CanOpenCloser(opener, closer *parser.Delimiter) bool { + return opener.Char == closer.Char +} + +func (p *inlineTagDelimiterProcessor) OnMatch(_ int) gast.Node { + return ast.NewInlineTag(p.Tag) +} + +type inlineTagParser struct { + ast.Tag +} + +func newInlineTagParser(tag ast.Tag) parser.InlineParser { + return &inlineTagParser{Tag: tag} +} + +// Trigger implements parser.InlineParser. +func (s *inlineTagParser) Trigger() []byte { + return []byte{s.Char} +} + +// Parse implements the parser.InlineParser for all types of InlineTags. +func (s *inlineTagParser) Parse(_ gast.Node, block text.Reader, pc parser.Context) gast.Node { + before := block.PrecendingCharacter() + line, segment := block.PeekLine() + node := parser.ScanDelimiter(line, before, s.Number, newInlineTagDelimiterProcessor(s.Tag)) + if node == nil { + return nil + } + if !s.WhitespaceAllowed && node.CanOpen && hasSpace(line) { + if !(node.CanClose && pc.LastDelimiter() != nil && pc.LastDelimiter().Char == node.Char) { + return nil + } + } + node.Segment = segment.WithStop(segment.Start + node.OriginalLength) + block.Advance(node.OriginalLength) + pc.PushDelimiter(node) + return node +} + +// Check if there is a space in the line before the next marker or the end. +func hasSpace(line []byte) bool { + marker := line[0] + for i := 1; i < len(line); i++ { + c := line[i] + if c == marker { + break + } + if util.IsSpace(c) { + return true + } + } + return false +} + +type inlineTagHTMLRenderer struct { + htmlTag string + tagType ast.TagType + html.Config +} + +// newInlineTagHTMLRenderer returns a new NodeRenderer that renders InlineTag nodes to HTML. +func newInlineTagHTMLRenderer(tag ast.Tag, opts ...html.Option) renderer.NodeRenderer { + r := &inlineTagHTMLRenderer{ + htmlTag: tag.Html, + tagType: tag.TagType, + Config: html.NewConfig(), + } + for _, opt := range opts { + opt.SetHTMLOption(&r.Config) + } + return r +} + +// RegisterFuncs registers rendering functions to the given NodeRendererFuncRegisterer. +func (r *inlineTagHTMLRenderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) { + reg.Register(ast.NewInlineTagKind(r.tagType), r.renderInlineTag) +} + +// inlineTagAttributeFilter is a global filter for attributes. +var inlineTagAttributeFilter = html.GlobalAttributeFilter + +// renderInlineTag renders an inline tag. +func (r *inlineTagHTMLRenderer) renderInlineTag( + w util.BufWriter, _ []byte, n gast.Node, entering bool) (gast.WalkStatus, error) { + if entering { + _ = w.WriteByte('<') + _, _ = w.WriteString(r.htmlTag) + if n.Attributes() != nil { + html.RenderAttributes(w, n, inlineTagAttributeFilter) + } + } else { + _, _ = w.WriteString("') + return gast.WalkContinue, nil +} + +// inlineTag is a general inline tag parser and renderer. +type inlineTag struct { + ast.Tag +} + +// Superscript is an inline tag parser and renderer for superscript text. +var Superscript = &inlineTag{ast.SuperscriptTag} + +// Subscript is an inline tag parser and renderer for subscript text. +var Subscript = &inlineTag{ast.SubscriptTag} + +// Insert is an inline tag parser and renderer for inserted text. +var Insert = &inlineTag{ast.InsertTag} + +// Mark is an inline tag parser and renderer for marked text. +var Mark = &inlineTag{ast.MarkTag} + +// Extend adds inline tags to the Markdown parser and renderer. +func (n *inlineTag) Extend(m goldmark.Markdown) { + m.Parser().AddOptions(parser.WithInlineParsers( + util.Prioritized(newInlineTagParser(n.Tag), n.ParsePriority), + )) + m.Renderer().AddOptions(renderer.WithNodeRenderers( + util.Prioritized(newInlineTagHTMLRenderer(n.Tag), n.RenderPriority), + )) +} diff --git a/extras/inline_test.go b/extras/inline_test.go new file mode 100644 index 0000000..ddebf20 --- /dev/null +++ b/extras/inline_test.go @@ -0,0 +1,187 @@ +package extras + +import ( + "bytes" + "github.com/yuin/goldmark/extension" + "github.com/yuin/goldmark/text" + "testing" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/testutil" +) + +func TestSuperscript(t *testing.T) { + markdown := goldmark.New( + goldmark.WithExtensions(Superscript), + ) + testutil.DoTestCaseFile(markdown, "_test/superscript.txt", t, testutil.ParseCliCaseArg()...) +} + +func TestSuperscriptDump(t *testing.T) { + input := "Parabola: f(x) = x^2^. Amazing" + markdown := goldmark.New(goldmark.WithExtensions(Superscript)) + root := markdown.Parser().Parse(text.NewReader([]byte(input))) + root.Dump([]byte(input), 0) + // Prints to stdout, so just test that it doesn't crash +} + +func BenchmarkWithAndWithoutOneSuperscript(b *testing.B) { + const input = ` +## Parabola + +This formula contains one superscript: f(x) = x^2^ .` + + b.Run("without superscript", func(b *testing.B) { + markdown := goldmark.New() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) + + b.Run("with superscript", func(b *testing.B) { + markdown := goldmark.New(goldmark.WithExtensions(Superscript)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) +} + +func TestSubscript(t *testing.T) { + markdown := goldmark.New( + goldmark.WithExtensions( + Subscript, extension.Strikethrough, + ), + ) + testutil.DoTestCaseFile(markdown, "_test/subscript.txt", t, testutil.ParseCliCaseArg()...) +} + +func TestSubscriptDump(t *testing.T) { + input := "The H~2~O molecule" + markdown := goldmark.New( + goldmark.WithExtensions(Subscript), + ) + root := markdown.Parser().Parse(text.NewReader([]byte(input))) + root.Dump([]byte(input), 0) + // Prints to stdout, so just test that it doesn't crash +} + +func BenchmarkWithAndWithoutOneSubscript(b *testing.B) { + const input = ` +## Water formula + +The chemical formula for water H~2~O contains one subscript.` + + b.Run("without subscript", func(b *testing.B) { + markdown := goldmark.New() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) + + b.Run("with subscript", func(b *testing.B) { + markdown := goldmark.New(goldmark.WithExtensions(Subscript)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) +} + +func TestInsert(t *testing.T) { + markdown := goldmark.New(goldmark.WithExtensions(Insert)) + testutil.DoTestCaseFile(markdown, "_test/insert.txt", t, testutil.ParseCliCaseArg()...) +} + +func TestInsertDump(t *testing.T) { + input := "Add some text: ++insertion++. Amazing." + markdown := goldmark.New(goldmark.WithExtensions(Insert)) + root := markdown.Parser().Parse(text.NewReader([]byte(input))) + root.Dump([]byte(input), 0) + // Prints to stdout, so just test that it doesn't crash +} + +func BenchmarkWithAndWithoutInsert(b *testing.B) { + const input = ` +## Insert text explicitly + +Add some text: ++insertion++. Amazing.` + + b.Run("without insert", func(b *testing.B) { + markdown := goldmark.New() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) + + b.Run("with insert", func(b *testing.B) { + markdown := goldmark.New(goldmark.WithExtensions(Insert)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) +} + +func TestMark(t *testing.T) { + markdown := goldmark.New(goldmark.WithExtensions(Mark)) + testutil.DoTestCaseFile(markdown, "_test/mark.txt", t, testutil.ParseCliCaseArg()...) +} + +func TestMarkDump(t *testing.T) { + input := "Add some marked text: ==marked==. Amazing." + markdown := goldmark.New(goldmark.WithExtensions(Mark)) + root := markdown.Parser().Parse(text.NewReader([]byte(input))) + root.Dump([]byte(input), 0) + // Prints to stdout, so just test that it doesn't crash +} + +func BenchmarkWithAndWithoutMark(b *testing.B) { + const input = ` +## Mark text + +Add some marked text: ==marked==. Amazing.` + + b.Run("without mark extension", func(b *testing.B) { + markdown := goldmark.New() + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) + + b.Run("with mark extension", func(b *testing.B) { + markdown := goldmark.New(goldmark.WithExtensions(Mark)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + var buf bytes.Buffer + if err := markdown.Convert([]byte(input), &buf); err != nil { + b.Fatal(err) + } + } + }) +} diff --git a/extras_old/go.mod b/extras_old/go.mod index e70ff64..8db9eea 100644 --- a/extras_old/go.mod +++ b/extras_old/go.mod @@ -1,4 +1,4 @@ -module github.com/gohugoio/hugo-goldmark-extensions/extras +module github.com/gohugoio/hugo-goldmark-extensions/extras_old go 1.22.1 diff --git a/extras_old/insert.go b/extras_old/insert.go index 76fae9d..905315b 100644 --- a/extras_old/insert.go +++ b/extras_old/insert.go @@ -1,7 +1,7 @@ package extras_old import ( - "github.com/gohugoio/hugo-goldmark-extensions/extras/ast" + "github.com/gohugoio/hugo-goldmark-extensions/extras_old/ast" "github.com/yuin/goldmark" gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser" diff --git a/extras_old/subscript.go b/extras_old/subscript.go index c2b12a4..514a0bc 100644 --- a/extras_old/subscript.go +++ b/extras_old/subscript.go @@ -1,7 +1,7 @@ package extras_old import ( - "github.com/gohugoio/hugo-goldmark-extensions/extras/ast" + "github.com/gohugoio/hugo-goldmark-extensions/extras_old/ast" "github.com/yuin/goldmark" gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser" diff --git a/extras_old/superscript.go b/extras_old/superscript.go index 12ae65b..803f096 100644 --- a/extras_old/superscript.go +++ b/extras_old/superscript.go @@ -1,7 +1,7 @@ package extras_old import ( - "github.com/gohugoio/hugo-goldmark-extensions/extras/ast" + "github.com/gohugoio/hugo-goldmark-extensions/extras_old/ast" "github.com/yuin/goldmark" gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser"