Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement a custom parser for <ac:*/> tags #273

Merged
merged 2 commits into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,8 @@ See task MYJIRA-123.
This is a [link to an existing confluence page](ac:Pagetitle)

And this is how to link when the linktext is the same as the [Pagetitle](ac:)

Link to a [page title with space](<ac:With Space>)
```

### Add width for an image
Expand Down
112 changes: 112 additions & 0 deletions pkg/mark/ac_tag_parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package mark

import (
"bytes"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
"regexp"
)

// NewACTagParser returns an inline parser that parses <ac:* /> tags to ensure that Confluence specific tags are parsed
// as ast.KindRawHtml so they are not escaped at render time. The parser must be registered with a higher priority
// than goldmark's linkParser. Otherwise, the linkParser would parse the <ac:* /> tags.
func NewACTagParser() parser.InlineParser {
return &acTagParser{}
}

var _ parser.InlineParser = (*acTagParser)(nil)

// acTagParser is a stripped down version of goldmark's rawHTMLParser.
// See: https://github.com/yuin/goldmark/blob/master/parser/raw_html.go
type acTagParser struct {
}

func (s *acTagParser) Trigger() []byte {
return []byte{'<'}
}

func (s *acTagParser) Parse(_ ast.Node, block text.Reader, pc parser.Context) ast.Node {
line, _ := block.PeekLine()
if len(line) > 1 && util.IsAlphaNumeric(line[1]) {
return s.parseMultiLineRegexp(openTagRegexp, block, pc)
}
if len(line) > 2 && line[1] == '/' && util.IsAlphaNumeric(line[2]) {
return s.parseMultiLineRegexp(closeTagRegexp, block, pc)
}
if len(line) > 2 && line[1] == '!' && line[2] >= 'A' && line[2] <= 'Z' {
return s.parseUntil(block, closeDecl, pc)
}
if bytes.HasPrefix(line, openCDATA) {
return s.parseUntil(block, closeCDATA, pc)
}
return nil
}

var tagnamePattern = `([A-Za-z][A-Za-z0-9-]*)`

var attributePattern = `(?:[\r\n \t]+[a-zA-Z_:][a-zA-Z0-9:._-]*(?:[\r\n \t]*=[\r\n \t]*(?:[^\"'=<>` + "`" + `\x00-\x20]+|'[^']*'|"[^"]*"))?)`

// Only match <ac:*/> tags
var openTagRegexp = regexp.MustCompile("^<ac:" + tagnamePattern + attributePattern + `*[ \t]*/?>`)
var closeTagRegexp = regexp.MustCompile("^</ac:" + tagnamePattern + `\s*>`)

var openCDATA = []byte("<![CDATA[")
var closeCDATA = []byte("]]>")
var closeDecl = []byte(">")

func (s *acTagParser) parseUntil(block text.Reader, closer []byte, _ parser.Context) ast.Node {
savedLine, savedSegment := block.Position()
node := ast.NewRawHTML()
for {
line, segment := block.PeekLine()
if line == nil {
break
}
index := bytes.Index(line, closer)
if index > -1 {
node.Segments.Append(segment.WithStop(segment.Start + index + len(closer)))
block.Advance(index + len(closer))
return node
}
node.Segments.Append(segment)
block.AdvanceLine()
}
block.SetPosition(savedLine, savedSegment)
return nil
}

func (s *acTagParser) parseMultiLineRegexp(reg *regexp.Regexp, block text.Reader, _ parser.Context) ast.Node {
sline, ssegment := block.Position()
if block.Match(reg) {
node := ast.NewRawHTML()
eline, esegment := block.Position()
block.SetPosition(sline, ssegment)
for {
line, segment := block.PeekLine()
if line == nil {
break
}
l, _ := block.Position()
start := segment.Start
if l == sline {
start = ssegment.Start
}
end := segment.Stop
if l == eline {
end = esegment.Start
}

node.Segments.Append(text.NewSegment(start, end))
if l == eline {
block.Advance(end - start)
break
} else {
block.AdvanceLine()
}
}
return node
}
return nil
}
25 changes: 8 additions & 17 deletions pkg/mark/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,8 @@ func (r *ConfluenceRenderer) renderLink(writer util.BufWriter, source []byte, no
if err != nil {
return ast.WalkStop, err
}

return ast.WalkSkipChildren, nil
}
return ast.WalkSkipChildren, nil
}
return r.goldmarkRenderLink(writer, source, node, entering)
}
Expand Down Expand Up @@ -430,22 +429,9 @@ func (r *ConfluenceRenderer) renderCodeBlock(writer util.BufWriter, source []byt
return ast.WalkContinue, nil
}

// compileMarkdown will replace tags like <ac:rich-tech-body> with escaped
// equivalent, because goldmark markdown parser replaces that tags with
// <a href="ac:rich-text-body">ac:rich-text-body</a> because of the autolink
// rule.
func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string {
log.Tracef(nil, "rendering markdown:\n%s", string(markdown))

colon := []byte("---bf-COLON---")

tags := regexp.MustCompile(`</?ac:[^>]+>`)

for _, match := range tags.FindAll(markdown, -1) {
// Replace the colon in all "<ac:*>" tags with the colon bytes to avoid having Goldmark escape the HTML output.
markdown = bytes.ReplaceAll(markdown, match, bytes.ReplaceAll(match, []byte(":"), colon))
}

converter := goldmark.New(
goldmark.WithExtensions(
extension.GFM,
Expand All @@ -461,6 +447,12 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string {
html.WithUnsafe(),
))

converter.Parser().AddOptions(parser.WithInlineParsers(
// Must be registered with a higher priority than goldmark's linkParser to make sure goldmark doesn't parse
// the <ac:*/> tags.
util.Prioritized(NewACTagParser(), 199),
))

converter.Renderer().AddOptions(renderer.WithNodeRenderers(
util.Prioritized(NewConfluenceRenderer(stdlib), 100),
))
Expand All @@ -472,8 +464,7 @@ func CompileMarkdown(markdown []byte, stdlib *stdlib.Lib) string {
panic(err)
}

// Restore all the colons we previously replaced.
html := bytes.ReplaceAll(buf.Bytes(), colon, []byte(":"))
html := buf.Bytes()

log.Tracef(nil, "rendered markdown to html:\n%s", string(html))

Expand Down
4 changes: 4 additions & 0 deletions pkg/mark/testdata/links.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
<p>Use <a href="https://example.com">https://example.com</a></p>
<p>Use <ac:rich-text-body>aaa</ac:rich-text-body></p>
<p>Use <ac:link><ri:page ri:content-title="Page"/><ac:plain-text-link-body><![CDATA[page link]]></ac:plain-text-link-body></ac:link></p>
<p>Use <ac:link><ri:page ri:content-title="AnotherPage"/><ac:plain-text-link-body><![CDATA[AnotherPage]]></ac:plain-text-link-body></ac:link></p>
<p>Use <ac:link><ri:page ri:content-title="Another Page"/><ac:plain-text-link-body><![CDATA[Another Page]]></ac:plain-text-link-body></ac:link></p>
<p>Use <ac:link><ri:page ri:content-title="Page With Space"/><ac:plain-text-link-body><![CDATA[page link with spaces]]></ac:plain-text-link-body></ac:link></p>
<p>Use footnotes link <sup id="fnref:1"><a href="#fn:1" class="footnote-ref" role="doc-noteref">1</a></sup></p>
<div class="footnotes" role="doc-endnotes">
<hr />
Expand Down
8 changes: 8 additions & 0 deletions pkg/mark/testdata/links.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,13 @@ Use <https://example.com>

Use <ac:rich-text-body>aaa</ac:rich-text-body>

Use [page link](ac:Page)

Use [AnotherPage](ac:)

Use [Another Page](ac:)

Use [page link with spaces](<ac:Page With Space>)

Use footnotes link [^1]
[^1]: a footnote link
4 changes: 2 additions & 2 deletions pkg/mark/testdata/macro-include.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<p><foo>bar</foo></p>
<ac:structured-macro ac:name="info">
<p><ac:structured-macro ac:name="info">
<ac:parameter ac:name="icon">true</ac:parameter>
<ac:parameter ac:name="title">Attention</ac:parameter>
<ac:rich-text-body>This is an info!</ac:rich-text-body>
</ac:structured-macro>
</ac:structured-macro></p>