Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix several render issues highlighted during fuzzing #14986

Merged
merged 12 commits into from
Mar 15, 2021
75 changes: 48 additions & 27 deletions modules/emoji/emoji.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ var (
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int

// emptyReplacer is the string replacer for emoji codes.
emptyReplacer *strings.Replacer

// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer

Expand All @@ -49,6 +52,7 @@ func loadMap() {

// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)

// sort from largest to small so we match combined emoji first
Expand All @@ -64,6 +68,7 @@ func loadMap() {
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)

// setup aliases
for _, a := range e.Aliases {
Expand All @@ -77,6 +82,7 @@ func loadMap() {
}

// create replacers
emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
Expand Down Expand Up @@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
return aliasReplacer.Replace(s)
}

// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
found := make(map[int]int)
keys := make([]int, 0)
type rememberSecondWriteWriter struct {
pos int
idx int
end int
writecount int
}

//see if there are any emoji in string before looking for position of specific ones
//no performance difference when there is a match but 10x faster when there are not
if s == ReplaceCodes(s) {
return nil
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(p)
}
n.pos += len(p)
return len(p), nil
}

// get index of first emoji occurrence while also checking for longest combination
for j := range GemojiData {
i := strings.Index(s, GemojiData[j].Emoji)
if i != -1 {
if _, ok := found[i]; !ok {
if len(keys) == 0 || i < keys[0] {
found[i] = j
keys = []int{i}
}
if i == 0 {
break
}
}
}
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(s)
}
n.pos += len(s)
return len(s), nil
}

if len(keys) > 0 {
index := keys[0]
return []int{index, index + len(GemojiData[found[index]].Emoji)}
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
secondWriteWriter := rememberSecondWriteWriter{}

// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
// we can be lazy here.
//
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
// submatch is simply the second thing that is written to WriteString in the writer.
//
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)

// if we wrote less than twice then we never "replaced"
if secondWriteWriter.writecount < 2 {
return nil
}

return nil
return []int{secondWriteWriter.idx, secondWriteWriter.end}
}
33 changes: 33 additions & 0 deletions modules/emoji/emoji_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ package emoji
import (
"reflect"
"testing"

"github.com/stretchr/testify/assert"
)

func TestDumpInfo(t *testing.T) {
Expand Down Expand Up @@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
}
}
}

func TestFindEmojiSubmatchIndex(t *testing.T) {
type testcase struct {
teststring string
expected []int
}

testcases := []testcase{
{
"\U0001f44d",
[]int{0, len("\U0001f44d")},
},
{
"\U0001f44d +1 \U0001f44d \U0001f37a",
[]int{0, 4},
},
{
" \U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
{
string([]byte{'\u0001'}) + "\U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
}

for _, kase := range testcases {
actual := FindEmojiSubmatchIndex(kase.teststring)
assert.Equal(t, kase.expected, actual)
}
}
6 changes: 6 additions & 0 deletions modules/markup/markdown/goldmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
header.ID = util.BytesToReadOnlyString(id.([]byte))
}
toc = append(toc, header)
} else {
for _, attr := range v.Attributes() {
if _, ok := attr.Value.([]byte); !ok {
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
}
}
}
case *ast.Image:
// Images need two things:
Expand Down
22 changes: 19 additions & 3 deletions modules/markup/markdown/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

chromahtml "github.com/alecthomas/chroma/formatters/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark-highlighting"
highlighting "github.com/yuin/goldmark-highlighting"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
Expand All @@ -43,8 +43,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
return pc
}

// render renders Markdown to HTML without handling special links.
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
// actual renders Markdown to HTML without handling special links.
zeripath marked this conversation as resolved.
Show resolved Hide resolved
func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
once.Do(func() {
converter = goldmark.New(
goldmark.WithExtensions(extension.Table,
Expand Down Expand Up @@ -127,6 +127,22 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
return markup.SanitizeReader(&buf).Bytes()
}

func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
defer func() {
err := recover()
if err == nil {
return
}

log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
ret = markup.SanitizeBytes(body)
}()
return actualRender(body, urlPrefix, metas, wikiMarkdown)
}

var (
// MarkupName describes markup's name
MarkupName = "markdown"
Expand Down
20 changes: 20 additions & 0 deletions modules/markup/markdown/markdown_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,23 @@ func TestRender_RenderParagraphs(t *testing.T) {
test(t, "A\n\nB\nC\n", 2)
test(t, "A\n\n\nB\nC\n", 2)
}

func TestMarkdownRenderRaw(t *testing.T) {
testcases := [][]byte{
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
0x5b,
},
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
},
{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
},
}

for _, testcase := range testcases {
_ = RenderRaw(testcase, "", false)
}

}