go-gitea · 6543 · Mar 15, 2021 · Mar 13, 2021 · Mar 13, 2021 · Mar 13, 2021
diff --git a/modules/emoji/emoji.go b/modules/emoji/emoji.go
@@ -30,6 +30,9 @@ var (
 	// aliasMap provides a map of the alias to its emoji data.
 	aliasMap map[string]int
 
+	// emptyReplacer is the string replacer for emoji codes.
+	emptyReplacer *strings.Replacer
+
 	// codeReplacer is the string replacer for emoji codes.
 	codeReplacer *strings.Replacer
 
@@ -49,6 +52,7 @@ func loadMap() {
 
 		// process emoji codes and aliases
 		codePairs := make([]string, 0)
+		emptyPairs := make([]string, 0)
 		aliasPairs := make([]string, 0)
 
 		// sort from largest to small so we match combined emoji first
@@ -64,6 +68,7 @@ func loadMap() {
 			// setup codes
 			codeMap[e.Emoji] = i
 			codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
+			emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
 
 			// setup aliases
 			for _, a := range e.Aliases {
@@ -77,6 +82,7 @@ func loadMap() {
 		}
 
 		// create replacers
+		emptyReplacer = strings.NewReplacer(emptyPairs...)
 		codeReplacer = strings.NewReplacer(codePairs...)
 		aliasReplacer = strings.NewReplacer(aliasPairs...)
 	})
@@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
 	return aliasReplacer.Replace(s)
 }
 
-// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
-func FindEmojiSubmatchIndex(s string) []int {
-	loadMap()
-	found := make(map[int]int)
-	keys := make([]int, 0)
+type rememberSecondWriteWriter struct {
+	pos        int
+	idx        int
+	end        int
+	writecount int
+}
 
-	//see if there are any emoji in string before looking for position of specific ones
-	//no performance difference when there is a match but 10x faster when there are not
-	if s == ReplaceCodes(s) {
-		return nil
+func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
+	n.writecount++
+	if n.writecount == 2 {
+		n.idx = n.pos
+		n.end = n.pos + len(p)
 	}
+	n.pos += len(p)
+	return len(p), nil
+}
 
-	// get index of first emoji occurrence while also checking for longest combination
-	for j := range GemojiData {
-		i := strings.Index(s, GemojiData[j].Emoji)
-		if i != -1 {
-			if _, ok := found[i]; !ok {
-				if len(keys) == 0 || i < keys[0] {
-					found[i] = j
-					keys = []int{i}
-				}
-				if i == 0 {
-					break
-				}
-			}
-		}
+func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
+	n.writecount++
+	if n.writecount == 2 {
+		n.idx = n.pos
+		n.end = n.pos + len(s)
 	}
+	n.pos += len(s)
+	return len(s), nil
+}
 
-	if len(keys) > 0 {
-		index := keys[0]
-		return []int{index, index + len(GemojiData[found[index]].Emoji)}
+// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
+func FindEmojiSubmatchIndex(s string) []int {
+	loadMap()
+	secondWriteWriter := rememberSecondWriteWriter{}
+
+	// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
+	// we can be lazy here.
+	//
+	// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
+	// submatch is simply the second thing that is written to WriteString in the writer.
+	//
+	// Therefore we can simply take the index of the second write as our first emoji
+	//
+	// FIXME: just copy the trie implementation from strings.NewReplacer
+	_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
+
+	// if we wrote less than twice then we never "replaced"
+	if secondWriteWriter.writecount < 2 {
+		return nil
 	}
 
-	return nil
+	return []int{secondWriteWriter.idx, secondWriteWriter.end}
 }
diff --git a/modules/emoji/emoji_test.go b/modules/emoji/emoji_test.go
@@ -8,6 +8,8 @@ package emoji
 import (
 	"reflect"
 	"testing"
+
+	"github.com/stretchr/testify/assert"
 )
 
 func TestDumpInfo(t *testing.T) {
@@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
 		}
 	}
 }
+
+func TestFindEmojiSubmatchIndex(t *testing.T) {
+	type testcase struct {
+		teststring string
+		expected   []int
+	}
+
+	testcases := []testcase{
+		{
+			"\U0001f44d",
+			[]int{0, len("\U0001f44d")},
+		},
+		{
+			"\U0001f44d +1 \U0001f44d \U0001f37a",
+			[]int{0, 4},
+		},
+		{
+			" \U0001f44d",
+			[]int{1, 1 + len("\U0001f44d")},
+		},
+		{
+			string([]byte{'\u0001'}) + "\U0001f44d",
+			[]int{1, 1 + len("\U0001f44d")},
+		},
+	}
+
+	for _, kase := range testcases {
+		actual := FindEmojiSubmatchIndex(kase.teststring)
+		assert.Equal(t, kase.expected, actual)
+	}
+}
diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go
@@ -76,6 +76,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
 					header.ID = util.BytesToReadOnlyString(id.([]byte))
 				}
 				toc = append(toc, header)
+			} else {
+				for _, attr := range v.Attributes() {
+					if _, ok := attr.Value.([]byte); !ok {
+						v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
+					}
+				}
 			}
 		case *ast.Image:
 			// Images need two things:

diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go
@@ -18,7 +18,7 @@ import (
 
 	chromahtml "github.com/alecthomas/chroma/formatters/html"
 	"github.com/yuin/goldmark"
-	"github.com/yuin/goldmark-highlighting"
+	highlighting "github.com/yuin/goldmark-highlighting"
 	meta "github.com/yuin/goldmark-meta"
 	"github.com/yuin/goldmark/extension"
 	"github.com/yuin/goldmark/parser"
@@ -43,8 +43,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
 	return pc
 }
 
-// render renders Markdown to HTML without handling special links.
-func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
+// actual renders Markdown to HTML without handling special links.
+func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
 	once.Do(func() {
 		converter = goldmark.New(
 			goldmark.WithExtensions(extension.Table,
@@ -127,6 +127,22 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
 	return markup.SanitizeReader(&buf).Bytes()
 }
 
+func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
+	defer func() {
+		err := recover()
+		if err == nil {
+			return
+		}
+
+		log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
+		if log.IsDebug() {
+			log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
+		}
+		ret = markup.SanitizeBytes(body)
+	}()
+	return actualRender(body, urlPrefix, metas, wikiMarkdown)
+}
+
 var (
 	// MarkupName describes markup's name
 	MarkupName = "markdown"

diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go
@@ -308,3 +308,23 @@ func TestRender_RenderParagraphs(t *testing.T) {
 	test(t, "A\n\nB\nC\n", 2)
 	test(t, "A\n\n\nB\nC\n", 2)
 }
+
+func TestMarkdownRenderRaw(t *testing.T) {
+	testcases := [][]byte{
+		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
+			0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
+			0x5b,
+		},
+		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
+			0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
+		},
+		{ // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
+			0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
+		},
+	}
+
+	for _, testcase := range testcases {
+		_ = RenderRaw(testcase, "", false)
+	}
+
+}