Skip to content

Commit

Permalink
Fix several render issues (#14986)
Browse files Browse the repository at this point in the history
* Fix an issue with panics related to attributes
* Wrap goldmark render in a recovery function
* Reduce memory use in render emoji
* Use a pipe for rendering goldmark - still needs more work and a limiter

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lauris BH <lauris@nix.lv>
  • Loading branch information
zeripath and lafriks committed Mar 15, 2021
1 parent 044cd4d commit ed31ddc
Show file tree
Hide file tree
Showing 6 changed files with 211 additions and 61 deletions.
75 changes: 48 additions & 27 deletions modules/emoji/emoji.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ var (
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int

// emptyReplacer is the string replacer for emoji codes.
emptyReplacer *strings.Replacer

// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer

Expand All @@ -49,6 +52,7 @@ func loadMap() {

// process emoji codes and aliases
codePairs := make([]string, 0)
emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)

// sort from largest to small so we match combined emoji first
Expand All @@ -64,6 +68,7 @@ func loadMap() {
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)

// setup aliases
for _, a := range e.Aliases {
Expand All @@ -77,6 +82,7 @@ func loadMap() {
}

// create replacers
emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
Expand Down Expand Up @@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
return aliasReplacer.Replace(s)
}

// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
found := make(map[int]int)
keys := make([]int, 0)
type rememberSecondWriteWriter struct {
pos int
idx int
end int
writecount int
}

//see if there are any emoji in string before looking for position of specific ones
//no performance difference when there is a match but 10x faster when there are not
if s == ReplaceCodes(s) {
return nil
func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(p)
}
n.pos += len(p)
return len(p), nil
}

// get index of first emoji occurrence while also checking for longest combination
for j := range GemojiData {
i := strings.Index(s, GemojiData[j].Emoji)
if i != -1 {
if _, ok := found[i]; !ok {
if len(keys) == 0 || i < keys[0] {
found[i] = j
keys = []int{i}
}
if i == 0 {
break
}
}
}
func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
n.writecount++
if n.writecount == 2 {
n.idx = n.pos
n.end = n.pos + len(s)
}
n.pos += len(s)
return len(s), nil
}

if len(keys) > 0 {
index := keys[0]
return []int{index, index + len(GemojiData[found[index]].Emoji)}
// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
func FindEmojiSubmatchIndex(s string) []int {
loadMap()
secondWriteWriter := rememberSecondWriteWriter{}

// A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
// we can be lazy here.
//
// The implementation of strings.Replacer.WriteString is such that the first index of the emoji
// submatch is simply the second thing that is written to WriteString in the writer.
//
// Therefore we can simply take the index of the second write as our first emoji
//
// FIXME: just copy the trie implementation from strings.NewReplacer
_, _ = emptyReplacer.WriteString(&secondWriteWriter, s)

// if we wrote less than twice then we never "replaced"
if secondWriteWriter.writecount < 2 {
return nil
}

return nil
return []int{secondWriteWriter.idx, secondWriteWriter.end}
}
33 changes: 33 additions & 0 deletions modules/emoji/emoji_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ package emoji
import (
"reflect"
"testing"

"github.com/stretchr/testify/assert"
)

func TestDumpInfo(t *testing.T) {
Expand Down Expand Up @@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
}
}
}

func TestFindEmojiSubmatchIndex(t *testing.T) {
type testcase struct {
teststring string
expected []int
}

testcases := []testcase{
{
"\U0001f44d",
[]int{0, len("\U0001f44d")},
},
{
"\U0001f44d +1 \U0001f44d \U0001f37a",
[]int{0, 4},
},
{
" \U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
{
string([]byte{'\u0001'}) + "\U0001f44d",
[]int{1, 1 + len("\U0001f44d")},
},
}

for _, kase := range testcases {
actual := FindEmojiSubmatchIndex(kase.teststring)
assert.Equal(t, kase.expected, actual)
}
}
36 changes: 11 additions & 25 deletions modules/markup/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -313,41 +313,27 @@ func RenderEmoji(
return ctx.postProcess(rawHTML)
}

var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
var nulCleaner = strings.NewReplacer("\000", "")

func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
if ctx.procs == nil {
ctx.procs = defaultProcessors
}

// give a generous extra 50 bytes
res := make([]byte, 0, len(rawHTML)+50)

res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
// prepend "<html><body>"
res = append(res, "<html><body>"...)
_, _ = res.WriteString("<html><body>")

// Strip out nuls - they're always invalid
start := bytes.IndexByte(rawHTML, '\000')
if start >= 0 {
res = append(res, rawHTML[:start]...)
start++
for start < len(rawHTML) {
end := bytes.IndexByte(rawHTML[start:], '\000')
if end < 0 {
res = append(res, rawHTML[start:]...)
break
} else if end > 0 {
res = append(res, rawHTML[start:start+end]...)
}
start += end + 1
}
} else {
res = append(res, rawHTML...)
}
_, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("&lt;$1"))))

// close the tags
res = append(res, "</body></html>"...)
_, _ = res.WriteString("</body></html>")

// parse the HTML
nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
nodes, err := html.ParseFragment(res, nil)
if err != nil {
return nil, &postProcessError{"invalid HTML", err}
}
Expand Down Expand Up @@ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
// Create buffer in which the data will be placed again. We know that the
// length will be at least that of res; to spare a few alloc+copy, we
// reuse res, resetting its length to 0.
buf := bytes.NewBuffer(res[:0])
res.Reset()
// Render everything to buf.
for _, node := range nodes {
err = html.Render(buf, node)
err = html.Render(res, node)
if err != nil {
return nil, &postProcessError{"error rendering processed HTML", err}
}
}

// Everything done successfully, return parsed data.
return buf.Bytes(), nil
return res.Bytes(), nil
}

func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
Expand Down
6 changes: 6 additions & 0 deletions modules/markup/markdown/goldmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
header.ID = util.BytesToReadOnlyString(id.([]byte))
}
toc = append(toc, header)
} else {
for _, attr := range v.Attributes() {
if _, ok := attr.Value.([]byte); !ok {
v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
}
}
}
case *ast.Image:
// Images need two things:
Expand Down
102 changes: 93 additions & 9 deletions modules/markup/markdown/markdown.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
package markdown

import (
"bytes"
"fmt"
"io"
"strings"
"sync"

Expand All @@ -18,7 +19,7 @@ import (

chromahtml "github.com/alecthomas/chroma/formatters/html"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark-highlighting"
highlighting "github.com/yuin/goldmark-highlighting"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
Expand All @@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
var isWikiKey = parser.NewContextKey()
var renderMetasKey = parser.NewContextKey()

type closesWithError interface {
io.WriteCloser
CloseWithError(err error) error
}

type limitWriter struct {
w closesWithError
sum int64
limit int64
}

// Write implements the standard Write interface:
func (l *limitWriter) Write(data []byte) (int, error) {
leftToWrite := l.limit - l.sum
if leftToWrite < int64(len(data)) {
n, err := l.w.Write(data[:leftToWrite])
l.sum += int64(n)
if err != nil {
return n, err
}
_ = l.w.Close()
return n, fmt.Errorf("Rendered content too large - truncating render")
}
n, err := l.w.Write(data)
l.sum += int64(n)
return n, err
}

// Close closes the writer
func (l *limitWriter) Close() error {
return l.w.Close()
}

// CloseWithError closes the writer
func (l *limitWriter) CloseWithError(err error) error {
return l.w.CloseWithError(err)
}

// NewGiteaParseContext creates a parser.Context with the gitea context set
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
Expand All @@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
return pc
}

// render renders Markdown to HTML without handling special links.
func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
// actualRender renders Markdown to HTML without handling special links.
func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
once.Do(func() {
converter = goldmark.New(
goldmark.WithExtensions(extension.Table,
Expand Down Expand Up @@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown

})

pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
var buf bytes.Buffer
if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
log.Error("Unable to render: %v", err)
rd, wr := io.Pipe()
defer func() {
_ = rd.Close()
_ = wr.Close()
}()

lw := &limitWriter{
w: wr,
limit: setting.UI.MaxDisplayFileSize * 3,
}
return markup.SanitizeReader(&buf).Bytes()

// FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
go func() {
defer func() {
err := recover()
if err == nil {
return
}

log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
_ = lw.CloseWithError(fmt.Errorf("%v", err))
}()

pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
log.Error("Unable to render: %v", err)
_ = lw.CloseWithError(err)
return
}
_ = lw.Close()
}()
return markup.SanitizeReader(rd).Bytes()
}

func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
defer func() {
err := recover()
if err == nil {
return
}

log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
ret = markup.SanitizeBytes(body)
}()
return actualRender(body, urlPrefix, metas, wikiMarkdown)
}

var (
Expand Down
Loading

0 comments on commit ed31ddc

Please sign in to comment.