diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go index 146264c7244b5..8838176eabd32 100644 --- a/modules/highlight/highlight.go +++ b/modules/highlight/highlight.go @@ -10,6 +10,7 @@ import ( "bytes" "fmt" gohtml "html" + "io" "path/filepath" "strings" "sync" @@ -26,7 +27,13 @@ import ( ) // don't index files larger than this many bytes for performance purposes -const sizeLimit = 1000000 +const sizeLimit = 1024 * 1024 + +// newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept +// this option is here for 2 purposes: +// (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future +// (2) make it clear to do tests: " " is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored +var newLineInHTML = " " var ( // For custom user mapping @@ -40,11 +47,12 @@ var ( // NewContext loads custom highlight map from local config func NewContext() { once.Do(func() { - keys := setting.Cfg.Section("highlight.mapping").Keys() - for i := range keys { - highlightMapping[keys[i].Name()] = keys[i].Value() + if setting.Cfg != nil { + keys := setting.Cfg.Section("highlight.mapping").Keys() + for i := range keys { + highlightMapping[keys[i].Name()] = keys[i].Value() + } } - // The size 512 is simply a conservative rule of thumb c, err := lru.New2Q(512) if err != nil { @@ -58,7 +66,7 @@ func NewContext() { func Code(fileName, language, code string) string { NewContext() - // diff view newline will be passed as empty, change to literal \n so it can be copied + // diff view newline will be passed as empty, change to literal '\n' so it can be copied // preserve literal newline in blame view if code == "" || code == "\n" { return "\n" @@ -114,7 +122,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { htmlbuf := bytes.Buffer{} htmlw := bufio.NewWriter(&htmlbuf) - iterator, err := lexer.Tokenise(nil, string(code)) + iterator, err := lexer.Tokenise(nil, code) if err != nil { log.Error("Can't tokenize code: %v", err) return code @@ -126,36 +134,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string { return code } - htmlw.Flush() + _ = htmlw.Flush() // Chroma will add newlines for certain lexers in order to highlight them properly - // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output + // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output return strings.TrimSuffix(htmlbuf.String(), "\n") } -// File returns a slice of chroma syntax highlighted lines of code -func File(numLines int, fileName, language string, code []byte) []string { +// File returns a slice of chroma syntax highlighted HTML lines of code +func File(fileName, language string, code []byte) ([]string, error) { NewContext() if len(code) > sizeLimit { - return plainText(string(code), numLines) + return PlainText(code), nil } + formatter := html.New(html.WithClasses(true), html.WithLineNumbers(false), html.PreventSurroundingPre(true), ) - if formatter == nil { - log.Error("Couldn't create chroma formatter") - return plainText(string(code), numLines) - } - - htmlbuf := bytes.Buffer{} - htmlw := bufio.NewWriter(&htmlbuf) + htmlBuf := bytes.Buffer{} + htmlWriter := bufio.NewWriter(&htmlBuf) var lexer chroma.Lexer // provided language overrides everything - if len(language) > 0 { + if language != "" { lexer = lexers.Get(language) } @@ -166,9 +170,9 @@ func File(numLines int, fileName, language string, code []byte) []string { } if lexer == nil { - language := analyze.GetCodeLanguage(fileName, code) + guessLanguage := analyze.GetCodeLanguage(fileName, code) - lexer = lexers.Get(language) + lexer = lexers.Get(guessLanguage) if lexer == nil { lexer = lexers.Match(fileName) if lexer == nil { @@ -179,61 +183,92 @@ func File(numLines int, fileName, language string, code []byte) []string { iterator, err := lexer.Tokenise(nil, string(code)) if err != nil { - log.Error("Can't tokenize code: %v", err) - return plainText(string(code), numLines) + return nil, fmt.Errorf("can't tokenize code: %w", err) } - err = formatter.Format(htmlw, styles.GitHub, iterator) + err = formatter.Format(htmlWriter, styles.GitHub, iterator) if err != nil { - log.Error("Can't format code: %v", err) - return plainText(string(code), numLines) - } - - htmlw.Flush() - finalNewLine := false - if len(code) > 0 { - finalNewLine = code[len(code)-1] == '\n' + return nil, fmt.Errorf("can't format code: %w", err) } - m := make([]string, 0, numLines) - for i, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) { - content := string(v) + _ = htmlWriter.Flush() - // remove useless wrapper nodes that are always present - content = strings.Replace(content, "", "", 1) - content = strings.TrimPrefix(content, ``) + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) - // if there's no final newline, closing tags will be on last line - if !finalNewLine && i == numLines-1 { - content = strings.TrimSuffix(content, ``) + htmlStr := htmlBuf.String() + line := strings.Builder{} + insideLine := 0 // every makes it increase one level, every closed makes it decrease one level + tagStack := make([]string, 0, 4) + for len(htmlStr) > 0 { + pos1 := strings.IndexByte(htmlStr, '<') + pos2 := strings.IndexByte(htmlStr, '>') + if pos1 == -1 || pos2 == -1 || pos1 > pos2 { + break } - - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" - } else if content == `` { - content += "\n" + tag := htmlStr[pos1 : pos2+1] + if insideLine > 0 { + line.WriteString(htmlStr[:pos1]) } - - m = append(m, content) + if tag[1] == '/' { + if len(tagStack) == 0 { + return nil, fmt.Errorf("can't find matched tag: %q", tag) + } + popped := tagStack[len(tagStack)-1] + tagStack = tagStack[:len(tagStack)-1] + if popped == `` { + insideLine-- + lineStr := line.String() + if newLineInHTML != "" && lineStr != "" && lineStr[len(lineStr)-1] == '\n' { + lineStr = lineStr[:len(lineStr)-1] + newLineInHTML + } + m = append(m, lineStr) + line = strings.Builder{} + } + if insideLine > 0 { + line.WriteString(tag) + } + } else { + tagStack = append(tagStack, tag) + if insideLine > 0 { + line.WriteString(tag) + } + if tag == `` { + insideLine++ + } + } + htmlStr = htmlStr[pos2+1:] } - if finalNewLine { - m = append(m, "\n") + + if len(m) == 0 { + m = append(m, "") // maybe we do not want to return 0 lines } - return m + return m, nil } -// return unhiglighted map -func plainText(code string, numLines int) []string { - m := make([]string, 0, numLines) - for _, v := range strings.SplitN(string(code), "\n", numLines) { - content := string(v) - // need to keep lines that are only \n so copy/paste works properly in browser - if content == "" { - content = "\n" +// PlainText returns non-highlighted HTML for code +func PlainText(code []byte) []string { + r := bufio.NewReader(bytes.NewReader(code)) + m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) + for { + content, err := r.ReadString('\n') + if err != nil && err != io.EOF { + log.Error("failed to read string from buffer: %v", err) + break + } + if content == "" && err == io.EOF { + break + } + s := gohtml.EscapeString(content) + if newLineInHTML != "" && s != "" && s[len(s)-1] == '\n' { + s = s[:len(s)-1] + newLineInHTML } - m = append(m, gohtml.EscapeString(content)) + m = append(m, s) } + + if len(m) == 0 { + m = append(m, "") // maybe we do not want to return 0 lines + } + return m } diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go index bbe1b716b83ab..d37d81ff6a313 100644 --- a/modules/highlight/highlight_test.go +++ b/modules/highlight/highlight_test.go @@ -8,100 +8,170 @@ import ( "strings" "testing" - "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/util" - "github.com/stretchr/testify/assert" - "gopkg.in/ini.v1" ) +func lines(s string) []string { + return strings.Split(strings.TrimSpace(s), "\n") +} + func TestFile(t *testing.T) { - setting.Cfg = ini.Empty() + defaultNewLineInHTML := newLineInHTML + defer func() { + newLineInHTML = defaultNewLineInHTML + }() + + newLineInHTML = " " tests := []struct { - name string - numLines int - fileName string - code string - want string + name string + code string + want []string }{ { - name: ".drone.yml", - numLines: 12, - fileName: ".drone.yml", - code: util.Dedent(` - kind: pipeline - name: default + name: "empty.py", + code: "", + want: []string{""}, + }, + { + name: "tags.txt", + code: "<>", + want: []string{"<>"}, + }, + { + name: "tags.py", + code: "<>", + want: []string{`<>`}, + }, + { + name: "eol-no.py", + code: "a=1", + want: []string{`a=1`}, + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: []string{ + `a=1 `, + }, + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: []string{ + `a=1 `, + ` `, + }, + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `), - want: util.Dedent(` - kind: pipeline - name: default - +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def: + a=1 + +b='' + +c=2`, + ), + }, + } - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `), + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + out, err := File(tt.name, "", []byte(tt.code)) + assert.NoError(t, err) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.Equal(t, strings.Count(actual, "")) + assert.EqualValues(t, expected, actual) + }) + } + + newLineInHTML = "" + out, err := File("test-original-newline.py", "", []byte("a=1\n")) + assert.NoError(t, err) + assert.EqualValues(t, `a=1`+"\n", strings.Join(out, "")) +} + +func TestPlainText(t *testing.T) { + defaultNewLineInHTML := newLineInHTML + defer func() { + newLineInHTML = defaultNewLineInHTML + }() + + newLineInHTML = " " + tests := []struct { + name string + code string + want []string + }{ + { + name: "empty.py", + code: "", + want: []string{""}, }, { - name: ".drone.yml - trailing space", - numLines: 13, - fileName: ".drone.yml", - code: strings.Replace(util.Dedent(` - kind: pipeline - name: default - - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - `)+"\n", "name: default", "name: default ", 1), - want: util.Dedent(` - kind: pipeline - name: default - + name: "tags.py", + code: "<>", + want: []string{"<>"}, + }, + { + name: "eol-no.py", + code: "a=1", + want: []string{`a=1`}, + }, + { + name: "eol-newline1.py", + code: "a=1\n", + want: []string{ + `a=1 `, + }, + }, + { + name: "eol-newline2.py", + code: "a=1\n\n", + want: []string{ + `a=1 `, + ` `, + }, + }, + { + name: "empty-line-with-space.py", + code: strings.ReplaceAll(strings.TrimSpace(` +def: + a=1 - steps: - - name: test - image: golang:1.13 - environment: - GOPROXY: https://goproxy.cn - commands: - - go get -u - - go build -v - - go test -v -race -coverprofile=coverage.txt -covermode=atomic - - - - `), +b='' +{space} +c=2 + `), "{space}", " "), + want: lines(` +def: + a=1 + +b='' + +c=2`), }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := strings.Join(File(tt.numLines, tt.fileName, "", []byte(tt.code)), "\n") - assert.Equal(t, tt.want, got) - assert.Equal(t, strings.Count(got, "")) + out := PlainText([]byte(tt.code)) + expected := strings.Join(tt.want, "\n") + actual := strings.Join(out, "\n") + assert.EqualValues(t, expected, actual) }) } + + newLineInHTML = "" + out := PlainText([]byte("a=1\n")) + assert.EqualValues(t, "a=1\n", strings.Join(out, "")) } diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 01bd2d89234f5..1586dd4bf58db 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -15,7 +15,6 @@ import ( "net/http" "net/url" "path" - "strconv" "strings" "time" @@ -58,15 +57,6 @@ type namedBlob struct { blob *git.Blob } -func linesBytesCount(s []byte) int { - nl := []byte{'\n'} - n := bytes.Count(s, nl) - if len(s) > 0 && !bytes.HasSuffix(s, nl) { - n++ - } - return n -} - // FIXME: There has to be a more efficient way of doing this func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) { tree, err := commit.SubTree(treePath) @@ -552,8 +542,14 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ) } else { buf, _ := io.ReadAll(rd) - lineNums := linesBytesCount(buf) - ctx.Data["NumLines"] = strconv.Itoa(lineNums) + + // empty: 0 line; "a": one line; "a\n": two lines; "a\nb": two lines; + // the NumLines is only used for the display on the UI: "xxx lines" + if len(buf) == 0 { + ctx.Data["NumLines"] = 0 + } else { + ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1 + } ctx.Data["NumLinesSet"] = true language := "" @@ -581,7 +577,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st language = "" } } - fileContent := highlight.File(lineNums, blob.Name(), language, buf) + fileContent, err := highlight.File(blob.Name(), language, buf) + if err != nil { + log.Error("highlight.File failed, fallback to plain text: %v", err) + fileContent = highlight.PlainText(buf) + } status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard) ctx.Data["EscapeStatus"] = status statuses := make([]charset.EscapeStatus, len(fileContent))