diff --git a/modules/highlight/highlight.go b/modules/highlight/highlight.go
index 146264c7244b5..8838176eabd32 100644
--- a/modules/highlight/highlight.go
+++ b/modules/highlight/highlight.go
@@ -10,6 +10,7 @@ import (
"bytes"
"fmt"
gohtml "html"
+ "io"
"path/filepath"
"strings"
"sync"
@@ -26,7 +27,13 @@ import (
)
// don't index files larger than this many bytes for performance purposes
-const sizeLimit = 1000000
+const sizeLimit = 1024 * 1024
+
+// newLineInHTML is the HTML entity to be used for newline in HTML content, if it's empty then the original "\n" is kept
+// this option is here for 2 purposes:
+// (1) make it easier to switch back to the original "\n" if there is any compatibility issue in the future
+// (2) make it clear to do tests: "
" is the real newline for rendering, '\n' is ignorable/trim-able and could be ignored
+var newLineInHTML = "
"
var (
// For custom user mapping
@@ -40,11 +47,12 @@ var (
// NewContext loads custom highlight map from local config
func NewContext() {
once.Do(func() {
- keys := setting.Cfg.Section("highlight.mapping").Keys()
- for i := range keys {
- highlightMapping[keys[i].Name()] = keys[i].Value()
+ if setting.Cfg != nil {
+ keys := setting.Cfg.Section("highlight.mapping").Keys()
+ for i := range keys {
+ highlightMapping[keys[i].Name()] = keys[i].Value()
+ }
}
-
// The size 512 is simply a conservative rule of thumb
c, err := lru.New2Q(512)
if err != nil {
@@ -58,7 +66,7 @@ func NewContext() {
func Code(fileName, language, code string) string {
NewContext()
- // diff view newline will be passed as empty, change to literal \n so it can be copied
+ // diff view newline will be passed as empty, change to literal '\n' so it can be copied
// preserve literal newline in blame view
if code == "" || code == "\n" {
return "\n"
@@ -114,7 +122,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
htmlbuf := bytes.Buffer{}
htmlw := bufio.NewWriter(&htmlbuf)
- iterator, err := lexer.Tokenise(nil, string(code))
+ iterator, err := lexer.Tokenise(nil, code)
if err != nil {
log.Error("Can't tokenize code: %v", err)
return code
@@ -126,36 +134,32 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
return code
}
- htmlw.Flush()
+ _ = htmlw.Flush()
// Chroma will add newlines for certain lexers in order to highlight them properly
- // Once highlighted, strip them here so they don't cause copy/paste trouble in HTML output
+ // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
return strings.TrimSuffix(htmlbuf.String(), "\n")
}
-// File returns a slice of chroma syntax highlighted lines of code
-func File(numLines int, fileName, language string, code []byte) []string {
+// File returns a slice of chroma syntax highlighted HTML lines of code
+func File(fileName, language string, code []byte) ([]string, error) {
NewContext()
if len(code) > sizeLimit {
- return plainText(string(code), numLines)
+ return PlainText(code), nil
}
+
formatter := html.New(html.WithClasses(true),
html.WithLineNumbers(false),
html.PreventSurroundingPre(true),
)
- if formatter == nil {
- log.Error("Couldn't create chroma formatter")
- return plainText(string(code), numLines)
- }
-
- htmlbuf := bytes.Buffer{}
- htmlw := bufio.NewWriter(&htmlbuf)
+ htmlBuf := bytes.Buffer{}
+ htmlWriter := bufio.NewWriter(&htmlBuf)
var lexer chroma.Lexer
// provided language overrides everything
- if len(language) > 0 {
+ if language != "" {
lexer = lexers.Get(language)
}
@@ -166,9 +170,9 @@ func File(numLines int, fileName, language string, code []byte) []string {
}
if lexer == nil {
- language := analyze.GetCodeLanguage(fileName, code)
+ guessLanguage := analyze.GetCodeLanguage(fileName, code)
- lexer = lexers.Get(language)
+ lexer = lexers.Get(guessLanguage)
if lexer == nil {
lexer = lexers.Match(fileName)
if lexer == nil {
@@ -179,61 +183,92 @@ func File(numLines int, fileName, language string, code []byte) []string {
iterator, err := lexer.Tokenise(nil, string(code))
if err != nil {
- log.Error("Can't tokenize code: %v", err)
- return plainText(string(code), numLines)
+ return nil, fmt.Errorf("can't tokenize code: %w", err)
}
- err = formatter.Format(htmlw, styles.GitHub, iterator)
+ err = formatter.Format(htmlWriter, styles.GitHub, iterator)
if err != nil {
- log.Error("Can't format code: %v", err)
- return plainText(string(code), numLines)
- }
-
- htmlw.Flush()
- finalNewLine := false
- if len(code) > 0 {
- finalNewLine = code[len(code)-1] == '\n'
+ return nil, fmt.Errorf("can't format code: %w", err)
}
- m := make([]string, 0, numLines)
- for i, v := range strings.SplitN(htmlbuf.String(), "\n", numLines) {
- content := string(v)
+ _ = htmlWriter.Flush()
- // remove useless wrapper nodes that are always present
- content = strings.Replace(content, "", "", 1)
- content = strings.TrimPrefix(content, ``)
+ m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
- // if there's no final newline, closing tags will be on last line
- if !finalNewLine && i == numLines-1 {
- content = strings.TrimSuffix(content, ``)
+ htmlStr := htmlBuf.String()
+ line := strings.Builder{}
+ insideLine := 0 // every makes it increase one level, every closed makes it decrease one level
+ tagStack := make([]string, 0, 4)
+ for len(htmlStr) > 0 {
+ pos1 := strings.IndexByte(htmlStr, '<')
+ pos2 := strings.IndexByte(htmlStr, '>')
+ if pos1 == -1 || pos2 == -1 || pos1 > pos2 {
+ break
}
-
- // need to keep lines that are only \n so copy/paste works properly in browser
- if content == "" {
- content = "\n"
- } else if content == `` {
- content += "\n"
+ tag := htmlStr[pos1 : pos2+1]
+ if insideLine > 0 {
+ line.WriteString(htmlStr[:pos1])
}
-
- m = append(m, content)
+ if tag[1] == '/' {
+ if len(tagStack) == 0 {
+ return nil, fmt.Errorf("can't find matched tag: %q", tag)
+ }
+ popped := tagStack[len(tagStack)-1]
+ tagStack = tagStack[:len(tagStack)-1]
+ if popped == `` {
+ insideLine--
+ lineStr := line.String()
+ if newLineInHTML != "" && lineStr != "" && lineStr[len(lineStr)-1] == '\n' {
+ lineStr = lineStr[:len(lineStr)-1] + newLineInHTML
+ }
+ m = append(m, lineStr)
+ line = strings.Builder{}
+ }
+ if insideLine > 0 {
+ line.WriteString(tag)
+ }
+ } else {
+ tagStack = append(tagStack, tag)
+ if insideLine > 0 {
+ line.WriteString(tag)
+ }
+ if tag == `` {
+ insideLine++
+ }
+ }
+ htmlStr = htmlStr[pos2+1:]
}
- if finalNewLine {
- m = append(m, "\n")
+
+ if len(m) == 0 {
+ m = append(m, "") // maybe we do not want to return 0 lines
}
- return m
+ return m, nil
}
-// return unhiglighted map
-func plainText(code string, numLines int) []string {
- m := make([]string, 0, numLines)
- for _, v := range strings.SplitN(string(code), "\n", numLines) {
- content := string(v)
- // need to keep lines that are only \n so copy/paste works properly in browser
- if content == "" {
- content = "\n"
+// PlainText returns non-highlighted HTML for code
+func PlainText(code []byte) []string {
+ r := bufio.NewReader(bytes.NewReader(code))
+ m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
+ for {
+ content, err := r.ReadString('\n')
+ if err != nil && err != io.EOF {
+ log.Error("failed to read string from buffer: %v", err)
+ break
+ }
+ if content == "" && err == io.EOF {
+ break
+ }
+ s := gohtml.EscapeString(content)
+ if newLineInHTML != "" && s != "" && s[len(s)-1] == '\n' {
+ s = s[:len(s)-1] + newLineInHTML
}
- m = append(m, gohtml.EscapeString(content))
+ m = append(m, s)
}
+
+ if len(m) == 0 {
+ m = append(m, "") // maybe we do not want to return 0 lines
+ }
+
return m
}
diff --git a/modules/highlight/highlight_test.go b/modules/highlight/highlight_test.go
index bbe1b716b83ab..d37d81ff6a313 100644
--- a/modules/highlight/highlight_test.go
+++ b/modules/highlight/highlight_test.go
@@ -8,100 +8,170 @@ import (
"strings"
"testing"
- "code.gitea.io/gitea/modules/setting"
- "code.gitea.io/gitea/modules/util"
-
"github.com/stretchr/testify/assert"
- "gopkg.in/ini.v1"
)
+func lines(s string) []string {
+ return strings.Split(strings.TrimSpace(s), "\n")
+}
+
func TestFile(t *testing.T) {
- setting.Cfg = ini.Empty()
+ defaultNewLineInHTML := newLineInHTML
+ defer func() {
+ newLineInHTML = defaultNewLineInHTML
+ }()
+
+ newLineInHTML = "
"
tests := []struct {
- name string
- numLines int
- fileName string
- code string
- want string
+ name string
+ code string
+ want []string
}{
{
- name: ".drone.yml",
- numLines: 12,
- fileName: ".drone.yml",
- code: util.Dedent(`
- kind: pipeline
- name: default
+ name: "empty.py",
+ code: "",
+ want: []string{""},
+ },
+ {
+ name: "tags.txt",
+ code: "<>",
+ want: []string{"<>"},
+ },
+ {
+ name: "tags.py",
+ code: "<>",
+ want: []string{`<>`},
+ },
+ {
+ name: "eol-no.py",
+ code: "a=1",
+ want: []string{`a=1`},
+ },
+ {
+ name: "eol-newline1.py",
+ code: "a=1\n",
+ want: []string{
+ `a=1
`,
+ },
+ },
+ {
+ name: "eol-newline2.py",
+ code: "a=1\n\n",
+ want: []string{
+ `a=1
`,
+ `
`,
+ },
+ },
+ {
+ name: "empty-line-with-space.py",
+ code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+ a=1
- steps:
- - name: test
- image: golang:1.13
- environment:
- GOPROXY: https://goproxy.cn
- commands:
- - go get -u
- - go build -v
- - go test -v -race -coverprofile=coverage.txt -covermode=atomic
- `),
- want: util.Dedent(`
- kind: pipeline
- name: default
-
+b=''
+{space}
+c=2
+ `), "{space}", " "),
+ want: lines(`
+def:
+ a=1
+
+b=''
+
+c=2`,
+ ),
+ },
+ }
- steps:
- - name: test
- image: golang:1.13
- environment:
- GOPROXY: https://goproxy.cn
- commands:
- - go get -u
- - go build -v
- - go test -v -race -coverprofile=coverage.txt -covermode=atomic
- `),
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ out, err := File(tt.name, "", []byte(tt.code))
+ assert.NoError(t, err)
+ expected := strings.Join(tt.want, "\n")
+ actual := strings.Join(out, "\n")
+ assert.Equal(t, strings.Count(actual, ""))
+ assert.EqualValues(t, expected, actual)
+ })
+ }
+
+ newLineInHTML = ""
+ out, err := File("test-original-newline.py", "", []byte("a=1\n"))
+ assert.NoError(t, err)
+ assert.EqualValues(t, `a=1`+"\n", strings.Join(out, ""))
+}
+
+func TestPlainText(t *testing.T) {
+ defaultNewLineInHTML := newLineInHTML
+ defer func() {
+ newLineInHTML = defaultNewLineInHTML
+ }()
+
+ newLineInHTML = "
"
+ tests := []struct {
+ name string
+ code string
+ want []string
+ }{
+ {
+ name: "empty.py",
+ code: "",
+ want: []string{""},
},
{
- name: ".drone.yml - trailing space",
- numLines: 13,
- fileName: ".drone.yml",
- code: strings.Replace(util.Dedent(`
- kind: pipeline
- name: default
-
- steps:
- - name: test
- image: golang:1.13
- environment:
- GOPROXY: https://goproxy.cn
- commands:
- - go get -u
- - go build -v
- - go test -v -race -coverprofile=coverage.txt -covermode=atomic
- `)+"\n", "name: default", "name: default ", 1),
- want: util.Dedent(`
- kind: pipeline
- name: default
-
+ name: "tags.py",
+ code: "<>",
+ want: []string{"<>"},
+ },
+ {
+ name: "eol-no.py",
+ code: "a=1",
+ want: []string{`a=1`},
+ },
+ {
+ name: "eol-newline1.py",
+ code: "a=1\n",
+ want: []string{
+ `a=1
`,
+ },
+ },
+ {
+ name: "eol-newline2.py",
+ code: "a=1\n\n",
+ want: []string{
+ `a=1
`,
+ `
`,
+ },
+ },
+ {
+ name: "empty-line-with-space.py",
+ code: strings.ReplaceAll(strings.TrimSpace(`
+def:
+ a=1
- steps:
- - name: test
- image: golang:1.13
- environment:
- GOPROXY: https://goproxy.cn
- commands:
- - go get -u
- - go build -v
- - go test -v -race -coverprofile=coverage.txt -covermode=atomic
-
-
-
- `),
+b=''
+{space}
+c=2
+ `), "{space}", " "),
+ want: lines(`
+def:
+ a=1
+
+b=''
+
+c=2`),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- got := strings.Join(File(tt.numLines, tt.fileName, "", []byte(tt.code)), "\n")
- assert.Equal(t, tt.want, got)
- assert.Equal(t, strings.Count(got, ""))
+ out := PlainText([]byte(tt.code))
+ expected := strings.Join(tt.want, "\n")
+ actual := strings.Join(out, "\n")
+ assert.EqualValues(t, expected, actual)
})
}
+
+ newLineInHTML = ""
+ out := PlainText([]byte("a=1\n"))
+ assert.EqualValues(t, "a=1\n", strings.Join(out, ""))
}
diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go
index 01bd2d89234f5..1586dd4bf58db 100644
--- a/routers/web/repo/view.go
+++ b/routers/web/repo/view.go
@@ -15,7 +15,6 @@ import (
"net/http"
"net/url"
"path"
- "strconv"
"strings"
"time"
@@ -58,15 +57,6 @@ type namedBlob struct {
blob *git.Blob
}
-func linesBytesCount(s []byte) int {
- nl := []byte{'\n'}
- n := bytes.Count(s, nl)
- if len(s) > 0 && !bytes.HasSuffix(s, nl) {
- n++
- }
- return n
-}
-
// FIXME: There has to be a more efficient way of doing this
func getReadmeFileFromPath(commit *git.Commit, treePath string) (*namedBlob, error) {
tree, err := commit.SubTree(treePath)
@@ -552,8 +542,14 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
)
} else {
buf, _ := io.ReadAll(rd)
- lineNums := linesBytesCount(buf)
- ctx.Data["NumLines"] = strconv.Itoa(lineNums)
+
+ // empty: 0 line; "a": one line; "a\n": two lines; "a\nb": two lines;
+ // the NumLines is only used for the display on the UI: "xxx lines"
+ if len(buf) == 0 {
+ ctx.Data["NumLines"] = 0
+ } else {
+ ctx.Data["NumLines"] = bytes.Count(buf, []byte{'\n'}) + 1
+ }
ctx.Data["NumLinesSet"] = true
language := ""
@@ -581,7 +577,11 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
language = ""
}
}
- fileContent := highlight.File(lineNums, blob.Name(), language, buf)
+ fileContent, err := highlight.File(blob.Name(), language, buf)
+ if err != nil {
+ log.Error("highlight.File failed, fallback to plain text: %v", err)
+ fileContent = highlight.PlainText(buf)
+ }
status, _ := charset.EscapeControlReader(bytes.NewReader(buf), io.Discard)
ctx.Data["EscapeStatus"] = status
statuses := make([]charset.EscapeStatus, len(fileContent))