Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch Unicode Escaping to a VSCode-like system #19990

Merged
merged 26 commits into from
Aug 13, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
2e935d3
Switch Unicode Escaping to a VSCode-like system
zeripath Jun 10, 2022
0f00129
placate lint
zeripath Jun 17, 2022
1b626e2
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath Jun 19, 2022
5d0aaf1
fix template issue
zeripath Jun 19, 2022
0fb4df2
Merge remote-tracking branch 'origin' into vscode-escape
zeripath Jul 4, 2022
7826c68
placate yet another linter
zeripath Jul 4, 2022
872844f
more placation
zeripath Jul 4, 2022
a638e64
Merge branch 'main' into vscode-escape
zeripath Jul 21, 2022
8a448aa
Use var colors
zeripath Jul 22, 2022
f49a102
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath Jul 31, 2022
3eddae1
add missing fix
zeripath Jul 31, 2022
d7b03b0
permit raw nbsps in rendered markdown
zeripath Jul 31, 2022
bb36c71
Merge branch 'main' into vscode-escape
lafriks Jul 31, 2022
c74f7bf
Update modules/charset/ambiguous.go
zeripath Aug 1, 2022
a3702f2
Merge branch 'main' into vscode-escape
wxiaoguang Aug 2, 2022
739c4ba
Merge branch 'main' into vscode-escape
zeripath Aug 7, 2022
9b83725
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath Aug 12, 2022
a16e264
as per review
zeripath Aug 12, 2022
39f15b9
as per review
zeripath Aug 12, 2022
c73d810
fix test
zeripath Aug 12, 2022
bf0d9dc
placate lint
zeripath Aug 13, 2022
cd27248
Merge remote-tracking branch 'origin/main' into vscode-escape
zeripath Aug 13, 2022
bd1336b
Make it clearer where ambiguous.json comes from
zeripath Aug 13, 2022
31954cc
use template for code declarations in diff
zeripath Aug 13, 2022
9c336a6
furhter subtemplating
zeripath Aug 13, 2022
afc0064
Merge branch 'main' into vscode-escape
zeripath Aug 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions modules/charset/breakwriter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package charset

import (
"bytes"
"io"
)

// BreakWriter wraps an io.Writer to always write '\n' as '<br>'
type BreakWriter struct {
io.Writer
}

// Write writes the provided bs transparently replacing '\n' with '<br>'
zeripath marked this conversation as resolved.
Show resolved Hide resolved
func (b *BreakWriter) Write(bs []byte) (n int, err error) {
pos := 0
for pos < len(bs) {
idx := bytes.IndexByte(bs[pos:], '\n')
if idx < 0 {
wn, err := b.Writer.Write(bs[pos:])
return n + wn, err
}

if idx > 0 {
wn, err := b.Writer.Write(bs[pos : pos+idx])
n += wn
if err != nil {
return n, err
}
}

if _, err = b.Writer.Write([]byte("<br>")); err != nil {
return n, err
}
pos += idx + 1

n++
}

return n, err
}
69 changes: 69 additions & 0 deletions modules/charset/breakwriter_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.

package charset

import (
"strings"
"testing"
)

func TestBreakWriter_Write(t *testing.T) {
tests := []struct {
name string
kase string
expect string
wantErr bool
}{
{
name: "noline",
kase: "abcdefghijklmnopqrstuvwxyz",
expect: "abcdefghijklmnopqrstuvwxyz",
},
{
name: "endline",
kase: "abcdefghijklmnopqrstuvwxyz\n",
expect: "abcdefghijklmnopqrstuvwxyz<br>",
},
{
name: "startline",
kase: "\nabcdefghijklmnopqrstuvwxyz",
expect: "<br>abcdefghijklmnopqrstuvwxyz",
},
{
name: "onlyline",
kase: "\n\n\n",
expect: "<br><br><br>",
},
{
name: "empty",
kase: "",
expect: "",
},
{
name: "midline",
kase: "\nabc\ndefghijkl\nmnopqrstuvwxy\nz",
expect: "<br>abc<br>defghijkl<br>mnopqrstuvwxy<br>z",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
buf := &strings.Builder{}
b := &BreakWriter{
Writer: buf,
}
n, err := b.Write([]byte(tt.kase))
if (err != nil) != tt.wantErr {
t.Errorf("BreakWriter.Write() error = %v, wantErr %v", err, tt.wantErr)
return
}
if n != len(tt.kase) {
t.Errorf("BreakWriter.Write() = %v, want %v", n, len(tt.kase))
}
if buf.String() != tt.expect {
t.Errorf("BreakWriter.Write() wrote %q, want %v", buf.String(), tt.expect)
}
})
}
}
9 changes: 6 additions & 3 deletions modules/charset/escape.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ import (
"code.gitea.io/gitea/modules/translation"
)

// RuneNBSP is the codepoint for NBSP
const RuneNBSP = 0xa0

// EscapeControlHTML escapes the unicode control sequences in a provided html document
func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, output string) {
func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
Expand All @@ -30,7 +33,7 @@ func EscapeControlHTML(text string, locale translation.Locale, allowed ...rune)
}

// EscapeControlReaders escapes the unicode control sequences in a provider reader and writer in a locale and returns the findings as an EscapeStatus and the escaped []byte
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, err error) {
func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, err error) {
outputStream := &HTMLStreamerWriter{Writer: writer}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)

Expand All @@ -42,7 +45,7 @@ func EscapeControlReader(reader io.Reader, writer io.Writer, locale translation.
}

// EscapeControlString escapes the unicode control sequences in a provided string and returns the findings as an EscapeStatus and the escaped string
func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped EscapeStatus, output string) {
func EscapeControlString(text string, locale translation.Locale, allowed ...rune) (escaped *EscapeStatus, output string) {
sb := &strings.Builder{}
outputStream := &HTMLStreamerWriter{Writer: sb}
streamer := NewEscapeStreamer(locale, outputStream, allowed...).(*escapeStreamer)
Expand Down
2 changes: 1 addition & 1 deletion modules/charset/escape_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ type EscapeStatus struct {
}

// Or combines two EscapeStatus structs into one representing the conjunction of the two
func (status EscapeStatus) Or(other EscapeStatus) EscapeStatus {
func (status *EscapeStatus) Or(other *EscapeStatus) *EscapeStatus {
st := status
st.Escaped = st.Escaped || other.Escaped
st.HasError = st.HasError || other.HasError
Expand Down
15 changes: 8 additions & 7 deletions modules/charset/escape_stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ var defaultWordRegexp = regexp.MustCompile(`(-?\d*\.\d\w*)|([^\` + "`" + `\~\!\@

func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...rune) HTMLStreamer {
return &escapeStreamer{
escaped: &EscapeStatus{},
PassthroughHTMLStreamer: *NewPassthroughStreamer(next),
locale: locale,
ambiguousTables: AmbiguousTablesForLocale(locale),
Expand All @@ -31,13 +32,13 @@ func NewEscapeStreamer(locale translation.Locale, next HTMLStreamer, allowed ...

type escapeStreamer struct {
PassthroughHTMLStreamer
escaped EscapeStatus
escaped *EscapeStatus
locale translation.Locale
ambiguousTables []*AmbiguousTable
allowed []rune
}

func (e *escapeStreamer) EscapeStatus() EscapeStatus {
func (e *escapeStreamer) EscapeStatus() *EscapeStatus {
return e.escaped
}

Expand Down Expand Up @@ -177,7 +178,7 @@ func (e *escapeStreamer) ambiguousRune(r, c rune) error {
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("%c", r)); err != nil {
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
Expand Down Expand Up @@ -206,7 +207,7 @@ func (e *escapeStreamer) invisibleRune(r rune) error {
}); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.Text(fmt.Sprintf("%c", r)); err != nil {
if err := e.PassthroughHTMLStreamer.Text(string(r)); err != nil {
return err
}
if err := e.PassthroughHTMLStreamer.EndTag("span"); err != nil {
Expand Down Expand Up @@ -286,11 +287,11 @@ func (e *escapeStreamer) isAllowed(r rune) bool {
if len(e.allowed) == 0 {
return false
}
if len(e.allowed) == 1 && e.allowed[0] == r {
return true
if len(e.allowed) == 1 {
return e.allowed[0] == r
}

return sort.Search(len(e.allowed), func(i int) bool {
return e.allowed[i] <= r
return e.allowed[i] >= r
}) >= 0
}
4 changes: 2 additions & 2 deletions routers/web/repo/blame.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ type blameRow struct {
CommitMessage string
CommitSince gotemplate.HTML
Code gotemplate.HTML
EscapeStatus charset.EscapeStatus
EscapeStatus *charset.EscapeStatus
}

// RefBlame render blame page
Expand Down Expand Up @@ -235,7 +235,7 @@ func renderBlame(ctx *context.Context, blameParts []git.BlamePart, commitNames m
}
lines := make([]string, 0)
rows := make([]*blameRow, 0)
escapeStatus := charset.EscapeStatus{}
escapeStatus := &charset.EscapeStatus{}

i := 0
commitCnt := 0
Expand Down
48 changes: 29 additions & 19 deletions routers/web/repo/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,33 +328,31 @@ func renderReadmeFile(ctx *context.Context, readmeFile *namedBlob, readmeTreelin
if markupType := markup.Type(readmeFile.name); markupType != "" {
ctx.Data["IsMarkup"] = true
ctx.Data["MarkupType"] = markupType
var result strings.Builder
err := markup.Render(&markup.RenderContext{

ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
RelativePath: path.Join(ctx.Repo.TreePath, readmeFile.name), // ctx.Repo.TreePath is the directory not the Readme so we must append the Readme filename (and path).
URLPrefix: readmeTreelink,
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
}, rd)
if err != nil {
log.Error("Render failed: %v then fallback", err)
log.Error("Render failed for %s in %-v: %v Falling back to rendering source", readmeFile.name, ctx.Repo.Repository, err)
buf := &bytes.Buffer{}
ctx.Data["EscapeStatus"], _ = charset.EscapeControlReader(rd, buf, ctx.Locale)
ctx.Data["FileContent"] = strings.ReplaceAll(
gotemplate.HTMLEscapeString(buf.String()), "\n", `<br>`,
)
} else {
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0)
}
} else {
ctx.Data["IsRenderedHTML"] = true
buf := &bytes.Buffer{}
ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, buf, ctx.Locale)
ctx.Data["EscapeStatus"], err = charset.EscapeControlReader(rd, &charset.BreakWriter{Writer: buf}, ctx.Locale, charset.RuneNBSP)
if err != nil {
log.Error("Read failed: %v", err)
}

ctx.Data["FileContent"] = strings.ReplaceAll(buf.String(), "\n", `<br>`)
ctx.Data["FileContent"] = buf.String()
}
}

Expand Down Expand Up @@ -496,27 +494,25 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
if markupType != "" && !shouldRenderSource {
ctx.Data["IsMarkup"] = true
ctx.Data["MarkupType"] = markupType
var result strings.Builder
if !detected {
markupType = ""
}
metas := ctx.Repo.Repository.ComposeDocumentMetas()
metas["BranchNameSubURL"] = ctx.Repo.BranchNameSubURL()
err := markup.Render(&markup.RenderContext{
ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
Type: markupType,
RelativePath: ctx.Repo.TreePath,
URLPrefix: path.Dir(treeLink),
Metas: metas,
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
}, rd)
if err != nil {
ctx.ServerError("Render", err)
return
}
// to prevent iframe load third-party url
ctx.Resp.Header().Add("Content-Security-Policy", "frame-src 'self'")
ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0)
} else if readmeExist && !shouldRenderSource {
buf := &bytes.Buffer{}
ctx.Data["IsRenderedHTML"] = true
Expand Down Expand Up @@ -568,8 +564,8 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
log.Error("highlight.File failed, fallback to plain text: %v", err)
fileContent = highlight.PlainText(buf)
}
status := charset.EscapeStatus{}
statuses := make([]charset.EscapeStatus, len(fileContent))
status := &charset.EscapeStatus{}
statuses := make([]*charset.EscapeStatus, len(fileContent))
for i, line := range fileContent {
statuses[i], fileContent[i] = charset.EscapeControlHTML(line, ctx.Locale)
status = status.Or(statuses[i])
Expand Down Expand Up @@ -612,20 +608,17 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
rd := io.MultiReader(bytes.NewReader(buf), dataRc)
ctx.Data["IsMarkup"] = true
ctx.Data["MarkupType"] = markupType
var result strings.Builder
err := markup.Render(&markup.RenderContext{
ctx.Data["EscapeStatus"], ctx.Data["FileContent"], err = markupRender(ctx, &markup.RenderContext{
Ctx: ctx,
RelativePath: ctx.Repo.TreePath,
URLPrefix: path.Dir(treeLink),
Metas: ctx.Repo.Repository.ComposeDocumentMetas(),
GitRepo: ctx.Repo.GitRepo,
}, rd, &result)
}, rd)
if err != nil {
ctx.ServerError("Render", err)
return
}

ctx.Data["EscapeStatus"], ctx.Data["FileContent"] = charset.EscapeControlHTML(result.String(), ctx.Locale, 0xa0)
}
}

Expand All @@ -644,6 +637,23 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st
}
}

func markupRender(ctx *context.Context, renderCtx *markup.RenderContext, input io.Reader) (escaped *charset.EscapeStatus, output string, err error) {
markupRd, markupWr := io.Pipe()
defer markupWr.Close()
done := make(chan struct{})
go func() {
sb := &strings.Builder{}
// We allow NBSP here this is rendered
escaped, _ = charset.EscapeControlReader(markupRd, sb, ctx.Locale, charset.RuneNBSP)
output = sb.String()
close(done)
}()
err = markup.Render(renderCtx, input, markupWr)
_ = markupWr.CloseWithError(err)
<-done
return
}

func safeURL(address string) string {
u, err := url.Parse(address)
if err != nil {
Expand Down
Loading