Skip to content

Commit

Permalink
placeholder for html entity
Browse files Browse the repository at this point in the history
  • Loading branch information
wxiaoguang committed Jul 9, 2022
1 parent 3c2aa66 commit 90c6440
Show file tree
Hide file tree
Showing 4 changed files with 313 additions and 246 deletions.
181 changes: 0 additions & 181 deletions services/gitdiff/gitdiff.go
Original file line number Diff line number Diff line change
Expand Up @@ -283,187 +283,6 @@ func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline {
return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
}

// highlightCodeDiff is used to do diff with highlighted HTML code.
// It totally depends on Chroma's valid HTML output and its structure, do not use these functions for other purposes.
// The HTML tags will be replaced by Unicode placeholders: "<span>{TEXT}</span>" => "\uE000{TEXT}\uE001"
// These Unicode placeholders are friendly to the diff.
// Then after diff, the placeholders in diff result will be recovered to the HTML tags.
// It's guaranteed that the tags in final diff result are paired correctly.
type highlightCodeDiff struct {
placeholderBegin rune
placeholderMaxCount int
placeholderIndex int
placeholderTagMap map[rune]string
tagPlaceholderMap map[string]rune

placeholderOverflowCount int

lineWrapperTags []string
}

func newHighlightCodeDiff() *highlightCodeDiff {
return &highlightCodeDiff{
placeholderBegin: rune(0xE000), // Private Use Unicode: U+E000..U+F8FF, BMP(0), 6400
placeholderMaxCount: 6400,
placeholderTagMap: map[rune]string{},
tagPlaceholderMap: map[string]rune{},
}
}

// nextPlaceholder returns 0 if no more placeholder can be used
// the diff is done line by line, usually there are only a few (no more than 10) placeholders in one line
// so the placeholderMaxCount is impossible to be exhausted in real cases.
func (hcd *highlightCodeDiff) nextPlaceholder() rune {
for hcd.placeholderIndex < hcd.placeholderMaxCount {
r := hcd.placeholderBegin + rune(hcd.placeholderIndex)
hcd.placeholderIndex++
// only use non-existing (not used by code) rune as placeholders
if _, ok := hcd.placeholderTagMap[r]; !ok {
return r
}
}
return 0 // no more available placeholder
}

func (hcd *highlightCodeDiff) isInPlaceholderRange(r rune) bool {
return hcd.placeholderBegin <= r && r < hcd.placeholderBegin+rune(hcd.placeholderMaxCount)
}

func (hcd *highlightCodeDiff) collectUsedRunes(code string) {
for _, r := range code {
if hcd.isInPlaceholderRange(r) {
// put the existing rune (used by code) in map, then this rune won't be used a placeholder anymore.
hcd.placeholderTagMap[r] = ""
}
}
}

func (hcd *highlightCodeDiff) diffWithHighlight(filename, language, codeA, codeB string) []diffmatchpatch.Diff {
hcd.collectUsedRunes(codeA)
hcd.collectUsedRunes(codeB)

highlightCodeA := highlight.Code(filename, language, codeA)
highlightCodeB := highlight.Code(filename, language, codeB)

highlightCodeA = hcd.convertToPlaceholders(highlightCodeA)
highlightCodeB = hcd.convertToPlaceholders(highlightCodeB)

diffs := diffMatchPatch.DiffMain(highlightCodeA, highlightCodeB, true)
diffs = diffMatchPatch.DiffCleanupEfficiency(diffs)

for i := range diffs {
hcd.recoverOneDiff(&diffs[i])
}
return diffs
}

func (hcd *highlightCodeDiff) convertToPlaceholders(htmlCode string) string {
var tagStack []string
res := strings.Builder{}

firstRunForLineTags := hcd.lineWrapperTags == nil

// the standard chroma highlight HTML is "<span class="line [hl]"><span class="cl"> ... </span></span>"
for {
// find the next HTML tag
pos1 := strings.IndexByte(htmlCode, '<')
pos2 := strings.IndexByte(htmlCode, '>')
if pos1 == -1 || pos2 == -1 || pos2 < pos1 {
break
}
tag := htmlCode[pos1 : pos2+1]

// write the content before the tag into result string, and consume the tag in the string
res.WriteString(htmlCode[:pos1])
htmlCode = htmlCode[pos2+1:]

// the line wrapper tags should be removed before diff
if strings.HasPrefix(tag, `<span class="line`) || strings.HasPrefix(tag, `<span class="cl"`) {
if firstRunForLineTags {
// if this is the first run for converting, save the line wrapper tags for later use, they should be added back
hcd.lineWrapperTags = append(hcd.lineWrapperTags, tag)
}
htmlCode = strings.TrimSuffix(htmlCode, "</span>")
continue
}

var tagInMap string
if tag[1] == '/' { // for closed tag
if len(tagStack) == 0 {
break // invalid diff result, no open tag but see close tag
}
// make sure the closed tag in map is related to the open tag, to make the diff algorithm can match the open/closed tags
// the closed tag will be recorded in the map by key "</span><!-- <span the-open> -->" for "<span the-open>"
tagInMap = tag + "<!-- " + tagStack[len(tagStack)-1] + "-->"
tagStack = tagStack[:len(tagStack)-1]
} else { // for open tag
tagInMap = tag
tagStack = append(tagStack, tag)
}

// remember the placeholder and tag in the map
placeholder, ok := hcd.tagPlaceholderMap[tagInMap]
if !ok {
placeholder = hcd.nextPlaceholder()
if placeholder != 0 {
hcd.tagPlaceholderMap[tagInMap] = placeholder
hcd.placeholderTagMap[placeholder] = tagInMap
}
}

if placeholder != 0 {
res.WriteRune(placeholder) // use the placeholder to replace the tag
} else {
// unfortunately, all private use runes has been exhausted, no more placeholder could be used, no more converting
// usually, the exhausting won't occur in real cases, the magnitude of used placeholders is not larger than that of the CSS classes outputted by chroma.
hcd.placeholderOverflowCount++
}
}
// write the remaining string
res.WriteString(htmlCode)
return res.String()
}

func (hcd *highlightCodeDiff) recoverOneDiff(diff *diffmatchpatch.Diff) {
sb := strings.Builder{}
var tagStack []string

for _, r := range diff.Text {
tag, ok := hcd.placeholderTagMap[r]
if !ok || tag == "" {
sb.WriteRune(r) // if the rune is not a placeholder, write it as it is
continue
}
var tagToRecover string
if tag[1] == '/' { // Closing tag
// only get the tag itself, ignore the trailing comment (for how the comment is generated, see the code in `convert` function)
tagToRecover = tag[:strings.IndexByte(tag, '>')+1]
if len(tagStack) == 0 {
continue // if no open tag in stack yet, skip the closed tag
}
tagStack = tagStack[:len(tagStack)-1]
} else {
tagToRecover = tag
tagStack = append(tagStack, tag)
}
sb.WriteString(tagToRecover)
}

if len(tagStack) > 0 {
// close all open tags
for i := len(tagStack) - 1; i >= 0; i-- {
tagToClose := tagStack[i]
// get the closed tag "</span>" from "<span class=...>" or "<span>"
pos := strings.IndexAny(tagToClose, " >")
if pos != -1 {
sb.WriteString("</" + tagToClose[1:pos] + ">")
} // else: impossible. every tag was pushed into the stack by the code above and is valid HTML open tag
}
}

diff.Text = sb.String()
}

// GetComputedInlineDiffFor computes inline diff for the given line.
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline {
if setting.Git.DisableDiffHighlight {
Expand Down
65 changes: 0 additions & 65 deletions services/gitdiff/gitdiff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,71 +644,6 @@ func TestGetDiffRangeWithWhitespaceBehavior(t *testing.T) {
}
}

func TestDiffWithHighlight(t *testing.T) {
hcd := newHighlightCodeDiff()
diffs := hcd.diffWithHighlight(
"main.v", "",
" run('<>')\n",
" run(db)\n",
)

expected := ` <span class="n">run</span><span class="o">(</span><span class="removed-code"><span class="k">&#39;</span><span class="o">&lt;</span><span class="o">&gt;</span><span class="k">&#39;</span></span><span class="o">)</span>` + "\n"
output := diffToHTML(nil, diffs, DiffLineDel)
assert.Equal(t, expected, output)

expected = ` <span class="n">run</span><span class="o">(</span><span class="added-code"><span class="n">db</span></span><span class="o">)</span>` + "\n"
output = diffToHTML(nil, diffs, DiffLineAdd)
assert.Equal(t, expected, output)
}

func TestDiffWithHighlightPlaceholder(t *testing.T) {
hcd := newHighlightCodeDiff()
diffs := hcd.diffWithHighlight(
"main.js", "",
"a='\uE000'",
"a='\uF8FF'",
)
assert.Equal(t, "", hcd.placeholderTagMap[0xE000])
assert.Equal(t, "", hcd.placeholderTagMap[0xF8FF])

expected := fmt.Sprintf(`<span class="line"><span class="cl"><span class="nx">a</span><span class="o">=</span><span class="s1">&#39;</span><span class="removed-code">%s</span>&#39;</span></span>`, "\uE000")
output := diffToHTML(hcd.lineWrapperTags, diffs, DiffLineDel)
assert.Equal(t, expected, output)

hcd = newHighlightCodeDiff()
diffs = hcd.diffWithHighlight(
"main.js", "",
"a='\uE000'",
"a='\uF8FF'",
)
expected = fmt.Sprintf(`<span class="nx">a</span><span class="o">=</span><span class="s1">&#39;</span><span class="added-code">%s</span>&#39;`, "\uF8FF")
output = diffToHTML(nil, diffs, DiffLineAdd)
assert.Equal(t, expected, output)

totalOverflow := 0
for i := 0; i < 100; i++ {
hcd = newHighlightCodeDiff()
hcd.placeholderMaxCount = i
diffs = hcd.diffWithHighlight(
"main.js", "",
"a='1'",
"b='2'",
)
totalOverflow += hcd.placeholderOverflowCount

output = diffToHTML(nil, diffs, DiffLineDel)
c1 := strings.Count(output, "<span")
c2 := strings.Count(output, "</span")
assert.Equal(t, c1, c2)

output = diffToHTML(nil, diffs, DiffLineAdd)
c1 = strings.Count(output, "<span")
c2 = strings.Count(output, "</span")
assert.Equal(t, c1, c2)
}
assert.NotZero(t, totalOverflow)
}

func TestNoCrashes(t *testing.T) {
type testcase struct {
gitdiff string
Expand Down
Loading

0 comments on commit 90c6440

Please sign in to comment.