Fix bug when viewing the commit diff page with non-ANSI files (#36149) (#36150)

Backport #36149
This commit is contained in:
wxiaoguang
2025-12-14 01:14:06 +08:00
committed by GitHub
parent 2dd8ef8368
commit c97b89a662
5 changed files with 147 additions and 183 deletions

View File

@@ -1233,10 +1233,10 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
if shouldFullFileHighlight {
if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String())
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
}
if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String())
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.Bytes())
}
}
}
@@ -1244,9 +1244,35 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
return diff, nil
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML {
func splitHighlightLines(buf []byte) (ret [][]byte) {
lineCount := bytes.Count(buf, []byte("\n")) + 1
ret = make([][]byte, 0, lineCount)
nlTagClose := []byte("\n</")
for {
pos := bytes.IndexByte(buf, '\n')
if pos == -1 {
ret = append(ret, buf)
return ret
}
// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
// * "<span>text\n</span>"
// * "<span>text</span>\n"
if bytes.HasPrefix(buf[pos:], nlTagClose) {
pos1 := bytes.IndexByte(buf[pos:], '>')
if pos1 != -1 {
pos += pos1
}
}
ret = append(ret, buf[:pos+1])
buf = buf[pos+1:]
}
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML {
contentUTF8, _ := charset.ToUTF8(rawContent, charset.ConvertOpts{})
content := util.UnsafeBytesToString(contentUTF8)
highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
splitLines := strings.Split(string(highlightedNewContent), "\n")
splitLines := splitHighlightLines([]byte(highlightedNewContent))
lines := make(map[int]template.HTML, len(splitLines))
// only save the highlighted lines we need, but not the whole file, to save memory
for _, sec := range diffFile.Sections {

View File

@@ -5,6 +5,7 @@
package gitdiff
import (
"html/template"
"strconv"
"strings"
"testing"
@@ -640,3 +641,41 @@ func TestNoCrashes(t *testing.T) {
ParsePatch(t.Context(), setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "")
}
}
func TestHighlightCodeLines(t *testing.T) {
t.Run("CharsetDetecting", func(t *testing.T) {
diffFile := &DiffFile{
Name: "a.c",
Language: "c",
Sections: []*DiffSection{
{
Lines: []*DiffLine{{LeftIdx: 1}},
},
},
}
ret := highlightCodeLines(diffFile, true, []byte("// abc\xcc def\xcd")) // ISO-8859-1 bytes
assert.Equal(t, "<span class=\"c1\">// abcÌ defÍ\n</span>", string(ret[0]))
})
t.Run("LeftLines", func(t *testing.T) {
diffFile := &DiffFile{
Name: "a.c",
Language: "c",
Sections: []*DiffSection{
{
Lines: []*DiffLine{
{LeftIdx: 1},
{LeftIdx: 2},
{LeftIdx: 3},
},
},
},
}
const nl = "\n"
ret := highlightCodeLines(diffFile, true, []byte("a\nb\n"))
assert.Equal(t, map[int]template.HTML{
0: `<span class="n">a</span>` + nl,
1: `<span class="n">b</span>`,
}, ret)
})
}