Fix bug when viewing the commit diff page with non-ANSI files (#36149) (#36150)

Backport #36149
This commit is contained in:
wxiaoguang
2025-12-14 01:14:06 +08:00
committed by GitHub
parent 2dd8ef8368
commit c97b89a662
5 changed files with 147 additions and 183 deletions

View File

@@ -26,9 +26,11 @@ type ConvertOpts struct {
KeepBOM bool
}
var ToUTF8WithFallbackReaderPrefetchSize = 16 * 1024
// ToUTF8WithFallbackReader detects the encoding of content and converts to UTF-8 reader if possible
func ToUTF8WithFallbackReader(rd io.Reader, opts ConvertOpts) io.Reader {
buf := make([]byte, 2048)
buf := make([]byte, ToUTF8WithFallbackReaderPrefetchSize)
n, err := util.ReadAtMost(rd, buf)
if err != nil {
return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd)
@@ -41,6 +43,7 @@ func ToUTF8WithFallbackReader(rd io.Reader, opts ConvertOpts) io.Reader {
encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
log.Error("Unknown encoding: %s", charsetLabel)
return io.MultiReader(bytes.NewReader(buf[:n]), rd)
}
@@ -54,17 +57,18 @@ func ToUTF8WithFallbackReader(rd io.Reader, opts ConvertOpts) io.Reader {
}
// ToUTF8 converts content to UTF8 encoding
func ToUTF8(content []byte, opts ConvertOpts) (string, error) {
func ToUTF8(content []byte, opts ConvertOpts) ([]byte, error) {
charsetLabel, err := DetectEncoding(content)
if err != nil {
return "", err
return content, err
} else if charsetLabel == "UTF-8" {
return string(MaybeRemoveBOM(content, opts)), nil
return MaybeRemoveBOM(content, opts), nil
}
encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
return string(content), fmt.Errorf("Unknown encoding: %s", charsetLabel)
log.Error("Unknown encoding: %s", charsetLabel)
return content, fmt.Errorf("unknown encoding: %s", charsetLabel)
}
// If there is an error, we concatenate the nicely decoded part and the
@@ -76,7 +80,7 @@ func ToUTF8(content []byte, opts ConvertOpts) (string, error) {
result = MaybeRemoveBOM(result, opts)
return string(result), err
return result, err
}
// ToUTF8WithFallback detects the encoding of content and converts to UTF-8 if possible
@@ -94,6 +98,7 @@ func ToUTF8DropErrors(content []byte, opts ConvertOpts) []byte {
encoding, _ := charset.Lookup(charsetLabel)
if encoding == nil {
log.Error("Unknown encoding: %s", charsetLabel)
return content
}
@@ -130,28 +135,37 @@ func MaybeRemoveBOM(content []byte, opts ConvertOpts) []byte {
}
// DetectEncoding detect the encoding of content
func DetectEncoding(content []byte) (string, error) {
// it always returns a detected or guessed "encoding" string, no matter error happens or not
func DetectEncoding(content []byte) (encoding string, _ error) {
// First we check if the content represents valid utf8 content excepting a truncated character at the end.
// Now we could decode all the runes in turn but this is not necessarily the cheapest thing to do
// instead we walk backwards from the end to trim off a the incomplete character
// instead we walk backwards from the end to trim off the incomplete character
toValidate := content
end := len(toValidate) - 1
if end < 0 {
// no-op
} else if toValidate[end]>>5 == 0b110 {
// Incomplete 1 byte extension e.g. © <c2><a9> which has been truncated to <c2>
toValidate = toValidate[:end]
} else if end > 0 && toValidate[end]>>6 == 0b10 && toValidate[end-1]>>4 == 0b1110 {
// Incomplete 2 byte extension e.g. ⛔ <e2><9b><94> which has been truncated to <e2><9b>
toValidate = toValidate[:end-1]
} else if end > 1 && toValidate[end]>>6 == 0b10 && toValidate[end-1]>>6 == 0b10 && toValidate[end-2]>>3 == 0b11110 {
// Incomplete 3 byte extension e.g. 💩 <f0><9f><92><a9> which has been truncated to <f0><9f><92>
toValidate = toValidate[:end-2]
// U+0000 U+007F 0yyyzzzz
// U+0080 U+07FF 110xxxyy 10yyzzzz
// U+0800 U+FFFF 1110wwww 10xxxxyy 10yyzzzz
// U+010000 U+10FFFF 11110uvv 10vvwwww 10xxxxyy 10yyzzzz
cnt := 0
for end >= 0 && cnt < 4 {
c := toValidate[end]
if c>>5 == 0b110 || c>>4 == 0b1110 || c>>3 == 0b11110 {
// a leading byte
toValidate = toValidate[:end]
break
} else if c>>6 == 0b10 {
// a continuation byte
end--
} else {
// not an utf-8 byte
break
}
cnt++
}
if utf8.Valid(toValidate) {
log.Debug("Detected encoding: utf-8 (fast)")
return "UTF-8", nil
}
@@ -160,7 +174,7 @@ func DetectEncoding(content []byte) (string, error) {
if len(content) < 1024 {
// Check if original content is valid
if _, err := textDetector.DetectBest(content); err != nil {
return "", err
return util.IfZero(setting.Repository.AnsiCharset, "UTF-8"), err
}
times := 1024 / len(content)
detectContent = make([]byte, 0, times*len(content))
@@ -171,14 +185,10 @@ func DetectEncoding(content []byte) (string, error) {
detectContent = content
}
// Now we can't use DetectBest or just results[0] because the result isn't stable - so we need a tie break
// Now we can't use DetectBest or just results[0] because the result isn't stable - so we need a tie-break
results, err := textDetector.DetectAll(detectContent)
if err != nil {
if err == chardet.NotDetectedError && len(setting.Repository.AnsiCharset) > 0 {
log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset)
return setting.Repository.AnsiCharset, nil
}
return "", err
return util.IfZero(setting.Repository.AnsiCharset, "UTF-8"), err
}
topConfidence := results[0].Confidence
@@ -201,11 +211,9 @@ func DetectEncoding(content []byte) (string, error) {
}
// FIXME: to properly decouple this function the fallback ANSI charset should be passed as an argument
if topResult.Charset != "UTF-8" && len(setting.Repository.AnsiCharset) > 0 {
log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset)
if topResult.Charset != "UTF-8" && setting.Repository.AnsiCharset != "" {
return setting.Repository.AnsiCharset, err
}
log.Debug("Detected encoding: %s", topResult.Charset)
return topResult.Charset, err
return topResult.Charset, nil
}

View File

@@ -4,12 +4,12 @@
package charset
import (
"bytes"
"io"
"strings"
"testing"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/test"
"github.com/stretchr/testify/assert"
)
@@ -47,12 +47,12 @@ func TestToUTF8(t *testing.T) {
res, err := ToUTF8([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
assert.NoError(t, err)
assert.Equal(t, "ABC", res)
assert.Equal(t, "ABC", string(res))
// "áéíóú"
res, err = ToUTF8([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
assert.NoError(t, err)
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
// "áéíóú"
res, err = ToUTF8([]byte{
@@ -60,7 +60,7 @@ func TestToUTF8(t *testing.T) {
0xc3, 0xba,
}, ConvertOpts{})
assert.NoError(t, err)
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
res, err = ToUTF8([]byte{
0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
@@ -96,12 +96,11 @@ func TestToUTF8(t *testing.T) {
assert.Equal(t, []byte{
0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
},
[]byte(res))
}, res)
res, err = ToUTF8([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
assert.NoError(t, err)
assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, []byte(res))
assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
}
func TestToUTF8WithFallback(t *testing.T) {
@@ -231,152 +230,42 @@ func TestDetectEncoding(t *testing.T) {
assert.Error(t, err)
}
func stringMustStartWith(t *testing.T, expected, value string) {
assert.Equal(t, expected, value[:len(expected)])
func stringMustStartWith(t *testing.T, expected string, value []byte) {
assert.Equal(t, expected, string(value[:len(expected)]))
}
func stringMustEndWith(t *testing.T, expected, value string) {
assert.Equal(t, expected, value[len(value)-len(expected):])
func stringMustEndWith(t *testing.T, expected string, value []byte) {
assert.Equal(t, expected, string(value[len(value)-len(expected):]))
}
func TestToUTF8WithFallbackReader(t *testing.T) {
resetDefaultCharsetsOrder()
test.MockVariableValue(&ToUTF8WithFallbackReaderPrefetchSize)
for testLen := range 2048 {
pattern := " test { () }\n"
input := ""
for len(input) < testLen {
input += pattern
}
input = input[:testLen]
input += "// Выключаем"
rd := ToUTF8WithFallbackReader(bytes.NewReader([]byte(input)), ConvertOpts{})
block := "aá啊🤔"
runes := []rune(block)
assert.Len(t, string(runes[0]), 1)
assert.Len(t, string(runes[1]), 2)
assert.Len(t, string(runes[2]), 3)
assert.Len(t, string(runes[3]), 4)
content := strings.Repeat(block, 2)
for i := 1; i < len(content); i++ {
encoding, err := DetectEncoding([]byte(content[:i]))
assert.NoError(t, err)
assert.Equal(t, "UTF-8", encoding)
ToUTF8WithFallbackReaderPrefetchSize = i
rd := ToUTF8WithFallbackReader(strings.NewReader(content), ConvertOpts{})
r, _ := io.ReadAll(rd)
assert.Equalf(t, input, string(r), "testing string len=%d", testLen)
assert.Equal(t, content, string(r))
}
for _, r := range runes {
content = "abc abc " + string(r) + string(r) + string(r)
for i := 0; i < len(content); i++ {
encoding, err := DetectEncoding([]byte(content[:i]))
assert.NoError(t, err)
assert.Equal(t, "UTF-8", encoding)
}
}
truncatedOneByteExtension := failFastBytes
encoding, _ := DetectEncoding(truncatedOneByteExtension)
assert.Equal(t, "UTF-8", encoding)
truncatedTwoByteExtension := failFastBytes
truncatedTwoByteExtension[len(failFastBytes)-1] = 0x9b
truncatedTwoByteExtension[len(failFastBytes)-2] = 0xe2
encoding, _ = DetectEncoding(truncatedTwoByteExtension)
assert.Equal(t, "UTF-8", encoding)
truncatedThreeByteExtension := failFastBytes
truncatedThreeByteExtension[len(failFastBytes)-1] = 0x92
truncatedThreeByteExtension[len(failFastBytes)-2] = 0x9f
truncatedThreeByteExtension[len(failFastBytes)-3] = 0xf0
encoding, _ = DetectEncoding(truncatedThreeByteExtension)
assert.Equal(t, "UTF-8", encoding)
}
var failFastBytes = []byte{
0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x74, 0x6f,
0x6f, 0x6c, 0x73, 0x2e, 0x61, 0x6e, 0x74, 0x2e, 0x74, 0x61, 0x73, 0x6b, 0x64, 0x65, 0x66, 0x73, 0x2e, 0x63, 0x6f, 0x6e,
0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x4f, 0x73, 0x0a, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x6f, 0x72, 0x67,
0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f,
0x74, 0x2e, 0x67, 0x72, 0x61, 0x64, 0x6c, 0x65, 0x2e, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x2e, 0x72, 0x75, 0x6e, 0x2e, 0x42,
0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x0a, 0x0a, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x69, 0x64, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d,
0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x22, 0x29, 0x0a, 0x7d, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x65,
0x6e, 0x64, 0x65, 0x6e, 0x63, 0x69, 0x65, 0x73, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65,
0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a,
0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x61, 0x70, 0x69, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d,
0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74,
0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x61, 0x70, 0x69, 0x2d, 0x64, 0x6f, 0x63, 0x73, 0x22, 0x29,
0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e,
0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x64, 0x62,
0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69,
0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a,
0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65,
0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a,
0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x66,
0x73, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74,
0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
0x3a, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x6d, 0x71, 0x22, 0x29, 0x29, 0x0a, 0x0a,
0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22,
0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e,
0x2d, 0x61, 0x75, 0x74, 0x68, 0x2d, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2d, 0x73, 0x74, 0x61, 0x72, 0x74,
0x65, 0x72, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74,
0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63,
0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, 0x68, 0x61, 0x6c, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, 0x63, 0x6f, 0x72, 0x65, 0x22, 0x29, 0x0a,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28,
0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b,
0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, 0x73, 0x74,
0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x77, 0x65, 0x62, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c,
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69,
0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72,
0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x61, 0x6f, 0x70,
0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f,
0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f,
0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d,
0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x61, 0x63, 0x74, 0x75, 0x61, 0x74, 0x6f, 0x72, 0x22, 0x29, 0x0a, 0x20,
0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f,
0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63,
0x6c, 0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74,
0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x22, 0x29, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72,
0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, 0x6c,
0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, 0x61,
0x72, 0x74, 0x65, 0x72, 0x2d, 0x63, 0x6f, 0x6e, 0x73, 0x75, 0x6c, 0x2d, 0x61, 0x6c, 0x6c, 0x22, 0x29, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72,
0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, 0x6c,
0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, 0x61,
0x72, 0x74, 0x65, 0x72, 0x2d, 0x73, 0x6c, 0x65, 0x75, 0x74, 0x68, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d,
0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70,
0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x72, 0x65, 0x74, 0x72, 0x79, 0x3a,
0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x72, 0x65, 0x74, 0x72, 0x79, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x63, 0x68, 0x2e, 0x71,
0x6f, 0x73, 0x2e, 0x6c, 0x6f, 0x67, 0x62, 0x61, 0x63, 0x6b, 0x3a, 0x6c, 0x6f, 0x67, 0x62, 0x61, 0x63, 0x6b, 0x2d, 0x63,
0x6c, 0x61, 0x73, 0x73, 0x69, 0x63, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d,
0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x69, 0x6f, 0x2e, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x6d, 0x65,
0x74, 0x65, 0x72, 0x3a, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x2d, 0x72, 0x65, 0x67, 0x69, 0x73,
0x74, 0x72, 0x79, 0x2d, 0x70, 0x72, 0x6f, 0x6d, 0x65, 0x74, 0x68, 0x65, 0x75, 0x73, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x6b, 0x6f, 0x74,
0x6c, 0x69, 0x6e, 0x28, 0x22, 0x73, 0x74, 0x64, 0x6c, 0x69, 0x62, 0x22, 0x29, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x65, 0x73, 0x74, 0x20, 0x64, 0x65, 0x70, 0x65, 0x6e, 0x64,
0x65, 0x6e, 0x63, 0x69, 0x65, 0x73, 0x2e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74,
0x65, 0x73, 0x74, 0x49, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a,
0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d,
0x74, 0x65, 0x73, 0x74, 0x22, 0x29, 0x0a, 0x7d, 0x0a, 0x0a, 0x76, 0x61, 0x6c, 0x20, 0x70, 0x61, 0x74, 0x63, 0x68, 0x4a,
0x61, 0x72, 0x20, 0x62, 0x79, 0x20, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x2e, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72,
0x69, 0x6e, 0x67, 0x28, 0x4a, 0x61, 0x72, 0x3a, 0x3a, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
0x20, 0x20, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x69, 0x66, 0x69, 0x65, 0x72, 0x2e,
0x73, 0x65, 0x74, 0x28, 0x22, 0x70, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
0x76, 0x61, 0x6c, 0x20, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x70, 0x61, 0x74, 0x68,
0x20, 0x62, 0x79, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x67,
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74,
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65,
0x73, 0x28, 0x22, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x2d, 0x50, 0x61, 0x74, 0x68, 0x22, 0x20, 0x74, 0x6f, 0x20, 0x6f, 0x62,
0x6a, 0x65, 0x63, 0x74, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70,
0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x20, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x20, 0x3d,
0x20, 0x22, 0x66, 0x69, 0x6c, 0x65, 0x3a, 0x2f, 0x2b, 0x22, 0x2e, 0x74, 0x6f, 0x52, 0x65, 0x67, 0x65, 0x78, 0x28, 0x29,
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64,
0x65, 0x20, 0x66, 0x75, 0x6e, 0x20, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28, 0x29, 0x3a, 0x20, 0x53, 0x74,
0x72, 0x69, 0x6e, 0x67, 0x20, 0x3d, 0x20, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x70,
0x61, 0x74, 0x68, 0x2e, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x2e, 0x6a, 0x6f, 0x69, 0x6e, 0x54, 0x6f, 0x53, 0x74, 0x72, 0x69,
0x6e, 0x67, 0x28, 0x22, 0x20, 0x22, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x74, 0x2e, 0x74, 0x6f, 0x55, 0x52, 0x49, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x55,
0x52, 0x4c, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28, 0x29, 0x2e, 0x72, 0x65, 0x70, 0x6c,
0x61, 0x63, 0x65, 0x46, 0x69, 0x72, 0x73, 0x74, 0x28, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x2c, 0x20, 0x22, 0x2f,
0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x74, 0x61, 0x73,
0x6b, 0x73, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x64, 0x3c, 0x42, 0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x3e, 0x28, 0x22, 0x62,
0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x22, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x4f,
0x73, 0x2e, 0x69, 0x73, 0x46, 0x61, 0x6d, 0x69, 0x6c, 0x79, 0x28, 0x4f, 0x73, 0x2e, 0x46, 0x41, 0x4d, 0x49, 0x4c, 0x59,
0x5f, 0x57, 0x49, 0x4e, 0x44, 0x4f, 0x57, 0x53, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x70, 0x61, 0x74, 0x68, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x28, 0x73,
0x6f, 0x75, 0x72, 0x63, 0x65, 0x53, 0x65, 0x74, 0x73, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x64, 0x28, 0x22, 0x6d, 0x61, 0x69,
0x6e, 0x22, 0x29, 0x2e, 0x6d, 0x61, 0x70, 0x20, 0x7b, 0x20, 0x69, 0x74, 0x2e, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x20,
0x7d, 0x2c, 0x20, 0x70, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x61, 0x72, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a,
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0xd0,
}

View File

@@ -312,11 +312,13 @@ func EditFile(ctx *context.Context) {
ctx.ServerError("ReadAll", err)
return
}
var fileContent string
if content, err := charset.ToUTF8(buf, charset.ConvertOpts{KeepBOM: true}); err != nil {
ctx.Data["FileContent"] = string(buf)
fileContent = string(buf)
} else {
ctx.Data["FileContent"] = content
fileContent = string(content)
}
ctx.Data["FileContent"] = fileContent
}
}

View File

@@ -1233,10 +1233,10 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
if shouldFullFileHighlight {
if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String())
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
}
if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize {
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String())
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.Bytes())
}
}
}
@@ -1244,9 +1244,35 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
return diff, nil
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML {
func splitHighlightLines(buf []byte) (ret [][]byte) {
lineCount := bytes.Count(buf, []byte("\n")) + 1
ret = make([][]byte, 0, lineCount)
nlTagClose := []byte("\n</")
for {
pos := bytes.IndexByte(buf, '\n')
if pos == -1 {
ret = append(ret, buf)
return ret
}
// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
// * "<span>text\n</span>"
// * "<span>text</span>\n"
if bytes.HasPrefix(buf[pos:], nlTagClose) {
pos1 := bytes.IndexByte(buf[pos:], '>')
if pos1 != -1 {
pos += pos1
}
}
ret = append(ret, buf[:pos+1])
buf = buf[pos+1:]
}
}
func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML {
contentUTF8, _ := charset.ToUTF8(rawContent, charset.ConvertOpts{})
content := util.UnsafeBytesToString(contentUTF8)
highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
splitLines := strings.Split(string(highlightedNewContent), "\n")
splitLines := splitHighlightLines([]byte(highlightedNewContent))
lines := make(map[int]template.HTML, len(splitLines))
// only save the highlighted lines we need, but not the whole file, to save memory
for _, sec := range diffFile.Sections {

View File

@@ -5,6 +5,7 @@
package gitdiff
import (
"html/template"
"strconv"
"strings"
"testing"
@@ -640,3 +641,41 @@ func TestNoCrashes(t *testing.T) {
ParsePatch(t.Context(), setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(testcase.gitdiff), "")
}
}
func TestHighlightCodeLines(t *testing.T) {
t.Run("CharsetDetecting", func(t *testing.T) {
diffFile := &DiffFile{
Name: "a.c",
Language: "c",
Sections: []*DiffSection{
{
Lines: []*DiffLine{{LeftIdx: 1}},
},
},
}
ret := highlightCodeLines(diffFile, true, []byte("// abc\xcc def\xcd")) // ISO-8859-1 bytes
assert.Equal(t, "<span class=\"c1\">// abcÌ defÍ\n</span>", string(ret[0]))
})
t.Run("LeftLines", func(t *testing.T) {
diffFile := &DiffFile{
Name: "a.c",
Language: "c",
Sections: []*DiffSection{
{
Lines: []*DiffLine{
{LeftIdx: 1},
{LeftIdx: 2},
{LeftIdx: 3},
},
},
},
}
const nl = "\n"
ret := highlightCodeLines(diffFile, true, []byte("a\nb\n"))
assert.Equal(t, map[int]template.HTML{
0: `<span class="n">a</span>` + nl,
1: `<span class="n">b</span>`,
}, ret)
})
}