mirror of
https://github.com/go-gitea/gitea.git
synced 2025-12-14 21:15:18 +08:00
Merge branch 'main' into lunny/refactor_compare
This commit is contained in:
commit
a92efc1e40
2
.github/workflows/pull-compliance.yml
vendored
2
.github/workflows/pull-compliance.yml
vendored
@ -10,6 +10,8 @@ concurrency:
|
||||
jobs:
|
||||
files-changed:
|
||||
uses: ./.github/workflows/files-changed.yml
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
lint-backend:
|
||||
if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true'
|
||||
|
||||
2
.github/workflows/pull-db-tests.yml
vendored
2
.github/workflows/pull-db-tests.yml
vendored
@ -10,6 +10,8 @@ concurrency:
|
||||
jobs:
|
||||
files-changed:
|
||||
uses: ./.github/workflows/files-changed.yml
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
test-pgsql:
|
||||
if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true'
|
||||
|
||||
2
.github/workflows/pull-docker-dryrun.yml
vendored
2
.github/workflows/pull-docker-dryrun.yml
vendored
@ -10,6 +10,8 @@ concurrency:
|
||||
jobs:
|
||||
files-changed:
|
||||
uses: ./.github/workflows/files-changed.yml
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
container:
|
||||
if: needs.files-changed.outputs.docker == 'true' || needs.files-changed.outputs.actions == 'true'
|
||||
|
||||
@ -5,12 +5,10 @@ package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
@ -23,60 +21,39 @@ import (
|
||||
var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'}
|
||||
|
||||
type ConvertOpts struct {
|
||||
KeepBOM bool
|
||||
KeepBOM bool
|
||||
ErrorReplacement []byte
|
||||
ErrorReturnOrigin bool
|
||||
}
|
||||
|
||||
var ToUTF8WithFallbackReaderPrefetchSize = 16 * 1024
|
||||
|
||||
// ToUTF8WithFallbackReader detects the encoding of content and converts to UTF-8 reader if possible
|
||||
func ToUTF8WithFallbackReader(rd io.Reader, opts ConvertOpts) io.Reader {
|
||||
buf := make([]byte, 2048)
|
||||
buf := make([]byte, ToUTF8WithFallbackReaderPrefetchSize)
|
||||
n, err := util.ReadAtMost(rd, buf)
|
||||
if err != nil {
|
||||
return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd)
|
||||
}
|
||||
|
||||
charsetLabel, err := DetectEncoding(buf[:n])
|
||||
if err != nil || charsetLabel == "UTF-8" {
|
||||
return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd)
|
||||
}
|
||||
|
||||
encoding, _ := charset.Lookup(charsetLabel)
|
||||
if encoding == nil {
|
||||
// read error occurs, don't do any processing
|
||||
return io.MultiReader(bytes.NewReader(buf[:n]), rd)
|
||||
}
|
||||
|
||||
return transform.NewReader(
|
||||
io.MultiReader(
|
||||
bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)),
|
||||
rd,
|
||||
),
|
||||
encoding.NewDecoder(),
|
||||
)
|
||||
}
|
||||
|
||||
// ToUTF8 converts content to UTF8 encoding
|
||||
func ToUTF8(content []byte, opts ConvertOpts) (string, error) {
|
||||
charsetLabel, err := DetectEncoding(content)
|
||||
if err != nil {
|
||||
return "", err
|
||||
} else if charsetLabel == "UTF-8" {
|
||||
return string(MaybeRemoveBOM(content, opts)), nil
|
||||
charsetLabel, _ := DetectEncoding(buf[:n])
|
||||
if charsetLabel == "UTF-8" {
|
||||
// is utf-8, try to remove BOM and read it as-is
|
||||
return io.MultiReader(bytes.NewReader(maybeRemoveBOM(buf[:n], opts)), rd)
|
||||
}
|
||||
|
||||
encoding, _ := charset.Lookup(charsetLabel)
|
||||
if encoding == nil {
|
||||
return string(content), fmt.Errorf("Unknown encoding: %s", charsetLabel)
|
||||
// unknown charset, don't do any processing
|
||||
return io.MultiReader(bytes.NewReader(buf[:n]), rd)
|
||||
}
|
||||
|
||||
// If there is an error, we concatenate the nicely decoded part and the
|
||||
// original left over. This way we won't lose much data.
|
||||
result, n, err := transform.Bytes(encoding.NewDecoder(), content)
|
||||
if err != nil {
|
||||
result = append(result, content[n:]...)
|
||||
}
|
||||
|
||||
result = MaybeRemoveBOM(result, opts)
|
||||
|
||||
return string(result), err
|
||||
// convert from charset to utf-8
|
||||
return transform.NewReader(
|
||||
io.MultiReader(bytes.NewReader(buf[:n]), rd),
|
||||
encoding.NewDecoder(),
|
||||
)
|
||||
}
|
||||
|
||||
// ToUTF8WithFallback detects the encoding of content and converts to UTF-8 if possible
|
||||
@ -85,73 +62,84 @@ func ToUTF8WithFallback(content []byte, opts ConvertOpts) []byte {
|
||||
return bs
|
||||
}
|
||||
|
||||
// ToUTF8DropErrors makes sure the return string is valid utf-8; attempts conversion if possible
|
||||
func ToUTF8DropErrors(content []byte, opts ConvertOpts) []byte {
|
||||
charsetLabel, err := DetectEncoding(content)
|
||||
if err != nil || charsetLabel == "UTF-8" {
|
||||
return MaybeRemoveBOM(content, opts)
|
||||
func ToUTF8DropErrors(content []byte) []byte {
|
||||
return ToUTF8(content, ConvertOpts{ErrorReplacement: []byte{' '}})
|
||||
}
|
||||
|
||||
func ToUTF8(content []byte, opts ConvertOpts) []byte {
|
||||
charsetLabel, _ := DetectEncoding(content)
|
||||
if charsetLabel == "UTF-8" {
|
||||
return maybeRemoveBOM(content, opts)
|
||||
}
|
||||
|
||||
encoding, _ := charset.Lookup(charsetLabel)
|
||||
if encoding == nil {
|
||||
setting.PanicInDevOrTesting("unsupported detected charset %q, it shouldn't happen", charsetLabel)
|
||||
return content
|
||||
}
|
||||
|
||||
// We ignore any non-decodable parts from the file.
|
||||
// Some parts might be lost
|
||||
var decoded []byte
|
||||
decoder := encoding.NewDecoder()
|
||||
idx := 0
|
||||
for {
|
||||
for idx < len(content) {
|
||||
result, n, err := transform.Bytes(decoder, content[idx:])
|
||||
decoded = append(decoded, result...)
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
decoded = append(decoded, ' ')
|
||||
idx = idx + n + 1
|
||||
if idx >= len(content) {
|
||||
break
|
||||
if opts.ErrorReturnOrigin {
|
||||
return content
|
||||
}
|
||||
if opts.ErrorReplacement == nil {
|
||||
decoded = append(decoded, content[idx+n])
|
||||
} else {
|
||||
decoded = append(decoded, opts.ErrorReplacement...)
|
||||
}
|
||||
idx += n + 1
|
||||
}
|
||||
|
||||
return MaybeRemoveBOM(decoded, opts)
|
||||
return maybeRemoveBOM(decoded, opts)
|
||||
}
|
||||
|
||||
// MaybeRemoveBOM removes a UTF-8 BOM from a []byte when opts.KeepBOM is false
|
||||
func MaybeRemoveBOM(content []byte, opts ConvertOpts) []byte {
|
||||
// maybeRemoveBOM removes a UTF-8 BOM from a []byte when opts.KeepBOM is false
|
||||
func maybeRemoveBOM(content []byte, opts ConvertOpts) []byte {
|
||||
if opts.KeepBOM {
|
||||
return content
|
||||
}
|
||||
if len(content) > 2 && bytes.Equal(content[0:3], UTF8BOM) {
|
||||
return content[3:]
|
||||
}
|
||||
return content
|
||||
return bytes.TrimPrefix(content, UTF8BOM)
|
||||
}
|
||||
|
||||
// DetectEncoding detect the encoding of content
|
||||
func DetectEncoding(content []byte) (string, error) {
|
||||
// it always returns a detected or guessed "encoding" string, no matter error happens or not
|
||||
func DetectEncoding(content []byte) (encoding string, _ error) {
|
||||
// First we check if the content represents valid utf8 content excepting a truncated character at the end.
|
||||
|
||||
// Now we could decode all the runes in turn but this is not necessarily the cheapest thing to do
|
||||
// instead we walk backwards from the end to trim off a the incomplete character
|
||||
// instead we walk backwards from the end to trim off the incomplete character
|
||||
toValidate := content
|
||||
end := len(toValidate) - 1
|
||||
|
||||
if end < 0 {
|
||||
// no-op
|
||||
} else if toValidate[end]>>5 == 0b110 {
|
||||
// Incomplete 1 byte extension e.g. © <c2><a9> which has been truncated to <c2>
|
||||
toValidate = toValidate[:end]
|
||||
} else if end > 0 && toValidate[end]>>6 == 0b10 && toValidate[end-1]>>4 == 0b1110 {
|
||||
// Incomplete 2 byte extension e.g. ⛔ <e2><9b><94> which has been truncated to <e2><9b>
|
||||
toValidate = toValidate[:end-1]
|
||||
} else if end > 1 && toValidate[end]>>6 == 0b10 && toValidate[end-1]>>6 == 0b10 && toValidate[end-2]>>3 == 0b11110 {
|
||||
// Incomplete 3 byte extension e.g. 💩 <f0><9f><92><a9> which has been truncated to <f0><9f><92>
|
||||
toValidate = toValidate[:end-2]
|
||||
// U+0000 U+007F 0yyyzzzz
|
||||
// U+0080 U+07FF 110xxxyy 10yyzzzz
|
||||
// U+0800 U+FFFF 1110wwww 10xxxxyy 10yyzzzz
|
||||
// U+010000 U+10FFFF 11110uvv 10vvwwww 10xxxxyy 10yyzzzz
|
||||
cnt := 0
|
||||
for end >= 0 && cnt < 4 {
|
||||
c := toValidate[end]
|
||||
if c>>5 == 0b110 || c>>4 == 0b1110 || c>>3 == 0b11110 {
|
||||
// a leading byte
|
||||
toValidate = toValidate[:end]
|
||||
break
|
||||
} else if c>>6 == 0b10 {
|
||||
// a continuation byte
|
||||
end--
|
||||
} else {
|
||||
// not an utf-8 byte
|
||||
break
|
||||
}
|
||||
cnt++
|
||||
}
|
||||
|
||||
if utf8.Valid(toValidate) {
|
||||
log.Debug("Detected encoding: utf-8 (fast)")
|
||||
return "UTF-8", nil
|
||||
}
|
||||
|
||||
@ -160,7 +148,7 @@ func DetectEncoding(content []byte) (string, error) {
|
||||
if len(content) < 1024 {
|
||||
// Check if original content is valid
|
||||
if _, err := textDetector.DetectBest(content); err != nil {
|
||||
return "", err
|
||||
return util.IfZero(setting.Repository.AnsiCharset, "UTF-8"), err
|
||||
}
|
||||
times := 1024 / len(content)
|
||||
detectContent = make([]byte, 0, times*len(content))
|
||||
@ -171,14 +159,10 @@ func DetectEncoding(content []byte) (string, error) {
|
||||
detectContent = content
|
||||
}
|
||||
|
||||
// Now we can't use DetectBest or just results[0] because the result isn't stable - so we need a tie break
|
||||
// Now we can't use DetectBest or just results[0] because the result isn't stable - so we need a tie-break
|
||||
results, err := textDetector.DetectAll(detectContent)
|
||||
if err != nil {
|
||||
if err == chardet.NotDetectedError && len(setting.Repository.AnsiCharset) > 0 {
|
||||
log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset)
|
||||
return setting.Repository.AnsiCharset, nil
|
||||
}
|
||||
return "", err
|
||||
return util.IfZero(setting.Repository.AnsiCharset, "UTF-8"), err
|
||||
}
|
||||
|
||||
topConfidence := results[0].Confidence
|
||||
@ -201,11 +185,9 @@ func DetectEncoding(content []byte) (string, error) {
|
||||
}
|
||||
|
||||
// FIXME: to properly decouple this function the fallback ANSI charset should be passed as an argument
|
||||
if topResult.Charset != "UTF-8" && len(setting.Repository.AnsiCharset) > 0 {
|
||||
log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset)
|
||||
if topResult.Charset != "UTF-8" && setting.Repository.AnsiCharset != "" {
|
||||
return setting.Repository.AnsiCharset, err
|
||||
}
|
||||
|
||||
log.Debug("Detected encoding: %s", topResult.Charset)
|
||||
return topResult.Charset, err
|
||||
return topResult.Charset, nil
|
||||
}
|
||||
|
||||
@ -4,108 +4,89 @@
|
||||
package charset
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func resetDefaultCharsetsOrder() {
|
||||
defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder))
|
||||
for _, charset := range setting.Repository.DetectedCharsetsOrder {
|
||||
defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset)))
|
||||
}
|
||||
func TestMain(m *testing.M) {
|
||||
setting.Repository.DetectedCharsetScore = map[string]int{}
|
||||
i := 0
|
||||
for _, charset := range defaultDetectedCharsetsOrder {
|
||||
canonicalCharset := strings.ToLower(strings.TrimSpace(charset))
|
||||
if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has {
|
||||
setting.Repository.DetectedCharsetScore[canonicalCharset] = i
|
||||
i++
|
||||
}
|
||||
for i, charset := range setting.Repository.DetectedCharsetsOrder {
|
||||
setting.Repository.DetectedCharsetScore[strings.ToLower(charset)] = i
|
||||
}
|
||||
os.Exit(m.Run())
|
||||
}
|
||||
|
||||
func TestMaybeRemoveBOM(t *testing.T) {
|
||||
res := MaybeRemoveBOM([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
res := maybeRemoveBOM([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
|
||||
|
||||
res = MaybeRemoveBOM([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
res = maybeRemoveBOM([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
|
||||
}
|
||||
|
||||
func TestToUTF8(t *testing.T) {
|
||||
resetDefaultCharsetsOrder()
|
||||
|
||||
// Note: golang compiler seems so behave differently depending on the current
|
||||
// locale, so some conversions might behave differently. For that reason, we don't
|
||||
// depend on particular conversions but in expected behaviors.
|
||||
|
||||
res, err := ToUTF8([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "ABC", res)
|
||||
res := ToUTF8([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
|
||||
assert.Equal(t, "ABC", string(res))
|
||||
|
||||
// "áéíóú"
|
||||
res, err = ToUTF8([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
|
||||
res = ToUTF8([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
|
||||
|
||||
// "áéíóú"
|
||||
res, err = ToUTF8([]byte{
|
||||
res = ToUTF8([]byte{
|
||||
0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3,
|
||||
0xc3, 0xba,
|
||||
}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res))
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
|
||||
|
||||
res, err = ToUTF8([]byte{
|
||||
res = ToUTF8([]byte{
|
||||
0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
|
||||
0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
|
||||
}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
stringMustStartWith(t, "Hola,", res)
|
||||
stringMustEndWith(t, "AAA.", res)
|
||||
|
||||
res, err = ToUTF8([]byte{
|
||||
res = ToUTF8([]byte{
|
||||
0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
|
||||
0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
|
||||
}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
stringMustStartWith(t, "Hola,", res)
|
||||
stringMustEndWith(t, "AAA.", res)
|
||||
|
||||
res, err = ToUTF8([]byte{
|
||||
res = ToUTF8([]byte{
|
||||
0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63,
|
||||
0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e,
|
||||
}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
stringMustStartWith(t, "Hola,", res)
|
||||
stringMustEndWith(t, "AAA.", res)
|
||||
|
||||
// Japanese (Shift-JIS)
|
||||
// 日属秘ぞしちゅ。
|
||||
res, err = ToUTF8([]byte{
|
||||
res = ToUTF8([]byte{
|
||||
0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82,
|
||||
0xBF, 0x82, 0xE3, 0x81, 0x42,
|
||||
}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []byte{
|
||||
0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
|
||||
0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
|
||||
},
|
||||
[]byte(res))
|
||||
}, res)
|
||||
|
||||
res, err = ToUTF8([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, []byte(res))
|
||||
res = ToUTF8([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
|
||||
assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
|
||||
}
|
||||
|
||||
func TestToUTF8WithFallback(t *testing.T) {
|
||||
resetDefaultCharsetsOrder()
|
||||
// "ABC"
|
||||
res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
|
||||
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
|
||||
@ -152,54 +133,58 @@ func TestToUTF8WithFallback(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestToUTF8DropErrors(t *testing.T) {
|
||||
resetDefaultCharsetsOrder()
|
||||
// "ABC"
|
||||
res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43}, ConvertOpts{})
|
||||
res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43})
|
||||
assert.Equal(t, []byte{0x41, 0x42, 0x43}, res)
|
||||
|
||||
// "áéíóú"
|
||||
res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
|
||||
|
||||
// UTF8 BOM + "áéíóú"
|
||||
res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba})
|
||||
assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res)
|
||||
|
||||
// "Hola, así cómo ños"
|
||||
res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73})
|
||||
assert.Equal(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73}, res[:8])
|
||||
assert.Equal(t, []byte{0x73}, res[len(res)-1:])
|
||||
|
||||
// "Hola, así cómo "
|
||||
minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20}
|
||||
|
||||
res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73})
|
||||
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
|
||||
assert.Equal(t, minmatch, res[0:len(minmatch)])
|
||||
|
||||
res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73})
|
||||
// Do not fail for differences in invalid cases, as the library might change the conversion criteria for those
|
||||
assert.Equal(t, minmatch, res[0:len(minmatch)])
|
||||
|
||||
// Japanese (Shift-JIS)
|
||||
// "日属秘ぞしちゅ。"
|
||||
res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42})
|
||||
assert.Equal(t, []byte{
|
||||
0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3,
|
||||
0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82,
|
||||
}, res)
|
||||
|
||||
res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{})
|
||||
res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00})
|
||||
assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)
|
||||
}
|
||||
|
||||
func TestDetectEncoding(t *testing.T) {
|
||||
resetDefaultCharsetsOrder()
|
||||
testSuccess := func(b []byte, expected string) {
|
||||
encoding, err := DetectEncoding(b)
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, expected, encoding)
|
||||
}
|
||||
|
||||
// invalid bytes
|
||||
encoding, err := DetectEncoding([]byte{0xfa})
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, "UTF-8", encoding)
|
||||
|
||||
// utf-8
|
||||
b := []byte("just some ascii")
|
||||
testSuccess(b, "UTF-8")
|
||||
@ -214,169 +199,49 @@ func TestDetectEncoding(t *testing.T) {
|
||||
|
||||
// iso-8859-1: d<accented e>cor<newline>
|
||||
b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a}
|
||||
encoding, err := DetectEncoding(b)
|
||||
encoding, err = DetectEncoding(b)
|
||||
assert.NoError(t, err)
|
||||
assert.Contains(t, encoding, "ISO-8859-1")
|
||||
|
||||
old := setting.Repository.AnsiCharset
|
||||
setting.Repository.AnsiCharset = "placeholder"
|
||||
defer func() {
|
||||
setting.Repository.AnsiCharset = old
|
||||
}()
|
||||
testSuccess(b, "placeholder")
|
||||
|
||||
// invalid bytes
|
||||
b = []byte{0xfa}
|
||||
_, err = DetectEncoding(b)
|
||||
assert.Error(t, err)
|
||||
defer test.MockVariableValue(&setting.Repository.AnsiCharset, "MyEncoding")()
|
||||
testSuccess(b, "MyEncoding")
|
||||
}
|
||||
|
||||
func stringMustStartWith(t *testing.T, expected, value string) {
|
||||
assert.Equal(t, expected, value[:len(expected)])
|
||||
func stringMustStartWith(t *testing.T, expected string, value []byte) {
|
||||
assert.Equal(t, expected, string(value[:len(expected)]))
|
||||
}
|
||||
|
||||
func stringMustEndWith(t *testing.T, expected, value string) {
|
||||
assert.Equal(t, expected, value[len(value)-len(expected):])
|
||||
func stringMustEndWith(t *testing.T, expected string, value []byte) {
|
||||
assert.Equal(t, expected, string(value[len(value)-len(expected):]))
|
||||
}
|
||||
|
||||
func TestToUTF8WithFallbackReader(t *testing.T) {
|
||||
resetDefaultCharsetsOrder()
|
||||
test.MockVariableValue(&ToUTF8WithFallbackReaderPrefetchSize)
|
||||
|
||||
for testLen := range 2048 {
|
||||
pattern := " test { () }\n"
|
||||
input := ""
|
||||
for len(input) < testLen {
|
||||
input += pattern
|
||||
}
|
||||
input = input[:testLen]
|
||||
input += "// Выключаем"
|
||||
rd := ToUTF8WithFallbackReader(bytes.NewReader([]byte(input)), ConvertOpts{})
|
||||
block := "aá啊🤔"
|
||||
runes := []rune(block)
|
||||
assert.Len(t, string(runes[0]), 1)
|
||||
assert.Len(t, string(runes[1]), 2)
|
||||
assert.Len(t, string(runes[2]), 3)
|
||||
assert.Len(t, string(runes[3]), 4)
|
||||
|
||||
content := strings.Repeat(block, 2)
|
||||
for i := 1; i < len(content); i++ {
|
||||
encoding, err := DetectEncoding([]byte(content[:i]))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "UTF-8", encoding)
|
||||
|
||||
ToUTF8WithFallbackReaderPrefetchSize = i
|
||||
rd := ToUTF8WithFallbackReader(strings.NewReader(content), ConvertOpts{})
|
||||
r, _ := io.ReadAll(rd)
|
||||
assert.Equalf(t, input, string(r), "testing string len=%d", testLen)
|
||||
assert.Equal(t, content, string(r))
|
||||
}
|
||||
for _, r := range runes {
|
||||
content = "abc abc " + string(r) + string(r) + string(r)
|
||||
for i := 0; i < len(content); i++ {
|
||||
encoding, err := DetectEncoding([]byte(content[:i]))
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, "UTF-8", encoding)
|
||||
}
|
||||
}
|
||||
|
||||
truncatedOneByteExtension := failFastBytes
|
||||
encoding, _ := DetectEncoding(truncatedOneByteExtension)
|
||||
assert.Equal(t, "UTF-8", encoding)
|
||||
|
||||
truncatedTwoByteExtension := failFastBytes
|
||||
truncatedTwoByteExtension[len(failFastBytes)-1] = 0x9b
|
||||
truncatedTwoByteExtension[len(failFastBytes)-2] = 0xe2
|
||||
|
||||
encoding, _ = DetectEncoding(truncatedTwoByteExtension)
|
||||
assert.Equal(t, "UTF-8", encoding)
|
||||
|
||||
truncatedThreeByteExtension := failFastBytes
|
||||
truncatedThreeByteExtension[len(failFastBytes)-1] = 0x92
|
||||
truncatedThreeByteExtension[len(failFastBytes)-2] = 0x9f
|
||||
truncatedThreeByteExtension[len(failFastBytes)-3] = 0xf0
|
||||
|
||||
encoding, _ = DetectEncoding(truncatedThreeByteExtension)
|
||||
assert.Equal(t, "UTF-8", encoding)
|
||||
}
|
||||
|
||||
var failFastBytes = []byte{
|
||||
0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x74, 0x6f,
|
||||
0x6f, 0x6c, 0x73, 0x2e, 0x61, 0x6e, 0x74, 0x2e, 0x74, 0x61, 0x73, 0x6b, 0x64, 0x65, 0x66, 0x73, 0x2e, 0x63, 0x6f, 0x6e,
|
||||
0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x4f, 0x73, 0x0a, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x6f, 0x72, 0x67,
|
||||
0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f,
|
||||
0x74, 0x2e, 0x67, 0x72, 0x61, 0x64, 0x6c, 0x65, 0x2e, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x2e, 0x72, 0x75, 0x6e, 0x2e, 0x42,
|
||||
0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x0a, 0x0a, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x64, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d,
|
||||
0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x22, 0x29, 0x0a, 0x7d, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x65,
|
||||
0x6e, 0x64, 0x65, 0x6e, 0x63, 0x69, 0x65, 0x73, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65,
|
||||
0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a,
|
||||
0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x61, 0x70, 0x69, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d,
|
||||
0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74,
|
||||
0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x61, 0x70, 0x69, 0x2d, 0x64, 0x6f, 0x63, 0x73, 0x22, 0x29,
|
||||
0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e,
|
||||
0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x64, 0x62,
|
||||
0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69,
|
||||
0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a,
|
||||
0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65,
|
||||
0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a,
|
||||
0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x66,
|
||||
0x73, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74,
|
||||
0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
|
||||
0x3a, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x6d, 0x71, 0x22, 0x29, 0x29, 0x0a, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22,
|
||||
0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e,
|
||||
0x2d, 0x61, 0x75, 0x74, 0x68, 0x2d, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2d, 0x73, 0x74, 0x61, 0x72, 0x74,
|
||||
0x65, 0x72, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74,
|
||||
0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63,
|
||||
0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, 0x68, 0x61, 0x6c, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e,
|
||||
0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, 0x63, 0x6f, 0x72, 0x65, 0x22, 0x29, 0x0a,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28,
|
||||
0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b,
|
||||
0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, 0x73, 0x74,
|
||||
0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x77, 0x65, 0x62, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69,
|
||||
0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72,
|
||||
0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x61, 0x6f, 0x70,
|
||||
0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f,
|
||||
0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f,
|
||||
0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d,
|
||||
0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x61, 0x63, 0x74, 0x75, 0x61, 0x74, 0x6f, 0x72, 0x22, 0x29, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f,
|
||||
0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63,
|
||||
0x6c, 0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74,
|
||||
0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x22, 0x29, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72,
|
||||
0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, 0x6c,
|
||||
0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, 0x61,
|
||||
0x72, 0x74, 0x65, 0x72, 0x2d, 0x63, 0x6f, 0x6e, 0x73, 0x75, 0x6c, 0x2d, 0x61, 0x6c, 0x6c, 0x22, 0x29, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72,
|
||||
0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, 0x6c,
|
||||
0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, 0x61,
|
||||
0x72, 0x74, 0x65, 0x72, 0x2d, 0x73, 0x6c, 0x65, 0x75, 0x74, 0x68, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d,
|
||||
0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70,
|
||||
0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x72, 0x65, 0x74, 0x72, 0x79, 0x3a,
|
||||
0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x72, 0x65, 0x74, 0x72, 0x79, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x63, 0x68, 0x2e, 0x71,
|
||||
0x6f, 0x73, 0x2e, 0x6c, 0x6f, 0x67, 0x62, 0x61, 0x63, 0x6b, 0x3a, 0x6c, 0x6f, 0x67, 0x62, 0x61, 0x63, 0x6b, 0x2d, 0x63,
|
||||
0x6c, 0x61, 0x73, 0x73, 0x69, 0x63, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d,
|
||||
0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x69, 0x6f, 0x2e, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x6d, 0x65,
|
||||
0x74, 0x65, 0x72, 0x3a, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x2d, 0x72, 0x65, 0x67, 0x69, 0x73,
|
||||
0x74, 0x72, 0x79, 0x2d, 0x70, 0x72, 0x6f, 0x6d, 0x65, 0x74, 0x68, 0x65, 0x75, 0x73, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x6b, 0x6f, 0x74,
|
||||
0x6c, 0x69, 0x6e, 0x28, 0x22, 0x73, 0x74, 0x64, 0x6c, 0x69, 0x62, 0x22, 0x29, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
|
||||
0x2f, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x65, 0x73, 0x74, 0x20, 0x64, 0x65, 0x70, 0x65, 0x6e, 0x64,
|
||||
0x65, 0x6e, 0x63, 0x69, 0x65, 0x73, 0x2e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f,
|
||||
0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74,
|
||||
0x65, 0x73, 0x74, 0x49, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a,
|
||||
0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d,
|
||||
0x74, 0x65, 0x73, 0x74, 0x22, 0x29, 0x0a, 0x7d, 0x0a, 0x0a, 0x76, 0x61, 0x6c, 0x20, 0x70, 0x61, 0x74, 0x63, 0x68, 0x4a,
|
||||
0x61, 0x72, 0x20, 0x62, 0x79, 0x20, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x2e, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72,
|
||||
0x69, 0x6e, 0x67, 0x28, 0x4a, 0x61, 0x72, 0x3a, 0x3a, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x69, 0x66, 0x69, 0x65, 0x72, 0x2e,
|
||||
0x73, 0x65, 0x74, 0x28, 0x22, 0x70, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x76, 0x61, 0x6c, 0x20, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x70, 0x61, 0x74, 0x68,
|
||||
0x20, 0x62, 0x79, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x67,
|
||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65,
|
||||
0x73, 0x28, 0x22, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x2d, 0x50, 0x61, 0x74, 0x68, 0x22, 0x20, 0x74, 0x6f, 0x20, 0x6f, 0x62,
|
||||
0x6a, 0x65, 0x63, 0x74, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70,
|
||||
0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x20, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x20, 0x3d,
|
||||
0x20, 0x22, 0x66, 0x69, 0x6c, 0x65, 0x3a, 0x2f, 0x2b, 0x22, 0x2e, 0x74, 0x6f, 0x52, 0x65, 0x67, 0x65, 0x78, 0x28, 0x29,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64,
|
||||
0x65, 0x20, 0x66, 0x75, 0x6e, 0x20, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28, 0x29, 0x3a, 0x20, 0x53, 0x74,
|
||||
0x72, 0x69, 0x6e, 0x67, 0x20, 0x3d, 0x20, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x70,
|
||||
0x61, 0x74, 0x68, 0x2e, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x2e, 0x6a, 0x6f, 0x69, 0x6e, 0x54, 0x6f, 0x53, 0x74, 0x72, 0x69,
|
||||
0x6e, 0x67, 0x28, 0x22, 0x20, 0x22, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x74, 0x2e, 0x74, 0x6f, 0x55, 0x52, 0x49, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x55,
|
||||
0x52, 0x4c, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28, 0x29, 0x2e, 0x72, 0x65, 0x70, 0x6c,
|
||||
0x61, 0x63, 0x65, 0x46, 0x69, 0x72, 0x73, 0x74, 0x28, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x2c, 0x20, 0x22, 0x2f,
|
||||
0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x74, 0x61, 0x73,
|
||||
0x6b, 0x73, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x64, 0x3c, 0x42, 0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x3e, 0x28, 0x22, 0x62,
|
||||
0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x22, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x4f,
|
||||
0x73, 0x2e, 0x69, 0x73, 0x46, 0x61, 0x6d, 0x69, 0x6c, 0x79, 0x28, 0x4f, 0x73, 0x2e, 0x46, 0x41, 0x4d, 0x49, 0x4c, 0x59,
|
||||
0x5f, 0x57, 0x49, 0x4e, 0x44, 0x4f, 0x57, 0x53, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x70, 0x61, 0x74, 0x68, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x28, 0x73,
|
||||
0x6f, 0x75, 0x72, 0x63, 0x65, 0x53, 0x65, 0x74, 0x73, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x64, 0x28, 0x22, 0x6d, 0x61, 0x69,
|
||||
0x6e, 0x22, 0x29, 0x2e, 0x6d, 0x61, 0x70, 0x20, 0x7b, 0x20, 0x69, 0x74, 0x2e, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x20,
|
||||
0x7d, 0x2c, 0x20, 0x70, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x61, 0x72, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0xd0,
|
||||
}
|
||||
|
||||
@ -19,7 +19,6 @@ import (
|
||||
charsetModule "code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/container"
|
||||
"code.gitea.io/gitea/modules/httpcache"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/modules/typesniffer"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
@ -109,11 +108,7 @@ func setServeHeadersByFile(r *http.Request, w http.ResponseWriter, mineBuf []byt
|
||||
}
|
||||
|
||||
if isPlain {
|
||||
charset, err := charsetModule.DetectEncoding(mineBuf)
|
||||
if err != nil {
|
||||
log.Error("Detect raw file %s charset failed: %v, using by default utf-8", opts.Filename, err)
|
||||
charset = "utf-8"
|
||||
}
|
||||
charset, _ := charsetModule.DetectEncoding(mineBuf)
|
||||
opts.ContentTypeCharset = strings.ToLower(charset)
|
||||
}
|
||||
|
||||
|
||||
@ -203,7 +203,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
|
||||
RepoID: repo.ID,
|
||||
CommitID: commitSha,
|
||||
Filename: update.Filename,
|
||||
Content: string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
|
||||
Content: string(charset.ToUTF8DropErrors(fileContents)),
|
||||
Language: analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
UpdatedAt: time.Now().UTC(),
|
||||
})
|
||||
|
||||
@ -191,7 +191,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
|
||||
Doc(map[string]any{
|
||||
"repo_id": repo.ID,
|
||||
"filename": update.Filename,
|
||||
"content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})),
|
||||
"content": string(charset.ToUTF8DropErrors(fileContents)),
|
||||
"commit_id": sha,
|
||||
"language": analyze.GetCodeLanguage(update.Filename, fileContents),
|
||||
"updated_at": timeutil.TimeStampNow(),
|
||||
|
||||
@ -62,7 +62,28 @@ type PackageMetadata struct {
|
||||
Author User `json:"author"`
|
||||
ReadmeFilename string `json:"readmeFilename,omitempty"`
|
||||
Users map[string]bool `json:"users,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
License License `json:"license,omitempty"`
|
||||
}
|
||||
|
||||
type License string
|
||||
|
||||
func (l *License) UnmarshalJSON(data []byte) error {
|
||||
switch data[0] {
|
||||
case '"':
|
||||
var value string
|
||||
if err := json.Unmarshal(data, &value); err != nil {
|
||||
return err
|
||||
}
|
||||
*l = License(value)
|
||||
case '{':
|
||||
var values map[string]any
|
||||
if err := json.Unmarshal(data, &values); err != nil {
|
||||
return err
|
||||
}
|
||||
value, _ := values["type"].(string)
|
||||
*l = License(value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// PackageMetadataVersion documentation: https://github.com/npm/registry/blob/master/docs/REGISTRY-API.md#version
|
||||
@ -74,7 +95,7 @@ type PackageMetadataVersion struct {
|
||||
Description string `json:"description"`
|
||||
Author User `json:"author"`
|
||||
Homepage string `json:"homepage,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
License License `json:"license,omitempty"`
|
||||
Repository Repository `json:"repository"`
|
||||
Keywords []string `json:"keywords,omitempty"`
|
||||
Dependencies map[string]string `json:"dependencies,omitempty"`
|
||||
|
||||
@ -13,6 +13,7 @@ import (
|
||||
"code.gitea.io/gitea/modules/json"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParsePackage(t *testing.T) {
|
||||
@ -291,11 +292,36 @@ func TestParsePackage(t *testing.T) {
|
||||
assert.Equal(t, packageDescription, p.Metadata.Readme)
|
||||
assert.Equal(t, packageAuthor, p.Metadata.Author)
|
||||
assert.Equal(t, packageBin, p.Metadata.Bin["bin"])
|
||||
assert.Equal(t, "MIT", p.Metadata.License)
|
||||
assert.Equal(t, "MIT", string(p.Metadata.License))
|
||||
assert.Equal(t, "https://gitea.io/", p.Metadata.ProjectURL)
|
||||
assert.Contains(t, p.Metadata.Dependencies, "package")
|
||||
assert.Equal(t, "1.2.0", p.Metadata.Dependencies["package"])
|
||||
assert.Equal(t, repository.Type, p.Metadata.Repository.Type)
|
||||
assert.Equal(t, repository.URL, p.Metadata.Repository.URL)
|
||||
})
|
||||
|
||||
t.Run("ValidLicenseMap", func(t *testing.T) {
|
||||
packageJSON := `{
|
||||
"versions": {
|
||||
"0.1.1": {
|
||||
"name": "dev-null",
|
||||
"version": "0.1.1",
|
||||
"license": {
|
||||
"type": "MIT"
|
||||
},
|
||||
"dist": {
|
||||
"integrity": "sha256-"
|
||||
}
|
||||
}
|
||||
},
|
||||
"_attachments": {
|
||||
"foo": {
|
||||
"data": "AAAA"
|
||||
}
|
||||
}
|
||||
}`
|
||||
p, err := ParsePackage(strings.NewReader(packageJSON))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "MIT", string(p.Metadata.License))
|
||||
})
|
||||
}
|
||||
|
||||
@ -12,7 +12,7 @@ type Metadata struct {
|
||||
Name string `json:"name,omitempty"`
|
||||
Description string `json:"description,omitempty"`
|
||||
Author string `json:"author,omitempty"`
|
||||
License string `json:"license,omitempty"`
|
||||
License License `json:"license,omitempty"`
|
||||
ProjectURL string `json:"project_url,omitempty"`
|
||||
Keywords []string `json:"keywords,omitempty"`
|
||||
Dependencies map[string]string `json:"dependencies,omitempty"`
|
||||
|
||||
@ -240,4 +240,5 @@ func PanicInDevOrTesting(msg string, a ...any) {
|
||||
if !IsProd || IsInTesting {
|
||||
panic(fmt.Sprintf(msg, a...))
|
||||
}
|
||||
log.Error(msg, a...)
|
||||
}
|
||||
|
||||
@ -317,11 +317,7 @@ func EditFile(ctx *context.Context) {
|
||||
ctx.ServerError("ReadAll", err)
|
||||
return
|
||||
}
|
||||
if content, err := charset.ToUTF8(buf, charset.ConvertOpts{KeepBOM: true}); err != nil {
|
||||
ctx.Data["FileContent"] = string(buf)
|
||||
} else {
|
||||
ctx.Data["FileContent"] = content
|
||||
}
|
||||
ctx.Data["FileContent"] = string(charset.ToUTF8(buf, charset.ConvertOpts{KeepBOM: true, ErrorReturnOrigin: true}))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -36,9 +36,7 @@ func CherryPick(ctx *context.Context) {
|
||||
ctx.Data["commit_message"] = "revert " + cherryPickCommit.Message()
|
||||
} else {
|
||||
ctx.Data["CherryPickType"] = "cherry-pick"
|
||||
splits := strings.SplitN(cherryPickCommit.Message(), "\n", 2)
|
||||
ctx.Data["commit_summary"] = splits[0]
|
||||
ctx.Data["commit_message"] = splits[1]
|
||||
ctx.Data["commit_summary"], ctx.Data["commit_message"], _ = strings.Cut(cherryPickCommit.Message(), "\n")
|
||||
}
|
||||
|
||||
ctx.HTML(http.StatusOK, tplCherryPick)
|
||||
|
||||
@ -835,11 +835,11 @@ parsingLoop:
|
||||
if buffer.Len() == 0 {
|
||||
continue
|
||||
}
|
||||
charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
|
||||
if charsetLabel != "UTF-8" && err == nil {
|
||||
encoding, _ := stdcharset.Lookup(charsetLabel)
|
||||
if encoding != nil {
|
||||
diffLineTypeDecoders[lineType] = encoding.NewDecoder()
|
||||
charsetLabel, _ := charset.DetectEncoding(buffer.Bytes())
|
||||
if charsetLabel != "UTF-8" {
|
||||
charsetEncoding, _ := stdcharset.Lookup(charsetLabel)
|
||||
if charsetEncoding != nil {
|
||||
diffLineTypeDecoders[lineType] = charsetEncoding.NewDecoder()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1325,10 +1325,10 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
|
||||
shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == ""
|
||||
if shouldFullFileHighlight {
|
||||
if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize {
|
||||
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String())
|
||||
diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes())
|
||||
}
|
||||
if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize {
|
||||
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String())
|
||||
diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.Bytes())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1336,9 +1336,34 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit
|
||||
return diff, nil
|
||||
}
|
||||
|
||||
func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML {
|
||||
func splitHighlightLines(buf []byte) (ret [][]byte) {
|
||||
lineCount := bytes.Count(buf, []byte("\n")) + 1
|
||||
ret = make([][]byte, 0, lineCount)
|
||||
nlTagClose := []byte("\n</")
|
||||
for {
|
||||
pos := bytes.IndexByte(buf, '\n')
|
||||
if pos == -1 {
|
||||
ret = append(ret, buf)
|
||||
return ret
|
||||
}
|
||||
// Chroma highlighting output sometimes have "</span>" right after \n, sometimes before.
|
||||
// * "<span>text\n</span>"
|
||||
// * "<span>text</span>\n"
|
||||
if bytes.HasPrefix(buf[pos:], nlTagClose) {
|
||||
pos1 := bytes.IndexByte(buf[pos:], '>')
|
||||
if pos1 != -1 {
|
||||
pos += pos1
|
||||
}
|
||||
}
|
||||
ret = append(ret, buf[:pos+1])
|
||||
buf = buf[pos+1:]
|
||||
}
|
||||
}
|
||||
|
||||
func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML {
|
||||
content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{}))
|
||||
highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content)
|
||||
splitLines := strings.Split(string(highlightedNewContent), "\n")
|
||||
splitLines := splitHighlightLines([]byte(highlightedNewContent))
|
||||
lines := make(map[int]template.HTML, len(splitLines))
|
||||
// only save the highlighted lines we need, but not the whole file, to save memory
|
||||
for _, sec := range diffFile.Sections {
|
||||
|
||||
@ -5,6 +5,7 @@
|
||||
package gitdiff
|
||||
|
||||
import (
|
||||
"html/template"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
@ -1106,3 +1107,41 @@ func TestDiffLine_GetExpandDirection(t *testing.T) {
|
||||
assert.Equal(t, c.direction, c.diffLine.GetExpandDirection(), "case %s expected direction: %s", c.name, c.direction)
|
||||
}
|
||||
}
|
||||
|
||||
func TestHighlightCodeLines(t *testing.T) {
|
||||
t.Run("CharsetDetecting", func(t *testing.T) {
|
||||
diffFile := &DiffFile{
|
||||
Name: "a.c",
|
||||
Language: "c",
|
||||
Sections: []*DiffSection{
|
||||
{
|
||||
Lines: []*DiffLine{{LeftIdx: 1}},
|
||||
},
|
||||
},
|
||||
}
|
||||
ret := highlightCodeLines(diffFile, true, []byte("// abc\xcc def\xcd")) // ISO-8859-1 bytes
|
||||
assert.Equal(t, "<span class=\"c1\">// abcÌ defÍ\n</span>", string(ret[0]))
|
||||
})
|
||||
|
||||
t.Run("LeftLines", func(t *testing.T) {
|
||||
diffFile := &DiffFile{
|
||||
Name: "a.c",
|
||||
Language: "c",
|
||||
Sections: []*DiffSection{
|
||||
{
|
||||
Lines: []*DiffLine{
|
||||
{LeftIdx: 1},
|
||||
{LeftIdx: 2},
|
||||
{LeftIdx: 3},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
const nl = "\n"
|
||||
ret := highlightCodeLines(diffFile, true, []byte("a\nb\n"))
|
||||
assert.Equal(t, map[int]template.HTML{
|
||||
0: `<span class="n">a</span>` + nl,
|
||||
1: `<span class="n">b</span>`,
|
||||
}, ret)
|
||||
})
|
||||
}
|
||||
|
||||
@ -25,12 +25,12 @@ func TestDiffWithHighlight(t *testing.T) {
|
||||
|
||||
t.Run("CleanUp", func(t *testing.T) {
|
||||
hcd := newHighlightCodeDiff()
|
||||
codeA := template.HTML(`<span class="cm>this is a comment</span>`)
|
||||
codeB := template.HTML(`<span class="cm>this is updated comment</span>`)
|
||||
codeA := template.HTML(`<span class="cm">this is a comment</span>`)
|
||||
codeB := template.HTML(`<span class="cm">this is updated comment</span>`)
|
||||
outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB)
|
||||
assert.Equal(t, `<span class="cm>this is <span class="removed-code">a</span> comment</span>`, string(outDel))
|
||||
assert.Equal(t, `<span class="cm">this is <span class="removed-code">a</span> comment</span>`, string(outDel))
|
||||
outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB)
|
||||
assert.Equal(t, `<span class="cm>this is <span class="added-code">updated</span> comment</span>`, string(outAdd))
|
||||
assert.Equal(t, `<span class="cm">this is <span class="added-code">updated</span> comment</span>`, string(outAdd))
|
||||
})
|
||||
|
||||
t.Run("OpenCloseTags", func(t *testing.T) {
|
||||
|
||||
@ -4,10 +4,8 @@
|
||||
package sender
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
)
|
||||
|
||||
type Sender interface {
|
||||
@ -16,23 +14,18 @@ type Sender interface {
|
||||
|
||||
var Send = send
|
||||
|
||||
func send(sender Sender, msgs ...*Message) error {
|
||||
if setting.MailService == nil {
|
||||
log.Error("Mailer: Send is being invoked but mail service hasn't been initialized")
|
||||
return nil
|
||||
func send(sender Sender, msg *Message) error {
|
||||
m := msg.ToMessage()
|
||||
froms := m.GetFrom()
|
||||
to, err := m.GetRecipients()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, msg := range msgs {
|
||||
m := msg.ToMessage()
|
||||
froms := m.GetFrom()
|
||||
to, err := m.GetRecipients()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: implement sending from multiple addresses
|
||||
if err := sender.Send(froms[0].Address, to, m); err != nil {
|
||||
return err
|
||||
}
|
||||
// TODO: implement sending from multiple addresses
|
||||
if len(froms) == 0 {
|
||||
// FIXME: no idea why sometimes the "froms" can be empty, need to figure out the root problem
|
||||
return errors.New("no FROM specified")
|
||||
}
|
||||
return nil
|
||||
return sender.Send(froms[0].Address, to, m)
|
||||
}
|
||||
|
||||
@ -177,7 +177,7 @@ func substGiteaTemplateFile(ctx context.Context, tmpDir, tmpDirSubPath string, t
|
||||
}
|
||||
|
||||
generatedContent := generateExpansion(ctx, string(content), templateRepo, generateRepo)
|
||||
substSubPath := filepath.Clean(filePathSanitize(generateExpansion(ctx, tmpDirSubPath, templateRepo, generateRepo)))
|
||||
substSubPath := filePathSanitize(generateExpansion(ctx, tmpDirSubPath, templateRepo, generateRepo))
|
||||
newLocalPath := filepath.Join(tmpDir, substSubPath)
|
||||
regular, err := util.IsRegularFile(newLocalPath)
|
||||
if canWrite := regular || errors.Is(err, fs.ErrNotExist); !canWrite {
|
||||
@ -358,5 +358,5 @@ func filePathSanitize(s string) string {
|
||||
}
|
||||
fields[i] = field
|
||||
}
|
||||
return filepath.FromSlash(strings.Join(fields, "/"))
|
||||
return filepath.Clean(filepath.FromSlash(strings.Trim(strings.Join(fields, "/"), "/")))
|
||||
}
|
||||
|
||||
@ -54,19 +54,24 @@ text/*.txt
|
||||
}
|
||||
|
||||
func TestFilePathSanitize(t *testing.T) {
|
||||
assert.Equal(t, "test_CON", filePathSanitize("test_CON"))
|
||||
assert.Equal(t, "test CON", filePathSanitize("test CON "))
|
||||
assert.Equal(t, "__/traverse/__", filePathSanitize(".. /traverse/ .."))
|
||||
assert.Equal(t, "./__/a/_git/b_", filePathSanitize("./../a/.git/ b: "))
|
||||
// path clean
|
||||
assert.Equal(t, "a", filePathSanitize("//a/"))
|
||||
assert.Equal(t, "_a", filePathSanitize(`\a`))
|
||||
assert.Equal(t, "__/a/__", filePathSanitize(".. /a/ .."))
|
||||
assert.Equal(t, "__/a/_git/b_", filePathSanitize("./../a/.git/ b: "))
|
||||
|
||||
// Windows reserved names
|
||||
assert.Equal(t, "_", filePathSanitize("CoN"))
|
||||
assert.Equal(t, "_", filePathSanitize("LpT1"))
|
||||
assert.Equal(t, "_", filePathSanitize("CoM1"))
|
||||
assert.Equal(t, "test_CON", filePathSanitize("test_CON"))
|
||||
assert.Equal(t, "test CON", filePathSanitize("test CON "))
|
||||
|
||||
// special chars
|
||||
assert.Equal(t, "_", filePathSanitize("\u0000"))
|
||||
assert.Equal(t, "目标", filePathSanitize("目标"))
|
||||
// unlike filepath.Clean, it only sanitizes, doesn't change the separator layout
|
||||
assert.Equal(t, "", filePathSanitize("")) //nolint:testifylint // for easy reading
|
||||
assert.Equal(t, ".", filePathSanitize(""))
|
||||
assert.Equal(t, ".", filePathSanitize("."))
|
||||
assert.Equal(t, "/", filePathSanitize("/"))
|
||||
assert.Equal(t, ".", filePathSanitize("/"))
|
||||
}
|
||||
|
||||
func TestProcessGiteaTemplateFile(t *testing.T) {
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
<div class="ui form">
|
||||
<div class="field">
|
||||
<label>{{svg "octicon-terminal"}} {{ctx.Locale.Tr "packages.pypi.install"}}</label>
|
||||
<div class="markup"><pre class="code-block"><code>pip install --index-url <origin-url data-url="{{AppSubUrl}}/api/packages/{{.PackageDescriptor.Owner.Name}}/pypi/simple/"></origin-url> --extra-index-url https://pypi.org/ {{.PackageDescriptor.Package.Name}}</code></pre></div>
|
||||
<div class="markup"><pre class="code-block"><code>pip install --index-url <origin-url data-url="{{AppSubUrl}}/api/packages/{{.PackageDescriptor.Owner.Name}}/pypi/simple/"></origin-url> --extra-index-url https://pypi.org/simple {{.PackageDescriptor.Package.Name}}</code></pre></div>
|
||||
</div>
|
||||
<div class="field">
|
||||
<label>{{ctx.Locale.Tr "packages.registry.documentation" "PyPI" "https://docs.gitea.com/usage/packages/pypi/"}}</label>
|
||||
|
||||
@ -96,7 +96,7 @@
|
||||
</div>
|
||||
{{else if eq .Type 2}}
|
||||
<div class="timeline-item event" id="{{.HashTag}}">
|
||||
<span class="badge tw-bg-red tw-text-white">{{svg "octicon-circle-slash"}}</span>
|
||||
<span class="badge tw-bg-red tw-text-white">{{svg "octicon-issue-closed"}}</span>
|
||||
{{if not .OriginalAuthor}}
|
||||
{{template "shared/user/avatarlink" dict "user" .Poster}}
|
||||
{{end}}
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
package migrations
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"database/sql"
|
||||
@ -21,7 +22,6 @@ import (
|
||||
"code.gitea.io/gitea/models/migrations"
|
||||
migrate_base "code.gitea.io/gitea/models/migrations/base"
|
||||
"code.gitea.io/gitea/models/unittest"
|
||||
"code.gitea.io/gitea/modules/charset"
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
@ -108,11 +108,11 @@ func readSQLFromFile(version string) (string, error) {
|
||||
}
|
||||
defer gr.Close()
|
||||
|
||||
bytes, err := io.ReadAll(gr)
|
||||
buf, err := io.ReadAll(gr)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(charset.MaybeRemoveBOM(bytes, charset.ConvertOpts{})), nil
|
||||
return string(bytes.TrimPrefix(buf, []byte{'\xef', '\xbb', '\xbf'})), nil
|
||||
}
|
||||
|
||||
func restoreOldDB(t *testing.T, version string) {
|
||||
|
||||
@ -2,20 +2,10 @@
|
||||
import {SvgIcon} from '../svg.ts';
|
||||
import {isPlainClick} from '../utils/dom.ts';
|
||||
import {shallowRef} from 'vue';
|
||||
import {type createViewFileTreeStore} from './ViewFileTreeStore.ts';
|
||||
|
||||
export type Item = {
|
||||
entryName: string;
|
||||
entryMode: 'blob' | 'exec' | 'tree' | 'commit' | 'symlink' | 'unknown';
|
||||
entryIcon: string;
|
||||
entryIconOpen: string;
|
||||
fullPath: string;
|
||||
submoduleUrl?: string;
|
||||
children?: Item[];
|
||||
};
|
||||
import type {createViewFileTreeStore, FileTreeItem} from './ViewFileTreeStore.ts';
|
||||
|
||||
const props = defineProps<{
|
||||
item: Item,
|
||||
item: FileTreeItem,
|
||||
store: ReturnType<typeof createViewFileTreeStore>
|
||||
}>();
|
||||
|
||||
|
||||
@ -3,11 +3,20 @@ import {GET} from '../modules/fetch.ts';
|
||||
import {pathEscapeSegments} from '../utils/url.ts';
|
||||
import {createElementFromHTML} from '../utils/dom.ts';
|
||||
import {html} from '../utils/html.ts';
|
||||
import type {Item} from './ViewFileTreeItem.vue';
|
||||
|
||||
export type FileTreeItem = {
|
||||
entryName: string;
|
||||
entryMode: 'blob' | 'exec' | 'tree' | 'commit' | 'symlink' | 'unknown';
|
||||
entryIcon: string;
|
||||
entryIconOpen: string;
|
||||
fullPath: string;
|
||||
submoduleUrl?: string;
|
||||
children?: Array<FileTreeItem>;
|
||||
};
|
||||
|
||||
export function createViewFileTreeStore(props: {repoLink: string, treePath: string, currentRefNameSubURL: string}) {
|
||||
const store = reactive({
|
||||
rootFiles: [] as Array<Item>,
|
||||
rootFiles: [] as Array<FileTreeItem>,
|
||||
selectedItem: props.treePath,
|
||||
|
||||
async loadChildren(treePath: string, subPath: string = '') {
|
||||
|
||||
4
web_src/js/globals.d.ts
vendored
4
web_src/js/globals.d.ts
vendored
@ -12,8 +12,8 @@ declare module '*.vue' {
|
||||
import type {DefineComponent} from 'vue';
|
||||
const component: DefineComponent<unknown, unknown, any>;
|
||||
export default component;
|
||||
// List of named exports from vue components, used to make `tsc` output clean.
|
||||
// To actually lint .vue files, `vue-tsc` is used because `tsc` can not parse them.
|
||||
// Here we declare all exports from vue files so `tsc` or `tsgo` can work for
|
||||
// non-vue files. To lint .vue files, `vue-tsc` must be used.
|
||||
export function initDashboardRepoList(): void;
|
||||
export function initRepositoryActionView(): void;
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user