From 3e57ba5b36a110065804a3f70f63b10587b17ea3 Mon Sep 17 00:00:00 2001 From: silverwind Date: Fri, 12 Dec 2025 18:38:59 +0100 Subject: [PATCH 1/6] Add permissions to`files-changed` jobs (#36142) Followup to https://github.com/go-gitea/gitea/pull/36140. `files-changed` is a job that imports another workflow via `uses` statement but CodeQL still complains about lack of permissions on these jobs, so add it. This will fix the remaining [3 CodeQL issues](https://github.com/go-gitea/gitea/security/code-scanning?query=is%3Aopen+branch%3Amain+permissions). --- .github/workflows/pull-compliance.yml | 2 ++ .github/workflows/pull-db-tests.yml | 2 ++ .github/workflows/pull-docker-dryrun.yml | 2 ++ 3 files changed, 6 insertions(+) diff --git a/.github/workflows/pull-compliance.yml b/.github/workflows/pull-compliance.yml index 9e1963d48a..c146b439e0 100644 --- a/.github/workflows/pull-compliance.yml +++ b/.github/workflows/pull-compliance.yml @@ -10,6 +10,8 @@ concurrency: jobs: files-changed: uses: ./.github/workflows/files-changed.yml + permissions: + contents: read lint-backend: if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' diff --git a/.github/workflows/pull-db-tests.yml b/.github/workflows/pull-db-tests.yml index 16c9e004a5..66f48d5af8 100644 --- a/.github/workflows/pull-db-tests.yml +++ b/.github/workflows/pull-db-tests.yml @@ -10,6 +10,8 @@ concurrency: jobs: files-changed: uses: ./.github/workflows/files-changed.yml + permissions: + contents: read test-pgsql: if: needs.files-changed.outputs.backend == 'true' || needs.files-changed.outputs.actions == 'true' diff --git a/.github/workflows/pull-docker-dryrun.yml b/.github/workflows/pull-docker-dryrun.yml index e1b86e5e38..1cd1ba31dd 100644 --- a/.github/workflows/pull-docker-dryrun.yml +++ b/.github/workflows/pull-docker-dryrun.yml @@ -10,6 +10,8 @@ concurrency: jobs: files-changed: uses: ./.github/workflows/files-changed.yml + permissions: + contents: read container: if: needs.files-changed.outputs.docker == 'true' || needs.files-changed.outputs.actions == 'true' From 3102c04c1eb9251d933797465e4187d60b17e8a0 Mon Sep 17 00:00:00 2001 From: silverwind Date: Fri, 12 Dec 2025 19:12:35 +0100 Subject: [PATCH 2/6] Fix issue close timeline icon (#36138) Previously there was a icon mismatch between a issue's label and the timeline close event icon --- templates/repo/issue/view_content/comments.tmpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/templates/repo/issue/view_content/comments.tmpl b/templates/repo/issue/view_content/comments.tmpl index 089cdf2ccd..6d23186d08 100644 --- a/templates/repo/issue/view_content/comments.tmpl +++ b/templates/repo/issue/view_content/comments.tmpl @@ -96,7 +96,7 @@ {{else if eq .Type 2}}
- {{svg "octicon-circle-slash"}} + {{svg "octicon-issue-closed"}} {{if not .OriginalAuthor}} {{template "shared/user/avatarlink" dict "user" .Poster}} {{end}} From 1e72b1563906ef5625f7f0dcb67ed4bad5e2429c Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Sat, 13 Dec 2025 02:56:05 +0800 Subject: [PATCH 3/6] Fix various bugs (#36139) * Fix #35768 * Fix #36064 * Fix #36051 * Fix cherry-pick panic --- modules/packages/npm/creator.go | 25 +++++++++++++++++++-- modules/packages/npm/creator_test.go | 28 ++++++++++++++++++++++- modules/packages/npm/metadata.go | 2 +- routers/web/repo/editor_cherry_pick.go | 4 +--- services/mailer/sender/sender.go | 31 ++++++++++---------------- services/repository/generate.go | 4 ++-- services/repository/generate_test.go | 21 ++++++++++------- templates/package/content/pypi.tmpl | 2 +- 8 files changed, 80 insertions(+), 37 deletions(-) diff --git a/modules/packages/npm/creator.go b/modules/packages/npm/creator.go index 11b5123c27..cc7695726b 100644 --- a/modules/packages/npm/creator.go +++ b/modules/packages/npm/creator.go @@ -62,7 +62,28 @@ type PackageMetadata struct { Author User `json:"author"` ReadmeFilename string `json:"readmeFilename,omitempty"` Users map[string]bool `json:"users,omitempty"` - License string `json:"license,omitempty"` + License License `json:"license,omitempty"` +} + +type License string + +func (l *License) UnmarshalJSON(data []byte) error { + switch data[0] { + case '"': + var value string + if err := json.Unmarshal(data, &value); err != nil { + return err + } + *l = License(value) + case '{': + var values map[string]any + if err := json.Unmarshal(data, &values); err != nil { + return err + } + value, _ := values["type"].(string) + *l = License(value) + } + return nil } // PackageMetadataVersion documentation: https://github.com/npm/registry/blob/master/docs/REGISTRY-API.md#version @@ -74,7 +95,7 @@ type PackageMetadataVersion struct { Description string `json:"description"` Author User `json:"author"` Homepage string `json:"homepage,omitempty"` - License string `json:"license,omitempty"` + License License `json:"license,omitempty"` Repository Repository `json:"repository"` Keywords []string `json:"keywords,omitempty"` Dependencies map[string]string `json:"dependencies,omitempty"` diff --git a/modules/packages/npm/creator_test.go b/modules/packages/npm/creator_test.go index 806377a52b..40c50de91f 100644 --- a/modules/packages/npm/creator_test.go +++ b/modules/packages/npm/creator_test.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/json" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestParsePackage(t *testing.T) { @@ -291,11 +292,36 @@ func TestParsePackage(t *testing.T) { assert.Equal(t, packageDescription, p.Metadata.Readme) assert.Equal(t, packageAuthor, p.Metadata.Author) assert.Equal(t, packageBin, p.Metadata.Bin["bin"]) - assert.Equal(t, "MIT", p.Metadata.License) + assert.Equal(t, "MIT", string(p.Metadata.License)) assert.Equal(t, "https://gitea.io/", p.Metadata.ProjectURL) assert.Contains(t, p.Metadata.Dependencies, "package") assert.Equal(t, "1.2.0", p.Metadata.Dependencies["package"]) assert.Equal(t, repository.Type, p.Metadata.Repository.Type) assert.Equal(t, repository.URL, p.Metadata.Repository.URL) }) + + t.Run("ValidLicenseMap", func(t *testing.T) { + packageJSON := `{ + "versions": { + "0.1.1": { + "name": "dev-null", + "version": "0.1.1", + "license": { + "type": "MIT" + }, + "dist": { + "integrity": "sha256-" + } + } + }, + "_attachments": { + "foo": { + "data": "AAAA" + } + } +}` + p, err := ParsePackage(strings.NewReader(packageJSON)) + require.NoError(t, err) + require.Equal(t, "MIT", string(p.Metadata.License)) + }) } diff --git a/modules/packages/npm/metadata.go b/modules/packages/npm/metadata.go index 362d0470d5..e6bbcb1177 100644 --- a/modules/packages/npm/metadata.go +++ b/modules/packages/npm/metadata.go @@ -12,7 +12,7 @@ type Metadata struct { Name string `json:"name,omitempty"` Description string `json:"description,omitempty"` Author string `json:"author,omitempty"` - License string `json:"license,omitempty"` + License License `json:"license,omitempty"` ProjectURL string `json:"project_url,omitempty"` Keywords []string `json:"keywords,omitempty"` Dependencies map[string]string `json:"dependencies,omitempty"` diff --git a/routers/web/repo/editor_cherry_pick.go b/routers/web/repo/editor_cherry_pick.go index c1f3ae861b..ca0e19517a 100644 --- a/routers/web/repo/editor_cherry_pick.go +++ b/routers/web/repo/editor_cherry_pick.go @@ -36,9 +36,7 @@ func CherryPick(ctx *context.Context) { ctx.Data["commit_message"] = "revert " + cherryPickCommit.Message() } else { ctx.Data["CherryPickType"] = "cherry-pick" - splits := strings.SplitN(cherryPickCommit.Message(), "\n", 2) - ctx.Data["commit_summary"] = splits[0] - ctx.Data["commit_message"] = splits[1] + ctx.Data["commit_summary"], ctx.Data["commit_message"], _ = strings.Cut(cherryPickCommit.Message(), "\n") } ctx.HTML(http.StatusOK, tplCherryPick) diff --git a/services/mailer/sender/sender.go b/services/mailer/sender/sender.go index e470c2f2b3..30c6feaf7a 100644 --- a/services/mailer/sender/sender.go +++ b/services/mailer/sender/sender.go @@ -4,10 +4,8 @@ package sender import ( + "errors" "io" - - "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/setting" ) type Sender interface { @@ -16,23 +14,18 @@ type Sender interface { var Send = send -func send(sender Sender, msgs ...*Message) error { - if setting.MailService == nil { - log.Error("Mailer: Send is being invoked but mail service hasn't been initialized") - return nil +func send(sender Sender, msg *Message) error { + m := msg.ToMessage() + froms := m.GetFrom() + to, err := m.GetRecipients() + if err != nil { + return err } - for _, msg := range msgs { - m := msg.ToMessage() - froms := m.GetFrom() - to, err := m.GetRecipients() - if err != nil { - return err - } - // TODO: implement sending from multiple addresses - if err := sender.Send(froms[0].Address, to, m); err != nil { - return err - } + // TODO: implement sending from multiple addresses + if len(froms) == 0 { + // FIXME: no idea why sometimes the "froms" can be empty, need to figure out the root problem + return errors.New("no FROM specified") } - return nil + return sender.Send(froms[0].Address, to, m) } diff --git a/services/repository/generate.go b/services/repository/generate.go index 3ec31dac22..b2913cd110 100644 --- a/services/repository/generate.go +++ b/services/repository/generate.go @@ -177,7 +177,7 @@ func substGiteaTemplateFile(ctx context.Context, tmpDir, tmpDirSubPath string, t } generatedContent := generateExpansion(ctx, string(content), templateRepo, generateRepo) - substSubPath := filepath.Clean(filePathSanitize(generateExpansion(ctx, tmpDirSubPath, templateRepo, generateRepo))) + substSubPath := filePathSanitize(generateExpansion(ctx, tmpDirSubPath, templateRepo, generateRepo)) newLocalPath := filepath.Join(tmpDir, substSubPath) regular, err := util.IsRegularFile(newLocalPath) if canWrite := regular || errors.Is(err, fs.ErrNotExist); !canWrite { @@ -358,5 +358,5 @@ func filePathSanitize(s string) string { } fields[i] = field } - return filepath.FromSlash(strings.Join(fields, "/")) + return filepath.Clean(filepath.FromSlash(strings.Trim(strings.Join(fields, "/"), "/"))) } diff --git a/services/repository/generate_test.go b/services/repository/generate_test.go index 9c01911ded..432de4dc59 100644 --- a/services/repository/generate_test.go +++ b/services/repository/generate_test.go @@ -54,19 +54,24 @@ text/*.txt } func TestFilePathSanitize(t *testing.T) { - assert.Equal(t, "test_CON", filePathSanitize("test_CON")) - assert.Equal(t, "test CON", filePathSanitize("test CON ")) - assert.Equal(t, "__/traverse/__", filePathSanitize(".. /traverse/ ..")) - assert.Equal(t, "./__/a/_git/b_", filePathSanitize("./../a/.git/ b: ")) + // path clean + assert.Equal(t, "a", filePathSanitize("//a/")) + assert.Equal(t, "_a", filePathSanitize(`\a`)) + assert.Equal(t, "__/a/__", filePathSanitize(".. /a/ ..")) + assert.Equal(t, "__/a/_git/b_", filePathSanitize("./../a/.git/ b: ")) + + // Windows reserved names assert.Equal(t, "_", filePathSanitize("CoN")) assert.Equal(t, "_", filePathSanitize("LpT1")) assert.Equal(t, "_", filePathSanitize("CoM1")) + assert.Equal(t, "test_CON", filePathSanitize("test_CON")) + assert.Equal(t, "test CON", filePathSanitize("test CON ")) + + // special chars assert.Equal(t, "_", filePathSanitize("\u0000")) - assert.Equal(t, "目标", filePathSanitize("目标")) - // unlike filepath.Clean, it only sanitizes, doesn't change the separator layout - assert.Equal(t, "", filePathSanitize("")) //nolint:testifylint // for easy reading + assert.Equal(t, ".", filePathSanitize("")) assert.Equal(t, ".", filePathSanitize(".")) - assert.Equal(t, "/", filePathSanitize("/")) + assert.Equal(t, ".", filePathSanitize("/")) } func TestProcessGiteaTemplateFile(t *testing.T) { diff --git a/templates/package/content/pypi.tmpl b/templates/package/content/pypi.tmpl index 2625c160fe..15d8971eaa 100644 --- a/templates/package/content/pypi.tmpl +++ b/templates/package/content/pypi.tmpl @@ -4,7 +4,7 @@
-
pip install --index-url  --extra-index-url https://pypi.org/ {{.PackageDescriptor.Package.Name}}
+
pip install --index-url  --extra-index-url https://pypi.org/simple {{.PackageDescriptor.Package.Name}}
From ac8308b5cbb10c46086862494e87f1b4fb79d211 Mon Sep 17 00:00:00 2001 From: silverwind Date: Sat, 13 Dec 2025 14:03:51 +0100 Subject: [PATCH 4/6] Refactor `FileTreeItem` type (#36137) --- web_src/js/components/ViewFileTreeItem.vue | 14 ++------------ web_src/js/components/ViewFileTreeStore.ts | 13 +++++++++++-- web_src/js/globals.d.ts | 4 ++-- 3 files changed, 15 insertions(+), 16 deletions(-) diff --git a/web_src/js/components/ViewFileTreeItem.vue b/web_src/js/components/ViewFileTreeItem.vue index 9a50adedaa..ce019e655f 100644 --- a/web_src/js/components/ViewFileTreeItem.vue +++ b/web_src/js/components/ViewFileTreeItem.vue @@ -2,20 +2,10 @@ import {SvgIcon} from '../svg.ts'; import {isPlainClick} from '../utils/dom.ts'; import {shallowRef} from 'vue'; -import {type createViewFileTreeStore} from './ViewFileTreeStore.ts'; - -export type Item = { - entryName: string; - entryMode: 'blob' | 'exec' | 'tree' | 'commit' | 'symlink' | 'unknown'; - entryIcon: string; - entryIconOpen: string; - fullPath: string; - submoduleUrl?: string; - children?: Item[]; -}; +import type {createViewFileTreeStore, FileTreeItem} from './ViewFileTreeStore.ts'; const props = defineProps<{ - item: Item, + item: FileTreeItem, store: ReturnType }>(); diff --git a/web_src/js/components/ViewFileTreeStore.ts b/web_src/js/components/ViewFileTreeStore.ts index 2dc8093878..936db07776 100644 --- a/web_src/js/components/ViewFileTreeStore.ts +++ b/web_src/js/components/ViewFileTreeStore.ts @@ -3,11 +3,20 @@ import {GET} from '../modules/fetch.ts'; import {pathEscapeSegments} from '../utils/url.ts'; import {createElementFromHTML} from '../utils/dom.ts'; import {html} from '../utils/html.ts'; -import type {Item} from './ViewFileTreeItem.vue'; + +export type FileTreeItem = { + entryName: string; + entryMode: 'blob' | 'exec' | 'tree' | 'commit' | 'symlink' | 'unknown'; + entryIcon: string; + entryIconOpen: string; + fullPath: string; + submoduleUrl?: string; + children?: Array; +}; export function createViewFileTreeStore(props: {repoLink: string, treePath: string, currentRefNameSubURL: string}) { const store = reactive({ - rootFiles: [] as Array, + rootFiles: [] as Array, selectedItem: props.treePath, async loadChildren(treePath: string, subPath: string = '') { diff --git a/web_src/js/globals.d.ts b/web_src/js/globals.d.ts index 00f1744a95..49ce63d688 100644 --- a/web_src/js/globals.d.ts +++ b/web_src/js/globals.d.ts @@ -12,8 +12,8 @@ declare module '*.vue' { import type {DefineComponent} from 'vue'; const component: DefineComponent; export default component; - // List of named exports from vue components, used to make `tsc` output clean. - // To actually lint .vue files, `vue-tsc` is used because `tsc` can not parse them. + // Here we declare all exports from vue files so `tsc` or `tsgo` can work for + // non-vue files. To lint .vue files, `vue-tsc` must be used. export function initDashboardRepoList(): void; export function initRepositoryActionView(): void; } From 29057ea55f3e21f56b54621581bb9b5b0956aba8 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 13 Dec 2025 05:54:03 -0800 Subject: [PATCH 5/6] Fix bug when viewing the commit diff page with non-ANSI files (#36149) Fix #35504 --------- Co-authored-by: wxiaoguang --- modules/charset/charset.go | 154 +++++----- modules/charset/charset_test.go | 269 +++++------------- modules/httplib/serve.go | 7 +- modules/indexer/code/bleve/bleve.go | 2 +- .../code/elasticsearch/elasticsearch.go | 2 +- modules/setting/setting.go | 1 + routers/web/repo/editor.go | 6 +- services/gitdiff/gitdiff.go | 43 ++- services/gitdiff/gitdiff_test.go | 39 +++ services/gitdiff/highlightdiff_test.go | 8 +- .../migration-test/migration_test.go | 6 +- 11 files changed, 220 insertions(+), 317 deletions(-) diff --git a/modules/charset/charset.go b/modules/charset/charset.go index 597ce5120c..b156654973 100644 --- a/modules/charset/charset.go +++ b/modules/charset/charset.go @@ -5,12 +5,10 @@ package charset import ( "bytes" - "fmt" "io" "strings" "unicode/utf8" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" @@ -23,60 +21,39 @@ import ( var UTF8BOM = []byte{'\xef', '\xbb', '\xbf'} type ConvertOpts struct { - KeepBOM bool + KeepBOM bool + ErrorReplacement []byte + ErrorReturnOrigin bool } +var ToUTF8WithFallbackReaderPrefetchSize = 16 * 1024 + // ToUTF8WithFallbackReader detects the encoding of content and converts to UTF-8 reader if possible func ToUTF8WithFallbackReader(rd io.Reader, opts ConvertOpts) io.Reader { - buf := make([]byte, 2048) + buf := make([]byte, ToUTF8WithFallbackReaderPrefetchSize) n, err := util.ReadAtMost(rd, buf) if err != nil { - return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd) - } - - charsetLabel, err := DetectEncoding(buf[:n]) - if err != nil || charsetLabel == "UTF-8" { - return io.MultiReader(bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), rd) - } - - encoding, _ := charset.Lookup(charsetLabel) - if encoding == nil { + // read error occurs, don't do any processing return io.MultiReader(bytes.NewReader(buf[:n]), rd) } - return transform.NewReader( - io.MultiReader( - bytes.NewReader(MaybeRemoveBOM(buf[:n], opts)), - rd, - ), - encoding.NewDecoder(), - ) -} - -// ToUTF8 converts content to UTF8 encoding -func ToUTF8(content []byte, opts ConvertOpts) (string, error) { - charsetLabel, err := DetectEncoding(content) - if err != nil { - return "", err - } else if charsetLabel == "UTF-8" { - return string(MaybeRemoveBOM(content, opts)), nil + charsetLabel, _ := DetectEncoding(buf[:n]) + if charsetLabel == "UTF-8" { + // is utf-8, try to remove BOM and read it as-is + return io.MultiReader(bytes.NewReader(maybeRemoveBOM(buf[:n], opts)), rd) } encoding, _ := charset.Lookup(charsetLabel) if encoding == nil { - return string(content), fmt.Errorf("Unknown encoding: %s", charsetLabel) + // unknown charset, don't do any processing + return io.MultiReader(bytes.NewReader(buf[:n]), rd) } - // If there is an error, we concatenate the nicely decoded part and the - // original left over. This way we won't lose much data. - result, n, err := transform.Bytes(encoding.NewDecoder(), content) - if err != nil { - result = append(result, content[n:]...) - } - - result = MaybeRemoveBOM(result, opts) - - return string(result), err + // convert from charset to utf-8 + return transform.NewReader( + io.MultiReader(bytes.NewReader(buf[:n]), rd), + encoding.NewDecoder(), + ) } // ToUTF8WithFallback detects the encoding of content and converts to UTF-8 if possible @@ -85,73 +62,84 @@ func ToUTF8WithFallback(content []byte, opts ConvertOpts) []byte { return bs } -// ToUTF8DropErrors makes sure the return string is valid utf-8; attempts conversion if possible -func ToUTF8DropErrors(content []byte, opts ConvertOpts) []byte { - charsetLabel, err := DetectEncoding(content) - if err != nil || charsetLabel == "UTF-8" { - return MaybeRemoveBOM(content, opts) +func ToUTF8DropErrors(content []byte) []byte { + return ToUTF8(content, ConvertOpts{ErrorReplacement: []byte{' '}}) +} + +func ToUTF8(content []byte, opts ConvertOpts) []byte { + charsetLabel, _ := DetectEncoding(content) + if charsetLabel == "UTF-8" { + return maybeRemoveBOM(content, opts) } encoding, _ := charset.Lookup(charsetLabel) if encoding == nil { + setting.PanicInDevOrTesting("unsupported detected charset %q, it shouldn't happen", charsetLabel) return content } - // We ignore any non-decodable parts from the file. - // Some parts might be lost var decoded []byte decoder := encoding.NewDecoder() idx := 0 - for { + for idx < len(content) { result, n, err := transform.Bytes(decoder, content[idx:]) decoded = append(decoded, result...) if err == nil { break } - decoded = append(decoded, ' ') - idx = idx + n + 1 - if idx >= len(content) { - break + if opts.ErrorReturnOrigin { + return content } + if opts.ErrorReplacement == nil { + decoded = append(decoded, content[idx+n]) + } else { + decoded = append(decoded, opts.ErrorReplacement...) + } + idx += n + 1 } - - return MaybeRemoveBOM(decoded, opts) + return maybeRemoveBOM(decoded, opts) } -// MaybeRemoveBOM removes a UTF-8 BOM from a []byte when opts.KeepBOM is false -func MaybeRemoveBOM(content []byte, opts ConvertOpts) []byte { +// maybeRemoveBOM removes a UTF-8 BOM from a []byte when opts.KeepBOM is false +func maybeRemoveBOM(content []byte, opts ConvertOpts) []byte { if opts.KeepBOM { return content } - if len(content) > 2 && bytes.Equal(content[0:3], UTF8BOM) { - return content[3:] - } - return content + return bytes.TrimPrefix(content, UTF8BOM) } // DetectEncoding detect the encoding of content -func DetectEncoding(content []byte) (string, error) { +// it always returns a detected or guessed "encoding" string, no matter error happens or not +func DetectEncoding(content []byte) (encoding string, _ error) { // First we check if the content represents valid utf8 content excepting a truncated character at the end. // Now we could decode all the runes in turn but this is not necessarily the cheapest thing to do - // instead we walk backwards from the end to trim off a the incomplete character + // instead we walk backwards from the end to trim off the incomplete character toValidate := content end := len(toValidate) - 1 - if end < 0 { - // no-op - } else if toValidate[end]>>5 == 0b110 { - // Incomplete 1 byte extension e.g. © which has been truncated to - toValidate = toValidate[:end] - } else if end > 0 && toValidate[end]>>6 == 0b10 && toValidate[end-1]>>4 == 0b1110 { - // Incomplete 2 byte extension e.g. ⛔ <9b><94> which has been truncated to <9b> - toValidate = toValidate[:end-1] - } else if end > 1 && toValidate[end]>>6 == 0b10 && toValidate[end-1]>>6 == 0b10 && toValidate[end-2]>>3 == 0b11110 { - // Incomplete 3 byte extension e.g. 💩 <9f><92> which has been truncated to <9f><92> - toValidate = toValidate[:end-2] + // U+0000 U+007F 0yyyzzzz + // U+0080 U+07FF 110xxxyy 10yyzzzz + // U+0800 U+FFFF 1110wwww 10xxxxyy 10yyzzzz + // U+010000 U+10FFFF 11110uvv 10vvwwww 10xxxxyy 10yyzzzz + cnt := 0 + for end >= 0 && cnt < 4 { + c := toValidate[end] + if c>>5 == 0b110 || c>>4 == 0b1110 || c>>3 == 0b11110 { + // a leading byte + toValidate = toValidate[:end] + break + } else if c>>6 == 0b10 { + // a continuation byte + end-- + } else { + // not an utf-8 byte + break + } + cnt++ } + if utf8.Valid(toValidate) { - log.Debug("Detected encoding: utf-8 (fast)") return "UTF-8", nil } @@ -160,7 +148,7 @@ func DetectEncoding(content []byte) (string, error) { if len(content) < 1024 { // Check if original content is valid if _, err := textDetector.DetectBest(content); err != nil { - return "", err + return util.IfZero(setting.Repository.AnsiCharset, "UTF-8"), err } times := 1024 / len(content) detectContent = make([]byte, 0, times*len(content)) @@ -171,14 +159,10 @@ func DetectEncoding(content []byte) (string, error) { detectContent = content } - // Now we can't use DetectBest or just results[0] because the result isn't stable - so we need a tie break + // Now we can't use DetectBest or just results[0] because the result isn't stable - so we need a tie-break results, err := textDetector.DetectAll(detectContent) if err != nil { - if err == chardet.NotDetectedError && len(setting.Repository.AnsiCharset) > 0 { - log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset) - return setting.Repository.AnsiCharset, nil - } - return "", err + return util.IfZero(setting.Repository.AnsiCharset, "UTF-8"), err } topConfidence := results[0].Confidence @@ -201,11 +185,9 @@ func DetectEncoding(content []byte) (string, error) { } // FIXME: to properly decouple this function the fallback ANSI charset should be passed as an argument - if topResult.Charset != "UTF-8" && len(setting.Repository.AnsiCharset) > 0 { - log.Debug("Using default AnsiCharset: %s", setting.Repository.AnsiCharset) + if topResult.Charset != "UTF-8" && setting.Repository.AnsiCharset != "" { return setting.Repository.AnsiCharset, err } - log.Debug("Detected encoding: %s", topResult.Charset) - return topResult.Charset, err + return topResult.Charset, nil } diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index cd2e3b9aaa..0314abc347 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -4,108 +4,89 @@ package charset import ( - "bytes" "io" + "os" "strings" "testing" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/test" "github.com/stretchr/testify/assert" ) -func resetDefaultCharsetsOrder() { - defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder)) - for _, charset := range setting.Repository.DetectedCharsetsOrder { - defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset))) - } +func TestMain(m *testing.M) { setting.Repository.DetectedCharsetScore = map[string]int{} - i := 0 - for _, charset := range defaultDetectedCharsetsOrder { - canonicalCharset := strings.ToLower(strings.TrimSpace(charset)) - if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has { - setting.Repository.DetectedCharsetScore[canonicalCharset] = i - i++ - } + for i, charset := range setting.Repository.DetectedCharsetsOrder { + setting.Repository.DetectedCharsetScore[strings.ToLower(charset)] = i } + os.Exit(m.Run()) } func TestMaybeRemoveBOM(t *testing.T) { - res := MaybeRemoveBOM([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) + res := maybeRemoveBOM([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) - res = MaybeRemoveBOM([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) + res = maybeRemoveBOM([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) } func TestToUTF8(t *testing.T) { - resetDefaultCharsetsOrder() - // Note: golang compiler seems so behave differently depending on the current // locale, so some conversions might behave differently. For that reason, we don't // depend on particular conversions but in expected behaviors. - res, err := ToUTF8([]byte{0x41, 0x42, 0x43}, ConvertOpts{}) - assert.NoError(t, err) - assert.Equal(t, "ABC", res) + res := ToUTF8([]byte{0x41, 0x42, 0x43}, ConvertOpts{}) + assert.Equal(t, "ABC", string(res)) // "áéíóú" - res, err = ToUTF8([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) - assert.NoError(t, err) - assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res)) + res = ToUTF8([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) + assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) // "áéíóú" - res, err = ToUTF8([]byte{ + res = ToUTF8([]byte{ 0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba, }, ConvertOpts{}) - assert.NoError(t, err) - assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, []byte(res)) + assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) - res, err = ToUTF8([]byte{ + res = ToUTF8([]byte{ 0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e, }, ConvertOpts{}) - assert.NoError(t, err) stringMustStartWith(t, "Hola,", res) stringMustEndWith(t, "AAA.", res) - res, err = ToUTF8([]byte{ + res = ToUTF8([]byte{ 0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e, }, ConvertOpts{}) - assert.NoError(t, err) stringMustStartWith(t, "Hola,", res) stringMustEndWith(t, "AAA.", res) - res, err = ToUTF8([]byte{ + res = ToUTF8([]byte{ 0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73, 0x41, 0x41, 0x41, 0x2e, }, ConvertOpts{}) - assert.NoError(t, err) stringMustStartWith(t, "Hola,", res) stringMustEndWith(t, "AAA.", res) // Japanese (Shift-JIS) // 日属秘ぞしちゅ。 - res, err = ToUTF8([]byte{ + res = ToUTF8([]byte{ 0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42, }, ConvertOpts{}) - assert.NoError(t, err) assert.Equal(t, []byte{ 0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3, 0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82, - }, - []byte(res)) + }, res) - res, err = ToUTF8([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{}) - assert.NoError(t, err) - assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, []byte(res)) + res = ToUTF8([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{}) + assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res) } func TestToUTF8WithFallback(t *testing.T) { - resetDefaultCharsetsOrder() // "ABC" res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43}, ConvertOpts{}) assert.Equal(t, []byte{0x41, 0x42, 0x43}, res) @@ -152,54 +133,58 @@ func TestToUTF8WithFallback(t *testing.T) { } func TestToUTF8DropErrors(t *testing.T) { - resetDefaultCharsetsOrder() // "ABC" - res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43}, ConvertOpts{}) + res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43}) assert.Equal(t, []byte{0x41, 0x42, 0x43}, res) // "áéíóú" - res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) // UTF8 BOM + "áéíóú" - res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) // "Hola, así cómo ños" - res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73}) assert.Equal(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73}, res[:8]) assert.Equal(t, []byte{0x73}, res[len(res)-1:]) // "Hola, así cómo " minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20} - res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}) // Do not fail for differences in invalid cases, as the library might change the conversion criteria for those assert.Equal(t, minmatch, res[0:len(minmatch)]) - res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x81, 0xA4, 0x6F, 0x73}) // Do not fail for differences in invalid cases, as the library might change the conversion criteria for those assert.Equal(t, minmatch, res[0:len(minmatch)]) // Japanese (Shift-JIS) // "日属秘ぞしちゅ。" - res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}) assert.Equal(t, []byte{ 0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3, 0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82, }, res) - res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00}, ConvertOpts{}) + res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00}) assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res) } func TestDetectEncoding(t *testing.T) { - resetDefaultCharsetsOrder() testSuccess := func(b []byte, expected string) { encoding, err := DetectEncoding(b) assert.NoError(t, err) assert.Equal(t, expected, encoding) } + + // invalid bytes + encoding, err := DetectEncoding([]byte{0xfa}) + assert.Error(t, err) + assert.Equal(t, "UTF-8", encoding) + // utf-8 b := []byte("just some ascii") testSuccess(b, "UTF-8") @@ -214,169 +199,49 @@ func TestDetectEncoding(t *testing.T) { // iso-8859-1: dcor b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a} - encoding, err := DetectEncoding(b) + encoding, err = DetectEncoding(b) assert.NoError(t, err) assert.Contains(t, encoding, "ISO-8859-1") - old := setting.Repository.AnsiCharset - setting.Repository.AnsiCharset = "placeholder" - defer func() { - setting.Repository.AnsiCharset = old - }() - testSuccess(b, "placeholder") - - // invalid bytes - b = []byte{0xfa} - _, err = DetectEncoding(b) - assert.Error(t, err) + defer test.MockVariableValue(&setting.Repository.AnsiCharset, "MyEncoding")() + testSuccess(b, "MyEncoding") } -func stringMustStartWith(t *testing.T, expected, value string) { - assert.Equal(t, expected, value[:len(expected)]) +func stringMustStartWith(t *testing.T, expected string, value []byte) { + assert.Equal(t, expected, string(value[:len(expected)])) } -func stringMustEndWith(t *testing.T, expected, value string) { - assert.Equal(t, expected, value[len(value)-len(expected):]) +func stringMustEndWith(t *testing.T, expected string, value []byte) { + assert.Equal(t, expected, string(value[len(value)-len(expected):])) } func TestToUTF8WithFallbackReader(t *testing.T) { - resetDefaultCharsetsOrder() + test.MockVariableValue(&ToUTF8WithFallbackReaderPrefetchSize) - for testLen := range 2048 { - pattern := " test { () }\n" - input := "" - for len(input) < testLen { - input += pattern - } - input = input[:testLen] - input += "// Выключаем" - rd := ToUTF8WithFallbackReader(bytes.NewReader([]byte(input)), ConvertOpts{}) + block := "aá啊🤔" + runes := []rune(block) + assert.Len(t, string(runes[0]), 1) + assert.Len(t, string(runes[1]), 2) + assert.Len(t, string(runes[2]), 3) + assert.Len(t, string(runes[3]), 4) + + content := strings.Repeat(block, 2) + for i := 1; i < len(content); i++ { + encoding, err := DetectEncoding([]byte(content[:i])) + assert.NoError(t, err) + assert.Equal(t, "UTF-8", encoding) + + ToUTF8WithFallbackReaderPrefetchSize = i + rd := ToUTF8WithFallbackReader(strings.NewReader(content), ConvertOpts{}) r, _ := io.ReadAll(rd) - assert.Equalf(t, input, string(r), "testing string len=%d", testLen) + assert.Equal(t, content, string(r)) + } + for _, r := range runes { + content = "abc abc " + string(r) + string(r) + string(r) + for i := 0; i < len(content); i++ { + encoding, err := DetectEncoding([]byte(content[:i])) + assert.NoError(t, err) + assert.Equal(t, "UTF-8", encoding) + } } - - truncatedOneByteExtension := failFastBytes - encoding, _ := DetectEncoding(truncatedOneByteExtension) - assert.Equal(t, "UTF-8", encoding) - - truncatedTwoByteExtension := failFastBytes - truncatedTwoByteExtension[len(failFastBytes)-1] = 0x9b - truncatedTwoByteExtension[len(failFastBytes)-2] = 0xe2 - - encoding, _ = DetectEncoding(truncatedTwoByteExtension) - assert.Equal(t, "UTF-8", encoding) - - truncatedThreeByteExtension := failFastBytes - truncatedThreeByteExtension[len(failFastBytes)-1] = 0x92 - truncatedThreeByteExtension[len(failFastBytes)-2] = 0x9f - truncatedThreeByteExtension[len(failFastBytes)-3] = 0xf0 - - encoding, _ = DetectEncoding(truncatedThreeByteExtension) - assert.Equal(t, "UTF-8", encoding) -} - -var failFastBytes = []byte{ - 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x6f, 0x72, 0x67, 0x2e, 0x61, 0x70, 0x61, 0x63, 0x68, 0x65, 0x2e, 0x74, 0x6f, - 0x6f, 0x6c, 0x73, 0x2e, 0x61, 0x6e, 0x74, 0x2e, 0x74, 0x61, 0x73, 0x6b, 0x64, 0x65, 0x66, 0x73, 0x2e, 0x63, 0x6f, 0x6e, - 0x64, 0x69, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x4f, 0x73, 0x0a, 0x69, 0x6d, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x6f, 0x72, 0x67, - 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, - 0x74, 0x2e, 0x67, 0x72, 0x61, 0x64, 0x6c, 0x65, 0x2e, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x2e, 0x72, 0x75, 0x6e, 0x2e, 0x42, - 0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x0a, 0x0a, 0x70, 0x6c, 0x75, 0x67, 0x69, 0x6e, 0x73, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x69, 0x64, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, - 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x22, 0x29, 0x0a, 0x7d, 0x0a, 0x0a, 0x64, 0x65, 0x70, 0x65, - 0x6e, 0x64, 0x65, 0x6e, 0x63, 0x69, 0x65, 0x73, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, - 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, - 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x61, 0x70, 0x69, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, - 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, - 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x61, 0x70, 0x69, 0x2d, 0x64, 0x6f, 0x63, 0x73, 0x22, 0x29, - 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x64, 0x62, - 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, - 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, - 0x73, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, - 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, - 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x3a, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x66, - 0x73, 0x22, 0x29, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x28, 0x70, 0x72, 0x6f, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x22, 0x3a, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, - 0x3a, 0x69, 0x6e, 0x74, 0x65, 0x67, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x2d, 0x6d, 0x71, 0x22, 0x29, 0x29, 0x0a, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, - 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, - 0x2d, 0x61, 0x75, 0x74, 0x68, 0x2d, 0x72, 0x65, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2d, 0x73, 0x74, 0x61, 0x72, 0x74, - 0x65, 0x72, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, - 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, 0x68, 0x61, 0x6c, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, - 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, - 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, 0x63, 0x6f, 0x72, 0x65, 0x22, 0x29, 0x0a, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, - 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, - 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, 0x73, 0x74, - 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x77, 0x65, 0x62, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, - 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, - 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, - 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x61, 0x6f, 0x70, - 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, - 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, - 0x72, 0x6b, 0x2e, 0x62, 0x6f, 0x6f, 0x74, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x2d, - 0x73, 0x74, 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x61, 0x63, 0x74, 0x75, 0x61, 0x74, 0x6f, 0x72, 0x22, 0x29, 0x0a, 0x20, - 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, - 0x72, 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, - 0x6c, 0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, - 0x61, 0x72, 0x74, 0x65, 0x72, 0x2d, 0x62, 0x6f, 0x6f, 0x74, 0x73, 0x74, 0x72, 0x61, 0x70, 0x22, 0x29, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, - 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, 0x6c, - 0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, 0x61, - 0x72, 0x74, 0x65, 0x72, 0x2d, 0x63, 0x6f, 0x6e, 0x73, 0x75, 0x6c, 0x2d, 0x61, 0x6c, 0x6c, 0x22, 0x29, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, - 0x67, 0x2e, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x63, 0x6c, - 0x6f, 0x75, 0x64, 0x3a, 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x63, 0x6c, 0x6f, 0x75, 0x64, 0x2d, 0x73, 0x74, 0x61, - 0x72, 0x74, 0x65, 0x72, 0x2d, 0x73, 0x6c, 0x65, 0x75, 0x74, 0x68, 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, - 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6f, 0x72, 0x67, 0x2e, 0x73, 0x70, - 0x72, 0x69, 0x6e, 0x67, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x77, 0x6f, 0x72, 0x6b, 0x2e, 0x72, 0x65, 0x74, 0x72, 0x79, 0x3a, - 0x73, 0x70, 0x72, 0x69, 0x6e, 0x67, 0x2d, 0x72, 0x65, 0x74, 0x72, 0x79, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x63, 0x68, 0x2e, 0x71, - 0x6f, 0x73, 0x2e, 0x6c, 0x6f, 0x67, 0x62, 0x61, 0x63, 0x6b, 0x3a, 0x6c, 0x6f, 0x67, 0x62, 0x61, 0x63, 0x6b, 0x2d, 0x63, - 0x6c, 0x61, 0x73, 0x73, 0x69, 0x63, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, - 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x69, 0x6f, 0x2e, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x6d, 0x65, - 0x74, 0x65, 0x72, 0x3a, 0x6d, 0x69, 0x63, 0x72, 0x6f, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x2d, 0x72, 0x65, 0x67, 0x69, 0x73, - 0x74, 0x72, 0x79, 0x2d, 0x70, 0x72, 0x6f, 0x6d, 0x65, 0x74, 0x68, 0x65, 0x75, 0x73, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x6b, 0x6f, 0x74, - 0x6c, 0x69, 0x6e, 0x28, 0x22, 0x73, 0x74, 0x64, 0x6c, 0x69, 0x62, 0x22, 0x29, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, - 0x2f, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x65, 0x73, 0x74, 0x20, 0x64, 0x65, 0x70, 0x65, 0x6e, 0x64, - 0x65, 0x6e, 0x63, 0x69, 0x65, 0x73, 0x2e, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, - 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x2f, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x74, - 0x65, 0x73, 0x74, 0x49, 0x6d, 0x70, 0x6c, 0x65, 0x6d, 0x65, 0x6e, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x28, 0x22, 0x6a, - 0x66, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x70, 0x65, 0x3a, 0x70, 0x65, 0x2d, 0x63, 0x6f, 0x6d, 0x6d, 0x6f, 0x6e, 0x2d, - 0x74, 0x65, 0x73, 0x74, 0x22, 0x29, 0x0a, 0x7d, 0x0a, 0x0a, 0x76, 0x61, 0x6c, 0x20, 0x70, 0x61, 0x74, 0x63, 0x68, 0x4a, - 0x61, 0x72, 0x20, 0x62, 0x79, 0x20, 0x74, 0x61, 0x73, 0x6b, 0x73, 0x2e, 0x72, 0x65, 0x67, 0x69, 0x73, 0x74, 0x65, 0x72, - 0x69, 0x6e, 0x67, 0x28, 0x4a, 0x61, 0x72, 0x3a, 0x3a, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, - 0x20, 0x20, 0x61, 0x72, 0x63, 0x68, 0x69, 0x76, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x69, 0x66, 0x69, 0x65, 0x72, 0x2e, - 0x73, 0x65, 0x74, 0x28, 0x22, 0x70, 0x61, 0x74, 0x63, 0x68, 0x65, 0x64, 0x22, 0x29, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, - 0x76, 0x61, 0x6c, 0x20, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x70, 0x61, 0x74, 0x68, - 0x20, 0x62, 0x79, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x67, - 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, - 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, - 0x73, 0x28, 0x22, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x2d, 0x50, 0x61, 0x74, 0x68, 0x22, 0x20, 0x74, 0x6f, 0x20, 0x6f, 0x62, - 0x6a, 0x65, 0x63, 0x74, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x70, - 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x20, 0x76, 0x61, 0x6c, 0x20, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x20, 0x3d, - 0x20, 0x22, 0x66, 0x69, 0x6c, 0x65, 0x3a, 0x2f, 0x2b, 0x22, 0x2e, 0x74, 0x6f, 0x52, 0x65, 0x67, 0x65, 0x78, 0x28, 0x29, - 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x76, 0x65, 0x72, 0x72, 0x69, 0x64, - 0x65, 0x20, 0x66, 0x75, 0x6e, 0x20, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28, 0x29, 0x3a, 0x20, 0x53, 0x74, - 0x72, 0x69, 0x6e, 0x67, 0x20, 0x3d, 0x20, 0x72, 0x75, 0x6e, 0x74, 0x69, 0x6d, 0x65, 0x43, 0x6c, 0x61, 0x73, 0x73, 0x70, - 0x61, 0x74, 0x68, 0x2e, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x2e, 0x6a, 0x6f, 0x69, 0x6e, 0x54, 0x6f, 0x53, 0x74, 0x72, 0x69, - 0x6e, 0x67, 0x28, 0x22, 0x20, 0x22, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x74, 0x2e, 0x74, 0x6f, 0x55, 0x52, 0x49, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x55, - 0x52, 0x4c, 0x28, 0x29, 0x2e, 0x74, 0x6f, 0x53, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x28, 0x29, 0x2e, 0x72, 0x65, 0x70, 0x6c, - 0x61, 0x63, 0x65, 0x46, 0x69, 0x72, 0x73, 0x74, 0x28, 0x70, 0x61, 0x74, 0x74, 0x65, 0x72, 0x6e, 0x2c, 0x20, 0x22, 0x2f, - 0x22, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, - 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x74, 0x61, 0x73, - 0x6b, 0x73, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x64, 0x3c, 0x42, 0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x3e, 0x28, 0x22, 0x62, - 0x6f, 0x6f, 0x74, 0x52, 0x75, 0x6e, 0x22, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x4f, - 0x73, 0x2e, 0x69, 0x73, 0x46, 0x61, 0x6d, 0x69, 0x6c, 0x79, 0x28, 0x4f, 0x73, 0x2e, 0x46, 0x41, 0x4d, 0x49, 0x4c, 0x59, - 0x5f, 0x57, 0x49, 0x4e, 0x44, 0x4f, 0x57, 0x53, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, - 0x20, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x70, 0x61, 0x74, 0x68, 0x20, 0x3d, 0x20, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x28, 0x73, - 0x6f, 0x75, 0x72, 0x63, 0x65, 0x53, 0x65, 0x74, 0x73, 0x2e, 0x6e, 0x61, 0x6d, 0x65, 0x64, 0x28, 0x22, 0x6d, 0x61, 0x69, - 0x6e, 0x22, 0x29, 0x2e, 0x6d, 0x61, 0x70, 0x20, 0x7b, 0x20, 0x69, 0x74, 0x2e, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x20, - 0x7d, 0x2c, 0x20, 0x70, 0x61, 0x74, 0x63, 0x68, 0x4a, 0x61, 0x72, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, - 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0xd0, } diff --git a/modules/httplib/serve.go b/modules/httplib/serve.go index b4c5e7fe1e..2d66a86a8b 100644 --- a/modules/httplib/serve.go +++ b/modules/httplib/serve.go @@ -19,7 +19,6 @@ import ( charsetModule "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/httpcache" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/typesniffer" "code.gitea.io/gitea/modules/util" @@ -109,11 +108,7 @@ func setServeHeadersByFile(r *http.Request, w http.ResponseWriter, mineBuf []byt } if isPlain { - charset, err := charsetModule.DetectEncoding(mineBuf) - if err != nil { - log.Error("Detect raw file %s charset failed: %v, using by default utf-8", opts.Filename, err) - charset = "utf-8" - } + charset, _ := charsetModule.DetectEncoding(mineBuf) opts.ContentTypeCharset = strings.ToLower(charset) } diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index bdb477ce6e..5f6a7f6082 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -203,7 +203,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro RepoID: repo.ID, CommitID: commitSha, Filename: update.Filename, - Content: string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})), + Content: string(charset.ToUTF8DropErrors(fileContents)), Language: analyze.GetCodeLanguage(update.Filename, fileContents), UpdatedAt: time.Now().UTC(), }) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index b2eb301a5d..a7027051d2 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -191,7 +191,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro Doc(map[string]any{ "repo_id": repo.ID, "filename": update.Filename, - "content": string(charset.ToUTF8DropErrors(fileContents, charset.ConvertOpts{})), + "content": string(charset.ToUTF8DropErrors(fileContents)), "commit_id": sha, "language": analyze.GetCodeLanguage(update.Filename, fileContents), "updated_at": timeutil.TimeStampNow(), diff --git a/modules/setting/setting.go b/modules/setting/setting.go index e14997801f..dc60d99bd6 100644 --- a/modules/setting/setting.go +++ b/modules/setting/setting.go @@ -240,4 +240,5 @@ func PanicInDevOrTesting(msg string, a ...any) { if !IsProd || IsInTesting { panic(fmt.Sprintf(msg, a...)) } + log.Error(msg, a...) } diff --git a/routers/web/repo/editor.go b/routers/web/repo/editor.go index 983249a6d2..048c9f3d4a 100644 --- a/routers/web/repo/editor.go +++ b/routers/web/repo/editor.go @@ -317,11 +317,7 @@ func EditFile(ctx *context.Context) { ctx.ServerError("ReadAll", err) return } - if content, err := charset.ToUTF8(buf, charset.ConvertOpts{KeepBOM: true}); err != nil { - ctx.Data["FileContent"] = string(buf) - } else { - ctx.Data["FileContent"] = content - } + ctx.Data["FileContent"] = string(charset.ToUTF8(buf, charset.ConvertOpts{KeepBOM: true, ErrorReturnOrigin: true})) } } diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 6e15f71609..f8fde6ab29 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -835,11 +835,11 @@ parsingLoop: if buffer.Len() == 0 { continue } - charsetLabel, err := charset.DetectEncoding(buffer.Bytes()) - if charsetLabel != "UTF-8" && err == nil { - encoding, _ := stdcharset.Lookup(charsetLabel) - if encoding != nil { - diffLineTypeDecoders[lineType] = encoding.NewDecoder() + charsetLabel, _ := charset.DetectEncoding(buffer.Bytes()) + if charsetLabel != "UTF-8" { + charsetEncoding, _ := stdcharset.Lookup(charsetLabel) + if charsetEncoding != nil { + diffLineTypeDecoders[lineType] = charsetEncoding.NewDecoder() } } } @@ -1325,10 +1325,10 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit shouldFullFileHighlight := !setting.Git.DisableDiffHighlight && attrDiff.Value() == "" if shouldFullFileHighlight { if limitedContent.LeftContent != nil && limitedContent.LeftContent.buf.Len() < MaxDiffHighlightEntireFileSize { - diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.String()) + diffFile.highlightedLeftLines = highlightCodeLines(diffFile, true /* left */, limitedContent.LeftContent.buf.Bytes()) } if limitedContent.RightContent != nil && limitedContent.RightContent.buf.Len() < MaxDiffHighlightEntireFileSize { - diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.String()) + diffFile.highlightedRightLines = highlightCodeLines(diffFile, false /* right */, limitedContent.RightContent.buf.Bytes()) } } } @@ -1336,9 +1336,34 @@ func GetDiffForRender(ctx context.Context, repoLink string, gitRepo *git.Reposit return diff, nil } -func highlightCodeLines(diffFile *DiffFile, isLeft bool, content string) map[int]template.HTML { +func splitHighlightLines(buf []byte) (ret [][]byte) { + lineCount := bytes.Count(buf, []byte("\n")) + 1 + ret = make([][]byte, 0, lineCount) + nlTagClose := []byte("\n" right after \n, sometimes before. + // * "text\n" + // * "text\n" + if bytes.HasPrefix(buf[pos:], nlTagClose) { + pos1 := bytes.IndexByte(buf[pos:], '>') + if pos1 != -1 { + pos += pos1 + } + } + ret = append(ret, buf[:pos+1]) + buf = buf[pos+1:] + } +} + +func highlightCodeLines(diffFile *DiffFile, isLeft bool, rawContent []byte) map[int]template.HTML { + content := util.UnsafeBytesToString(charset.ToUTF8(rawContent, charset.ConvertOpts{})) highlightedNewContent, _ := highlight.Code(diffFile.Name, diffFile.Language, content) - splitLines := strings.Split(string(highlightedNewContent), "\n") + splitLines := splitHighlightLines([]byte(highlightedNewContent)) lines := make(map[int]template.HTML, len(splitLines)) // only save the highlighted lines we need, but not the whole file, to save memory for _, sec := range diffFile.Sections { diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 721ae0dfc7..a94dad8b63 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -5,6 +5,7 @@ package gitdiff import ( + "html/template" "strconv" "strings" "testing" @@ -1106,3 +1107,41 @@ func TestDiffLine_GetExpandDirection(t *testing.T) { assert.Equal(t, c.direction, c.diffLine.GetExpandDirection(), "case %s expected direction: %s", c.name, c.direction) } } + +func TestHighlightCodeLines(t *testing.T) { + t.Run("CharsetDetecting", func(t *testing.T) { + diffFile := &DiffFile{ + Name: "a.c", + Language: "c", + Sections: []*DiffSection{ + { + Lines: []*DiffLine{{LeftIdx: 1}}, + }, + }, + } + ret := highlightCodeLines(diffFile, true, []byte("// abc\xcc def\xcd")) // ISO-8859-1 bytes + assert.Equal(t, "// abcÌ defÍ\n", string(ret[0])) + }) + + t.Run("LeftLines", func(t *testing.T) { + diffFile := &DiffFile{ + Name: "a.c", + Language: "c", + Sections: []*DiffSection{ + { + Lines: []*DiffLine{ + {LeftIdx: 1}, + {LeftIdx: 2}, + {LeftIdx: 3}, + }, + }, + }, + } + const nl = "\n" + ret := highlightCodeLines(diffFile, true, []byte("a\nb\n")) + assert.Equal(t, map[int]template.HTML{ + 0: `a` + nl, + 1: `b`, + }, ret) + }) +} diff --git a/services/gitdiff/highlightdiff_test.go b/services/gitdiff/highlightdiff_test.go index aebe38ae7c..0df2e29d13 100644 --- a/services/gitdiff/highlightdiff_test.go +++ b/services/gitdiff/highlightdiff_test.go @@ -25,12 +25,12 @@ func TestDiffWithHighlight(t *testing.T) { t.Run("CleanUp", func(t *testing.T) { hcd := newHighlightCodeDiff() - codeA := template.HTML(`this is updated comment`) + codeA := template.HTML(`this is a comment`) + codeB := template.HTML(`this is updated comment`) outDel := hcd.diffLineWithHighlight(DiffLineDel, codeA, codeB) - assert.Equal(t, `a comment`, string(outDel)) + assert.Equal(t, `this is a comment`, string(outDel)) outAdd := hcd.diffLineWithHighlight(DiffLineAdd, codeA, codeB) - assert.Equal(t, `updated comment`, string(outAdd)) + assert.Equal(t, `this is updated comment`, string(outAdd)) }) t.Run("OpenCloseTags", func(t *testing.T) { diff --git a/tests/integration/migration-test/migration_test.go b/tests/integration/migration-test/migration_test.go index 5fa7cbbfb7..2659c5c53d 100644 --- a/tests/integration/migration-test/migration_test.go +++ b/tests/integration/migration-test/migration_test.go @@ -4,6 +4,7 @@ package migrations import ( + "bytes" "compress/gzip" "context" "database/sql" @@ -21,7 +22,6 @@ import ( "code.gitea.io/gitea/models/migrations" migrate_base "code.gitea.io/gitea/models/migrations/base" "code.gitea.io/gitea/models/unittest" - "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -108,11 +108,11 @@ func readSQLFromFile(version string) (string, error) { } defer gr.Close() - bytes, err := io.ReadAll(gr) + buf, err := io.ReadAll(gr) if err != nil { return "", err } - return string(charset.MaybeRemoveBOM(bytes, charset.ConvertOpts{})), nil + return string(bytes.TrimPrefix(buf, []byte{'\xef', '\xbb', '\xbf'})), nil } func restoreOldDB(t *testing.T, version string) { From 1f5237e0d7214294aceaa4487a98c88d183a243c Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Sat, 13 Dec 2025 18:14:18 -0800 Subject: [PATCH 6/6] Check user visibility when redirecting to a renamed user (#36148) Fix #34169 --- routers/api/v1/api.go | 4 +- routers/api/v1/user/helper.go | 2 +- services/context/context_response.go | 16 ++++++- services/context/org.go | 2 +- services/context/repo.go | 2 +- services/context/user.go | 2 +- tests/integration/user_test.go | 72 ++++++++++++++++++++++++++++ 7 files changed, 92 insertions(+), 8 deletions(-) diff --git a/routers/api/v1/api.go b/routers/api/v1/api.go index 9bce98ac02..fcf9e73057 100644 --- a/routers/api/v1/api.go +++ b/routers/api/v1/api.go @@ -152,7 +152,7 @@ func repoAssignment() func(ctx *context.APIContext) { if err != nil { if user_model.IsErrUserNotExist(err) { if redirectUserID, err := user_model.LookupUserRedirect(ctx, userName); err == nil { - context.RedirectToUser(ctx.Base, userName, redirectUserID) + context.RedirectToUser(ctx.Base, ctx.Doer, userName, redirectUserID) } else if user_model.IsErrUserRedirectNotExist(err) { ctx.APIErrorNotFound("GetUserByName", err) } else { @@ -612,7 +612,7 @@ func orgAssignment(args ...bool) func(ctx *context.APIContext) { if organization.IsErrOrgNotExist(err) { redirectUserID, err := user_model.LookupUserRedirect(ctx, ctx.PathParam("org")) if err == nil { - context.RedirectToUser(ctx.Base, ctx.PathParam("org"), redirectUserID) + context.RedirectToUser(ctx.Base, ctx.Doer, ctx.PathParam("org"), redirectUserID) } else if user_model.IsErrUserRedirectNotExist(err) { ctx.APIErrorNotFound("GetOrgByName", err) } else { diff --git a/routers/api/v1/user/helper.go b/routers/api/v1/user/helper.go index f49bbbd6db..de3ec089df 100644 --- a/routers/api/v1/user/helper.go +++ b/routers/api/v1/user/helper.go @@ -16,7 +16,7 @@ func GetUserByPathParam(ctx *context.APIContext, name string) *user_model.User { if err != nil { if user_model.IsErrUserNotExist(err) { if redirectUserID, err2 := user_model.LookupUserRedirect(ctx, username); err2 == nil { - context.RedirectToUser(ctx.Base, username, redirectUserID) + context.RedirectToUser(ctx.Base, ctx.Doer, username, redirectUserID) } else { ctx.APIErrorNotFound("GetUserByName", err) } diff --git a/services/context/context_response.go b/services/context/context_response.go index 3f64fc7352..bb896024b1 100644 --- a/services/context/context_response.go +++ b/services/context/context_response.go @@ -20,15 +20,27 @@ import ( "code.gitea.io/gitea/modules/httplib" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/templates" "code.gitea.io/gitea/modules/web/middleware" ) // RedirectToUser redirect to a differently-named user -func RedirectToUser(ctx *Base, userName string, redirectUserID int64) { +func RedirectToUser(ctx *Base, doer *user_model.User, userName string, redirectUserID int64) { user, err := user_model.GetUserByID(ctx, redirectUserID) if err != nil { - ctx.HTTPError(http.StatusInternalServerError, "unable to get user") + if user_model.IsErrUserNotExist(err) { + ctx.HTTPError(http.StatusNotFound, "user does not exist") + } else { + ctx.HTTPError(http.StatusInternalServerError, "unable to get user") + } + return + } + + // Handle Visibility + if user.Visibility != structs.VisibleTypePublic && doer == nil { + // We must be signed in to see limited or private organizations + ctx.HTTPError(http.StatusNotFound, "user does not exist") return } diff --git a/services/context/org.go b/services/context/org.go index 1cd8923178..d41bd5ea79 100644 --- a/services/context/org.go +++ b/services/context/org.go @@ -49,7 +49,7 @@ func GetOrganizationByParams(ctx *Context) { if organization.IsErrOrgNotExist(err) { redirectUserID, err := user_model.LookupUserRedirect(ctx, orgName) if err == nil { - RedirectToUser(ctx.Base, orgName, redirectUserID) + RedirectToUser(ctx.Base, ctx.Doer, orgName, redirectUserID) } else if user_model.IsErrUserRedirectNotExist(err) { ctx.NotFound(err) } else { diff --git a/services/context/repo.go b/services/context/repo.go index 64b8695236..5a313e6f15 100644 --- a/services/context/repo.go +++ b/services/context/repo.go @@ -443,7 +443,7 @@ func RepoAssignment(ctx *Context) { } if redirectUserID, err := user_model.LookupUserRedirect(ctx, userName); err == nil { - RedirectToUser(ctx.Base, userName, redirectUserID) + RedirectToUser(ctx.Base, ctx.Doer, userName, redirectUserID) } else if user_model.IsErrUserRedirectNotExist(err) { ctx.NotFound(nil) } else { diff --git a/services/context/user.go b/services/context/user.go index f1a3035ee9..19c055e2a3 100644 --- a/services/context/user.go +++ b/services/context/user.go @@ -69,7 +69,7 @@ func userAssignment(ctx *Base, doer *user_model.User, errCb func(int, any)) (con if err != nil { if user_model.IsErrUserNotExist(err) { if redirectUserID, err := user_model.LookupUserRedirect(ctx, username); err == nil { - RedirectToUser(ctx, username, redirectUserID) + RedirectToUser(ctx, doer, username, redirectUserID) } else if user_model.IsErrUserRedirectNotExist(err) { errCb(http.StatusNotFound, err) } else { diff --git a/tests/integration/user_test.go b/tests/integration/user_test.go index 34692d9cab..54b372dd16 100644 --- a/tests/integration/user_test.go +++ b/tests/integration/user_test.go @@ -45,6 +45,78 @@ func TestRenameUsername(t *testing.T) { unittest.AssertNotExistsBean(t, &user_model.User{Name: "user2"}) } +func TestViewLimitedAndPrivateUserAndRename(t *testing.T) { + defer tests.PrepareTestEnv(t)() + + // user 22 is a limited visibility org + org22 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 22}) + req := NewRequest(t, "GET", "/"+org22.Name) + MakeRequest(t, req, http.StatusNotFound) + + session := loginUser(t, "user1") + oldName := org22.Name + newName := "org22_renamed" + req = NewRequestWithValues(t, "POST", "/org/"+oldName+"/settings/rename", map[string]string{ + "_csrf": GetUserCSRFToken(t, session), + "org_name": oldName, + "new_org_name": newName, + }) + session.MakeRequest(t, req, http.StatusOK) + + unittest.AssertExistsAndLoadBean(t, &user_model.User{Name: newName}) + unittest.AssertNotExistsBean(t, &user_model.User{Name: oldName}) + + req = NewRequest(t, "GET", "/"+oldName) + MakeRequest(t, req, http.StatusNotFound) // anonymous user cannot visit limited visibility org via old name + req = NewRequest(t, "GET", "/"+oldName) + session.MakeRequest(t, req, http.StatusTemporaryRedirect) // login user can visit limited visibility org via old name + + // org 23 is a private visibility org + org23 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 23}) + req = NewRequest(t, "GET", "/"+org23.Name) + MakeRequest(t, req, http.StatusNotFound) + + oldName = org23.Name + newName = "org23_renamed" + req = NewRequestWithValues(t, "POST", "/org/"+oldName+"/settings/rename", map[string]string{ + "_csrf": GetUserCSRFToken(t, session), + "org_name": oldName, + "new_org_name": newName, + }) + session.MakeRequest(t, req, http.StatusOK) + + unittest.AssertExistsAndLoadBean(t, &user_model.User{Name: newName}) + unittest.AssertNotExistsBean(t, &user_model.User{Name: oldName}) + + req = NewRequest(t, "GET", "/"+oldName) + MakeRequest(t, req, http.StatusNotFound) // anonymous user cannot visit limited visibility org via old name + req = NewRequest(t, "GET", "/"+oldName) + session.MakeRequest(t, req, http.StatusTemporaryRedirect) // login user can visit limited visibility org via old name + + // user 31 is a private visibility user + user31 := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 31}) + req = NewRequest(t, "GET", "/"+user31.Name) + MakeRequest(t, req, http.StatusNotFound) + + oldName = user31.Name + newName = "user31_renamed" + session2 := loginUser(t, oldName) + req = NewRequestWithValues(t, "POST", "/user/settings", map[string]string{ + "_csrf": GetUserCSRFToken(t, session2), + "name": newName, + "visibility": "2", // private + }) + session2.MakeRequest(t, req, http.StatusSeeOther) + + unittest.AssertExistsAndLoadBean(t, &user_model.User{Name: newName}) + unittest.AssertNotExistsBean(t, &user_model.User{Name: oldName}) + + req = NewRequest(t, "GET", "/"+oldName) + MakeRequest(t, req, http.StatusNotFound) // anonymous user cannot visit private visibility user via old name + req = NewRequest(t, "GET", "/"+oldName) + session.MakeRequest(t, req, http.StatusTemporaryRedirect) // login user2 can visit private visibility user via old name +} + func TestRenameInvalidUsername(t *testing.T) { defer tests.PrepareTestEnv(t)()