improve test

This commit is contained in:
wxiaoguang 2025-12-13 20:57:06 +08:00
parent 87d1fe5345
commit eb2b6fc16c
2 changed files with 18 additions and 6 deletions

View File

@ -125,12 +125,16 @@ func DetectEncoding(content []byte) (encoding string, _ error) {
cnt := 0
for end >= 0 && cnt < 4 {
c := toValidate[end]
if c>>6 == 0b10 {
end--
}
if c>>5 == 0b110 || c>>4 == 0b1110 || c>>3 == 0b11110 {
// a leading byte
toValidate = toValidate[:end]
break
} else if c>>6 == 0b10 {
// a continuation byte
end--
} else {
// not an utf-8 byte
break
}
cnt++
}

View File

@ -4,7 +4,6 @@
package charset
import (
"bytes"
"io"
"os"
"strings"
@ -228,12 +227,21 @@ func TestToUTF8WithFallbackReader(t *testing.T) {
content := strings.Repeat(block, 2)
for i := 1; i < len(content); i++ {
encoding, _ := DetectEncoding([]byte(content[:i]))
encoding, err := DetectEncoding([]byte(content[:i]))
assert.NoError(t, err)
assert.Equal(t, "UTF-8", encoding)
ToUTF8WithFallbackReaderPrefetchSize = i
rd := ToUTF8WithFallbackReader(bytes.NewReader([]byte(content)), ConvertOpts{})
rd := ToUTF8WithFallbackReader(strings.NewReader(content), ConvertOpts{})
r, _ := io.ReadAll(rd)
assert.Equal(t, content, string(r))
}
for _, r := range runes {
content = "abc abc " + string(r) + string(r) + string(r)
for i := 0; i < len(content); i++ {
encoding, err := DetectEncoding([]byte(content[:i]))
assert.NoError(t, err)
assert.Equal(t, "UTF-8", encoding)
}
}
}