fix: 修复标题长度计算不准确的问题 (#410)

使用基于 UTF-16 编码的加权算法替换 go-runewidth,与小红书实际计算规则一致:
非ASCII字符算2字节,ASCII字符算1字节,向上取整除以2。

Closes #401

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
zy
2026-02-10 23:04:12 +08:00
committed by GitHub
parent db3dd37cb8
commit a790a97c93
5 changed files with 58 additions and 13 deletions

17
pkg/xhsutil/title.go Normal file
View File

@@ -0,0 +1,17 @@
package xhsutil
import "unicode/utf16"
// CalcTitleLength 计算小红书标题长度
// 规则非ASCII字符(中文、全角符号等)算2字节ASCII字符算1字节最终结果向上取整除以2
func CalcTitleLength(s string) int {
byteLen := 0
for _, c := range utf16.Encode([]rune(s)) {
if c > 127 {
byteLen += 2
} else {
byteLen += 1
}
}
return (byteLen + 1) / 2
}

36
pkg/xhsutil/title_test.go Normal file
View File

@@ -0,0 +1,36 @@
package xhsutil
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestCalcTitleLength(t *testing.T) {
tests := []struct {
name string
input string
want int
}{
{name: "空字符串", input: "", want: 0},
{name: "纯中文", input: "你好世界", want: 4},
{name: "纯英文", input: "hello", want: 3},
{name: "纯数字", input: "12345", want: 3},
{name: "中英混合-OOTD穿搭分享", input: "OOTD穿搭分享", want: 6},
{name: "20个中文字刚好上限", input: "一二三四五六七八九十一二三四五六七八九十", want: 20},
{name: "40个英文字母等于20", input: "abcdefghijklmnopqrstuvwxyzabcdefghijklmn", want: 20},
{name: "单个emoji", input: "😀", want: 2},
{name: "中文加emoji", input: "今天好开心😀", want: 7},
{name: "奇数个英文字母向上取整", input: "a", want: 1},
{name: "两个英文字母", input: "ab", want: 1},
{name: "三个英文字母", input: "abc", want: 2},
{name: "全角符号", input: "", want: 2},
{name: "半角符号", input: "!?", want: 1},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, CalcTitleLength(tt.input))
})
}
}