* fix: 修复滑动绑定事件评论 * fix: fix * fix: fix * fix: 修复没有评论的场景 * fix * fix: fix --------- Co-authored-by: chekayo <9827969+chekayo@user.noreply.gitee.com>
868 lines
22 KiB
Go
868 lines
22 KiB
Go
package xiaohongshu
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"math/rand"
|
||
"regexp"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/avast/retry-go/v4"
|
||
"github.com/go-rod/rod"
|
||
"github.com/go-rod/rod/lib/proto"
|
||
"github.com/sirupsen/logrus"
|
||
"github.com/xpzouying/xiaohongshu-mcp/errors"
|
||
)
|
||
|
||
// ========== 配置常量 ==========
|
||
const (
|
||
defaultMaxAttempts = 500
|
||
stagnantLimit = 20
|
||
minScrollDelta = 10
|
||
maxClickPerRound = 3
|
||
stagnantCheckThreshold = 2 // 达到目标后需要停滞几次才确认
|
||
largeScrollTrigger = 5 // 停滞多少次后触发大滚动
|
||
buttonClickInterval = 3 // 每隔多少次尝试点击一次按钮
|
||
finalSprintPushCount = 15
|
||
)
|
||
|
||
// 延迟时间配置(毫秒)
|
||
type delayConfig struct {
|
||
min, max int
|
||
}
|
||
|
||
var (
|
||
humanDelayRange = delayConfig{300, 700}
|
||
reactionTimeRange = delayConfig{300, 800}
|
||
hoverTimeRange = delayConfig{100, 300}
|
||
readTimeRange = delayConfig{500, 1200}
|
||
shortReadRange = delayConfig{600, 1200}
|
||
scrollWaitRange = delayConfig{100, 200}
|
||
postScrollRange = delayConfig{300, 500}
|
||
)
|
||
|
||
// ========== 数据结构 ==========
|
||
|
||
type CommentLoadConfig struct {
|
||
ClickMoreReplies bool
|
||
MaxRepliesThreshold int
|
||
MaxCommentItems int
|
||
ScrollSpeed string
|
||
}
|
||
|
||
func DefaultCommentLoadConfig() CommentLoadConfig {
|
||
return CommentLoadConfig{
|
||
ClickMoreReplies: false,
|
||
MaxRepliesThreshold: 10,
|
||
MaxCommentItems: 0,
|
||
ScrollSpeed: "normal",
|
||
}
|
||
}
|
||
|
||
type FeedDetailAction struct {
|
||
page *rod.Page
|
||
}
|
||
|
||
func NewFeedDetailAction(page *rod.Page) *FeedDetailAction {
|
||
return &FeedDetailAction{page: page}
|
||
}
|
||
|
||
// ========== 主要业务逻辑 ==========
|
||
|
||
func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) {
|
||
return f.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, config)
|
||
}
|
||
|
||
func (f *FeedDetailAction) GetFeedDetailWithConfig(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) {
|
||
page := f.page.Context(ctx).Timeout(10 * time.Minute)
|
||
url := makeFeedDetailURL(feedID, xsecToken)
|
||
|
||
logrus.Infof("打开 feed 详情页: %s", url)
|
||
logrus.Infof("配置: 点击更多=%v, 回复阈值=%d, 最大评论数=%d, 滚动速度=%s",
|
||
config.ClickMoreReplies, config.MaxRepliesThreshold, config.MaxCommentItems, config.ScrollSpeed)
|
||
|
||
// 使用retry-go处理页面导航和DOM稳定等待
|
||
err := retry.Do(
|
||
func() error {
|
||
page.MustNavigate(url)
|
||
page.MustWaitDOMStable()
|
||
return nil
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(500*time.Millisecond),
|
||
retry.MaxJitter(1000*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("页面导航重试 #%d: %v", n, err)
|
||
}),
|
||
)
|
||
if err != nil {
|
||
logrus.Errorf("页面导航失败: %v", err)
|
||
return nil, err
|
||
}
|
||
sleepRandom(1000, 1000)
|
||
|
||
if err := checkPageAccessible(page); err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
if loadAllComments {
|
||
if err := f.loadAllCommentsWithConfig(page, config); err != nil {
|
||
logrus.Warnf("加载全部评论失败: %v", err)
|
||
}
|
||
}
|
||
|
||
return f.extractFeedDetail(page, feedID)
|
||
}
|
||
|
||
// ========== 评论加载器 ==========
|
||
|
||
type commentLoader struct {
|
||
page *rod.Page
|
||
config CommentLoadConfig
|
||
stats *loadStats
|
||
state *loadState
|
||
}
|
||
|
||
type loadStats struct {
|
||
totalClicked int
|
||
totalSkipped int
|
||
attempts int
|
||
}
|
||
|
||
type loadState struct {
|
||
lastCount int
|
||
lastScrollTop int
|
||
stagnantChecks int
|
||
}
|
||
|
||
func (f *FeedDetailAction) loadAllCommentsWithConfig(page *rod.Page, config CommentLoadConfig) error {
|
||
loader := &commentLoader{
|
||
page: page,
|
||
config: config,
|
||
stats: &loadStats{},
|
||
state: &loadState{},
|
||
}
|
||
|
||
return loader.load()
|
||
}
|
||
|
||
func (cl *commentLoader) load() error {
|
||
maxAttempts := cl.calculateMaxAttempts()
|
||
scrollInterval := getScrollInterval(cl.config.ScrollSpeed)
|
||
|
||
logrus.Info("开始加载评论...")
|
||
scrollToCommentsArea(cl.page)
|
||
sleepRandom(humanDelayRange.min, humanDelayRange.max)
|
||
|
||
// 检查是否没有评论
|
||
if cl.checkNoComments() {
|
||
return nil
|
||
}
|
||
|
||
for cl.stats.attempts = 0; cl.stats.attempts < maxAttempts; cl.stats.attempts++ {
|
||
logrus.Debugf("=== 尝试 %d/%d ===", cl.stats.attempts+1, maxAttempts)
|
||
|
||
if cl.checkComplete() {
|
||
return nil
|
||
}
|
||
|
||
if cl.shouldClickButtons() {
|
||
cl.clickButtonsWithRetry()
|
||
}
|
||
|
||
currentCount := getCommentCount(cl.page)
|
||
cl.updateState(currentCount)
|
||
|
||
if cl.shouldStopAtTarget(currentCount) {
|
||
return nil
|
||
}
|
||
|
||
cl.performScroll()
|
||
cl.handleStagnation()
|
||
|
||
time.Sleep(scrollInterval)
|
||
}
|
||
|
||
cl.performFinalSprint()
|
||
return nil
|
||
}
|
||
|
||
func (cl *commentLoader) calculateMaxAttempts() int {
|
||
if cl.config.MaxCommentItems > 0 {
|
||
return cl.config.MaxCommentItems * 3
|
||
}
|
||
return defaultMaxAttempts
|
||
}
|
||
|
||
func (cl *commentLoader) checkNoComments() bool {
|
||
if checkNoCommentsArea(cl.page) {
|
||
logrus.Infof("✓ 检测到无评论区域(这是一片荒地),跳过加载")
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
|
||
func (cl *commentLoader) checkComplete() bool {
|
||
if checkEndContainer(cl.page) {
|
||
currentCount := getCommentCount(cl.page)
|
||
logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部")
|
||
sleepRandom(humanDelayRange.min, humanDelayRange.max)
|
||
logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击: %d, 跳过: %d",
|
||
currentCount, cl.stats.attempts+1, cl.stats.totalClicked, cl.stats.totalSkipped)
|
||
return true
|
||
}
|
||
return false
|
||
}
|
||
|
||
func (cl *commentLoader) shouldClickButtons() bool {
|
||
return cl.config.ClickMoreReplies && cl.stats.attempts%buttonClickInterval == 0
|
||
}
|
||
|
||
func (cl *commentLoader) clickButtonsWithRetry() {
|
||
clicked, skipped := clickShowMoreButtonsSmart(cl.page, cl.config.MaxRepliesThreshold)
|
||
if clicked > 0 || skipped > 0 {
|
||
cl.stats.totalClicked += clicked
|
||
cl.stats.totalSkipped += skipped
|
||
logrus.Infof("点击'更多': %d 个, 跳过: %d 个, 累计点击: %d, 累计跳过: %d",
|
||
clicked, skipped, cl.stats.totalClicked, cl.stats.totalSkipped)
|
||
|
||
sleepRandom(readTimeRange.min, readTimeRange.max)
|
||
|
||
// 重试一轮
|
||
clicked2, skipped2 := clickShowMoreButtonsSmart(cl.page, cl.config.MaxRepliesThreshold)
|
||
if clicked2 > 0 || skipped2 > 0 {
|
||
cl.stats.totalClicked += clicked2
|
||
cl.stats.totalSkipped += skipped2
|
||
logrus.Infof("第 2 轮: 点击 %d, 跳过 %d", clicked2, skipped2)
|
||
sleepRandom(shortReadRange.min, shortReadRange.max)
|
||
}
|
||
}
|
||
}
|
||
|
||
func (cl *commentLoader) updateState(currentCount int) {
|
||
totalCount := getTotalCommentCount(cl.page)
|
||
logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount)
|
||
|
||
if currentCount != cl.state.lastCount {
|
||
logrus.Infof("✓ 评论增加: %d -> %d (+%d)",
|
||
cl.state.lastCount, currentCount, currentCount-cl.state.lastCount)
|
||
cl.state.lastCount = currentCount
|
||
cl.state.stagnantChecks = 0
|
||
} else {
|
||
cl.state.stagnantChecks++
|
||
if cl.state.stagnantChecks%5 == 0 {
|
||
logrus.Debugf("评论停滞 %d 次", cl.state.stagnantChecks)
|
||
}
|
||
}
|
||
}
|
||
|
||
func (cl *commentLoader) shouldStopAtTarget(currentCount int) bool {
|
||
// 如果未设置最大评论数,或者还未达到目标,继续加载
|
||
if cl.config.MaxCommentItems <= 0 {
|
||
return false
|
||
}
|
||
|
||
// 如果已达到或超过目标评论数,立即停止
|
||
if currentCount >= cl.config.MaxCommentItems {
|
||
logrus.Infof("✓ 已达到目标评论数: %d/%d, 停止加载",
|
||
currentCount, cl.config.MaxCommentItems)
|
||
return true
|
||
}
|
||
|
||
return false
|
||
}
|
||
|
||
func (cl *commentLoader) performScroll() {
|
||
currentCount := getCommentCount(cl.page)
|
||
if currentCount > 0 {
|
||
scrollToLastComment(cl.page)
|
||
sleepRandom(postScrollRange.min, postScrollRange.max)
|
||
}
|
||
|
||
largeMode := cl.state.stagnantChecks >= largeScrollTrigger
|
||
pushCount := 1
|
||
if largeMode {
|
||
pushCount = 3 + rand.Intn(3)
|
||
}
|
||
|
||
_, scrollDelta, currentScrollTop := humanScroll(cl.page, cl.config.ScrollSpeed, largeMode, pushCount)
|
||
|
||
if scrollDelta < minScrollDelta || currentScrollTop == cl.state.lastScrollTop {
|
||
cl.state.stagnantChecks++
|
||
if cl.state.stagnantChecks%5 == 0 {
|
||
logrus.Debugf("滚动停滞 %d 次", cl.state.stagnantChecks)
|
||
}
|
||
} else {
|
||
cl.state.stagnantChecks = 0
|
||
cl.state.lastScrollTop = currentScrollTop
|
||
}
|
||
}
|
||
|
||
func (cl *commentLoader) handleStagnation() {
|
||
if cl.state.stagnantChecks >= stagnantLimit {
|
||
logrus.Infof("停滞过多,尝试大冲刺...")
|
||
humanScroll(cl.page, cl.config.ScrollSpeed, true, 10)
|
||
cl.state.stagnantChecks = 0
|
||
|
||
if checkEndContainer(cl.page) {
|
||
currentCount := getCommentCount(cl.page)
|
||
logrus.Infof("✓ 到达底部,评论数: %d", currentCount)
|
||
}
|
||
}
|
||
}
|
||
|
||
func (cl *commentLoader) performFinalSprint() {
|
||
logrus.Infof("达到最大尝试次数,最后冲刺...")
|
||
humanScroll(cl.page, cl.config.ScrollSpeed, true, finalSprintPushCount)
|
||
|
||
currentCount := getCommentCount(cl.page)
|
||
hasEnd := checkEndContainer(cl.page)
|
||
logrus.Infof("✓ 加载结束: %d 条评论, 点击: %d, 跳过: %d, 到达底部: %v",
|
||
currentCount, cl.stats.totalClicked, cl.stats.totalSkipped, hasEnd)
|
||
}
|
||
|
||
// ========== 工具函数 ==========
|
||
|
||
func sleepRandom(minMs, maxMs int) {
|
||
if maxMs <= minMs {
|
||
time.Sleep(time.Duration(minMs) * time.Millisecond)
|
||
return
|
||
}
|
||
delay := time.Duration(minMs+rand.Intn(maxMs-minMs)) * time.Millisecond
|
||
time.Sleep(delay)
|
||
}
|
||
|
||
func getScrollInterval(speed string) time.Duration {
|
||
switch speed {
|
||
case "slow":
|
||
return time.Duration(1200+rand.Intn(300)) * time.Millisecond
|
||
case "fast":
|
||
return time.Duration(300+rand.Intn(100)) * time.Millisecond
|
||
default: // normal
|
||
return time.Duration(600+rand.Intn(200)) * time.Millisecond
|
||
}
|
||
}
|
||
|
||
// ========== 按钮点击 ==========
|
||
|
||
func clickShowMoreButtonsSmart(page *rod.Page, maxRepliesThreshold int) (clicked, skipped int) {
|
||
elements, err := page.Elements(".show-more")
|
||
if err != nil {
|
||
return 0, 0
|
||
}
|
||
|
||
replyCountRegex := regexp.MustCompile(`展开\s*(\d+)\s*条回复`)
|
||
maxClick := maxClickPerRound + rand.Intn(maxClickPerRound)
|
||
clickedInRound := 0
|
||
|
||
for _, el := range elements {
|
||
if clickedInRound >= maxClick {
|
||
break
|
||
}
|
||
|
||
if !isElementClickable(el) {
|
||
continue
|
||
}
|
||
|
||
text, err := el.Text()
|
||
if err != nil {
|
||
continue
|
||
}
|
||
|
||
if shouldSkipButton(text, maxRepliesThreshold, replyCountRegex) {
|
||
skipped++
|
||
continue
|
||
}
|
||
|
||
if clickElementWithHumanBehavior(page, el, text) {
|
||
clicked++
|
||
clickedInRound++
|
||
}
|
||
}
|
||
|
||
return clicked, skipped
|
||
}
|
||
|
||
func isElementClickable(el *rod.Element) bool {
|
||
visible, err := el.Visible()
|
||
if err != nil || !visible {
|
||
return false
|
||
}
|
||
|
||
box, err := el.Shape()
|
||
return err == nil && len(box.Quads) > 0
|
||
}
|
||
|
||
func shouldSkipButton(text string, threshold int, regex *regexp.Regexp) bool {
|
||
if threshold <= 0 {
|
||
return false
|
||
}
|
||
|
||
matches := regex.FindStringSubmatch(text)
|
||
if len(matches) > 1 {
|
||
if replyCount, err := strconv.Atoi(matches[1]); err == nil && replyCount > threshold {
|
||
logrus.Debugf("跳过'%s'(回复数 %d > 阈值 %d)", text, replyCount, threshold)
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|
||
|
||
func clickElementWithHumanBehavior(page *rod.Page, el *rod.Element, text string) bool {
|
||
var clickSuccess bool
|
||
|
||
// 使用retry-go进行点击操作重试
|
||
err := retry.Do(
|
||
func() error {
|
||
// 滚动到元素
|
||
el.MustEval(`() => {
|
||
try {
|
||
this.scrollIntoView({behavior: 'smooth', block: 'center'});
|
||
} catch (e) {}
|
||
}`)
|
||
|
||
sleepRandom(reactionTimeRange.min, reactionTimeRange.max)
|
||
|
||
// 鼠标悬停
|
||
if box, err := el.Shape(); err == nil && len(box.Quads) > 0 {
|
||
x := float64(box.Quads[0][0]+box.Quads[0][4]) / 2
|
||
y := float64(box.Quads[0][1]+box.Quads[0][5]) / 2
|
||
page.Mouse.MustMoveTo(x, y)
|
||
sleepRandom(hoverTimeRange.min, hoverTimeRange.max)
|
||
}
|
||
|
||
// 点击
|
||
if err := el.Click(proto.InputMouseButtonLeft, 1); err != nil {
|
||
return err // 返回错误以触发重试
|
||
}
|
||
|
||
// 模拟人类阅读时间
|
||
sleepRandom(readTimeRange.min, readTimeRange.max)
|
||
clickSuccess = true
|
||
return nil
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(100*time.Millisecond),
|
||
retry.MaxJitter(200*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("点击重试 #%d: %s, 错误: %v", n, text, err)
|
||
}),
|
||
)
|
||
|
||
if err != nil {
|
||
logrus.Debugf("点击失败 '%s': %v", text, err)
|
||
return false
|
||
}
|
||
|
||
if clickSuccess {
|
||
logrus.Debugf("点击了'%s'", text)
|
||
}
|
||
|
||
return clickSuccess
|
||
}
|
||
|
||
// ========== 滚动相关 ==========
|
||
|
||
func humanScroll(page *rod.Page, speed string, largeMode bool, pushCount int) (bool, int, int) {
|
||
beforeTop := getScrollTop(page)
|
||
viewportHeight := page.MustEval(`() => window.innerHeight`).Int()
|
||
|
||
baseRatio := getScrollRatio(speed)
|
||
if largeMode {
|
||
baseRatio *= 2.0
|
||
}
|
||
|
||
scrolled := false
|
||
actualDelta := 0
|
||
currentScrollTop := beforeTop
|
||
|
||
for i := 0; i < max(1, pushCount); i++ {
|
||
scrollDelta := calculateScrollDelta(viewportHeight, baseRatio)
|
||
page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta)
|
||
|
||
sleepRandom(scrollWaitRange.min, scrollWaitRange.max)
|
||
|
||
currentScrollTop = getScrollTop(page)
|
||
deltaThisTime := currentScrollTop - beforeTop
|
||
actualDelta += deltaThisTime
|
||
|
||
if deltaThisTime > 5 {
|
||
scrolled = true
|
||
}
|
||
|
||
beforeTop = currentScrollTop
|
||
|
||
if i < pushCount-1 {
|
||
sleepRandom(humanDelayRange.min, humanDelayRange.max)
|
||
}
|
||
}
|
||
|
||
if !scrolled && pushCount > 0 {
|
||
page.MustEval(`() => window.scrollTo(0, document.body.scrollHeight)`)
|
||
sleepRandom(postScrollRange.min, postScrollRange.max)
|
||
currentScrollTop = getScrollTop(page)
|
||
actualDelta = currentScrollTop - beforeTop + actualDelta
|
||
scrolled = actualDelta > 5
|
||
}
|
||
|
||
if scrolled {
|
||
logrus.Debugf("滚动: %d -> %d (Δ%d, large=%v, push=%d)",
|
||
beforeTop-actualDelta, currentScrollTop, actualDelta, largeMode, pushCount)
|
||
}
|
||
|
||
return scrolled, actualDelta, currentScrollTop
|
||
}
|
||
|
||
func getScrollRatio(speed string) float64 {
|
||
switch speed {
|
||
case "slow":
|
||
return 0.5
|
||
case "fast":
|
||
return 0.9
|
||
default: // normal
|
||
return 0.7
|
||
}
|
||
}
|
||
|
||
func calculateScrollDelta(viewportHeight int, baseRatio float64) float64 {
|
||
scrollDelta := float64(viewportHeight) * (baseRatio + rand.Float64()*0.2)
|
||
if scrollDelta < 400 {
|
||
scrollDelta = 400
|
||
}
|
||
return scrollDelta + float64(rand.Intn(100)-50)
|
||
}
|
||
|
||
func scrollToCommentsArea(page *rod.Page) {
|
||
logrus.Info("滚动到评论区...")
|
||
|
||
// 先定位到评论区
|
||
if el, err := page.Timeout(2 * time.Second).Element(".comments-container"); err == nil {
|
||
el.MustScrollIntoView()
|
||
}
|
||
// 等待滚动完成
|
||
time.Sleep(500 * time.Millisecond)
|
||
|
||
// 触发一次小滚动,激活懒加载机制
|
||
smartScroll(page, 100)
|
||
}
|
||
|
||
// smartScroll 智能滚动:触发滚轮事件以正确触发懒加载
|
||
func smartScroll(page *rod.Page, delta float64) {
|
||
page.MustEval(`(delta) => {
|
||
// 查找滚动目标元素
|
||
let targetElement = document.querySelector('.note-scroller')
|
||
|| document.querySelector('.interaction-container')
|
||
|| document.documentElement;
|
||
|
||
// 触发滚轮事件(关键!这样才能触发懒加载)
|
||
const wheelEvent = new WheelEvent('wheel', {
|
||
deltaY: delta,
|
||
deltaMode: 0, // 像素模式
|
||
bubbles: true,
|
||
cancelable: true,
|
||
view: window
|
||
});
|
||
targetElement.dispatchEvent(wheelEvent);
|
||
}`, delta)
|
||
}
|
||
|
||
func scrollToLastComment(page *rod.Page) {
|
||
// 获取所有主评论元素
|
||
elements, err := page.Timeout(2 * time.Second).Elements(".parent-comment")
|
||
if err != nil || len(elements) == 0 {
|
||
return
|
||
}
|
||
// 滚动到最后一个评论
|
||
lastComment := elements[len(elements)-1]
|
||
lastComment.MustScrollIntoView()
|
||
}
|
||
|
||
// ========== DOM 查询 ==========
|
||
|
||
func getScrollTop(page *rod.Page) int {
|
||
var result int
|
||
|
||
// 使用retry-go来处理可能的DOM查询失败
|
||
err := retry.Do(
|
||
func() error {
|
||
evalResult := page.MustEval(`() => {
|
||
return window.pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0;
|
||
}`)
|
||
|
||
result = evalResult.Int()
|
||
return nil
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(100*time.Millisecond),
|
||
retry.MaxJitter(200*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("获取滚动位置重试 #%d: %v", n, err)
|
||
}),
|
||
)
|
||
|
||
if err != nil {
|
||
logrus.Warnf("获取滚动位置失败: %v", err)
|
||
return 0 // 失败时返回0
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
func getCommentCount(page *rod.Page) int {
|
||
var result int
|
||
|
||
// 使用retry-go来处理可能的DOM查询失败
|
||
err := retry.Do(
|
||
func() error {
|
||
// 使用 Go 获取评论元素
|
||
elements, err := page.Timeout(2 * time.Second).Elements(".parent-comment")
|
||
if err != nil {
|
||
return err
|
||
}
|
||
result = len(elements)
|
||
return nil
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(100*time.Millisecond),
|
||
retry.MaxJitter(200*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("获取评论计数重试 #%d: %v", n, err)
|
||
}),
|
||
)
|
||
|
||
if err != nil {
|
||
logrus.Warnf("获取评论计数失败: %v", err)
|
||
return 0 // 失败时返回0
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
func getTotalCommentCount(page *rod.Page) int {
|
||
var result int
|
||
|
||
// 使用retry-go来处理可能的DOM查询失败
|
||
err := retry.Do(
|
||
func() error {
|
||
// 使用 Go 获取总评论数元素
|
||
totalEl, err := page.Timeout(2 * time.Second).Element(".comments-container .total")
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
// 获取文本内容
|
||
text, err := totalEl.Text()
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
// 使用正则提取数字
|
||
re := regexp.MustCompile(`共(\d+)条评论`)
|
||
matches := re.FindStringSubmatch(text)
|
||
if len(matches) > 1 {
|
||
count, err := strconv.Atoi(matches[1])
|
||
if err != nil {
|
||
return err
|
||
}
|
||
result = count
|
||
} else {
|
||
result = 0
|
||
}
|
||
|
||
return nil
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(100*time.Millisecond),
|
||
retry.MaxJitter(200*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("获取总评论计数重试 #%d: %v", n, err)
|
||
}),
|
||
)
|
||
|
||
if err != nil {
|
||
logrus.Warnf("获取总评论计数失败: %v", err)
|
||
return 0 // 失败时返回0
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
func checkNoCommentsArea(page *rod.Page) bool {
|
||
// 查找无评论区域
|
||
noCommentsEl, err := page.Timeout(2 * time.Second).Element(".no-comments-text")
|
||
if err != nil {
|
||
// 未找到无评论元素,说明有评论或评论区正常
|
||
return false
|
||
}
|
||
|
||
// 获取文本内容
|
||
text, err := noCommentsEl.Text()
|
||
if err != nil {
|
||
return false
|
||
}
|
||
|
||
// 检查是否包含"这是一片荒地"等关键词
|
||
text = strings.TrimSpace(text)
|
||
return strings.Contains(text, "这是一片荒地")
|
||
}
|
||
|
||
func checkEndContainer(page *rod.Page) bool {
|
||
var result bool
|
||
|
||
// 使用retry-go来处理可能的DOM查询失败
|
||
err := retry.Do(
|
||
func() error {
|
||
// 使用 Go 查找结束容器
|
||
endEl, err := page.Timeout(2 * time.Second).Element(".end-container")
|
||
if err != nil {
|
||
// 未找到元素,说明未到底部
|
||
result = false
|
||
return nil
|
||
}
|
||
|
||
// 获取文本内容
|
||
text, err := endEl.Text()
|
||
if err != nil {
|
||
result = false
|
||
return nil
|
||
}
|
||
|
||
// 转换为大写并检查
|
||
textUpper := strings.ToUpper(strings.TrimSpace(text))
|
||
result = strings.Contains(textUpper, "THE END") || strings.Contains(textUpper, "THEEND")
|
||
return nil
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(100*time.Millisecond),
|
||
retry.MaxJitter(200*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("检查结束容器重试 #%d: %v", n, err)
|
||
}),
|
||
)
|
||
|
||
if err != nil {
|
||
logrus.Warnf("检查结束容器失败: %v", err)
|
||
return false // 失败时返回false
|
||
}
|
||
|
||
return result
|
||
}
|
||
|
||
// ========== 页面检查 ==========
|
||
|
||
func checkPageAccessible(page *rod.Page) error {
|
||
time.Sleep(500 * time.Millisecond)
|
||
|
||
// 查找错误提示容器
|
||
wrapperEl, err := page.Timeout(2 * time.Second).Element(".access-wrapper, .error-wrapper, .not-found-wrapper, .blocked-wrapper")
|
||
if err != nil {
|
||
// 未找到错误容器,说明页面可访问
|
||
return nil
|
||
}
|
||
|
||
// 获取文本内容
|
||
text, err := wrapperEl.Text()
|
||
if err != nil {
|
||
// 无法获取文本,假设页面可访问
|
||
return nil
|
||
}
|
||
|
||
// 检查关键词
|
||
keywords := []string{
|
||
"当前笔记暂时无法浏览",
|
||
"该内容因违规已被删除",
|
||
"该笔记已被删除",
|
||
"内容不存在",
|
||
"笔记不存在",
|
||
"已失效",
|
||
"私密笔记",
|
||
"仅作者可见",
|
||
"因用户设置,你无法查看",
|
||
"因违规无法查看",
|
||
}
|
||
|
||
for _, kw := range keywords {
|
||
if strings.Contains(text, kw) {
|
||
logrus.Warnf("笔记不可访问: %s", kw)
|
||
return fmt.Errorf("笔记不可访问: %s", kw)
|
||
}
|
||
}
|
||
|
||
// 如果有文本但不匹配关键词,返回未知错误
|
||
trimmedText := strings.TrimSpace(text)
|
||
if trimmedText != "" {
|
||
logrus.Warnf("笔记不可访问(未知原因): %s", trimmedText)
|
||
return fmt.Errorf("笔记不可访问: %s", trimmedText)
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
// ========== 数据提取 ==========
|
||
|
||
func (f *FeedDetailAction) extractFeedDetail(page *rod.Page, feedID string) (*FeedDetailResponse, error) {
|
||
var result string
|
||
|
||
// 使用retry-go来处理可能的DOM查询失败
|
||
err := retry.Do(
|
||
func() error {
|
||
evalResult := page.MustEval(`() => {
|
||
if (window.__INITIAL_STATE__ &&
|
||
window.__INITIAL_STATE__.note &&
|
||
window.__INITIAL_STATE__.note.noteDetailMap) {
|
||
const noteDetailMap = window.__INITIAL_STATE__.note.noteDetailMap;
|
||
return JSON.stringify(noteDetailMap);
|
||
}
|
||
return "";
|
||
}`).String()
|
||
|
||
if evalResult != "" {
|
||
result = evalResult
|
||
return nil
|
||
}
|
||
return fmt.Errorf("无法获取初始状态数据")
|
||
},
|
||
retry.Attempts(3),
|
||
retry.Delay(200*time.Millisecond),
|
||
retry.MaxJitter(300*time.Millisecond),
|
||
retry.OnRetry(func(n uint, err error) {
|
||
logrus.Debugf("提取Feed详情重试 #%d: %v", n, err)
|
||
}),
|
||
)
|
||
|
||
if err != nil {
|
||
logrus.Errorf("提取Feed详情失败: %v", err)
|
||
return nil, fmt.Errorf("提取Feed详情失败: %w", err)
|
||
}
|
||
|
||
if result == "" {
|
||
return nil, errors.ErrNoFeedDetail
|
||
}
|
||
|
||
var noteDetailMap map[string]struct {
|
||
Note FeedDetail `json:"note"`
|
||
Comments CommentList `json:"comments"`
|
||
}
|
||
|
||
if err := json.Unmarshal([]byte(result), ¬eDetailMap); err != nil {
|
||
return nil, fmt.Errorf("failed to unmarshal noteDetailMap: %w", err)
|
||
}
|
||
|
||
noteDetail, exists := noteDetailMap[feedID]
|
||
if !exists {
|
||
return nil, fmt.Errorf("feed %s not found in noteDetailMap", feedID)
|
||
}
|
||
|
||
return &FeedDetailResponse{
|
||
Note: noteDetail.Note,
|
||
Comments: noteDetail.Comments,
|
||
}, nil
|
||
}
|
||
|
||
func makeFeedDetailURL(feedID, xsecToken string) string {
|
||
return fmt.Sprintf("https://www.xiaohongshu.com/explore/%s?xsec_token=%s&xsec_source=pc_feed", feedID, xsecToken)
|
||
}
|