fix: 详情增加自定义加载数量

This commit is contained in:
chekayo
2025-12-03 02:42:50 +08:00
parent c5fa30bd3e
commit 9b15339ef0
6 changed files with 417 additions and 186 deletions

View File

@@ -164,8 +164,23 @@ func (s *AppServer) getFeedDetailHandler(c *gin.Context) {
return return
} }
// 获取 Feed 详情 var result *FeedDetailResponse
result, err := s.xiaohongshuService.GetFeedDetail(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments) var err error
if req.CommentConfig != nil {
// 使用配置参数
config := xiaohongshu.CommentLoadConfig{
ClickMoreReplies: req.CommentConfig.ClickMoreReplies,
MaxRepliesThreshold: req.CommentConfig.MaxRepliesThreshold,
MaxCommentItems: req.CommentConfig.MaxCommentItems,
ScrollSpeed: req.CommentConfig.ScrollSpeed,
}
result, err = s.xiaohongshuService.GetFeedDetailWithConfig(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments, config)
} else {
// 使用默认配置
result, err = s.xiaohongshuService.GetFeedDetail(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments)
}
if err != nil { if err != nil {
respondError(c, http.StatusInternalServerError, "GET_FEED_DETAIL_FAILED", respondError(c, http.StatusInternalServerError, "GET_FEED_DETAIL_FAILED",
"获取Feed详情失败", err.Error()) "获取Feed详情失败", err.Error())

View File

@@ -322,9 +322,53 @@ func (s *AppServer) handleGetFeedDetail(ctx context.Context, args map[string]any
} }
} }
logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v", feedID, loadAll) // 解析评论配置参数,如果未提供则使用默认值
config := xiaohongshu.DefaultCommentLoadConfig()
result, err := s.xiaohongshuService.GetFeedDetail(ctx, feedID, xsecToken, loadAll) if raw, ok := args["click_more_replies"]; ok {
switch v := raw.(type) {
case bool:
config.ClickMoreReplies = v
case string:
if parsed, err := strconv.ParseBool(v); err == nil {
config.ClickMoreReplies = parsed
}
}
}
if raw, ok := args["max_replies_threshold"]; ok {
switch v := raw.(type) {
case float64:
config.MaxRepliesThreshold = int(v)
case string:
if parsed, err := strconv.Atoi(v); err == nil {
config.MaxRepliesThreshold = parsed
}
case int:
config.MaxRepliesThreshold = v
}
}
if raw, ok := args["max_comment_items"]; ok {
switch v := raw.(type) {
case float64:
config.MaxCommentItems = int(v)
case string:
if parsed, err := strconv.Atoi(v); err == nil {
config.MaxCommentItems = parsed
}
case int:
config.MaxCommentItems = v
}
}
if raw, ok := args["scroll_speed"].(string); ok && raw != "" {
config.ScrollSpeed = raw
}
logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v, config=%+v", feedID, loadAll, config)
result, err := s.xiaohongshuService.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAll, config)
if err != nil { if err != nil {
return &MCPToolResult{ return &MCPToolResult{
Content: []MCPContent{{ Content: []MCPContent{{

View File

@@ -45,9 +45,13 @@ type FilterOption struct {
// FeedDetailArgs 获取Feed详情的参数 // FeedDetailArgs 获取Feed详情的参数
type FeedDetailArgs struct { type FeedDetailArgs struct {
FeedID string `json:"feed_id" jsonschema:"小红书笔记ID从Feed列表获取"` FeedID string `json:"feed_id" jsonschema:"小红书笔记ID从Feed列表获取"`
XsecToken string `json:"xsec_token" jsonschema:"访问令牌从Feed列表的xsecToken字段获取"` XsecToken string `json:"xsec_token" jsonschema:"访问令牌从Feed列表的xsecToken字段获取"`
LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论默认false仅返回首批评论"` LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论默认false仅返回首批评论"`
ClickMoreReplies bool `json:"click_more_replies,omitempty" jsonschema:"是否点击'更多回复'按钮 (默认: false)"`
MaxRepliesThreshold int `json:"max_replies_threshold,omitempty" jsonschema:"回复数量阈值,超过此数量的'更多'按钮将被跳过 (0表示不跳过任何, 默认: 10)"`
MaxCommentItems int `json:"max_comment_items,omitempty" jsonschema:"最大加载评论数0表示加载所有, 默认: 0"`
ScrollSpeed string `json:"scroll_speed,omitempty" jsonschema:"滚动速度: 'slow'|'normal'|'fast' (默认: 'normal')"`
} }
// UserProfileArgs 获取用户主页的参数 // UserProfileArgs 获取用户主页的参数
@@ -214,9 +218,13 @@ func registerTools(server *mcp.Server, appServer *AppServer) {
}, },
withPanicRecovery("get_feed_detail", func(ctx context.Context, req *mcp.CallToolRequest, args FeedDetailArgs) (*mcp.CallToolResult, any, error) { withPanicRecovery("get_feed_detail", func(ctx context.Context, req *mcp.CallToolRequest, args FeedDetailArgs) (*mcp.CallToolResult, any, error) {
argsMap := map[string]interface{}{ argsMap := map[string]interface{}{
"feed_id": args.FeedID, "feed_id": args.FeedID,
"xsec_token": args.XsecToken, "xsec_token": args.XsecToken,
"load_all_comments": args.LoadAllComments, "load_all_comments": args.LoadAllComments,
"click_more_replies": args.ClickMoreReplies,
"max_replies_threshold": args.MaxRepliesThreshold,
"max_comment_items": args.MaxCommentItems,
"scroll_speed": args.ScrollSpeed,
} }
result := appServer.handleGetFeedDetail(ctx, argsMap) result := appServer.handleGetFeedDetail(ctx, argsMap)
return convertToMCPResult(result), nil, nil return convertToMCPResult(result), nil, nil

View File

@@ -322,6 +322,11 @@ func (s *XiaohongshuService) SearchFeeds(ctx context.Context, keyword string, fi
// GetFeedDetail 获取Feed详情 // GetFeedDetail 获取Feed详情
func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) { func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) {
return s.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, xiaohongshu.DefaultCommentLoadConfig())
}
// GetFeedDetailWithConfig 使用配置获取Feed详情
func (s *XiaohongshuService) GetFeedDetailWithConfig(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config xiaohongshu.CommentLoadConfig) (*FeedDetailResponse, error) {
b := newBrowser() b := newBrowser()
defer b.Close() defer b.Close()
@@ -332,7 +337,7 @@ func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToke
action := xiaohongshu.NewFeedDetailAction(page) action := xiaohongshu.NewFeedDetailAction(page)
// 获取 Feed 详情 // 获取 Feed 详情
result, err := action.GetFeedDetail(ctx, feedID, xsecToken, loadAllComments) result, err := action.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, config)
if err != nil { if err != nil {
return nil, err return nil, err
} }

View File

@@ -34,11 +34,24 @@ type MCPContent struct {
Data string `json:"data"` Data string `json:"data"`
} }
// CommentLoadConfig 评论加载配置
type CommentLoadConfig struct {
// 是否点击"更多回复"按钮
ClickMoreReplies bool `json:"click_more_replies,omitempty"`
// 回复数量阈值,超过这个数量的"更多"按钮将被跳过0表示不跳过任何
MaxRepliesThreshold int `json:"max_replies_threshold,omitempty"`
// 最大加载评论数comment-item数量0表示加载所有
MaxCommentItems int `json:"max_comment_items,omitempty"`
// 滚动速度等级: slow(慢速), normal(正常), fast(快速)
ScrollSpeed string `json:"scroll_speed,omitempty"`
}
// FeedDetailRequest Feed详情请求 // FeedDetailRequest Feed详情请求
type FeedDetailRequest struct { type FeedDetailRequest struct {
FeedID string `json:"feed_id" binding:"required"` FeedID string `json:"feed_id" binding:"required"`
XsecToken string `json:"xsec_token" binding:"required"` XsecToken string `json:"xsec_token" binding:"required"`
LoadAllComments bool `json:"load_all_comments,omitempty"` LoadAllComments bool `json:"load_all_comments,omitempty"`
CommentConfig *CommentLoadConfig `json:"comment_config,omitempty"`
} }
type SearchFeedsRequest struct { type SearchFeedsRequest struct {

View File

@@ -4,6 +4,9 @@ import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"math/rand"
"regexp"
"strconv"
"time" "time"
"github.com/go-rod/rod" "github.com/go-rod/rod"
@@ -12,6 +15,28 @@ import (
"github.com/xpzouying/xiaohongshu-mcp/errors" "github.com/xpzouying/xiaohongshu-mcp/errors"
) )
// CommentLoadConfig 评论加载配置
type CommentLoadConfig struct {
// 是否点击"更多回复"按钮
ClickMoreReplies bool
// 回复数量阈值,超过这个数量的"更多"按钮将被跳过0表示不跳过任何
MaxRepliesThreshold int
// 最大加载评论数comment-item数量0表示加载所有
MaxCommentItems int
// 滚动速度等级: slow(慢速), normal(正常), fast(快速)
ScrollSpeed string
}
// DefaultCommentLoadConfig 默认配置
func DefaultCommentLoadConfig() CommentLoadConfig {
return CommentLoadConfig{
ClickMoreReplies: false, // 默认不点击"更多回复"
MaxRepliesThreshold: 10, // 默认超过10条回复就跳过
MaxCommentItems: 0, // 默认加载所有评论
ScrollSpeed: "normal",
}
}
// FeedDetailAction 表示 Feed 详情页动作 // FeedDetailAction 表示 Feed 详情页动作
type FeedDetailAction struct { type FeedDetailAction struct {
page *rod.Page page *rod.Page
@@ -23,12 +48,19 @@ func NewFeedDetailAction(page *rod.Page) *FeedDetailAction {
} }
// GetFeedDetail 获取 Feed 详情页数据 // GetFeedDetail 获取 Feed 详情页数据
func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) { func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) {
page := f.page.Context(ctx).Timeout(5 * time.Minute) return f.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, config)
}
// GetFeedDetailWithConfig 获取 Feed 详情页数据(带配置)
func (f *FeedDetailAction) GetFeedDetailWithConfig(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) {
page := f.page.Context(ctx).Timeout(10 * time.Minute)
// 构建详情页 URL // 构建详情页 URL
url := makeFeedDetailURL(feedID, xsecToken) url := makeFeedDetailURL(feedID, xsecToken)
logrus.Infof("打开 feed 详情页: %s", url) logrus.Infof("打开 feed 详情页: %s", url)
logrus.Infof("配置: 点击更多=%v, 回复阈值=%d, 最大评论数=%d, 滚动速度=%s",
config.ClickMoreReplies, config.MaxRepliesThreshold, config.MaxCommentItems, config.ScrollSpeed)
// 导航到详情页 // 导航到详情页
page.MustNavigate(url) page.MustNavigate(url)
@@ -42,7 +74,7 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken
// 加载全部评论 // 加载全部评论
if loadAllComments { if loadAllComments {
if err := f.loadAllComments(page); err != nil { if err := f.loadAllCommentsWithConfig(page, config); err != nil {
logrus.Warnf("加载全部评论失败: %v", err) logrus.Warnf("加载全部评论失败: %v", err)
} }
} }
@@ -99,23 +131,28 @@ func checkPageAccessible(page *rod.Page) error {
return nil return nil
} }
// loadAllComments 加载所有评论 // loadAllCommentsWithConfig 加载所有评论(带配置)
func (f *FeedDetailAction) loadAllComments(page *rod.Page) error { func (f *FeedDetailAction) loadAllCommentsWithConfig(page *rod.Page, config CommentLoadConfig) error {
maxAttempts := 500
if config.MaxCommentItems > 0 {
// 如果设置了最大评论数,减少尝试次数
maxAttempts = config.MaxCommentItems * 3
}
const ( const (
maxAttempts = 500 stagnantLimit = 20
scrollInterval = 600 * time.Millisecond noScrollChangeLimit = 15
clickMoreInterval = 1 // 每次滚动都检查"更多"按钮 minScrollDelta = 10
stagnantLimit = 20 // 增加停滞容忍度
noScrollChangeLimit = 15 // 增加滚动停滞容忍度
minScrollDelta = 10 // 最小有效滚动距离
aggressiveClickEvery = 5 // 每5次尝试进行一次激进点击
) )
logrus.Info("开始加载所有评论...") // 获取滚动间隔(根据速度)
scrollInterval := getScrollInterval(config.ScrollSpeed)
logrus.Info("开始加载评论...")
// 先滚动到评论区 // 先滚动到评论区
scrollToCommentsArea(page) scrollToCommentsArea(page)
time.Sleep(1 * time.Second) humanDelay()
var ( var (
lastCount = 0 lastCount = 0
@@ -123,6 +160,7 @@ func (f *FeedDetailAction) loadAllComments(page *rod.Page) error {
stagnantChecks = 0 stagnantChecks = 0
noScrollChangeCount = 0 noScrollChangeCount = 0
totalClickedButtons = 0 totalClickedButtons = 0
skippedButtons = 0
attempt = 0 attempt = 0
) )
@@ -132,36 +170,42 @@ func (f *FeedDetailAction) loadAllComments(page *rod.Page) error {
// === 1. 检查是否到达底部 === // === 1. 检查是否到达底部 ===
if checkEndContainer(page) { if checkEndContainer(page) {
logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部") logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部")
// 到底部后再做最后一轮点击 humanDelay()
finalClicked := clickShowMoreButtons(page)
totalClickedButtons += finalClicked
if finalClicked > 0 {
logrus.Infof("底部最后点击了 %d 个按钮", finalClicked)
time.Sleep(1 * time.Second)
}
currentCount := getCommentCount(page) currentCount := getCommentCount(page)
logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击按钮: %d", logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击: %d, 跳过: %d",
currentCount, attempt+1, totalClickedButtons) currentCount, attempt+1, totalClickedButtons, skippedButtons)
return nil return nil
} }
// === 2. 每次都点击"更多"按钮 === // === 2. 获取当前评论数 ===
if attempt%clickMoreInterval == 0 { currentCount := getCommentCount(page)
clicked := clickShowMoreButtons(page)
if clicked > 0 {
totalClickedButtons += clicked
logrus.Infof("点击了 %d 个'更多'按钮,累计: %d", clicked, totalClickedButtons)
time.Sleep(500 * time.Millisecond)
// 多轮检查 // === 3. 点击"更多"按钮(人性化:每隔几次尝试才点击一次) ===
for round := 0; round < 2; round++ { if config.ClickMoreReplies && attempt%3 == 0 {
time.Sleep(300 * time.Millisecond) clicked, skipped := clickShowMoreButtonsSmart(page, config.MaxRepliesThreshold)
clicked2 := clickShowMoreButtons(page) if clicked > 0 || skipped > 0 {
if clicked2 > 0 { totalClickedButtons += clicked
skippedButtons += skipped
logrus.Infof("点击'更多': %d 个, 跳过: %d 个, 累计点击: %d, 累计跳过: %d",
clicked, skipped, totalClickedButtons, skippedButtons)
// 点击后等待更长时间模拟人阅读新内容800-1500ms
readTime := time.Duration(800+rand.Intn(700)) * time.Millisecond
time.Sleep(readTime)
// 多轮检查(但减少轮数,避免太频繁)
for round := 0; round < 1; round++ {
// 等待一段时间再检查(模拟人继续浏览)
time.Sleep(time.Duration(500+rand.Intn(500)) * time.Millisecond)
clicked2, skipped2 := clickShowMoreButtonsSmart(page, config.MaxRepliesThreshold)
if clicked2 > 0 || skipped2 > 0 {
totalClickedButtons += clicked2 totalClickedButtons += clicked2
logrus.Infof("第 %d 轮再次点击了 %d 个按钮", round+2, clicked2) skippedButtons += skipped2
time.Sleep(500 * time.Millisecond) logrus.Infof("第 %d 轮: 点击 %d, 跳过 %d", round+2, clicked2, skipped2)
// 再次等待阅读时间
readTime2 := time.Duration(600+rand.Intn(600)) * time.Millisecond
time.Sleep(readTime2)
} else { } else {
break break
} }
@@ -169,103 +213,258 @@ func (f *FeedDetailAction) loadAllComments(page *rod.Page) error {
} }
} }
// === 4. 获取当前评论数量 === // === 4. 获取评论数量 ===
currentCount := getCommentCount(page)
totalCount := getTotalCommentCount(page) totalCount := getTotalCommentCount(page)
logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount) logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount)
// 检查是否已加载所有评论(但继续滚动到底部确认)
if totalCount > 0 && currentCount >= totalCount {
logrus.Infof("评论数量已达标: %d/%d继续滚动到底部确认...", currentCount, totalCount)
// 不要立即返回,继续滚动到底部
}
// === 5. 检查评论数量变化 === // === 5. 检查评论数量变化 ===
if currentCount != lastCount { if currentCount != lastCount {
logrus.Infof("✓ 评论数量增加: %d -> %d (+%d)", lastCount, currentCount, currentCount-lastCount) logrus.Infof("✓ 评论增加: %d -> %d (+%d)", lastCount, currentCount, currentCount-lastCount)
lastCount = currentCount lastCount = currentCount
stagnantChecks = 0 // 重置停滞计数 stagnantChecks = 0
} else { } else {
stagnantChecks++ stagnantChecks++
if stagnantChecks%5 == 0 { if stagnantChecks%5 == 0 {
logrus.Debugf("评论数量停滞 %d 次", stagnantChecks) logrus.Debugf("评论停滞 %d 次", stagnantChecks)
} }
} }
// 只有在严重停滞时才考虑退出 // === 5.1 检查是否已达到目标评论数(在评论数停滞时)===
if config.MaxCommentItems > 0 && currentCount >= config.MaxCommentItems {
// 达到目标且停滞2次确认加载完成
if stagnantChecks >= 2 {
logrus.Infof("✓ 已达到目标评论数: %d/%d (停滞%d次), 停止加载",
currentCount, config.MaxCommentItems, stagnantChecks)
return nil
}
// 刚达到目标,继续滚动确认
if stagnantChecks > 0 {
logrus.Debugf("已达目标数 %d/%d再确认 %d 次...",
currentCount, config.MaxCommentItems, 2-stagnantChecks)
}
}
// === 6. 停滞处理 ===
if stagnantChecks >= stagnantLimit { if stagnantChecks >= stagnantLimit {
logrus.Infof("评论数量长期停滞,尝试最后冲刺...") logrus.Infof("评论停滞,尝试最后冲刺...")
// 最后冲刺:大幅滚动 + 点击 finalPush(page, config.ScrollSpeed)
finalPush(page)
finalClicked := clickShowMoreButtons(page)
totalClickedButtons += finalClicked
if checkEndContainer(page) { if checkEndContainer(page) {
logrus.Infof("✓ 最终到达底部,评论数: %d, 点击按钮: %d", logrus.Infof("✓ 到达底部,评论数: %d", currentCount)
currentCount, totalClickedButtons)
return nil return nil
} }
// 还没到底部,继续 logrus.Infof("未到底部,重置停滞计数")
logrus.Infof("未到底部,重置停滞计数,继续加载...")
stagnantChecks = 0 stagnantChecks = 0
} }
// === 6. 执行滚动 === // === 7. 执行人性化滚动 ===
_, scrollDelta, currentScrollTop := scrollWithMouse(page) // 先滚动到最后一个评论(触发懒加载的关键!)
if currentCount > 0 {
scrollToLastComment(page)
time.Sleep(time.Duration(300+rand.Intn(200)) * time.Millisecond)
}
_, scrollDelta, currentScrollTop := humanScroll(page, config.ScrollSpeed)
// === 7. 检查滚动变化 === // === 8. 检查滚动变化 ===
if scrollDelta < minScrollDelta || currentScrollTop == lastScrollTop { if scrollDelta < minScrollDelta || currentScrollTop == lastScrollTop {
noScrollChangeCount++ noScrollChangeCount++
if noScrollChangeCount%5 == 0 { if noScrollChangeCount%5 == 0 {
logrus.Debugf("滚动停滞 %d 次,尝试大幅滚动", noScrollChangeCount) logrus.Debugf("滚动停滞 %d 次", noScrollChangeCount)
// 尝试更大幅度滚动 largeScroll(page, config.ScrollSpeed)
largeScroll(page) humanDelay()
time.Sleep(300 * time.Millisecond)
} }
} else { } else {
noScrollChangeCount = 0 noScrollChangeCount = 0
lastScrollTop = currentScrollTop lastScrollTop = currentScrollTop
} }
// 只有严重滚动停滞时才考虑结束 // === 9. 滚动停滞处理 ===
if noScrollChangeCount >= noScrollChangeLimit { if noScrollChangeCount >= noScrollChangeLimit {
logrus.Infof("滚动严重停滞,尝试最后冲刺...") logrus.Infof("滚动停滞,最后冲刺...")
finalPush(page) finalPush(page, config.ScrollSpeed)
if checkEndContainer(page) { if checkEndContainer(page) {
currentCount := getCommentCount(page) logrus.Infof("✓ 到达底部,评论数: %d", currentCount)
logrus.Infof("✓ 最终到达底部,评论数: %d, 点击按钮: %d",
currentCount, totalClickedButtons)
return nil return nil
} }
// 重置计数继续 logrus.Infof("重置滚动计数")
logrus.Infof("未到底部,重置滚动计数,继续加载...")
noScrollChangeCount = 0 noScrollChangeCount = 0
lastScrollTop = 0 lastScrollTop = 0
} }
// === 8. 等待内容加载 === // === 10. 等待内容加载 ===
time.Sleep(scrollInterval) time.Sleep(scrollInterval)
} }
// === 9. 达到最大尝试次数,做最后冲刺 === // === 11. 最后冲刺 ===
logrus.Infof("达到最大尝试次数 %d执行最后冲刺...", maxAttempts) logrus.Infof("达到最大尝试次数最后冲刺...")
finalPush(page) finalPush(page, config.ScrollSpeed)
finalClicked := clickShowMoreButtons(page)
totalClickedButtons += finalClicked
currentCount := getCommentCount(page) currentCount := getCommentCount(page)
hasEnd := checkEndContainer(page) hasEnd := checkEndContainer(page)
logrus.Infof("✓ 加载结束: %d 条评论, 点击按钮: %d, 到达底部: %v", logrus.Infof("✓ 加载结束: %d 条评论, 点击: %d, 跳过: %d, 到达底部: %v",
currentCount, totalClickedButtons, hasEnd) currentCount, totalClickedButtons, skippedButtons, hasEnd)
return nil return nil
} }
// getScrollInterval 根据速度获取滚动间隔
func getScrollInterval(speed string) time.Duration {
switch speed {
case "slow":
return time.Duration(1200+rand.Intn(300)) * time.Millisecond
case "fast":
return time.Duration(300+rand.Intn(100)) * time.Millisecond
default: // normal
return time.Duration(600+rand.Intn(200)) * time.Millisecond
}
}
// humanDelay 人性化延迟
func humanDelay() {
delay := time.Duration(300+rand.Intn(400)) * time.Millisecond
time.Sleep(delay)
}
// clickShowMoreButtonsSmart 智能点击"更多"按钮(根据回复数量判断,人性化操作)
func clickShowMoreButtonsSmart(page *rod.Page, maxRepliesThreshold int) (clicked, skipped int) {
elements, err := page.Elements(".show-more")
if err != nil {
return 0, 0
}
// 正则表达式:匹配"展开 X 条回复"
replyCountRegex := regexp.MustCompile(`展开\s*(\d+)\s*条回复`)
// 限制每次最多点击的按钮数量(模拟人不会一次性点击太多)
maxClickPerRound := 3 + rand.Intn(3) // 每次3-5个
clickedInRound := 0
for _, el := range elements {
// 限制单次点击数量
if clickedInRound >= maxClickPerRound {
break
}
// 检查元素是否可见
visible, err := el.Visible()
if err != nil || !visible {
continue
}
// 检查是否在 DOM 中
box, err := el.Shape()
if err != nil || len(box.Quads) == 0 {
continue
}
// 获取按钮文本
text, err := el.Text()
if err != nil {
continue
}
// 判断是否需要跳过
shouldSkip := false
if maxRepliesThreshold > 0 {
matches := replyCountRegex.FindStringSubmatch(text)
if len(matches) > 1 {
replyCount, err := strconv.Atoi(matches[1])
if err == nil && replyCount > maxRepliesThreshold {
shouldSkip = true
logrus.Debugf("跳过'%s'(回复数 %d > 阈值 %d", text, replyCount, maxRepliesThreshold)
}
}
}
if shouldSkip {
skipped++
continue
}
// === 人性化点击流程 ===
// 1. 先滚动到元素附近(模拟人看到按钮)
el.MustEval(`() => {
try {
this.scrollIntoView({behavior: 'smooth', block: 'center'});
} catch (e) {}
}`)
// 2. 等待滚动完成 + 模拟人看到按钮后的反应时间300-800ms
reactionTime := time.Duration(300+rand.Intn(500)) * time.Millisecond
time.Sleep(reactionTime)
// 3. 模拟鼠标移动到按钮上(悬停效果)
box, _ = el.Shape()
if len(box.Quads) > 0 {
// 计算按钮中心点
x := float64(box.Quads[0][0]+box.Quads[0][4]) / 2
y := float64(box.Quads[0][1]+box.Quads[0][5]) / 2
page.Mouse.MustMoveTo(x, y)
// 悬停时间(模拟人确认要点击)
time.Sleep(time.Duration(100+rand.Intn(200)) * time.Millisecond)
}
// 4. 点击元素
if err := el.Click(proto.InputMouseButtonLeft, 1); err == nil {
clicked++
clickedInRound++
logrus.Debugf("点击了'%s'", text)
// 5. 点击后的延迟模拟人阅读新内容的时间500-1200ms
readTime := time.Duration(500+rand.Intn(700)) * time.Millisecond
time.Sleep(readTime)
}
}
return clicked, skipped
}
// humanScroll 人性化滚动
func humanScroll(page *rod.Page, speed string) (bool, int, int) {
beforeTop := getScrollTop(page)
viewportHeight := page.MustEval(`() => window.innerHeight`).Int()
// 根据速度调整滚动距离
var scrollRatio float64
switch speed {
case "slow":
scrollRatio = 0.5 + rand.Float64()*0.2 // 50%-70%
case "fast":
scrollRatio = 0.9 + rand.Float64()*0.2 // 90%-110%
default: // normal
scrollRatio = 0.7 + rand.Float64()*0.2 // 70%-90%
}
scrollDelta := float64(viewportHeight) * scrollRatio
if scrollDelta < 400 {
scrollDelta = 400
}
// 添加随机波动
scrollDelta += float64(rand.Intn(100) - 50)
// 使用JS的 scrollBy 方法进行滚动
page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta)
// 等待滚动完成
time.Sleep(time.Duration(100+rand.Intn(100)) * time.Millisecond)
afterTop := getScrollTop(page)
actualDelta := afterTop - beforeTop
scrolled := actualDelta > 5
if scrolled {
logrus.Debugf("滚动: %d -> %d (Δ%d)", beforeTop, afterTop, actualDelta)
}
return scrolled, actualDelta, afterTop
}
// scrollToCommentsArea 滚动到评论区 // scrollToCommentsArea 滚动到评论区
func scrollToCommentsArea(page *rod.Page) { func scrollToCommentsArea(page *rod.Page) {
logrus.Info("滚动到评论区...") logrus.Info("滚动到评论区...")
@@ -277,80 +476,59 @@ func scrollToCommentsArea(page *rod.Page) {
}`) }`)
} }
// finalPush 最后冲刺:大幅滚动到底部 // scrollToLastComment 滚动到最后一个评论(触发懒加载的关键)
func finalPush(page *rod.Page) { func scrollToLastComment(page *rod.Page) {
logrus.Info("执行最后冲刺滚动...") page.MustEval(`() => {
const container = document.querySelector('.comments-container');
if (!container) return;
// 查找最后一个主评论
const comments = container.querySelectorAll('.parent-comment');
if (comments.length > 0) {
const lastComment = comments[comments.length - 1];
// 滚动到最后一个评论,让它出现在视口中间偏下位置
lastComment.scrollIntoView({behavior: 'smooth', block: 'center'});
}
}`)
}
for i := 0; i < 20; i++ { // finalPush 最后冲刺:大幅滚动到底部
// 检查是否已经到底部 func finalPush(page *rod.Page, speed string) {
logrus.Info("执行最后冲刺...")
for i := 0; i < 15; i++ {
if checkEndContainer(page) { if checkEndContainer(page) {
logrus.Debug("已到底部,停止冲刺")
return return
} }
beforeTop := getScrollTop(page) beforeTop := getScrollTop(page)
largeScroll(page, speed)
// 大幅滚动 // 人性化延迟
largeScroll(page) time.Sleep(time.Duration(200+rand.Intn(200)) * time.Millisecond)
time.Sleep(200 * time.Millisecond)
// 点击出现的按钮
clicked := clickShowMoreButtons(page)
if clicked > 0 {
time.Sleep(500 * time.Millisecond)
}
afterTop := getScrollTop(page) afterTop := getScrollTop(page)
// 如果滚动没变化尝试JS滚动
if afterTop == beforeTop { if afterTop == beforeTop {
page.MustEval(`() => { page.MustEval(`() => window.scrollTo(0, document.body.scrollHeight)`)
window.scrollTo(0, document.body.scrollHeight); time.Sleep(time.Duration(300+rand.Intn(200)) * time.Millisecond)
}`)
time.Sleep(300 * time.Millisecond)
} }
} }
} }
// largeScroll 大幅度滚动 // largeScroll 大幅度滚动
func largeScroll(page *rod.Page) { func largeScroll(page *rod.Page, speed string) {
// 方法1: Mouse.Scroll 大幅度滚动 var scrollDelta float64
page.Mouse.Scroll(0, 2000, 5) switch speed {
time.Sleep(100 * time.Millisecond) case "slow":
} scrollDelta = 1000 + float64(rand.Intn(500))
case "fast":
// scrollWithMouse 使用 Mouse 模拟滚轮滚动 scrollDelta = 3000 + float64(rand.Intn(1000))
func scrollWithMouse(page *rod.Page) (bool, int, int) { default: // normal
beforeTop := getScrollTop(page) scrollDelta = 2000 + float64(rand.Intn(500))
// 获取视口高度
viewportHeight := page.MustEval(`() => window.innerHeight`).Int()
// 计算滚动距离(每次滚动视口高度的 80%
scrollDelta := float64(viewportHeight) * 0.8
if scrollDelta < 500 {
scrollDelta = 500
} }
// 使用 Mouse.Scroll 模拟滚轮滚动 page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta)
err := page.Mouse.Scroll(0, scrollDelta, 5) time.Sleep(time.Duration(100+rand.Intn(50)) * time.Millisecond)
if err != nil {
logrus.Warnf("鼠标滚动失败: %v", err)
return false, 0, beforeTop
}
// 等待滚动完成
time.Sleep(150 * time.Millisecond)
afterTop := getScrollTop(page)
actualDelta := afterTop - beforeTop
scrolled := actualDelta > 5
if scrolled {
logrus.Debugf("滚动: %d -> %d (Δ%d)", beforeTop, afterTop, actualDelta)
}
return scrolled, actualDelta, afterTop
} }
// getScrollTop 获取当前滚动位置 // getScrollTop 获取当前滚动位置
@@ -361,44 +539,12 @@ func getScrollTop(page *rod.Page) int {
return result.Int() return result.Int()
} }
// clickShowMoreButtons 点击所有可见的"更多"按钮
func clickShowMoreButtons(page *rod.Page) int {
elements, err := page.Elements(".show-more")
if err != nil {
return 0
}
clickedCount := 0
for _, el := range elements {
// 检查元素是否可见
visible, err := el.Visible()
if err != nil || !visible {
continue
}
// 检查是否在 DOM 中
box, err := el.Shape()
if err != nil || len(box.Quads) == 0 {
continue
}
// 点击元素
if err := el.Click(proto.InputMouseButtonLeft, 1); err == nil {
clickedCount++
time.Sleep(150 * time.Millisecond)
}
}
return clickedCount
}
// getCommentCount 获取当前评论数量 // getCommentCount 获取当前评论数量
func getCommentCount(page *rod.Page) int { func getCommentCount(page *rod.Page) int {
result := page.MustEval(`() => { result := page.MustEval(`() => {
const container = document.querySelector('.comments-container'); const container = document.querySelector('.comments-container');
if (!container) return 0; if (!container) return 0;
return container.querySelectorAll('.comment-item, .comment-item-sub, .comment').length; return container.querySelectorAll('.parent-comment').length;
}`) }`)
return result.Int() return result.Int()
} }