From c6390bf0148ec28457f740ef484c27f61626f062 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Mon, 6 Oct 2025 03:26:52 +0800 Subject: [PATCH 01/19] feat: add like and favorite functionality for feeds - Implemented handleLikeFeed and handleFavoriteFeed methods in mcp_handlers.go to manage liking and favoriting feeds. - Added LikeFavoriteArgs struct in mcp_server.go for handling parameters. - Registered new MCP tools for liking and favoriting feeds in registerTools function. - Introduced LikeFeed and FavoriteFeed methods in XiaohongshuService to interact with the respective actions. - Created LikeFavoriteAction in a new file to encapsulate the logic for liking and favoriting feeds on the Xiaohongshu platform. --- mcp_handlers.go | 34 ++++ mcp_server.go | 40 ++++- service.go | 49 ++++-- xiaohongshu/like_favorite.go | 332 +++++++++++++++++++++++++++++++++++ 4 files changed, 443 insertions(+), 12 deletions(-) create mode 100644 xiaohongshu/like_favorite.go diff --git a/mcp_handlers.go b/mcp_handlers.go index 6c7fd1e..aebbe25 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -391,6 +391,40 @@ func (s *AppServer) handleUserProfile(ctx context.Context, args map[string]any) } } +// handleLikeFeed 处理点赞 +func (s *AppServer) handleLikeFeed(ctx context.Context, args map[string]interface{}) *MCPToolResult { + feedID, ok := args["feed_id"].(string) + if !ok || feedID == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "点赞失败: 缺少feed_id参数"}}, IsError: true} + } + xsecToken, ok := args["xsec_token"].(string) + if !ok || xsecToken == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "点赞失败: 缺少xsec_token参数"}}, IsError: true} + } + res, err := s.xiaohongshuService.LikeFeed(ctx, feedID, xsecToken) + if err != nil { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "点赞失败: " + err.Error()}}, IsError: true} + } + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: fmt.Sprintf("点赞成功 - Feed ID: %s", res.FeedID)}}} +} + +// handleFavoriteFeed 处理收藏 +func (s *AppServer) handleFavoriteFeed(ctx context.Context, args map[string]interface{}) *MCPToolResult { + feedID, ok := args["feed_id"].(string) + if !ok || feedID == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "收藏失败: 缺少feed_id参数"}}, IsError: true} + } + xsecToken, ok := args["xsec_token"].(string) + if !ok || xsecToken == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "收藏失败: 缺少xsec_token参数"}}, IsError: true} + } + res, err := s.xiaohongshuService.FavoriteFeed(ctx, feedID, xsecToken) + if err != nil { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "收藏失败: " + err.Error()}}, IsError: true} + } + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: fmt.Sprintf("收藏成功 - Feed ID: %s", res.FeedID)}}} +} + // handlePostComment 处理发表评论到Feed func (s *AppServer) handlePostComment(ctx context.Context, args map[string]interface{}) *MCPToolResult { logrus.Info("MCP: 发表评论到Feed") diff --git a/mcp_server.go b/mcp_server.go index 9d6895b..2c7e4b8 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -50,6 +50,12 @@ type PostCommentArgs struct { Content string `json:"content" jsonschema:"评论内容"` } +// LikeFavoriteArgs 点赞/收藏参数 +type LikeFavoriteArgs struct { + FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` + XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` +} + // InitMCPServer 初始化 MCP Server func InitMCPServer(appServer *AppServer) *mcp.Server { // 创建 MCP Server @@ -208,7 +214,39 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - logrus.Infof("Registered %d MCP tools", 9) + // 工具 10: 点赞笔记 + mcp.AddTool(server, + &mcp.Tool{ + Name: "like_feed", + Description: "为指定笔记点赞(如已点赞将跳过)", + }, + func(ctx context.Context, req *mcp.CallToolRequest, args LikeFavoriteArgs) (*mcp.CallToolResult, any, error) { + argsMap := map[string]interface{}{ + "feed_id": args.FeedID, + "xsec_token": args.XsecToken, + } + result := appServer.handleLikeFeed(ctx, argsMap) + return convertToMCPResult(result), nil, nil + }, + ) + + // 工具 11: 收藏笔记 + mcp.AddTool(server, + &mcp.Tool{ + Name: "favorite_feed", + Description: "收藏指定笔记(如已收藏将跳过)", + }, + func(ctx context.Context, req *mcp.CallToolRequest, args LikeFavoriteArgs) (*mcp.CallToolResult, any, error) { + argsMap := map[string]interface{}{ + "feed_id": args.FeedID, + "xsec_token": args.XsecToken, + } + result := appServer.handleFavoriteFeed(ctx, argsMap) + return convertToMCPResult(result), nil, nil + }, + ) + + logrus.Infof("Registered %d MCP tools", 11) } // convertToMCPResult 将自定义的 MCPToolResult 转换为官方 SDK 的格式 diff --git a/service.go b/service.go index ebc1534..ab909d2 100644 --- a/service.go +++ b/service.go @@ -20,6 +20,13 @@ import ( // XiaohongshuService 小红书业务服务 type XiaohongshuService struct{} +// ActionResult 通用动作响应(点赞/收藏等) +type ActionResult struct { + FeedID string `json:"feed_id"` + Success bool `json:"success"` + Message string `json:"message"` +} + // NewXiaohongshuService 创建小红书服务实例 func NewXiaohongshuService() *XiaohongshuService { return &XiaohongshuService{} @@ -368,29 +375,49 @@ func (s *XiaohongshuService) UserProfile(ctx context.Context, userID, xsecToken // PostCommentToFeed 发表评论到Feed func (s *XiaohongshuService) PostCommentToFeed(ctx context.Context, feedID, xsecToken, content string) (*PostCommentResponse, error) { - // 使用非无头模式以便查看操作过程 b := newBrowser() defer b.Close() page := b.NewPage() defer page.Close() - // 创建 Feed 评论 action action := xiaohongshu.NewCommentFeedAction(page) - // 发表评论 - err := action.PostComment(ctx, feedID, xsecToken, content) - if err != nil { + if err := action.PostComment(ctx, feedID, xsecToken, content); err != nil { return nil, err } - response := &PostCommentResponse{ - FeedID: feedID, - Success: true, - Message: "评论发表成功", - } + return &PostCommentResponse{FeedID: feedID, Success: true, Message: "评论发表成功"}, nil +} - return response, nil +// LikeFeed 点赞笔记 +func (s *XiaohongshuService) LikeFeed(ctx context.Context, feedID, xsecToken string) (*ActionResult, error) { + b := newBrowser() + defer b.Close() + + page := b.NewPage() + defer page.Close() + + action := xiaohongshu.NewLikeFavoriteAction(page) + if err := action.Like(ctx, feedID, xsecToken); err != nil { + return nil, err + } + return &ActionResult{FeedID: feedID, Success: true, Message: "点赞成功或已点赞"}, nil +} + +// FavoriteFeed 收藏笔记 +func (s *XiaohongshuService) FavoriteFeed(ctx context.Context, feedID, xsecToken string) (*ActionResult, error) { + b := newBrowser() + defer b.Close() + + page := b.NewPage() + defer page.Close() + + action := xiaohongshu.NewLikeFavoriteAction(page) + if err := action.Favorite(ctx, feedID, xsecToken); err != nil { + return nil, err + } + return &ActionResult{FeedID: feedID, Success: true, Message: "收藏成功或已收藏"}, nil } func newBrowser() *headless_browser.Browser { diff --git a/xiaohongshu/like_favorite.go b/xiaohongshu/like_favorite.go new file mode 100644 index 0000000..19b85ee --- /dev/null +++ b/xiaohongshu/like_favorite.go @@ -0,0 +1,332 @@ +package xiaohongshu + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/proto" + "github.com/pkg/errors" + "github.com/sirupsen/logrus" +) + +// LikeFavoriteAction 点赞/收藏 动作 +// 提供在笔记详情页执行点赞和收藏的能力,并在可能的情况下避免重复点击 +// 通过读取 window.__INITIAL_STATE__ 判断当前状态 +// 并尽量采用多种选择器/文案做回退,避免因页面样式变更导致失败 +// 注意:该实现依赖页面 DOM,可能随页面升级而变化 + +type LikeFavoriteAction struct { + page *rod.Page +} + +func NewLikeFavoriteAction(page *rod.Page) *LikeFavoriteAction { + return &LikeFavoriteAction{page: page} +} + +// Like 点赞指定笔记,如果已点赞则直接返回 +func (a *LikeFavoriteAction) Like(ctx context.Context, feedID, xsecToken string) error { + page := a.page.Context(ctx).Timeout(60 * time.Second) + url := makeFeedDetailURL(feedID, xsecToken) + logrus.Infof("Opening feed detail page for like: %s", url) + + page.MustNavigate(url) + page.MustWaitDOMStable() + time.Sleep(1 * time.Second) + + liked, _, err := a.getInteractState(page, feedID) + if err != nil { + logrus.Warnf("failed to read interact state: %v (continue to try clicking)", err) + } else if liked { + logrus.Infof("feed %s already liked, skip clicking", feedID) + return nil + } + + // 依次尝试多种选择器或按文案匹配 + selectors := []string{ + "span.like-lottie", // 页面提供的喜欢图标容器 (根据您提供的HTML) + ".like-lottie", // 页面提供的喜欢图标容器 + "button.like", // 常见按钮类名 + "div.interaction-bar .like", // 交互区域 like + "div.footer .like", // 底部工具栏 + ".side-action .like", // 侧边操作栏 + ".like-wrapper", // 包裹元素 + ".interactions .like", // 通用交互区 + } + // 同时尝试 SVG use 的 like 图标 + selectors = append(selectors, + "svg.like-icon", "use[href='#like']", "use[xlink\\:href='#like']", + ) + textCandidates := []string{"点赞", "赞", "喜欢"} + if err := clickFirstMatch(page, selectors, textCandidates); err != nil { + return errors.Wrap(err, "点击点赞按钮失败") + } + + time.Sleep(3 * time.Second) // 增加等待时间,确保状态更新 + + // 验证点赞是否成功 + newLiked, _, err := a.getInteractState(page, feedID) + if err == nil && newLiked { + logrus.Infof("feed %s 点赞成功", feedID) + return nil + } + + if err != nil { + logrus.Warnf("验证点赞状态失败: %v", err) + } else { + logrus.Warnf("feed %s 点赞可能未成功,状态未变化,尝试再次点击", feedID) + // 如果第一次点击失败,尝试再次点击 + if err := clickFirstMatch(page, selectors, textCandidates); err != nil { + logrus.Warnf("第二次点击点赞按钮也失败: %v", err) + } else { + time.Sleep(2 * time.Second) + newLiked2, _, err2 := a.getInteractState(page, feedID) + if err2 == nil && newLiked2 { + logrus.Infof("feed %s 第二次点击点赞成功", feedID) + return nil + } else if err2 == nil && !newLiked2 { + logrus.Warnf("feed %s 第二次点击后取消了点赞,这是正常行为", feedID) + return nil + } + } + } + + return nil +} + +// Favorite 收藏指定笔记,如果已收藏则直接返回 +func (a *LikeFavoriteAction) Favorite(ctx context.Context, feedID, xsecToken string) error { + page := a.page.Context(ctx).Timeout(60 * time.Second) + url := makeFeedDetailURL(feedID, xsecToken) + logrus.Infof("Opening feed detail page for favorite: %s", url) + + page.MustNavigate(url) + page.MustWaitDOMStable() + time.Sleep(1 * time.Second) + + _, collected, err := a.getInteractState(page, feedID) + if err != nil { + logrus.Warnf("failed to read interact state: %v (continue to try clicking)", err) + } else if collected { + logrus.Infof("feed %s already favorited, skip clicking", feedID) + return nil + } + + selectors := []string{ + "#note-page-collect-board-guide", // 直接通过ID点击收藏按钮容器 + ".collect-wrapper", // 收藏按钮的包裹容器 + ".collect-wrapper svg", // 容器内的SVG + ".collect-wrapper .reds-icon.collect-icon", // 容器内的收藏图标 + ".collect-wrapper use", // 容器内的use元素 + "use[xlink:href='#collect']", // 直接点击SVG内部的use元素 + "use[href='#collect']", // 备用use选择器 + "svg.reds-icon.collect-icon use", // SVG内部的use元素 + "svg.reds-icon.collect-icon", // SVG容器(可能需要点击父容器) + ".reds-icon.collect-icon use", // 类组合的use元素 + ".reds-icon.collect-icon", // 类组合的容器 + "svg.collect-icon use", // 通用SVG收藏图标内部的use + "svg.collect-icon", // 通用SVG收藏图标 + ".collect-icon", // 通用收藏图标类 + "button.collect", // 常见按钮类名(收藏/收藏夹) + "button.favorite", + "div.interaction-bar .collect", + "div.footer .collect", + ".side-action .collect", + ".interactions .collect", + } + textCandidates := []string{"收藏", "收藏夹", "喜欢"} + if err := clickFirstMatch(page, selectors, textCandidates); err != nil { + return errors.Wrap(err, "点击收藏按钮失败") + } + + time.Sleep(3 * time.Second) // 增加等待时间,确保状态更新 + + // 验证收藏是否成功 + _, newCollected, err := a.getInteractState(page, feedID) + if err == nil && newCollected { + logrus.Infof("feed %s 收藏成功", feedID) + return nil + } + + if err != nil { + logrus.Warnf("验证收藏状态失败: %v", err) + } else { + logrus.Warnf("feed %s 收藏可能未成功,状态未变化,尝试再次点击", feedID) + // 如果第一次点击失败,尝试再次点击 + if err := clickFirstMatch(page, selectors, textCandidates); err != nil { + logrus.Warnf("第二次点击收藏按钮也失败: %v", err) + } else { + time.Sleep(2 * time.Second) + _, newCollected2, err2 := a.getInteractState(page, feedID) + if err2 == nil && newCollected2 { + logrus.Infof("feed %s 第二次点击收藏成功", feedID) + return nil + } + } + } + + return nil +} + +// getInteractState 从 __INITIAL_STATE__ 读取笔记的点赞/收藏状态 +func (a *LikeFavoriteAction) getInteractState(page *rod.Page, feedID string) (liked bool, collected bool, err error) { + result := page.MustEval(`() => { + if (window.__INITIAL_STATE__) { + return JSON.stringify(window.__INITIAL_STATE__); + } + return ""; + }`).String() + if result == "" { + return false, false, fmt.Errorf("__INITIAL_STATE__ not found") + } + + var state struct { + Note struct { + NoteDetailMap map[string]struct { + Note struct { + InteractInfo struct { + Liked bool `json:"liked"` + Collected bool `json:"collected"` + } `json:"interactInfo"` + } `json:"note"` + } `json:"noteDetailMap"` + } `json:"note"` + } + if err := json.Unmarshal([]byte(result), &state); err != nil { + return false, false, errors.Wrap(err, "unmarshal __INITIAL_STATE__ failed") + } + + detail, ok := state.Note.NoteDetailMap[feedID] + if !ok { + return false, false, fmt.Errorf("feed %s not in noteDetailMap", feedID) + } + return detail.Note.InteractInfo.Liked, detail.Note.InteractInfo.Collected, nil +} + +// clickFirstMatch 依次尝试选择器点击;若失败,尝试按按钮/链接文本模糊匹配 +func clickFirstMatch(page *rod.Page, selectors []string, textCandidates []string) error { + // 1) 尝试按选择器查找多个元素并点击(优先点击最后一个,即笔记的点赞按钮) + for _, sel := range selectors { + if els, err := page.Elements(sel); err == nil && len(els) > 0 { + // 从最后一个元素开始尝试(笔记的点赞按钮通常在评论区之前) + for i := len(els) - 1; i >= 0; i-- { + if tryClickChain(els[i]) { + return nil + } + } + } + // 单个元素回退 + if el, err := page.Element(sel); err == nil && el != nil { + if tryClickChain(el) { + return nil + } + } + } + // 2) 文案匹配:在按钮/链接/容器中查找包含文案的元素 + for _, txt := range textCandidates { + if els, err := page.Elements("button, a, div, span, svg, use"); err == nil && len(els) > 0 { + // 从最后一个元素开始尝试匹配文本 + for i := len(els) - 1; i >= 0; i-- { + text, _ := els[i].Text() + if strings.Contains(strings.ToLower(text), strings.ToLower(txt)) { + if tryClickChain(els[i]) { + return nil + } + } + } + } + // 单个元素回退 + if el, err := page.ElementR("button, a, div, span, svg, use", fmt.Sprintf("(?i)%s", regexpEscape(txt))); err == nil && el != nil { + if tryClickChain(el) { + return nil + } + } + } + return errors.New("no clickable element matched for selectors/text") +} + +// tryClickChain 对元素自身及其若干父级尝试点击(scrollIntoView + js click + rod click) +func tryClickChain(el *rod.Element) bool { + current := el + for i := 0; i < 6 && current != nil; i++ { + if clickElement(current) { + return true + } + parent, _ := current.Parent() + current = parent + } + return false +} + +func clickElement(el *rod.Element) bool { + defer func() { _ = recover() }() + // 滚动到可见区域 + _, _ = el.Eval(`() => { try { this.scrollIntoView({block: "center", inline: "center", behavior: "instant"}); } catch (e) {} return true }`) + + // 检查元素类型,对SVG元素使用特殊处理 - 简化处理,直接尝试所有方法 + // 不检查元素类型,直接尝试多种点击方式 + + // 1. 尝试触发MouseEvent(对SVG元素特别有效) + _, jsErr := el.Eval(`() => { + try { + const event = new MouseEvent('click', { + view: window, + bubbles: true, + cancelable: true + }); + this.dispatchEvent(event); + return true; + } catch (e) { + console.error('MouseEvent click error:', e); + return false; + } + }`) + if jsErr == nil { + return true + } + + // 优先尝试标准 JS click + _, jsErr2 := el.Eval(`() => { + try { + this.click(); + return true; + } catch (e) { + console.error('JS click error:', e); + return false; + } + }`) + if jsErr2 == nil { + return true + } + + // 再尝试 rod 的 Click + if err := el.Click(proto.InputMouseButtonLeft, 1); err != nil { + return false + } + + return true +} + +// regexpEscape 对用户文案做正则转义,避免特殊字符 +func regexpEscape(s string) string { + replacer := strings.NewReplacer( + "\\", "\\\\", + ".", "\\.", + "+", "\\+", + "*", "\\*", + "?", "\\?", + "(", "\\(", + ")", "\\)", + "[", "\\[", + "]", "\\]", + "{", "\\{", + "}", "\\}", + "^", "\\^", + "$", "\\$", + "|", "\\|", + ) + return replacer.Replace(s) +} From 7c2658dae5d9e20f56e42557d563ba67dd11b4d5 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Tue, 7 Oct 2025 14:00:16 +0800 Subject: [PATCH 02/19] =?UTF-8?q?"=E4=BC=98=E5=8C=96=E8=AF=84=E8=AE=BA?= =?UTF-8?q?=E5=8F=8D=E9=A6=88=E9=80=BB=E8=BE=91=EF=BC=9A=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E5=9B=9E=E5=A4=8D=E6=8C=89=E9=92=AE=E6=9F=A5=E6=89=BE=E5=92=8C?= =?UTF-8?q?=E7=82=B9=E5=87=BB=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- handlers_api.go | 20 ++ mcp_handlers.go | 36 ++ mcp_server.go | 43 ++- routes.go | 1 + service.go | 29 +- types.go | 18 + xiaohongshu/comment_feed.go | 675 +++++++++++++++++++++++++++++++++++- 7 files changed, 802 insertions(+), 20 deletions(-) diff --git a/handlers_api.go b/handlers_api.go index a7dcd04..1a6d11c 100644 --- a/handlers_api.go +++ b/handlers_api.go @@ -199,6 +199,26 @@ func (s *AppServer) postCommentHandler(c *gin.Context) { respondSuccess(c, result, result.Message) } +// replyCommentHandler 回复指定评论 +func (s *AppServer) replyCommentHandler(c *gin.Context) { + var req ReplyCommentRequest + if err := c.ShouldBindJSON(&req); err != nil { + respondError(c, http.StatusBadRequest, "INVALID_REQUEST", + "请求参数错误", err.Error()) + return + } + + result, err := s.xiaohongshuService.ReplyCommentToFeed(c.Request.Context(), req.FeedID, req.XsecToken, req.CommentID, req.UserID, req.Content) + if err != nil { + respondError(c, http.StatusInternalServerError, "REPLY_COMMENT_FAILED", + "回复评论失败", err.Error()) + return + } + + c.Set("account", "ai-report") + respondSuccess(c, result, result.Message) +} + // healthHandler 健康检查 func healthHandler(c *gin.Context) { respondSuccess(c, map[string]any{ diff --git a/mcp_handlers.go b/mcp_handlers.go index aebbe25..f6cc1e1 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -486,3 +486,39 @@ func (s *AppServer) handlePostComment(ctx context.Context, args map[string]inter }}, } } + +// handleReplyComment 处理回复评论 +func (s *AppServer) handleReplyComment(ctx context.Context, args map[string]interface{}) *MCPToolResult { + logrus.Info("MCP: 回复评论") + + feedID, ok := args["feed_id"].(string) + if !ok || feedID == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "回复评论失败: 缺少feed_id参数"}}, IsError: true} + } + + xsecToken, ok := args["xsec_token"].(string) + if !ok || xsecToken == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "回复评论失败: 缺少xsec_token参数"}}, IsError: true} + } + + commentID, _ := args["comment_id"].(string) + userID, _ := args["user_id"].(string) + if commentID == "" && userID == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "回复评论失败: 缺少comment_id或user_id参数"}}, IsError: true} + } + + content, ok := args["content"].(string) + if !ok || content == "" { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "回复评论失败: 缺少content参数"}}, IsError: true} + } + + logrus.Infof("MCP: 回复评论 - Feed ID: %s, Comment ID: %s, User ID: %s, 内容长度: %d", feedID, commentID, userID, len(content)) + + result, err := s.xiaohongshuService.ReplyCommentToFeed(ctx, feedID, xsecToken, commentID, userID, content) + if err != nil { + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: "回复评论失败: " + err.Error()}}, IsError: true} + } + + responseText := fmt.Sprintf("评论回复成功 - Feed ID: %s, Comment ID: %s, User ID: %s", result.FeedID, result.TargetCommentID, result.TargetUserID) + return &MCPToolResult{Content: []MCPContent{{Type: "text", Text: responseText}}} +} diff --git a/mcp_server.go b/mcp_server.go index 2c7e4b8..e6d7166 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -50,6 +50,15 @@ type PostCommentArgs struct { Content string `json:"content" jsonschema:"评论内容"` } +// ReplyCommentArgs 回复评论的参数 +type ReplyCommentArgs struct { + FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` + XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` + CommentID string `json:"comment_id,omitempty" jsonschema:"目标评论ID,从评论列表获取"` + UserID string `json:"user_id,omitempty" jsonschema:"目标评论作者ID,从评论列表获取"` + Content string `json:"content" jsonschema:"回复内容"` +} + // LikeFavoriteArgs 点赞/收藏参数 type LikeFavoriteArgs struct { FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` @@ -196,7 +205,33 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - // 工具 9: 发布视频(仅本地文件) + // 工具 9: 回复评论 + mcp.AddTool(server, + &mcp.Tool{ + Name: "reply_comment_in_feed", + Description: "回复小红书笔记下的指定评论", + }, + func(ctx context.Context, req *mcp.CallToolRequest, args ReplyCommentArgs) (*mcp.CallToolResult, any, error) { + if args.CommentID == "" && args.UserID == "" { + return &mcp.CallToolResult{ + IsError: true, + Content: []mcp.Content{&mcp.TextContent{Text: "缺少 comment_id 或 user_id"}}, + }, nil, nil + } + + argsMap := map[string]interface{}{ + "feed_id": args.FeedID, + "xsec_token": args.XsecToken, + "comment_id": args.CommentID, + "user_id": args.UserID, + "content": args.Content, + } + result := appServer.handleReplyComment(ctx, argsMap) + return convertToMCPResult(result), nil, nil + }, + ) + + // 工具 10: 发布视频(仅本地文件) mcp.AddTool(server, &mcp.Tool{ Name: "publish_with_video", @@ -214,7 +249,7 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - // 工具 10: 点赞笔记 + // 工具 11: 点赞笔记 mcp.AddTool(server, &mcp.Tool{ Name: "like_feed", @@ -230,7 +265,7 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - // 工具 11: 收藏笔记 + // 工具 12: 收藏笔记 mcp.AddTool(server, &mcp.Tool{ Name: "favorite_feed", @@ -246,7 +281,7 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - logrus.Infof("Registered %d MCP tools", 11) + logrus.Infof("Registered %d MCP tools", 12) } // convertToMCPResult 将自定义的 MCPToolResult 转换为官方 SDK 的格式 diff --git a/routes.go b/routes.go index c709943..aa15106 100644 --- a/routes.go +++ b/routes.go @@ -47,6 +47,7 @@ func setupRoutes(appServer *AppServer) *gin.Engine { api.POST("/feeds/detail", appServer.getFeedDetailHandler) api.POST("/user/profile", appServer.userProfileHandler) api.POST("/feeds/comment", appServer.postCommentHandler) + api.POST("/feeds/comment/reply", appServer.replyCommentHandler) } return router diff --git a/service.go b/service.go index ab909d2..b3cfd72 100644 --- a/service.go +++ b/service.go @@ -22,9 +22,9 @@ type XiaohongshuService struct{} // ActionResult 通用动作响应(点赞/收藏等) type ActionResult struct { - FeedID string `json:"feed_id"` - Success bool `json:"success"` - Message string `json:"message"` + FeedID string `json:"feed_id"` + Success bool `json:"success"` + Message string `json:"message"` } // NewXiaohongshuService 创建小红书服务实例 @@ -390,6 +390,29 @@ func (s *XiaohongshuService) PostCommentToFeed(ctx context.Context, feedID, xsec return &PostCommentResponse{FeedID: feedID, Success: true, Message: "评论发表成功"}, nil } +// ReplyCommentToFeed 回复指定评论 +func (s *XiaohongshuService) ReplyCommentToFeed(ctx context.Context, feedID, xsecToken, commentID, userID, content string) (*ReplyCommentResponse, error) { + b := newBrowser() + defer b.Close() + + page := b.NewPage() + defer page.Close() + + action := xiaohongshu.NewCommentFeedAction(page) + + if err := action.ReplyToComment(ctx, feedID, xsecToken, commentID, userID, content); err != nil { + return nil, err + } + + return &ReplyCommentResponse{ + FeedID: feedID, + TargetCommentID: commentID, + TargetUserID: userID, + Success: true, + Message: "评论回复成功", + }, nil +} + // LikeFeed 点赞笔记 func (s *XiaohongshuService) LikeFeed(ctx context.Context, feedID, xsecToken string) (*ActionResult, error) { b := newBrowser() diff --git a/types.go b/types.go index afd5f07..96a2738 100644 --- a/types.go +++ b/types.go @@ -58,6 +58,24 @@ type PostCommentResponse struct { Message string `json:"message"` } +// ReplyCommentRequest 回复评论请求 +type ReplyCommentRequest struct { + FeedID string `json:"feed_id" binding:"required"` + XsecToken string `json:"xsec_token" binding:"required"` + CommentID string `json:"comment_id" binding:"required_without=UserID"` + UserID string `json:"user_id" binding:"required_without=CommentID"` + Content string `json:"content" binding:"required"` +} + +// ReplyCommentResponse 回复评论响应 +type ReplyCommentResponse struct { + FeedID string `json:"feed_id"` + TargetCommentID string `json:"target_comment_id,omitempty"` + TargetUserID string `json:"target_user_id,omitempty"` + Success bool `json:"success"` + Message string `json:"message"` +} + // UserProfileRequest 用户主页请求 type UserProfileRequest struct { UserID string `json:"user_id" binding:"required"` diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index eb953e2..5aee894 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -2,9 +2,11 @@ package xiaohongshu import ( "context" + "fmt" "time" "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/proto" "github.com/sirupsen/logrus" ) @@ -21,30 +23,677 @@ func NewCommentFeedAction(page *rod.Page) *CommentFeedAction { // PostComment 发表评论到 Feed func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, content string) error { page := f.page.Context(ctx).Timeout(60 * time.Second) - // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) - logrus.Infof("Opening feed detail page: %s", url) - // 导航到详情页 page.MustNavigate(url) page.MustWaitDOMStable() - - time.Sleep(1 * time.Second) - + time.Sleep(3 * time.Second) // 增加等待时间确保页面完全加载 + + // 等待评论容器加载 + waitForCommentsContainer(page) + elem := page.MustElement("div.input-box div.content-edit span") elem.MustClick() - elem2 := page.MustElement("div.input-box div.content-edit p.content-input") elem2.MustInput(content) - - time.Sleep(1 * time.Second) - + time.Sleep(2 * time.Second) // 增加等待时间 submitButton := page.MustElement("div.bottom button.submit") submitButton.MustClick() - - time.Sleep(1 * time.Second) - + time.Sleep(2 * time.Second) // 增加等待时间确保提交完成 return nil } + +// ReplyToComment 回复指定评论 +func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToken, commentID, userID, content string) error { + page := f.page.Context(ctx).Timeout(60 * time.Second) + url := makeFeedDetailURL(feedID, xsecToken) + logrus.Infof("Opening feed detail page for reply: %s", url) + page.MustNavigate(url) + page.MustWaitDOMStable() + time.Sleep(3 * time.Second) // 增加等待时间确保页面完全加载 + + // 等待评论容器加载 + waitForCommentsContainer(page) + + // 确保评论区域可见 + ensureCommentsVisible(page) + + // 额外等待确保评论内容加载完成 + time.Sleep(2 * time.Second) + + // 尝试多次查找评论元素 + var commentEl *rod.Element + var err error + for attempt := 0; attempt < 5; attempt++ { // 增加尝试次数 + commentEl, err = findCommentElement(page, commentID, userID) + if err == nil { + break + } + logrus.Warnf("Attempt %d: Failed to find comment: %v", attempt+1, err) + time.Sleep(2 * time.Second) // 增加等待时间 + ensureCommentsVisible(page) + scrollComments(page) // 每次尝试后滚动 + } + + if err != nil { + return fmt.Errorf("无法找到评论: %w", err) + } + + // 滚动到评论位置 + _, _ = commentEl.Eval(`() => { try { this.scrollIntoView({behavior: "instant", block: "center"}); } catch (e) {} return true }`) + time.Sleep(1 * time.Second) // 增加等待时间 + + // 尝试多次点击回复按钮 + var replyBtn *rod.Element + for attempt := 0; attempt < 5; attempt++ { // 增加尝试次数 + replyBtn, err = findReplyButton(commentEl) + if err == nil { + if tryClickChainForComment(replyBtn) { + break + } + } + logrus.Warnf("Attempt %d: Failed to click reply button: %v", attempt+1, err) + time.Sleep(1 * time.Second) // 增加等待时间 + } + + if err != nil || replyBtn == nil { + return fmt.Errorf("无法点击回复按钮") + } + + time.Sleep(2 * time.Second) // 增加等待时间确保回复输入框出现 + + // 查找回复输入框 + inputEl, err := findReplyInput(page, commentEl) + if err != nil { + return fmt.Errorf("无法找到回复输入框: %w", err) + } + + // 聚焦并输入内容 + if _, evalErr := inputEl.Eval(`() => { try { this.focus(); } catch (e) {} return true }`); evalErr != nil { + logrus.Warnf("focus reply input failed: %v", evalErr) + } + + inputEl.MustInput(content) + time.Sleep(500 * time.Millisecond) // 增加等待时间 + + // 查找并点击提交按钮 + submitBtn, err := findSubmitButton(page) + if err != nil { + return fmt.Errorf("无法找到提交按钮: %w", err) + } + + if !tryClickChainForComment(submitBtn) { + return fmt.Errorf("点击回复提交按钮失败") + } + + time.Sleep(3 * time.Second) // 增加等待时间确保回复提交完成 + return nil +} + +func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { + var lastErr error + + // 首先尝试确保评论区域可见 + ensureCommentsVisible(page) + + for attempt := 0; attempt < 20; attempt++ { // 增加尝试次数 + logrus.Infof("查找评论,尝试次数: %d", attempt+1) + el, err := locateCommentElement(page, commentID, userID) + if err == nil && el != nil { + logrus.Infof("成功找到评论") + return el, nil + } + if err != nil { + lastErr = err + } + + // 每3次尝试后进行一次更彻底的滚动 + if attempt%3 == 0 { + // 更彻底的滚动策略 + performFullScroll(page) + } else { + // 常规滚动 + if !scrollComments(page) { + logrus.Infof("滚动到底部,无法继续滚动") + break + } + } + time.Sleep(800 * time.Millisecond) // 增加等待时间 + } + + if lastErr != nil { + return nil, lastErr + } + return nil, fmt.Errorf("未找到评论: %s", buildIdentifier(commentID, userID)) +} + +func locateCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { + // 首先在comments-container内查找 + if commentsContainer, err := page.Element(".comments-container"); err == nil && commentsContainer != nil { + if commentID != "" { + if el, err := locateCommentElementByCommentIDInContainer(commentsContainer, commentID); err == nil && el != nil { + return el, nil + } + } + if userID != "" { + if el, err := locateCommentElementByUserIDInContainer(commentsContainer, userID); err == nil && el != nil { + return el, nil + } + } + } + + // 如果在comments-container内没有找到,尝试在整个页面查找 + if commentID != "" { + if el, err := locateCommentElementByCommentID(page, commentID); err == nil && el != nil { + return el, nil + } + } + if userID != "" { + if el, err := locateCommentElementByUserID(page, userID); err == nil && el != nil { + return el, nil + } + } + + identifier := buildIdentifier(commentID, userID) + if identifier == "" { + return nil, fmt.Errorf("未提供评论标识") + } + return nil, fmt.Errorf("未找到评论: %s", identifier) +} + +func locateCommentElementByCommentID(page *rod.Page, commentID string) (*rod.Element, error) { + if commentID == "" { + return nil, fmt.Errorf("评论ID为空") + } + + // 首先尝试直接通过ID查找(根据HTML结构中的id="comment-68d9df3e0000000002015818") + idSelector := fmt.Sprintf("#comment-%s", commentID) + if el, err := page.Element(idSelector); err == nil && el != nil { + return el, nil + } + + // 尝试其他data属性 + selectors := []string{ + fmt.Sprintf(`[data-comment-id="%s"]`, commentID), + fmt.Sprintf(`[data-comment_id="%s"]`, commentID), + fmt.Sprintf(`[data-commentid="%s"]`, commentID), + fmt.Sprintf(`[data-id="%s"]`, commentID), + fmt.Sprintf(`[comment-id="%s"]`, commentID), + } + for _, selector := range selectors { + if el, err := page.Element(selector); err == nil && el != nil { + return el, nil + } + } + + return nil, fmt.Errorf("未找到评论ID: %s", commentID) +} + +func locateCommentElementByUserID(page *rod.Page, userID string) (*rod.Element, error) { + if userID == "" { + return nil, fmt.Errorf("用户ID为空") + } + + selectors := []string{ + fmt.Sprintf(`[data-user-id="%s"]`, userID), + fmt.Sprintf(`[data-user_id="%s"]`, userID), + fmt.Sprintf(`[data-userid="%s"]`, userID), + fmt.Sprintf(`[data-uid="%s"]`, userID), + fmt.Sprintf(`a[data-user-id="%s"]`, userID), + fmt.Sprintf(`a[href*="%s"]`, userID), + } + + for _, selector := range selectors { + if el, err := page.Element(selector); err == nil && el != nil { + // 使用JavaScript查找父级评论元素 + jsCode := `() => { + let current = this; + while (current) { + if (current.classList && (current.classList.contains('comment-item') || current.classList.contains('comment'))) { + return current; + } + current = current.parentElement; + } + return this; + }` + if _, err := el.Eval(jsCode); err == nil { + return el, nil + } + return el, nil + } + } + + return nil, fmt.Errorf("未找到用户ID: %s", userID) +} + +// 在指定容器内查找评论元素 +func locateCommentElementByCommentIDInContainer(container *rod.Element, commentID string) (*rod.Element, error) { + if commentID == "" { + return nil, fmt.Errorf("评论ID为空") + } + + // 首先尝试直接通过ID查找 + idSelector := fmt.Sprintf("#comment-%s", commentID) + if el, err := container.Element(idSelector); err == nil && el != nil { + return el, nil + } + + // 尝试其他data属性 + selectors := []string{ + fmt.Sprintf(`[data-comment-id="%s"]`, commentID), + fmt.Sprintf(`[data-comment_id="%s"]`, commentID), + fmt.Sprintf(`[data-commentid="%s"]`, commentID), + fmt.Sprintf(`[data-id="%s"]`, commentID), + fmt.Sprintf(`[comment-id="%s"]`, commentID), + } + for _, selector := range selectors { + if el, err := container.Element(selector); err == nil && el != nil { + return el, nil + } + } + + return nil, fmt.Errorf("在容器内未找到评论ID: %s", commentID) +} + +// 在指定容器内通过用户ID查找评论元素 +func locateCommentElementByUserIDInContainer(container *rod.Element, userID string) (*rod.Element, error) { + if userID == "" { + return nil, fmt.Errorf("用户ID为空") + } + + selectors := []string{ + fmt.Sprintf(`[data-user-id="%s"]`, userID), + fmt.Sprintf(`[data-user_id="%s"]`, userID), + fmt.Sprintf(`[data-userid="%s"]`, userID), + fmt.Sprintf(`[data-uid="%s"]`, userID), + fmt.Sprintf(`a[data-user-id="%s"]`, userID), + fmt.Sprintf(`a[href*="%s"]`, userID), + } + + for _, selector := range selectors { + if el, err := container.Element(selector); err == nil && el != nil { + // 找到用户链接,返回其父级评论元素 + if parent, err := el.Element(".comment-item"); err == nil && parent != nil { + return parent, nil + } + if parent, err := el.Element(".comment"); err == nil && parent != nil { + return parent, nil + } + return el, nil + } + } + + return nil, fmt.Errorf("在容器内未找到用户ID: %s", userID) +} + +// 等待评论容器加载完成 +func waitForCommentsContainer(page *rod.Page) { + jsCode := `() => { + // 等待comments-container元素出现 + let attempts = 0; + const maxAttempts = 10; + + const checkContainer = () => { + const container = document.querySelector('.comments-container'); + if (container) { + // 检查容器内是否有评论内容 + const comments = container.querySelectorAll('.comment-item, .comment'); + return comments.length > 0; + } + return false; + }; + + // 定期检查评论容器是否加载完成 + const interval = setInterval(() => { + attempts++; + if (checkContainer() || attempts >= maxAttempts) { + clearInterval(interval); + } + }, 500); + + return checkContainer(); + }` + + page.Eval(jsCode) + time.Sleep(2 * time.Second) // 等待检查完成 +} + +func ensureCommentsVisible(page *rod.Page) { + // 专门针对comments-container元素的JavaScript代码 + jsCode := `() => { + // 查找comments-container元素 + const commentsContainer = document.querySelector('.comments-container'); + + // 如果找到comments-container,尝试滚动到视图中并在其内部滚动 + if (commentsContainer) { + // 先滚动到视图中 + commentsContainer.scrollIntoView({behavior: 'instant', block: 'start'}); + + // 等待一下再在容器内部滚动 + setTimeout(() => { + // 在comments-container内部滚动以显示评论 + if (commentsContainer.scrollHeight > commentsContainer.clientHeight) { + const maxScroll = commentsContainer.scrollHeight - commentsContainer.clientHeight; + if (maxScroll > 0) { + // 滚动到一半位置 + commentsContainer.scrollTop = Math.min(maxScroll, commentsContainer.clientHeight * 0.5); + } + } + }, 200); + + return true; + } + + return false; + }` + + page.Eval(jsCode) + time.Sleep(1 * time.Second) +} + +func scrollComments(page *rod.Page) bool { + scrollJS := `() => { + let scrolled = false; + + // 专门查找comments-container元素 + const commentsContainer = document.querySelector('.comments-container'); + + if (commentsContainer) { + const maxScroll = commentsContainer.scrollHeight - commentsContainer.clientHeight; + if (maxScroll > 0 && commentsContainer.scrollTop < maxScroll) { + // 滚动更多内容 + const delta = Math.max(commentsContainer.clientHeight * 0.8, 400); + commentsContainer.scrollTop = Math.min(maxScroll, commentsContainer.scrollTop + delta); + scrolled = true; + } + } + + return scrolled; + }` + res, err := page.Eval(scrollJS) + if err != nil { + logrus.Warnf("scroll comments failed: %v", err) + return false + } + if res == nil { + return false + } + return res.Value.Bool() +} + +// performFullScroll 执行更彻底的滚动策略 +func performFullScroll(page *rod.Page) { + logrus.Infof("执行彻底滚动策略") + + // 策略1: 滚动到评论容器的不同位置 + scrollPositionsJS := `() => { + const commentsContainer = document.querySelector('.comments-container'); + if (!commentsContainer) return false; + + const maxScroll = commentsContainer.scrollHeight - commentsContainer.clientHeight; + if (maxScroll <= 0) return false; + + // 根据当前滚动位置决定下一步滚动 + const currentScroll = commentsContainer.scrollTop; + const scrollRatio = currentScroll / maxScroll; + + if (scrollRatio < 0.3) { + // 滚动到30%位置 + commentsContainer.scrollTop = maxScroll * 0.3; + } else if (scrollRatio < 0.6) { + // 滚动到60%位置 + commentsContainer.scrollTop = maxScroll * 0.6; + } else if (scrollRatio < 0.9) { + // 滚动到90%位置 + commentsContainer.scrollTop = maxScroll * 0.9; + } else { + // 滚动到底部 + commentsContainer.scrollTop = maxScroll; + } + + return true; + }` + + if _, err := page.Eval(scrollPositionsJS); err != nil { + logrus.Warnf("彻底滚动失败: %v", err) + } + +} + +func buildIdentifier(commentID, userID string) string { + if commentID != "" && userID != "" { + return fmt.Sprintf("comment_id=%s / user_id=%s", commentID, userID) + } + if commentID != "" { + return commentID + } + return userID +} + +func findReplyButton(commentEl *rod.Element) (*rod.Element, error) { + logrus.Infof("开始查找回复按钮...") + + // 在right区域内查找interactions + right, err := commentEl.Element(".right") + if err != nil { + logrus.Errorf("未找到.right区域") + return nil, fmt.Errorf("未找到.right区域") + } + + interactions, err := right.Element(".interactions") + if err != nil { + logrus.Errorf("未找到.interactions区域") + return nil, fmt.Errorf("未找到.interactions区域") + } + + // 选择器列表 + selectors := []string{ + ".reply", // 回复容器(最通用) + ":nth-child(2)", // 第二个子元素(单评论) + ".reply-icon", // 回复图标 + ".reds-icon.reply-icon", // 带类的回复图标 + ".reply.icon-container", // 回复图标容器 + } + + // 在interactions区域内查找 + for _, selector := range selectors { + if el, err := interactions.Element(selector); err == nil && el != nil { + logrus.Infof("通过选择器 %s 找到回复按钮", selector) + return el, nil + } + } + + logrus.Errorf("未找到回复按钮") + return nil, fmt.Errorf("未找到回复按钮") +} + +// verifyClickSuccess 验证点击是否真的成功(检查是否出现了回复输入框) +func verifyClickSuccess(clickedEl *rod.Element) bool { + // 获取页面实例 + page := clickedEl.Page() + + // 检查是否出现了回复输入框 + selectors := []string{ + "div.input-box div.content-edit p.content-input", + "div.input-box [contenteditable='true']", + "[contenteditable='true']", + "textarea", + "input[type='text']", + } + + for _, selector := range selectors { + if el, err := page.Element(selector); err == nil && el != nil { + // 检查元素是否可见 + if visible, _ := el.Visible(); visible { + logrus.Infof("验证成功:找到可见的回复输入框 (%s)", selector) + return true + } + } + } + + // 使用JavaScript检查是否有新的输入框出现 + jsCode := `() => { + // 查找所有可编辑元素 + const editables = document.querySelectorAll('[contenteditable="true"], textarea, input[type="text"]'); + for (const el of editables) { + // 检查元素是否可见 + const rect = el.getBoundingClientRect(); + if (rect.width > 0 && rect.height > 0) { + // 检查元素是否在视口中 + const inViewport = rect.top >= 0 && rect.left >= 0 && + rect.bottom <= window.innerHeight && + rect.right <= window.innerWidth; + if (inViewport) { + console.log('找到可见的输入元素:', el); + return true; + } + } + } + return false; + }` + + if result, err := page.Eval(jsCode); err == nil && result != nil { + if result.Value.Bool() { + logrus.Infof("JavaScript验证成功:找到可见的输入元素") + return true + } + } + + logrus.Infof("验证失败:没有找到回复输入框") + return false +} + +func findReplyInput(page *rod.Page, commentEl *rod.Element) (*rod.Element, error) { + activeEditableJS := `() => { + const active = document.activeElement; + if (active && active.getAttribute && active.getAttribute('contenteditable') === 'true') { + return active; + } + return null; + }` + if el, err := page.ElementByJS(rod.Eval(activeEditableJS)); err == nil && el != nil { + return el, nil + } + selectors := []string{ + "div.input-box div.content-edit p.content-input", // 原有选择器 + "div.input-box [contenteditable='true']", // 通用输入框 + "[contenteditable='true']", // 任何可编辑元素 + "textarea", // 备用textarea + "input[type='text']", // 备用text输入框 + "[data-role='reply-input'] [contenteditable='true']", + } + for _, selector := range selectors { + if el, err := page.Element(selector); err == nil && el != nil { + return el, nil + } + } + // 尝试在评论内部寻找可编辑区域 + if el, err := commentEl.Element("[contenteditable='true']"); err == nil && el != nil { + return el, nil + } + // 最后尝试:等待一下再查找,可能是动态加载的 + time.Sleep(1 * time.Second) + for _, selector := range selectors { + if el, err := page.Element(selector); err == nil && el != nil { + return el, nil + } + } + return nil, fmt.Errorf("未找到回复输入框") +} + +func tryClickChainForComment(el *rod.Element) bool { + if el == nil { + logrus.Errorf("要点击的元素为空") + return false + } + + // 获取元素信息用于调试 + text, _ := el.Text() + class, _ := el.Attribute("class") + tag, _ := el.Describe(0, false) + logrus.Infof("准备点击元素 - 文本: '%s', 类: '%s', 标签: %s", text, class, tag) + + // 检查元素是否可见和可点击 + visible, _ := el.Visible() + logrus.Infof("元素可见性: %v", visible) + + // 滚动到元素位置 + _, _ = el.Eval(`() => { try { this.scrollIntoView({behavior: "instant", block: "center"}); } catch (e) {} return true }`) + time.Sleep(500 * time.Millisecond) + + // 只使用直接点击方式 + clickMethods := []struct { + name string + fn func(*rod.Element) bool + }{ + {"直接点击", func(e *rod.Element) bool { + if err := e.Click(proto.InputMouseButtonLeft, 1); err != nil { + logrus.Warnf("直接点击失败: %v", err) + return false + } + logrus.Infof("直接点击成功") + return true + }}, + } + + for i, method := range clickMethods { + logrus.Infof("尝试点击方法 %d: %s", i+1, method.name) + if method.fn(el) { + // 点击后等待一下,检查是否有反应 + time.Sleep(1 * time.Second) + + // 验证点击是否真的成功(检查是否出现了回复输入框) + success := verifyClickSuccess(el) + if success { + logrus.Infof("点击方法 %s 执行成功且有效", method.name) + return true + } else { + logrus.Warnf("点击方法 %s 执行成功但无效(没有出现回复输入框)", method.name) + // 继续尝试下一种方法 + } + } + } + + logrus.Errorf("所有点击方法都失败") + return false +} + +func findSubmitButton(page *rod.Page) (*rod.Element, error) { + selectors := []string{ + "div.bottom button.submit", + "button.submit", + "button.reds-button", + "button[type='submit']", + "button:contains('回复')", + "button:contains('发布')", + "button:contains('发送')", + } + for _, selector := range selectors { + if el, err := page.Element(selector); err == nil && el != nil { + disabled, _ := el.Attribute("disabled") + if disabled == nil { + return el, nil + } + } + } + // 使用JS查找包含特定文本的按钮 + jsCode := `() => { + const buttons = document.querySelectorAll('button'); + for (const btn of buttons) { + const text = btn.textContent || btn.innerText || ''; + if (text.includes('回复') || text.includes('发布') || text.includes('发送')) { + const disabled = btn.getAttribute('disabled'); + if (!disabled) { + return btn; + } + } + } + return null; + }` + if el, err := page.ElementByJS(rod.Eval(jsCode)); err == nil && el != nil { + return el, nil + } + return nil, fmt.Errorf("未找到回复发布按钮") +} \ No newline at end of file From e6bf9921ea08b37dc05800fa4a03fbe6c8557a37 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Thu, 9 Oct 2025 21:38:52 +0800 Subject: [PATCH 03/19] refactor: improve comment posting logic with enhanced error handling and stability checks - Updated the PostComment method to include error handling for navigation and element interactions. - Replaced sleep calls with more reliable wait mechanisms to ensure page stability. - Added checks for the presence of input elements and improved logging for better debugging. --- xiaohongshu/comment_feed.go | 70 ++++++++++++++++++++++++++++--------- xiaohongshu/publish.go | 7 ++++ 2 files changed, 61 insertions(+), 16 deletions(-) diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index 5aee894..d2ad332 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -23,28 +23,66 @@ func NewCommentFeedAction(page *rod.Page) *CommentFeedAction { // PostComment 发表评论到 Feed func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, content string) error { page := f.page.Context(ctx).Timeout(60 * time.Second) + // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) + logrus.Infof("Opening feed detail page: %s", url) + // 导航到详情页 - page.MustNavigate(url) - page.MustWaitDOMStable() - time.Sleep(3 * time.Second) // 增加等待时间确保页面完全加载 - - // 等待评论容器加载 - waitForCommentsContainer(page) - - elem := page.MustElement("div.input-box div.content-edit span") - elem.MustClick() - elem2 := page.MustElement("div.input-box div.content-edit p.content-input") - elem2.MustInput(content) - time.Sleep(2 * time.Second) // 增加等待时间 - submitButton := page.MustElement("div.bottom button.submit") - submitButton.MustClick() - time.Sleep(2 * time.Second) // 增加等待时间确保提交完成 + if err := page.Navigate(url); err != nil { + logrus.Warnf("Failed to navigate to feed detail page: %v", err) + return fmt.Errorf("无法打开帖子详情页,该帖子可能在网页端不可访问: %w", err) + } + + if err := page.WaitStable(2 * time.Second); err != nil { + logrus.Warnf("Failed to wait for page stable: %v", err) + return fmt.Errorf("页面加载超时,该帖子可能在网页端不可访问: %w", err) + } + + time.Sleep(1 * time.Second) + + // 查找评论输入框 + elem, err := page.Element("div.input-box div.content-edit span") + if err != nil { + logrus.Warnf("Failed to find comment input box: %v", err) + return fmt.Errorf("未找到评论输入框,该帖子可能不支持评论或网页端不可访问: %w", err) + } + + if err := elem.Click(proto.InputMouseButtonLeft, 1); err != nil { + logrus.Warnf("Failed to click comment input box: %v", err) + return fmt.Errorf("无法点击评论输入框: %w", err) + } + + elem2, err := page.Element("div.input-box div.content-edit p.content-input") + if err != nil { + logrus.Warnf("Failed to find comment input field: %v", err) + return fmt.Errorf("未找到评论输入区域: %w", err) + } + + if err := elem2.Input(content); err != nil { + logrus.Warnf("Failed to input comment content: %v", err) + return fmt.Errorf("无法输入评论内容: %w", err) + } + + time.Sleep(1 * time.Second) + + submitButton, err := page.Element("div.bottom button.submit") + if err != nil { + logrus.Warnf("Failed to find submit button: %v", err) + return fmt.Errorf("未找到提交按钮: %w", err) + } + + if err := submitButton.Click(proto.InputMouseButtonLeft, 1); err != nil { + logrus.Warnf("Failed to click submit button: %v", err) + return fmt.Errorf("无法点击提交按钮: %w", err) + } + + time.Sleep(1 * time.Second) + + logrus.Infof("Comment posted successfully to feed: %s", feedID) return nil } - // ReplyToComment 回复指定评论 func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToken, commentID, userID, content string) error { page := f.page.Context(ctx).Timeout(60 * time.Second) diff --git a/xiaohongshu/publish.go b/xiaohongshu/publish.go index a8d1439..d9844e8 100644 --- a/xiaohongshu/publish.go +++ b/xiaohongshu/publish.go @@ -7,6 +7,8 @@ import ( "os" "strings" "time" + "unicode" + "unicode/utf8" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/input" @@ -55,6 +57,11 @@ func (p *PublishAction) Publish(ctx context.Context, content PublishImageContent return errors.New("图片不能为空") } + trimmedContent := strings.TrimRightFunc(content.Content, unicode.IsSpace) + if utf8.RuneCountInString(trimmedContent) > 1000 { + return errors.New("正文内容不能超过1000个字符") + } + page := p.page.Context(ctx) if err := uploadImages(page, content.ImagePaths); err != nil { From cff1705c5b21c4172c9b9f896b6c61198fac36af Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Thu, 9 Oct 2025 23:49:40 +0800 Subject: [PATCH 04/19] feat: add reply comment functionality for Xiaohongshu feeds - Implemented handleReplyComment method in mcp_handlers.go to manage replying to comments on feeds. - Introduced ReplyCommentArgs struct in mcp_server.go for handling parameters related to comment replies. - Registered a new MCP tool for replying to comments in the registerTools function. - Added ReplyCommentToFeed method in service.go to interact with the Xiaohongshu platform for comment replies. - Enhanced error handling for missing parameters in the reply process. --- mcp_handlers.go | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ mcp_server.go | 34 ++++++++++++++++++-- service.go | 15 +++++++++ 3 files changed, 130 insertions(+), 3 deletions(-) diff --git a/mcp_handlers.go b/mcp_handlers.go index b2577d7..5bcd1fb 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -524,3 +524,87 @@ func (s *AppServer) handlePostComment(ctx context.Context, args map[string]inter }}, } } + +// handleReplyComment 处理回复评论 +func (s *AppServer) handleReplyComment(ctx context.Context, args map[string]interface{}) *MCPToolResult { + logrus.Info("MCP: 回复评论") + + // 解析参数 + feedID, ok := args["feed_id"].(string) + if !ok || feedID == "" { + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: "回复评论失败: 缺少feed_id参数", + }}, + IsError: true, + } + } + + xsecToken, ok := args["xsec_token"].(string) + if !ok || xsecToken == "" { + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: "回复评论失败: 缺少xsec_token参数", + }}, + IsError: true, + } + } + + commentID, ok := args["comment_id"].(string) + if !ok || commentID == "" { + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: "回复评论失败: 缺少comment_id参数", + }}, + IsError: true, + } + } + + userID, ok := args["user_id"].(string) + if !ok || userID == "" { + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: "回复评论失败: 缺少user_id参数", + }}, + IsError: true, + } + } + + content, ok := args["content"].(string) + if !ok || content == "" { + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: "回复评论失败: 缺少content参数", + }}, + IsError: true, + } + } + + logrus.Infof("MCP: 回复评论 - Feed ID: %s, Comment ID: %s, User ID: %s, 内容长度: %d", feedID, commentID, userID, len(content)) + + // 回复评论 + result, err := s.xiaohongshuService.ReplyCommentToFeed(ctx, feedID, xsecToken, commentID, userID, content) + if err != nil { + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: "回复评论失败: " + err.Error(), + }}, + IsError: true, + } + } + + // 返回成功结果 + resultText := fmt.Sprintf("回复评论成功 - Feed ID: %s", result.FeedID) + return &MCPToolResult{ + Content: []MCPContent{{ + Type: "text", + Text: resultText, + }}, + } +} diff --git a/mcp_server.go b/mcp_server.go index 3e1ce9e..81c9a2b 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -50,6 +50,15 @@ type PostCommentArgs struct { Content string `json:"content" jsonschema:"评论内容"` } +// ReplyCommentArgs 回复评论的参数 +type ReplyCommentArgs struct { + FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` + XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` + CommentID string `json:"comment_id" jsonschema:"评论ID"` + UserID string `json:"user_id" jsonschema:"用户ID"` + Content string `json:"content" jsonschema:"回复内容"` +} + // LikeFeedArgs 点赞参数 type LikeFeedArgs struct { FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` @@ -204,7 +213,26 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - // 工具 9: 发布视频(仅本地文件) + // 工具 9: 回复评论 + mcp.AddTool(server, + &mcp.Tool{ + Name: "reply_to_comment", + Description: "回复小红书笔记的评论", + }, + func(ctx context.Context, req *mcp.CallToolRequest, args ReplyCommentArgs) (*mcp.CallToolResult, any, error) { + argsMap := map[string]interface{}{ + "feed_id": args.FeedID, + "xsec_token": args.XsecToken, + "comment_id": args.CommentID, + "user_id": args.UserID, + "content": args.Content, + } + result := appServer.handleReplyComment(ctx, argsMap) + return convertToMCPResult(result), nil, nil + }, + ) + + // 工具 10: 发布视频(仅本地文件) mcp.AddTool(server, &mcp.Tool{ Name: "publish_with_video", @@ -222,7 +250,7 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - // 工具 10: 点赞笔记 + // 工具 11: 点赞笔记 mcp.AddTool(server, &mcp.Tool{ Name: "like_feed", @@ -239,7 +267,7 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, ) - // 工具 11: 收藏笔记 + // 工具 12: 收藏笔记 mcp.AddTool(server, &mcp.Tool{ Name: "favorite_feed", diff --git a/service.go b/service.go index 9cee00b..152adad 100644 --- a/service.go +++ b/service.go @@ -443,6 +443,21 @@ func (s *XiaohongshuService) UnfavoriteFeed(ctx context.Context, feedID, xsecTok return &ActionResult{FeedID: feedID, Success: true, Message: "取消收藏成功或未收藏"}, nil } +// ReplyCommentToFeed 回复笔记评论 +func (s *XiaohongshuService) ReplyCommentToFeed(ctx context.Context, feedID, xsecToken, commentID, userID, content string) (*ActionResult, error) { + b := newBrowser() + defer b.Close() + + page := b.NewPage() + defer page.Close() + + action := xiaohongshu.NewCommentFeedAction(page) + if err := action.ReplyToComment(ctx, feedID, xsecToken, commentID, userID, content); err != nil { + return nil, err + } + return &ActionResult{FeedID: feedID, Success: true, Message: "回复评论成功"}, nil +} + func newBrowser() *headless_browser.Browser { return browser.NewBrowser(configs.IsHeadless(), browser.WithBinPath(configs.GetBinPath())) } From 8d089f59f84950147b081b9dbd5dc45a55fda24b Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Fri, 10 Oct 2025 00:23:51 +0800 Subject: [PATCH 05/19] refactor: enhance reply comment functionality with improved error handling and response structure - Simplified error handling in handleReplyComment to check for both comment_id and user_id simultaneously. - Updated response message to include both Comment ID and User ID upon successful reply. - Modified ReplyCommentArgs struct to make comment_id and user_id optional. - Renamed MCP tool for replying to comments for clarity. --- mcp_handlers.go | 22 ++++++---------------- mcp_server.go | 15 +++++++++++---- service.go | 14 +++++++++++--- 3 files changed, 28 insertions(+), 23 deletions(-) diff --git a/mcp_handlers.go b/mcp_handlers.go index 5bcd1fb..9e89a3e 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -552,23 +552,13 @@ func (s *AppServer) handleReplyComment(ctx context.Context, args map[string]inte } } - commentID, ok := args["comment_id"].(string) - if !ok || commentID == "" { + commentID, _ := args["comment_id"].(string) + userID, _ := args["user_id"].(string) + if commentID == "" && userID == "" { return &MCPToolResult{ Content: []MCPContent{{ Type: "text", - Text: "回复评论失败: 缺少comment_id参数", - }}, - IsError: true, - } - } - - userID, ok := args["user_id"].(string) - if !ok || userID == "" { - return &MCPToolResult{ - Content: []MCPContent{{ - Type: "text", - Text: "回复评论失败: 缺少user_id参数", + Text: "回复评论失败: 缺少comment_id或user_id参数", }}, IsError: true, } @@ -600,11 +590,11 @@ func (s *AppServer) handleReplyComment(ctx context.Context, args map[string]inte } // 返回成功结果 - resultText := fmt.Sprintf("回复评论成功 - Feed ID: %s", result.FeedID) + responseText := fmt.Sprintf("评论回复成功 - Feed ID: %s, Comment ID: %s, User ID: %s", result.FeedID, result.TargetCommentID, result.TargetUserID) return &MCPToolResult{ Content: []MCPContent{{ Type: "text", - Text: resultText, + Text: responseText, }}, } } diff --git a/mcp_server.go b/mcp_server.go index 81c9a2b..c190c65 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -54,8 +54,8 @@ type PostCommentArgs struct { type ReplyCommentArgs struct { FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` - CommentID string `json:"comment_id" jsonschema:"评论ID"` - UserID string `json:"user_id" jsonschema:"用户ID"` + CommentID string `json:"comment_id,omitempty" jsonschema:"目标评论ID,从评论列表获取"` + UserID string `json:"user_id,omitempty" jsonschema:"目标评论用户ID,从评论列表获取"` Content string `json:"content" jsonschema:"回复内容"` } @@ -216,10 +216,17 @@ func registerTools(server *mcp.Server, appServer *AppServer) { // 工具 9: 回复评论 mcp.AddTool(server, &mcp.Tool{ - Name: "reply_to_comment", - Description: "回复小红书笔记的评论", + Name: "reply_comment_in_feed", + Description: "回复小红书笔记下的指定评论", }, func(ctx context.Context, req *mcp.CallToolRequest, args ReplyCommentArgs) (*mcp.CallToolResult, any, error) { + if args.CommentID == "" && args.UserID == "" { + return &mcp.CallToolResult{ + IsError: true, + Content: []mcp.Content{&mcp.TextContent{Text: "缺少 comment_id 或 user_id"}}, + }, nil, nil + } + argsMap := map[string]interface{}{ "feed_id": args.FeedID, "xsec_token": args.XsecToken, diff --git a/service.go b/service.go index 152adad..23bf100 100644 --- a/service.go +++ b/service.go @@ -443,8 +443,8 @@ func (s *XiaohongshuService) UnfavoriteFeed(ctx context.Context, feedID, xsecTok return &ActionResult{FeedID: feedID, Success: true, Message: "取消收藏成功或未收藏"}, nil } -// ReplyCommentToFeed 回复笔记评论 -func (s *XiaohongshuService) ReplyCommentToFeed(ctx context.Context, feedID, xsecToken, commentID, userID, content string) (*ActionResult, error) { +// ReplyCommentToFeed 回复指定评论 +func (s *XiaohongshuService) ReplyCommentToFeed(ctx context.Context, feedID, xsecToken, commentID, userID, content string) (*ReplyCommentResponse, error) { b := newBrowser() defer b.Close() @@ -452,10 +452,18 @@ func (s *XiaohongshuService) ReplyCommentToFeed(ctx context.Context, feedID, xse defer page.Close() action := xiaohongshu.NewCommentFeedAction(page) + if err := action.ReplyToComment(ctx, feedID, xsecToken, commentID, userID, content); err != nil { return nil, err } - return &ActionResult{FeedID: feedID, Success: true, Message: "回复评论成功"}, nil + + return &ReplyCommentResponse{ + FeedID: feedID, + TargetCommentID: commentID, + TargetUserID: userID, + Success: true, + Message: "评论回复成功", + }, nil } func newBrowser() *headless_browser.Browser { From a169db603b4c96978ab6e2e3b0f0fe9be40baf36 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Tue, 28 Oct 2025 02:02:08 +0800 Subject: [PATCH 06/19] refactor(comment_feed): streamline comment reply process and enhance error handling - Removed redundant waiting times and improved the logic for finding comment elements and reply buttons. - Simplified the reply button search by consolidating selectors and enhancing error messages. - Improved the overall readability of the code by removing unnecessary comments and whitespace. - Ensured better handling of potential errors during the comment reply process. --- xiaohongshu/comment_feed.go | 305 ++++++++---------------------------- 1 file changed, 64 insertions(+), 241 deletions(-) diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index d2ad332..4fab693 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -83,6 +83,7 @@ func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, logrus.Infof("Comment posted successfully to feed: %s", feedID) return nil } + // ReplyToComment 回复指定评论 func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToken, commentID, userID, content string) error { page := f.page.Context(ctx).Timeout(60 * time.Second) @@ -91,16 +92,16 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke page.MustNavigate(url) page.MustWaitDOMStable() time.Sleep(3 * time.Second) // 增加等待时间确保页面完全加载 - + // 等待评论容器加载 waitForCommentsContainer(page) - + // 确保评论区域可见 ensureCommentsVisible(page) - + // 额外等待确保评论内容加载完成 time.Sleep(2 * time.Second) - + // 尝试多次查找评论元素 var commentEl *rod.Element var err error @@ -114,15 +115,15 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke ensureCommentsVisible(page) scrollComments(page) // 每次尝试后滚动 } - + if err != nil { return fmt.Errorf("无法找到评论: %w", err) } - + // 滚动到评论位置 _, _ = commentEl.Eval(`() => { try { this.scrollIntoView({behavior: "instant", block: "center"}); } catch (e) {} return true }`) time.Sleep(1 * time.Second) // 增加等待时间 - + // 尝试多次点击回复按钮 var replyBtn *rod.Element for attempt := 0; attempt < 5; attempt++ { // 增加尝试次数 @@ -135,47 +136,47 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke logrus.Warnf("Attempt %d: Failed to click reply button: %v", attempt+1, err) time.Sleep(1 * time.Second) // 增加等待时间 } - + if err != nil || replyBtn == nil { return fmt.Errorf("无法点击回复按钮") } - + time.Sleep(2 * time.Second) // 增加等待时间确保回复输入框出现 - + // 查找回复输入框 inputEl, err := findReplyInput(page, commentEl) if err != nil { return fmt.Errorf("无法找到回复输入框: %w", err) } - + // 聚焦并输入内容 if _, evalErr := inputEl.Eval(`() => { try { this.focus(); } catch (e) {} return true }`); evalErr != nil { logrus.Warnf("focus reply input failed: %v", evalErr) } - + inputEl.MustInput(content) time.Sleep(500 * time.Millisecond) // 增加等待时间 - + // 查找并点击提交按钮 submitBtn, err := findSubmitButton(page) if err != nil { return fmt.Errorf("无法找到提交按钮: %w", err) } - + if !tryClickChainForComment(submitBtn) { return fmt.Errorf("点击回复提交按钮失败") } - + time.Sleep(3 * time.Second) // 增加等待时间确保回复提交完成 return nil } func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { var lastErr error - + // 首先尝试确保评论区域可见 ensureCommentsVisible(page) - + for attempt := 0; attempt < 20; attempt++ { // 增加尝试次数 logrus.Infof("查找评论,尝试次数: %d", attempt+1) el, err := locateCommentElement(page, commentID, userID) @@ -186,7 +187,7 @@ func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, if err != nil { lastErr = err } - + // 每3次尝试后进行一次更彻底的滚动 if attempt%3 == 0 { // 更彻底的滚动策略 @@ -200,7 +201,7 @@ func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, } time.Sleep(800 * time.Millisecond) // 增加等待时间 } - + if lastErr != nil { return nil, lastErr } @@ -208,20 +209,6 @@ func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, } func locateCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { - // 首先在comments-container内查找 - if commentsContainer, err := page.Element(".comments-container"); err == nil && commentsContainer != nil { - if commentID != "" { - if el, err := locateCommentElementByCommentIDInContainer(commentsContainer, commentID); err == nil && el != nil { - return el, nil - } - } - if userID != "" { - if el, err := locateCommentElementByUserIDInContainer(commentsContainer, userID); err == nil && el != nil { - return el, nil - } - } - } - // 如果在comments-container内没有找到,尝试在整个页面查找 if commentID != "" { if el, err := locateCommentElementByCommentID(page, commentID); err == nil && el != nil { @@ -233,7 +220,7 @@ func locateCommentElement(page *rod.Page, commentID, userID string) (*rod.Elemen return el, nil } } - + identifier := buildIdentifier(commentID, userID) if identifier == "" { return nil, fmt.Errorf("未提供评论标识") @@ -245,27 +232,13 @@ func locateCommentElementByCommentID(page *rod.Page, commentID string) (*rod.Ele if commentID == "" { return nil, fmt.Errorf("评论ID为空") } - + // 首先尝试直接通过ID查找(根据HTML结构中的id="comment-68d9df3e0000000002015818") idSelector := fmt.Sprintf("#comment-%s", commentID) if el, err := page.Element(idSelector); err == nil && el != nil { return el, nil } - - // 尝试其他data属性 - selectors := []string{ - fmt.Sprintf(`[data-comment-id="%s"]`, commentID), - fmt.Sprintf(`[data-comment_id="%s"]`, commentID), - fmt.Sprintf(`[data-commentid="%s"]`, commentID), - fmt.Sprintf(`[data-id="%s"]`, commentID), - fmt.Sprintf(`[comment-id="%s"]`, commentID), - } - for _, selector := range selectors { - if el, err := page.Element(selector); err == nil && el != nil { - return el, nil - } - } - + return nil, fmt.Errorf("未找到评论ID: %s", commentID) } @@ -273,16 +246,11 @@ func locateCommentElementByUserID(page *rod.Page, userID string) (*rod.Element, if userID == "" { return nil, fmt.Errorf("用户ID为空") } - + selectors := []string{ fmt.Sprintf(`[data-user-id="%s"]`, userID), - fmt.Sprintf(`[data-user_id="%s"]`, userID), - fmt.Sprintf(`[data-userid="%s"]`, userID), - fmt.Sprintf(`[data-uid="%s"]`, userID), - fmt.Sprintf(`a[data-user-id="%s"]`, userID), - fmt.Sprintf(`a[href*="%s"]`, userID), } - + for _, selector := range selectors { if el, err := page.Element(selector); err == nil && el != nil { // 使用JavaScript查找父级评论元素 @@ -302,70 +270,10 @@ func locateCommentElementByUserID(page *rod.Page, userID string) (*rod.Element, return el, nil } } - + return nil, fmt.Errorf("未找到用户ID: %s", userID) } -// 在指定容器内查找评论元素 -func locateCommentElementByCommentIDInContainer(container *rod.Element, commentID string) (*rod.Element, error) { - if commentID == "" { - return nil, fmt.Errorf("评论ID为空") - } - - // 首先尝试直接通过ID查找 - idSelector := fmt.Sprintf("#comment-%s", commentID) - if el, err := container.Element(idSelector); err == nil && el != nil { - return el, nil - } - - // 尝试其他data属性 - selectors := []string{ - fmt.Sprintf(`[data-comment-id="%s"]`, commentID), - fmt.Sprintf(`[data-comment_id="%s"]`, commentID), - fmt.Sprintf(`[data-commentid="%s"]`, commentID), - fmt.Sprintf(`[data-id="%s"]`, commentID), - fmt.Sprintf(`[comment-id="%s"]`, commentID), - } - for _, selector := range selectors { - if el, err := container.Element(selector); err == nil && el != nil { - return el, nil - } - } - - return nil, fmt.Errorf("在容器内未找到评论ID: %s", commentID) -} - -// 在指定容器内通过用户ID查找评论元素 -func locateCommentElementByUserIDInContainer(container *rod.Element, userID string) (*rod.Element, error) { - if userID == "" { - return nil, fmt.Errorf("用户ID为空") - } - - selectors := []string{ - fmt.Sprintf(`[data-user-id="%s"]`, userID), - fmt.Sprintf(`[data-user_id="%s"]`, userID), - fmt.Sprintf(`[data-userid="%s"]`, userID), - fmt.Sprintf(`[data-uid="%s"]`, userID), - fmt.Sprintf(`a[data-user-id="%s"]`, userID), - fmt.Sprintf(`a[href*="%s"]`, userID), - } - - for _, selector := range selectors { - if el, err := container.Element(selector); err == nil && el != nil { - // 找到用户链接,返回其父级评论元素 - if parent, err := el.Element(".comment-item"); err == nil && parent != nil { - return parent, nil - } - if parent, err := el.Element(".comment"); err == nil && parent != nil { - return parent, nil - } - return el, nil - } - } - - return nil, fmt.Errorf("在容器内未找到用户ID: %s", userID) -} - // 等待评论容器加载完成 func waitForCommentsContainer(page *rod.Page) { jsCode := `() => { @@ -393,7 +301,7 @@ func waitForCommentsContainer(page *rod.Page) { return checkContainer(); }` - + page.Eval(jsCode) time.Sleep(2 * time.Second) // 等待检查完成 } @@ -426,7 +334,7 @@ func ensureCommentsVisible(page *rod.Page) { return false; }` - + page.Eval(jsCode) time.Sleep(1 * time.Second) } @@ -464,7 +372,7 @@ func scrollComments(page *rod.Page) bool { // performFullScroll 执行更彻底的滚动策略 func performFullScroll(page *rod.Page) { logrus.Infof("执行彻底滚动策略") - + // 策略1: 滚动到评论容器的不同位置 scrollPositionsJS := `() => { const commentsContainer = document.querySelector('.comments-container'); @@ -493,11 +401,11 @@ func performFullScroll(page *rod.Page) { return true; }` - + if _, err := page.Eval(scrollPositionsJS); err != nil { logrus.Warnf("彻底滚动失败: %v", err) } - + } func buildIdentifier(commentID, userID string) string { @@ -510,57 +418,33 @@ func buildIdentifier(commentID, userID string) string { return userID } +// 选取当前层主的回复按钮 func findReplyButton(commentEl *rod.Element) (*rod.Element, error) { - logrus.Infof("开始查找回复按钮...") - - // 在right区域内查找interactions - right, err := commentEl.Element(".right") - if err != nil { - logrus.Errorf("未找到.right区域") - return nil, fmt.Errorf("未找到.right区域") + if commentEl == nil { + return nil, fmt.Errorf("评论元素为空") } - - interactions, err := right.Element(".interactions") - if err != nil { - logrus.Errorf("未找到.interactions区域") - return nil, fmt.Errorf("未找到.interactions区域") + + selector := ".right .interactions .reply" + btn, err := commentEl.Element(selector) + if err != nil || btn == nil { + logrus.Warnf("未找到回复按钮,选择器: %s, err: %v", selector, err) + return nil, fmt.Errorf("未找到回复按钮") } - - // 选择器列表 - selectors := []string{ - ".reply", // 回复容器(最通用) - ":nth-child(2)", // 第二个子元素(单评论) - ".reply-icon", // 回复图标 - ".reds-icon.reply-icon", // 带类的回复图标 - ".reply.icon-container", // 回复图标容器 - } - - // 在interactions区域内查找 - for _, selector := range selectors { - if el, err := interactions.Element(selector); err == nil && el != nil { - logrus.Infof("通过选择器 %s 找到回复按钮", selector) - return el, nil - } - } - - logrus.Errorf("未找到回复按钮") - return nil, fmt.Errorf("未找到回复按钮") + + logrus.Infof("通过选择器 %s 找到回复按钮", selector) + return btn, nil } // verifyClickSuccess 验证点击是否真的成功(检查是否出现了回复输入框) func verifyClickSuccess(clickedEl *rod.Element) bool { // 获取页面实例 page := clickedEl.Page() - + // 检查是否出现了回复输入框 selectors := []string{ "div.input-box div.content-edit p.content-input", - "div.input-box [contenteditable='true']", - "[contenteditable='true']", - "textarea", - "input[type='text']", } - + for _, selector := range selectors { if el, err := page.Element(selector); err == nil && el != nil { // 检查元素是否可见 @@ -570,35 +454,6 @@ func verifyClickSuccess(clickedEl *rod.Element) bool { } } } - - // 使用JavaScript检查是否有新的输入框出现 - jsCode := `() => { - // 查找所有可编辑元素 - const editables = document.querySelectorAll('[contenteditable="true"], textarea, input[type="text"]'); - for (const el of editables) { - // 检查元素是否可见 - const rect = el.getBoundingClientRect(); - if (rect.width > 0 && rect.height > 0) { - // 检查元素是否在视口中 - const inViewport = rect.top >= 0 && rect.left >= 0 && - rect.bottom <= window.innerHeight && - rect.right <= window.innerWidth; - if (inViewport) { - console.log('找到可见的输入元素:', el); - return true; - } - } - } - return false; - }` - - if result, err := page.Eval(jsCode); err == nil && result != nil { - if result.Value.Bool() { - logrus.Infof("JavaScript验证成功:找到可见的输入元素") - return true - } - } - logrus.Infof("验证失败:没有找到回复输入框") return false } @@ -615,29 +470,13 @@ func findReplyInput(page *rod.Page, commentEl *rod.Element) (*rod.Element, error return el, nil } selectors := []string{ - "div.input-box div.content-edit p.content-input", // 原有选择器 - "div.input-box [contenteditable='true']", // 通用输入框 - "[contenteditable='true']", // 任何可编辑元素 - "textarea", // 备用textarea - "input[type='text']", // 备用text输入框 - "[data-role='reply-input'] [contenteditable='true']", + "div.input-box div.content-edit p.content-input", // 原有选择器 } for _, selector := range selectors { if el, err := page.Element(selector); err == nil && el != nil { return el, nil } } - // 尝试在评论内部寻找可编辑区域 - if el, err := commentEl.Element("[contenteditable='true']"); err == nil && el != nil { - return el, nil - } - // 最后尝试:等待一下再查找,可能是动态加载的 - time.Sleep(1 * time.Second) - for _, selector := range selectors { - if el, err := page.Element(selector); err == nil && el != nil { - return el, nil - } - } return nil, fmt.Errorf("未找到回复输入框") } @@ -646,21 +485,28 @@ func tryClickChainForComment(el *rod.Element) bool { logrus.Errorf("要点击的元素为空") return false } - + // 获取元素信息用于调试 text, _ := el.Text() - class, _ := el.Attribute("class") - tag, _ := el.Describe(0, false) - logrus.Infof("准备点击元素 - 文本: '%s', 类: '%s', 标签: %s", text, class, tag) - + classAttr, _ := el.Attribute("class") + class := "" + if classAttr != nil { + class = *classAttr + } + tagName := "" + if desc, err := el.Describe(0, false); err == nil && desc != nil { + tagName = desc.NodeName + } + logrus.Infof("准备点击元素 - 文本: '%s', 类: '%s', 标签: %s", text, class, tagName) + // 检查元素是否可见和可点击 visible, _ := el.Visible() logrus.Infof("元素可见性: %v", visible) - + // 滚动到元素位置 _, _ = el.Eval(`() => { try { this.scrollIntoView({behavior: "instant", block: "center"}); } catch (e) {} return true }`) time.Sleep(500 * time.Millisecond) - + // 只使用直接点击方式 clickMethods := []struct { name string @@ -675,13 +521,13 @@ func tryClickChainForComment(el *rod.Element) bool { return true }}, } - + for i, method := range clickMethods { logrus.Infof("尝试点击方法 %d: %s", i+1, method.name) if method.fn(el) { // 点击后等待一下,检查是否有反应 time.Sleep(1 * time.Second) - + // 验证点击是否真的成功(检查是否出现了回复输入框) success := verifyClickSuccess(el) if success { @@ -693,7 +539,7 @@ func tryClickChainForComment(el *rod.Element) bool { } } } - + logrus.Errorf("所有点击方法都失败") return false } @@ -701,12 +547,6 @@ func tryClickChainForComment(el *rod.Element) bool { func findSubmitButton(page *rod.Page) (*rod.Element, error) { selectors := []string{ "div.bottom button.submit", - "button.submit", - "button.reds-button", - "button[type='submit']", - "button:contains('回复')", - "button:contains('发布')", - "button:contains('发送')", } for _, selector := range selectors { if el, err := page.Element(selector); err == nil && el != nil { @@ -716,22 +556,5 @@ func findSubmitButton(page *rod.Page) (*rod.Element, error) { } } } - // 使用JS查找包含特定文本的按钮 - jsCode := `() => { - const buttons = document.querySelectorAll('button'); - for (const btn of buttons) { - const text = btn.textContent || btn.innerText || ''; - if (text.includes('回复') || text.includes('发布') || text.includes('发送')) { - const disabled = btn.getAttribute('disabled'); - if (!disabled) { - return btn; - } - } - } - return null; - }` - if el, err := page.ElementByJS(rod.Eval(jsCode)); err == nil && el != nil { - return el, nil - } return nil, fmt.Errorf("未找到回复发布按钮") -} \ No newline at end of file +} From 6484e58aded3c523c13959a4f65ba1c9a7277afb Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sat, 1 Nov 2025 20:55:51 +0800 Subject: [PATCH 07/19] feat(feed_detail): add loadAllComments parameter to GetFeedDetail functionality - Enhanced GetFeedDetail method to support loading all comments based on the new loadAllComments parameter. - Updated related handlers and request structures to accommodate the new parameter. - Improved logging to reflect the loading of all comments during feed detail retrieval. - Implemented JavaScript logic to scroll and collect comments when loadAllComments is true. --- handlers_api.go | 2 +- mcp_handlers.go | 24 +++- mcp_server.go | 10 +- service.go | 4 +- types.go | 5 +- xiaohongshu/feed_detail.go | 256 ++++++++++++++++++++++++++++++++++++- 6 files changed, 286 insertions(+), 15 deletions(-) diff --git a/handlers_api.go b/handlers_api.go index cbd0ebd..646ccaa 100644 --- a/handlers_api.go +++ b/handlers_api.go @@ -165,7 +165,7 @@ func (s *AppServer) getFeedDetailHandler(c *gin.Context) { } // 获取 Feed 详情 - result, err := s.xiaohongshuService.GetFeedDetail(c.Request.Context(), req.FeedID, req.XsecToken) + result, err := s.xiaohongshuService.GetFeedDetail(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments) if err != nil { respondError(c, http.StatusInternalServerError, "GET_FEED_DETAIL_FAILED", "获取Feed详情失败", err.Error()) diff --git a/mcp_handlers.go b/mcp_handlers.go index ff4ff58..be1e03f 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -4,10 +4,12 @@ import ( "context" "encoding/json" "fmt" - "github.com/sirupsen/logrus" - "github.com/xpzouying/xiaohongshu-mcp/xiaohongshu" + "strconv" "strings" "time" + + "github.com/sirupsen/logrus" + "github.com/xpzouying/xiaohongshu-mcp/xiaohongshu" ) // MCP 工具处理函数 @@ -306,9 +308,23 @@ func (s *AppServer) handleGetFeedDetail(ctx context.Context, args map[string]any } } - logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s", feedID) + loadAll := false + if raw, ok := args["load_all_comments"]; ok { + switch v := raw.(type) { + case bool: + loadAll = v + case string: + if parsed, err := strconv.ParseBool(v); err == nil { + loadAll = parsed + } + case float64: + loadAll = v != 0 + } + } - result, err := s.xiaohongshuService.GetFeedDetail(ctx, feedID, xsecToken) + logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v", feedID, loadAll) + + result, err := s.xiaohongshuService.GetFeedDetail(ctx, feedID, xsecToken, loadAll) if err != nil { return &MCPToolResult{ Content: []MCPContent{{ diff --git a/mcp_server.go b/mcp_server.go index 71a39bd..3c127e8 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -45,8 +45,9 @@ type FilterOption struct { // FeedDetailArgs 获取Feed详情的参数 type FeedDetailArgs struct { - FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` - XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` + FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` + XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` + LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批评论)"` } // UserProfileArgs 获取用户主页的参数 @@ -213,8 +214,9 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, withPanicRecovery("get_feed_detail", func(ctx context.Context, req *mcp.CallToolRequest, args FeedDetailArgs) (*mcp.CallToolResult, any, error) { argsMap := map[string]interface{}{ - "feed_id": args.FeedID, - "xsec_token": args.XsecToken, + "feed_id": args.FeedID, + "xsec_token": args.XsecToken, + "load_all_comments": args.LoadAllComments, } result := appServer.handleGetFeedDetail(ctx, argsMap) return convertToMCPResult(result), nil, nil diff --git a/service.go b/service.go index a3fa71e..2c7c1a3 100644 --- a/service.go +++ b/service.go @@ -321,7 +321,7 @@ func (s *XiaohongshuService) SearchFeeds(ctx context.Context, keyword string, fi } // GetFeedDetail 获取Feed详情 -func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToken string) (*FeedDetailResponse, error) { +func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) { b := newBrowser() defer b.Close() @@ -332,7 +332,7 @@ func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToke action := xiaohongshu.NewFeedDetailAction(page) // 获取 Feed 详情 - result, err := action.GetFeedDetail(ctx, feedID, xsecToken) + result, err := action.GetFeedDetail(ctx, feedID, xsecToken, loadAllComments) if err != nil { return nil, err } diff --git a/types.go b/types.go index f622cfb..cfd3a8c 100644 --- a/types.go +++ b/types.go @@ -36,8 +36,9 @@ type MCPContent struct { // FeedDetailRequest Feed详情请求 type FeedDetailRequest struct { - FeedID string `json:"feed_id" binding:"required"` - XsecToken string `json:"xsec_token" binding:"required"` + FeedID string `json:"feed_id" binding:"required"` + XsecToken string `json:"xsec_token" binding:"required"` + LoadAllComments bool `json:"load_all_comments,omitempty"` } type SearchFeedsRequest struct { diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index 8921498..6adcfb2 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -22,8 +22,8 @@ func NewFeedDetailAction(page *rod.Page) *FeedDetailAction { } // GetFeedDetail 获取 Feed 详情页数据 -func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string) (*FeedDetailResponse, error) { - page := f.page.Context(ctx).Timeout(60 * time.Second) +func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) { + page := f.page.Context(ctx).Timeout(5 * time.Minute) // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) @@ -35,6 +35,217 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken page.MustWaitDOMStable() time.Sleep(1 * time.Second) + var domCommentsPayload string + if loadAllComments { + scrollToEndJS := `() => { + const END_SELECTOR = '.end-container'; + const DELTA_MIN = 520; + const MAX_ATTEMPTS = 60; + const WAIT_AFTER_SCROLL = 420; + + const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + const scrollRoot = document.scrollingElement || document.documentElement || document.body; + + const reachedEnd = () => { + const endEl = document.querySelector(END_SELECTOR); + if (!endEl) return false; + const text = (endEl.textContent || '').toUpperCase(); + if (text.includes('THE END')) return true; + const rect = endEl.getBoundingClientRect(); + return rect.top >= 0 && rect.top <= (window.innerHeight || document.documentElement.clientHeight || 0); + }; + + const collectCandidates = () => { + const container = document.querySelector('.comments-container'); + const set = new Set(); + + const push = (node) => { + if (node && node instanceof HTMLElement) { + set.add(node); + } + }; + + push(document.body); + push(document.documentElement); + push(scrollRoot); + + if (container) { + let current = container; + while (current) { + push(current); + if (current === document.body || current === document.documentElement) { + break; + } + current = current.parentElement; + } + container.querySelectorAll('.comments-el, .list-container, [data-v-4a19279a][name="list"]').forEach(push); + } + + const ranked = Array.from(set).map((node) => { + const style = window.getComputedStyle(node); + const scrollable = node.scrollHeight - node.clientHeight > 40; + const hasScroll = /auto|scroll|overlay/i.test(style.overflowY || ''); + const weight = + (node === scrollRoot ? 800 : 0) + + (container && node === container ? 1200 : 0) + + (container && node.contains && node.contains(container) ? 600 : 0) + + (hasScroll ? 300 : 0) + + (scrollable ? 300 : 0) - + (node === document.body || node === document.documentElement ? 80 : 0); + return { node, weight }; + }).sort((a, b) => b.weight - a.weight); + + return ranked.slice(0, 8).map((item) => item.node); + }; + + const metrics = (el) => { + if (!el || el === document || el === window) { + const root = scrollRoot; + return { + top: root.scrollTop, + max: Math.max(root.scrollHeight - root.clientHeight, 0), + client: root.clientHeight || window.innerHeight + }; + } + return { + top: el.scrollTop, + max: Math.max(el.scrollHeight - el.clientHeight, 0), + client: el.clientHeight + }; + }; + + const setScrollTop = (el, value) => { + if (!el) return; + if (el === document.body || el === document.documentElement || el === scrollRoot || el === document || el === window) { + scrollRoot.scrollTop = value; + } else { + el.scrollTop = value; + } + }; + + const dispatchWheel = (el, delta) => { + if (!el) return; + try { + el.dispatchEvent(new Event('scroll', { bubbles: true })); + if (typeof WheelEvent === 'function' && delta !== 0) { + const wheel = new WheelEvent('wheel', { deltaY: delta, bubbles: true, cancelable: true }); + el.dispatchEvent(wheel); + } + } catch (err) { + console.debug('dispatchWheel error', err); + } + }; + + const waitForMove = (el, beforeTop) => { + let tries = 0; + return new Promise((resolve) => { + const tick = () => { + tries++; + const now = metrics(el).top; + if (Math.abs(now - beforeTop) >= 6 || tries >= 6) { + resolve(Math.abs(now - beforeTop) >= 6); + return; + } + setTimeout(tick, 60); + }; + setTimeout(tick, 60); + }); + }; + + const scrollOnce = async (node) => { + const before = metrics(node); + const delta = Math.max(before.client * 0.85, DELTA_MIN); + const desired = before.max > 0 ? Math.min(before.top + delta, before.max) : before.top + delta; + const applied = Math.max(0, desired - before.top); + setScrollTop(node, desired); + dispatchWheel(node, applied); + const moved = await waitForMove(node, before.top); + if (!moved && node !== scrollRoot) { + const rootBefore = metrics(scrollRoot).top; + setScrollTop(scrollRoot, rootBefore + applied); + dispatchWheel(scrollRoot, applied); + return waitForMove(scrollRoot, rootBefore); + } + return moved; + }; + + return (async () => { + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { + const candidates = collectCandidates(); + for (const node of candidates) { + const moved = await scrollOnce(node); + if (moved) { + await sleep(WAIT_AFTER_SCROLL); + break; + } + } + if (reachedEnd()) { + return JSON.stringify({ status: 'end', attempts: attempt + 1 }); + } + } + return JSON.stringify({ status: 'timeout' }); + })().catch((err) => JSON.stringify({ status: 'error', message: err && err.message ? err.message : String(err) })); + }` + + if res, err := page.Eval(scrollToEndJS); err != nil { + logrus.Warnf("加载全部评论失败: %v", err) + } else if res != nil { + logrus.Infof("评论滚动结果: %v", res.Value) + } + + collectCommentsJS := `() => { + try { + const container = document.querySelector('.comments-container'); + if (!container) { + return JSON.stringify({ list: [], reachedEnd: false, error: 'comments container not found' }); + } + + const items = Array.from(container.querySelectorAll('.comment-item')); + const seen = new Set(); + const list = []; + + const textContent = (node) => (node && node.textContent ? node.textContent.trim() : ''); + + for (const item of items) { + let rawId = item.getAttribute('id') || ''; + if (!rawId && item.dataset) { + rawId = item.dataset.commentId || item.dataset.id || ''; + } + const commentId = rawId.replace(/^comment-/, '') || rawId; + if (!commentId || seen.has(commentId)) { + continue; + } + seen.add(commentId); + + const contentEl = item.querySelector('.comment-content, .content, .content-text, .text, .word'); + const nicknameEl = item.querySelector('.user-name, .nickname, .name, .author-name, .title'); + const userNode = item.querySelector('[data-user-id]'); + const likeEl = item.querySelector('.like .count, .interaction .like span, .interaction-bar .like span, [class*="like"] span'); + + list.push({ + id: commentId, + content: textContent(contentEl), + nickname: textContent(nicknameEl), + userId: userNode ? (userNode.getAttribute('data-user-id') || '') : '', + likeCount: textContent(likeEl), + }); + } + + const endEl = document.querySelector('.end-container'); + const reachedEnd = !!(endEl && (endEl.textContent || '').toUpperCase().includes('THE END')); + return JSON.stringify({ list, reachedEnd }); + } catch (err) { + return JSON.stringify({ list: [], reachedEnd: false, error: err && err.message ? err.message : String(err) }); + } + }` + + if res, err := page.Eval(collectCommentsJS); err != nil { + logrus.Warnf("收集评论失败: %v", err) + } else if res != nil { + domCommentsPayload = res.Value.Str() + } + } + result := page.MustEval(`() => { if (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.note && @@ -63,6 +274,47 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken return nil, fmt.Errorf("feed %s not found in noteDetailMap", feedID) } + if loadAllComments && domCommentsPayload != "" { + var payload struct { + List []struct { + ID string `json:"id"` + Content string `json:"content"` + Nickname string `json:"nickname"` + UserID string `json:"userId"` + LikeCount string `json:"likeCount"` + } + ReachedEnd bool `json:"reachedEnd"` + Error string `json:"error"` + } + + if err := json.Unmarshal([]byte(domCommentsPayload), &payload); err != nil { + logrus.Warnf("解析 DOM 评论数据失败: %v", err) + } else if payload.Error != "" { + logrus.Warnf("DOM 评论数据返回错误: %s", payload.Error) + } else if len(payload.List) > 0 { + comments := make([]Comment, 0, len(payload.List)) + for _, item := range payload.List { + comments = append(comments, Comment{ + ID: item.ID, + NoteID: feedID, + Content: item.Content, + LikeCount: item.LikeCount, + UserInfo: User{ + UserID: item.UserID, + Nickname: item.Nickname, + NickName: item.Nickname, + }, + SubComments: nil, + SubCommentCount: "0", + }) + } + + noteDetail.Comments.List = comments + noteDetail.Comments.Cursor = "" + noteDetail.Comments.HasMore = !payload.ReachedEnd + } + } + return &FeedDetailResponse{ Note: noteDetail.Note, Comments: noteDetail.Comments, From 19b0f8545d4de10c18572efd18b72a66ca4e405f Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sat, 1 Nov 2025 21:21:47 +0800 Subject: [PATCH 08/19] refactor(feed_detail): optimize comment loading logic in GetFeedDetail - Replaced the previous comment loading JavaScript with a more efficient scrolling and collection mechanism. - Improved the logic for determining scroll targets and handling comment counts. - Enhanced error handling and logging for comment loading failures. - Removed deprecated code related to the old comment loading approach. --- xiaohongshu/feed_detail.go | 414 ++++++++++++++++--------------------- 1 file changed, 177 insertions(+), 237 deletions(-) diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index 6adcfb2..64d1a2d 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -35,214 +35,195 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken page.MustWaitDOMStable() time.Sleep(1 * time.Second) - var domCommentsPayload string if loadAllComments { - scrollToEndJS := `() => { - const END_SELECTOR = '.end-container'; - const DELTA_MIN = 520; - const MAX_ATTEMPTS = 60; - const WAIT_AFTER_SCROLL = 420; + scrollAllCommentsJS := `() => { + const INTERVAL_MS = 900; + const STAGNANT_LIMIT = 8; + const NO_CHANGE_SCROLL_LIMIT = 3; + const DELTA_MIN = 480; + const SCROLL_TIMEOUT = 900; + const MAX_ATTEMPTS = 200; - const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); - const scrollRoot = document.scrollingElement || document.documentElement || document.body; - - const reachedEnd = () => { - const endEl = document.querySelector(END_SELECTOR); - if (!endEl) return false; - const text = (endEl.textContent || '').toUpperCase(); - if (text.includes('THE END')) return true; - const rect = endEl.getBoundingClientRect(); - return rect.top >= 0 && rect.top <= (window.innerHeight || document.documentElement.clientHeight || 0); - }; - - const collectCandidates = () => { - const container = document.querySelector('.comments-container'); - const set = new Set(); - - const push = (node) => { - if (node && node instanceof HTMLElement) { - set.add(node); - } - }; - - push(document.body); - push(document.documentElement); - push(scrollRoot); - - if (container) { - let current = container; - while (current) { - push(current); - if (current === document.body || current === document.documentElement) { - break; - } - current = current.parentElement; - } - container.querySelectorAll('.comments-el, .list-container, [data-v-4a19279a][name="list"]').forEach(push); - } - - const ranked = Array.from(set).map((node) => { - const style = window.getComputedStyle(node); - const scrollable = node.scrollHeight - node.clientHeight > 40; - const hasScroll = /auto|scroll|overlay/i.test(style.overflowY || ''); - const weight = - (node === scrollRoot ? 800 : 0) + - (container && node === container ? 1200 : 0) + - (container && node.contains && node.contains(container) ? 600 : 0) + - (hasScroll ? 300 : 0) + - (scrollable ? 300 : 0) - - (node === document.body || node === document.documentElement ? 80 : 0); - return { node, weight }; - }).sort((a, b) => b.weight - a.weight); - - return ranked.slice(0, 8).map((item) => item.node); - }; - - const metrics = (el) => { - if (!el || el === document || el === window) { - const root = scrollRoot; - return { - top: root.scrollTop, - max: Math.max(root.scrollHeight - root.clientHeight, 0), - client: root.clientHeight || window.innerHeight - }; - } + const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + const scrollRoot = () => document.scrollingElement || document.documentElement || document.body; + const getContainer = () => document.querySelector('.comments-container'); + const getCommentCount = (container) => + container ? container.querySelectorAll('.comment-item, .comment-item-sub, .comment').length : 0; + const getTotalCount = (container) => { + if (!container) return null; + const text = (container.querySelector('.total')?.textContent || '').replace(/\s+/g, ''); + const match = text.match(/共(\d+)条评论/); + return match ? parseInt(match[1], 10) : null; + }; + const getScrollMetrics = (el) => { + if (!el) { + return { top: 0, max: 0, client: window.innerHeight }; + } + if (el === window || el === document || el === document.body || el === document.documentElement) { + const root = scrollRoot(); return { - top: el.scrollTop, - max: Math.max(el.scrollHeight - el.clientHeight, 0), - client: el.clientHeight + top: root.scrollTop, + max: Math.max(root.scrollHeight - root.clientHeight, 0), + client: root.clientHeight || window.innerHeight }; + } + return { + top: el.scrollTop, + max: Math.max(el.scrollHeight - el.clientHeight, 0), + client: el.clientHeight }; - - const setScrollTop = (el, value) => { - if (!el) return; - if (el === document.body || el === document.documentElement || el === scrollRoot || el === document || el === window) { - scrollRoot.scrollTop = value; - } else { - el.scrollTop = value; - } - }; - - const dispatchWheel = (el, delta) => { - if (!el) return; - try { - el.dispatchEvent(new Event('scroll', { bubbles: true })); - if (typeof WheelEvent === 'function' && delta !== 0) { - const wheel = new WheelEvent('wheel', { deltaY: delta, bubbles: true, cancelable: true }); - el.dispatchEvent(wheel); - } - } catch (err) { - console.debug('dispatchWheel error', err); - } - }; - - const waitForMove = (el, beforeTop) => { - let tries = 0; - return new Promise((resolve) => { - const tick = () => { - tries++; - const now = metrics(el).top; - if (Math.abs(now - beforeTop) >= 6 || tries >= 6) { - resolve(Math.abs(now - beforeTop) >= 6); - return; - } - setTimeout(tick, 60); - }; - setTimeout(tick, 60); + }; + const setScrollTop = (el, value) => { + if (!el) return; + if (el === window || el === document || el === document.body || el === document.documentElement) { + const root = scrollRoot(); + root.scrollTop = value; + window.scrollTo(0, value); + return; + } + el.scrollTop = value; + }; + const dispatchWheel = (el, delta) => { + if (!el) return; + try { + const wheel = new WheelEvent('wheel', { + deltaY: delta, + bubbles: true, + cancelable: true }); - }; - - const scrollOnce = async (node) => { - const before = metrics(node); - const delta = Math.max(before.client * 0.85, DELTA_MIN); - const desired = before.max > 0 ? Math.min(before.top + delta, before.max) : before.top + delta; - const applied = Math.max(0, desired - before.top); - setScrollTop(node, desired); - dispatchWheel(node, applied); - const moved = await waitForMove(node, before.top); - if (!moved && node !== scrollRoot) { - const rootBefore = metrics(scrollRoot).top; - setScrollTop(scrollRoot, rootBefore + applied); - dispatchWheel(scrollRoot, applied); - return waitForMove(scrollRoot, rootBefore); - } - return moved; - }; - - return (async () => { - for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { - const candidates = collectCandidates(); - for (const node of candidates) { - const moved = await scrollOnce(node); - if (moved) { - await sleep(WAIT_AFTER_SCROLL); - break; - } - } - if (reachedEnd()) { - return JSON.stringify({ status: 'end', attempts: attempt + 1 }); + el.dispatchEvent(wheel); + el.dispatchEvent(new Event('scroll', { bubbles: true })); + } catch (err) { + console.debug('dispatchWheel error', err); + } + }; + let cachedTarget = null; + const collectCandidates = () => { + const container = getContainer(); + const candidatesSet = new Set(); + if (container) { + let current = container; + while (current) { + if (current instanceof HTMLElement) { + candidatesSet.add(current); } + current = current.parentElement; } - return JSON.stringify({ status: 'timeout' }); - })().catch((err) => JSON.stringify({ status: 'error', message: err && err.message ? err.message : String(err) })); - }` + container.querySelectorAll('*').forEach((node) => { + if (node instanceof HTMLElement) { + candidatesSet.add(node); + } + }); + } + [document.body, document.documentElement].forEach((node) => { + if (node instanceof HTMLElement) { + candidatesSet.add(node); + } + }); + const candidates = []; + candidatesSet.forEach((node) => { + const style = window.getComputedStyle(node); + const overflowY = style.overflowY; + const scrollable = node.scrollHeight - node.clientHeight > 40; + const hasScrollStyle = /auto|scroll|overlay/i.test(overflowY); + const weight = + (node.contains(container) ? 1000 : 0) + + (node === container ? 800 : 0) + + (hasScrollStyle ? 400 : 0) + + (scrollable ? 300 : 0) - + (node === document.body || node === document.documentElement ? 50 : 0); + if (scrollable || hasScrollStyle || node === document.body || node === document.documentElement) { + candidates.push({ node, weight }); + } + }); + candidates.sort((a, b) => b.weight - a.weight); + return candidates.map((candidate) => candidate.node); + }; + const findScrollTarget = () => { + if (cachedTarget && cachedTarget.isConnected) { + return cachedTarget; + } + const candidates = collectCandidates(); + cachedTarget = candidates.find((node) => { + const metrics = getScrollMetrics(node); + return metrics.max > 30 || metrics.client > 0; + }) || scrollRoot(); + return cachedTarget; + }; + const performScroll = (target) => { + const scrollTarget = target || findScrollTarget(); + if (!scrollTarget) { + window.scrollBy(0, window.innerHeight * 0.8); + return; + } + const metrics = getScrollMetrics(scrollTarget); + const beforeTop = metrics.top; + const desired = metrics.max > 0 ? Math.min(metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN), metrics.max) : metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN); + const applied = Math.max(0, desired - metrics.top); + setScrollTop(scrollTarget, desired); + dispatchWheel(scrollTarget, applied); + const afterTop = getScrollMetrics(scrollTarget).top; + if (Math.abs(afterTop - beforeTop) < 5 && scrollTarget !== scrollRoot()) { + const root = scrollRoot(); + const rootBefore = root.scrollTop; + root.scrollTop = rootBefore + applied; + window.scrollBy(0, applied); + dispatchWheel(root, applied); + } + }; + return (async () => { + let lastCount = 0; + let stagnantChecks = 0; + let noScrollChangeCount = 0; + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { + const container = getContainer(); + if (!container) { + await sleep(300); + continue; + } + const total = getTotalCount(container); + const count = getCommentCount(container); + if (total && count >= total) { + return { status: 'complete', reason: 'total', attempts: attempt + 1, count, total }; + } + if (count === lastCount) { + stagnantChecks += 1; + } else { + lastCount = count; + stagnantChecks = 0; + } + if (stagnantChecks >= STAGNANT_LIMIT) { + return { status: 'complete', reason: 'stagnant', attempts: attempt + 1, count, total }; + } + const target = findScrollTarget(); + const beforeTop = getScrollMetrics(target).top; + performScroll(target); + await sleep(SCROLL_TIMEOUT); + const afterTop = getScrollMetrics(target).top; + if (Math.abs(afterTop - beforeTop) < 5) { + noScrollChangeCount += 1; + } else { + noScrollChangeCount = 0; + } + if (noScrollChangeCount >= NO_CHANGE_SCROLL_LIMIT) { + return { status: 'complete', reason: 'no-scroll-change', attempts: attempt + 1, count, total }; + } + if (INTERVAL_MS > SCROLL_TIMEOUT) { + await sleep(INTERVAL_MS - SCROLL_TIMEOUT); + } + } + return { status: 'timeout' }; + })() + .then((res) => JSON.stringify(res)) + .catch((err) => JSON.stringify({ status: 'error', message: err && err.message ? err.message : String(err) })); + }` - if res, err := page.Eval(scrollToEndJS); err != nil { + if res, err := page.Eval(scrollAllCommentsJS); err != nil { logrus.Warnf("加载全部评论失败: %v", err) } else if res != nil { - logrus.Infof("评论滚动结果: %v", res.Value) - } - - collectCommentsJS := `() => { - try { - const container = document.querySelector('.comments-container'); - if (!container) { - return JSON.stringify({ list: [], reachedEnd: false, error: 'comments container not found' }); - } - - const items = Array.from(container.querySelectorAll('.comment-item')); - const seen = new Set(); - const list = []; - - const textContent = (node) => (node && node.textContent ? node.textContent.trim() : ''); - - for (const item of items) { - let rawId = item.getAttribute('id') || ''; - if (!rawId && item.dataset) { - rawId = item.dataset.commentId || item.dataset.id || ''; - } - const commentId = rawId.replace(/^comment-/, '') || rawId; - if (!commentId || seen.has(commentId)) { - continue; - } - seen.add(commentId); - - const contentEl = item.querySelector('.comment-content, .content, .content-text, .text, .word'); - const nicknameEl = item.querySelector('.user-name, .nickname, .name, .author-name, .title'); - const userNode = item.querySelector('[data-user-id]'); - const likeEl = item.querySelector('.like .count, .interaction .like span, .interaction-bar .like span, [class*="like"] span'); - - list.push({ - id: commentId, - content: textContent(contentEl), - nickname: textContent(nicknameEl), - userId: userNode ? (userNode.getAttribute('data-user-id') || '') : '', - likeCount: textContent(likeEl), - }); - } - - const endEl = document.querySelector('.end-container'); - const reachedEnd = !!(endEl && (endEl.textContent || '').toUpperCase().includes('THE END')); - return JSON.stringify({ list, reachedEnd }); - } catch (err) { - return JSON.stringify({ list: [], reachedEnd: false, error: err && err.message ? err.message : String(err) }); + if str := res.Value.Str(); str != "" { + logrus.Infof("评论滚动结果: %s", str) } - }` - - if res, err := page.Eval(collectCommentsJS); err != nil { - logrus.Warnf("收集评论失败: %v", err) - } else if res != nil { - domCommentsPayload = res.Value.Str() } } @@ -274,47 +255,6 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken return nil, fmt.Errorf("feed %s not found in noteDetailMap", feedID) } - if loadAllComments && domCommentsPayload != "" { - var payload struct { - List []struct { - ID string `json:"id"` - Content string `json:"content"` - Nickname string `json:"nickname"` - UserID string `json:"userId"` - LikeCount string `json:"likeCount"` - } - ReachedEnd bool `json:"reachedEnd"` - Error string `json:"error"` - } - - if err := json.Unmarshal([]byte(domCommentsPayload), &payload); err != nil { - logrus.Warnf("解析 DOM 评论数据失败: %v", err) - } else if payload.Error != "" { - logrus.Warnf("DOM 评论数据返回错误: %s", payload.Error) - } else if len(payload.List) > 0 { - comments := make([]Comment, 0, len(payload.List)) - for _, item := range payload.List { - comments = append(comments, Comment{ - ID: item.ID, - NoteID: feedID, - Content: item.Content, - LikeCount: item.LikeCount, - UserInfo: User{ - UserID: item.UserID, - Nickname: item.Nickname, - NickName: item.Nickname, - }, - SubComments: nil, - SubCommentCount: "0", - }) - } - - noteDetail.Comments.List = comments - noteDetail.Comments.Cursor = "" - noteDetail.Comments.HasMore = !payload.ReachedEnd - } - } - return &FeedDetailResponse{ Note: noteDetail.Note, Comments: noteDetail.Comments, From c47ef233aa04aee74593e8243924cc0300d397ef Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sat, 15 Nov 2025 14:58:54 +0800 Subject: [PATCH 09/19] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9B=B4=E5=A4=9A?= =?UTF-8?q?=E8=AF=84=E8=AE=BA=E8=AF=A6=E6=83=85=E5=86=85=E5=AE=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xiaohongshu/feed_detail.go | 127 +++++++++++++++++++++++++++++++++++-- 1 file changed, 123 insertions(+), 4 deletions(-) diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index 64d1a2d..7eecf6b 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -43,6 +43,8 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken const DELTA_MIN = 480; const SCROLL_TIMEOUT = 900; const MAX_ATTEMPTS = 200; + const CLICK_MORE_INTERVAL = 2; // 每滚动2次检查一次"更多"按钮 + const CLICK_WAIT_TIME = 300; // 点击后等待时间 const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); const scrollRoot = () => document.scrollingElement || document.documentElement || document.body; @@ -97,6 +99,77 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken console.debug('dispatchWheel error', err); } }; + + // 点击所有"更多"按钮 - 使用多种策略确保不遗漏 + const clickShowMoreButtons = () => { + // 尝试多个可能的选择器 + const selectors = [ + '.show-more', + '.show-more-btn', + '[class*="show-more"]', + '[class*="showMore"]', + 'button:has-text("更多")', + 'span:has-text("更多")', + 'div:has-text("更多")' + ]; + + const clickedElements = new Set(); + let clickedCount = 0; + + selectors.forEach((selector) => { + try { + const elements = document.querySelectorAll(selector); + elements.forEach((el) => { + // 避免重复点击同一个元素 + if (clickedElements.has(el)) return; + + // 检查元素文本是否包含"更多"或者是否有相关class + const text = el.textContent || ''; + const className = el.className || ''; + const shouldClick = text.includes('更多') || + className.includes('show-more') || + className.includes('showMore'); + + if (!shouldClick) return; + + // 检查元素是否可见(放宽条件,不要求完全在视口内) + const rect = el.getBoundingClientRect(); + const style = window.getComputedStyle(el); + const isVisible = ( + rect.height > 0 && + rect.width > 0 && + style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0' && + rect.top < window.innerHeight + 500 && // 允许元素在视口下方500px内 + rect.bottom > -500 // 允许元素在视口上方500px内 + ); + + if (isVisible) { + try { + // 尝试多种点击方式 + el.click(); + + // 如果是嵌套元素,也尝试点击父元素 + if (el.parentElement && el.parentElement.classList.contains('show-more')) { + el.parentElement.click(); + } + + clickedElements.add(el); + clickedCount++; + } catch (err) { + console.debug('点击失败', err); + } + } + }); + } catch (err) { + console.debug('选择器错误: ' + selector, err); + } + }); + + return clickedCount; + }; + let cachedTarget = null; const collectCandidates = () => { const container = getContainer(); @@ -175,16 +248,45 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken let lastCount = 0; let stagnantChecks = 0; let noScrollChangeCount = 0; + let totalClickedButtons = 0; + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { const container = getContainer(); if (!container) { await sleep(300); continue; } + + // 每隔一定次数检查并点击"更多"按钮 + if (attempt % CLICK_MORE_INTERVAL === 0) { + const clicked = clickShowMoreButtons(); + if (clicked > 0) { + totalClickedButtons += clicked; + console.log('点击了 ' + clicked + ' 个"更多"按钮,累计: ' + totalClickedButtons); + await sleep(CLICK_WAIT_TIME); // 等待内容展开 + + // 点击后再次检查是否有新的"更多"按钮出现 + await sleep(200); + const clicked2 = clickShowMoreButtons(); + if (clicked2 > 0) { + totalClickedButtons += clicked2; + console.log('二次检查点击了 ' + clicked2 + ' 个"更多"按钮'); + await sleep(CLICK_WAIT_TIME); + } + } + } + const total = getTotalCount(container); const count = getCommentCount(container); if (total && count >= total) { - return { status: 'complete', reason: 'total', attempts: attempt + 1, count, total }; + return { + status: 'complete', + reason: 'total', + attempts: attempt + 1, + count, + total, + clickedButtons: totalClickedButtons + }; } if (count === lastCount) { stagnantChecks += 1; @@ -193,7 +295,14 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken stagnantChecks = 0; } if (stagnantChecks >= STAGNANT_LIMIT) { - return { status: 'complete', reason: 'stagnant', attempts: attempt + 1, count, total }; + return { + status: 'complete', + reason: 'stagnant', + attempts: attempt + 1, + count, + total, + clickedButtons: totalClickedButtons + }; } const target = findScrollTarget(); const beforeTop = getScrollMetrics(target).top; @@ -206,13 +315,23 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken noScrollChangeCount = 0; } if (noScrollChangeCount >= NO_CHANGE_SCROLL_LIMIT) { - return { status: 'complete', reason: 'no-scroll-change', attempts: attempt + 1, count, total }; + return { + status: 'complete', + reason: 'no-scroll-change', + attempts: attempt + 1, + count, + total, + clickedButtons: totalClickedButtons + }; } if (INTERVAL_MS > SCROLL_TIMEOUT) { await sleep(INTERVAL_MS - SCROLL_TIMEOUT); } } - return { status: 'timeout' }; + return { + status: 'timeout', + clickedButtons: totalClickedButtons + }; })() .then((res) => JSON.stringify(res)) .catch((err) => JSON.stringify({ status: 'error', message: err && err.message ? err.message : String(err) })); From 3947509479248f06c29f2b14295584efa9b56c0e Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Mon, 24 Nov 2025 02:05:30 +0800 Subject: [PATCH 10/19] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=BB=9A?= =?UTF-8?q?=E5=8A=A8=E6=BB=91=E5=8A=A8=E5=9B=9E=E5=A4=8D=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xiaohongshu/comment_feed.go | 686 ++++++++++++++++++++---------------- xiaohongshu/feed_detail.go | 133 +++---- 2 files changed, 464 insertions(+), 355 deletions(-) diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index 4fab693..c3f48bf 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -24,12 +24,9 @@ func NewCommentFeedAction(page *rod.Page) *CommentFeedAction { func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, content string) error { page := f.page.Context(ctx).Timeout(60 * time.Second) - // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) + logrus.Infof("打开 feed 详情页: %s", url) - logrus.Infof("Opening feed detail page: %s", url) - - // 导航到详情页 if err := page.Navigate(url); err != nil { logrus.Warnf("Failed to navigate to feed detail page: %v", err) return fmt.Errorf("无法打开帖子详情页,该帖子可能在网页端不可访问: %w", err) @@ -42,7 +39,6 @@ func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, time.Sleep(1 * time.Second) - // 查找评论输入框 elem, err := page.Element("div.input-box div.content-edit span") if err != nil { logrus.Warnf("Failed to find comment input box: %v", err) @@ -86,62 +82,52 @@ func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, // ReplyToComment 回复指定评论 func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToken, commentID, userID, content string) error { - page := f.page.Context(ctx).Timeout(60 * time.Second) + // 增加超时时间,因为需要滚动查找评论 + page := f.page.Context(ctx).Timeout(5 * time.Minute) url := makeFeedDetailURL(feedID, xsecToken) - logrus.Infof("Opening feed detail page for reply: %s", url) + logrus.Infof("打开 feed 详情页进行回复: %s", url) + page.MustNavigate(url) page.MustWaitDOMStable() - time.Sleep(3 * time.Second) // 增加等待时间确保页面完全加载 + time.Sleep(3 * time.Second) // 等待评论容器加载 waitForCommentsContainer(page) - - // 确保评论区域可见 - ensureCommentsVisible(page) - - // 额外等待确保评论内容加载完成 time.Sleep(2 * time.Second) - // 尝试多次查找评论元素 - var commentEl *rod.Element - var err error - for attempt := 0; attempt < 5; attempt++ { // 增加尝试次数 - commentEl, err = findCommentElement(page, commentID, userID) - if err == nil { - break - } - logrus.Warnf("Attempt %d: Failed to find comment: %v", attempt+1, err) - time.Sleep(2 * time.Second) // 增加等待时间 - ensureCommentsVisible(page) - scrollComments(page) // 每次尝试后滚动 - } - + // 使用新的查找逻辑(完全在 JS 中执行) + commentEl, err := findCommentElementNew(page, commentID, userID) if err != nil { return fmt.Errorf("无法找到评论: %w", err) } - // 滚动到评论位置 - _, _ = commentEl.Eval(`() => { try { this.scrollIntoView({behavior: "instant", block: "center"}); } catch (e) {} return true }`) - time.Sleep(1 * time.Second) // 增加等待时间 + // 多次滚动确保可见 + for i := 0; i < 3; i++ { + logrus.Infof("第 %d 次滚动到评论位置...", i+1) + _, _ = commentEl.Eval(`() => { + this.scrollIntoView({behavior: "instant", block: "center"}); + return true + }`) + time.Sleep(1500 * time.Millisecond) - // 尝试多次点击回复按钮 - var replyBtn *rod.Element - for attempt := 0; attempt < 5; attempt++ { // 增加尝试次数 - replyBtn, err = findReplyButton(commentEl) - if err == nil { - if tryClickChainForComment(replyBtn) { - break - } - } - logrus.Warnf("Attempt %d: Failed to click reply button: %v", attempt+1, err) - time.Sleep(1 * time.Second) // 增加等待时间 + // 往下多滚动一点 + page.MustEval(`() => window.scrollBy(0, 150)`) + time.Sleep(500 * time.Millisecond) } - if err != nil || replyBtn == nil { - return fmt.Errorf("无法点击回复按钮") + logrus.Info("滚动完成,准备点击回复按钮") + + // 查找并点击回复按钮 + replyBtn, err := findReplyButton(commentEl) + if err != nil { + return fmt.Errorf("无法找到回复按钮: %w", err) } - time.Sleep(2 * time.Second) // 增加等待时间确保回复输入框出现 + if !tryClickChainForComment(replyBtn) { + return fmt.Errorf("点击回复按钮失败") + } + + time.Sleep(2 * time.Second) // 查找回复输入框 inputEl, err := findReplyInput(page, commentEl) @@ -150,12 +136,17 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke } // 聚焦并输入内容 - if _, evalErr := inputEl.Eval(`() => { try { this.focus(); } catch (e) {} return true }`); evalErr != nil { + if _, evalErr := inputEl.Eval(`() => { + try { + this.focus(); + } catch (e) {} + return true + }`); evalErr != nil { logrus.Warnf("focus reply input failed: %v", evalErr) } inputEl.MustInput(content) - time.Sleep(500 * time.Millisecond) // 增加等待时间 + time.Sleep(500 * time.Millisecond) // 查找并点击提交按钮 submitBtn, err := findSubmitButton(page) @@ -167,131 +158,368 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke return fmt.Errorf("点击回复提交按钮失败") } - time.Sleep(3 * time.Second) // 增加等待时间确保回复提交完成 + time.Sleep(3 * time.Second) return nil } -func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { - var lastErr error +func findCommentElementNew(page *rod.Page, commentID, userID string) (*rod.Element, error) { + logrus.Infof("🔍 开始查找评论(新方法)- commentID: %s, userID: %s", commentID, userID) - // 首先尝试确保评论区域可见 - ensureCommentsVisible(page) + // 修改 JS:找到后记录元素的 ID + findCommentJS := fmt.Sprintf(`async () => { + const INTERVAL_MS = 900; + const STAGNANT_LIMIT = 8; + const NO_CHANGE_SCROLL_LIMIT = 3; + const DELTA_MIN = 480; + const SCROLL_TIMEOUT = 900; + const MAX_ATTEMPTS = 100; + const CLICK_MORE_INTERVAL = 2; + const CLICK_WAIT_TIME = 300; - for attempt := 0; attempt < 20; attempt++ { // 增加尝试次数 - logrus.Infof("查找评论,尝试次数: %d", attempt+1) - el, err := locateCommentElement(page, commentID, userID) - if err == nil && el != nil { - logrus.Infof("成功找到评论") - return el, nil - } - if err != nil { - lastErr = err - } + const TARGET_COMMENT_ID = %q; + const TARGET_USER_ID = %q; - // 每3次尝试后进行一次更彻底的滚动 - if attempt%3 == 0 { - // 更彻底的滚动策略 - performFullScroll(page) - } else { - // 常规滚动 - if !scrollComments(page) { - logrus.Infof("滚动到底部,无法继续滚动") - break + console.log('开始查找评论 - TARGET_COMMENT_ID:', TARGET_COMMENT_ID, 'TARGET_USER_ID:', TARGET_USER_ID); + + const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); + const scrollRoot = () => document.scrollingElement || document.documentElement || document.body; + const getContainer = () => document.querySelector('.comments-container'); + + const clickShowMoreButtons = () => { + let clickedCount = 0; + const elements = document.querySelectorAll('.show-more'); + + elements.forEach((el) => { + try { + const rect = el.getBoundingClientRect(); + const style = window.getComputedStyle(el); + const isVisible = ( + rect.height > 0 && + rect.width > 0 && + style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0' && + rect.top < window.innerHeight + 500 && + rect.bottom > -500 + ); + + if (isVisible) { + el.click(); + clickedCount++; + } + } catch (err) { + console.debug('点击失败', err); + } + }); + + return clickedCount; + }; + + // === 修改:返回元素的稳定标识符 === + const findTargetComment = () => { + // 优先通过 commentID 查找 + if (TARGET_COMMENT_ID) { + const byId = document.querySelector('#comment-' + TARGET_COMMENT_ID); + if (byId) { + console.log('通过 commentID 找到评论:', TARGET_COMMENT_ID); + // 返回包含完整信息的对象 + return { + element: byId, + selector: '#comment-' + TARGET_COMMENT_ID, + commentId: TARGET_COMMENT_ID + }; + } } - } - time.Sleep(800 * time.Millisecond) // 增加等待时间 - } + + // 通过 userID 查找 + if (TARGET_USER_ID) { + const allComments = document.querySelectorAll('.comment-item, .comment'); + for (const comment of allComments) { + const userIdEl = comment.querySelector('[data-user-id="' + TARGET_USER_ID + '"]'); + if (userIdEl) { + console.log('通过 userID 找到评论:', TARGET_USER_ID); + + // 尝试获取评论的 ID + const commentId = comment.id; + if (commentId) { + return { + element: comment, + selector: '#' + commentId, + commentId: commentId.replace('comment-', '') + }; + } else { + // 如果没有 ID,给它添加一个唯一标识 + const uniqueId = 'xhs-found-' + Date.now() + '-' + Math.random().toString(36).substr(2, 9); + comment.id = uniqueId; + return { + element: comment, + selector: '#' + uniqueId, + commentId: null + }; + } + } + } + } + + return null; + }; - if lastErr != nil { - return nil, lastErr - } - return nil, fmt.Errorf("未找到评论: %s", buildIdentifier(commentID, userID)) -} + // ... (保留原有的滚动逻辑) ... + const getScrollMetrics = (el) => { + if (!el) { + return { top: 0, max: 0, client: window.innerHeight }; + } + if (el === window || el === document || el === document.body || el === document.documentElement) { + const root = scrollRoot(); + return { + top: root.scrollTop, + max: Math.max(root.scrollHeight - root.clientHeight, 0), + client: root.clientHeight || window.innerHeight + }; + } + return { + top: el.scrollTop, + max: Math.max(el.scrollHeight - el.clientHeight, 0), + client: el.clientHeight + }; + }; -func locateCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { - // 如果在comments-container内没有找到,尝试在整个页面查找 - if commentID != "" { - if el, err := locateCommentElementByCommentID(page, commentID); err == nil && el != nil { - return el, nil - } - } - if userID != "" { - if el, err := locateCommentElementByUserID(page, userID); err == nil && el != nil { - return el, nil - } - } + const setScrollTop = (el, value) => { + if (!el) return; + if (el === window || el === document || el === document.body || el === document.documentElement) { + const root = scrollRoot(); + root.scrollTop = value; + window.scrollTo(0, value); + return; + } + el.scrollTop = value; + }; - identifier := buildIdentifier(commentID, userID) - if identifier == "" { - return nil, fmt.Errorf("未提供评论标识") - } - return nil, fmt.Errorf("未找到评论: %s", identifier) -} + const dispatchWheel = (el, delta) => { + if (!el) return; + try { + const wheel = new WheelEvent('wheel', { + deltaY: delta, + bubbles: true, + cancelable: true + }); + el.dispatchEvent(wheel); + el.dispatchEvent(new Event('scroll', { bubbles: true })); + } catch (err) { + console.debug('dispatchWheel error', err); + } + }; -func locateCommentElementByCommentID(page *rod.Page, commentID string) (*rod.Element, error) { - if commentID == "" { - return nil, fmt.Errorf("评论ID为空") - } - - // 首先尝试直接通过ID查找(根据HTML结构中的id="comment-68d9df3e0000000002015818") - idSelector := fmt.Sprintf("#comment-%s", commentID) - if el, err := page.Element(idSelector); err == nil && el != nil { - return el, nil - } - - return nil, fmt.Errorf("未找到评论ID: %s", commentID) -} - -func locateCommentElementByUserID(page *rod.Page, userID string) (*rod.Element, error) { - if userID == "" { - return nil, fmt.Errorf("用户ID为空") - } - - selectors := []string{ - fmt.Sprintf(`[data-user-id="%s"]`, userID), - } - - for _, selector := range selectors { - if el, err := page.Element(selector); err == nil && el != nil { - // 使用JavaScript查找父级评论元素 - jsCode := `() => { - let current = this; + const findScrollTarget = () => { + const container = getContainer(); + const candidates = new Set(); + + if (container) { + let current = container; while (current) { - if (current.classList && (current.classList.contains('comment-item') || current.classList.contains('comment'))) { - return current; + if (current instanceof HTMLElement) { + candidates.add(current); } current = current.parentElement; } - return this; - }` - if _, err := el.Eval(jsCode); err == nil { - return el, nil } - return el, nil + + candidates.add(document.body); + candidates.add(document.documentElement); + + const weighted = Array.from(candidates).map((node) => { + const style = window.getComputedStyle(node); + const overflowY = style.overflowY; + const scrollable = node.scrollHeight - node.clientHeight > 40; + const hasScrollStyle = /auto|scroll|overlay/i.test(overflowY); + const weight = + (node.contains(container) ? 1000 : 0) + + (node === container ? 800 : 0) + + (hasScrollStyle ? 400 : 0) + + (scrollable ? 300 : 0) - + (node === document.body || node === document.documentElement ? 50 : 0); + + if (scrollable || hasScrollStyle || node === document.body || node === document.documentElement) { + return { node, weight }; + } + return null; + }).filter(Boolean); + + weighted.sort((a, b) => b.weight - a.weight); + + return weighted.length > 0 ? weighted[0].node : scrollRoot(); + }; + + const performScroll = (target) => { + const scrollTarget = target || findScrollTarget(); + if (!scrollTarget) { + window.scrollBy(0, window.innerHeight * 0.8); + return; + } + + const metrics = getScrollMetrics(scrollTarget); + const beforeTop = metrics.top; + const desired = metrics.max > 0 + ? Math.min(metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN), metrics.max) + : metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN); + const applied = Math.max(0, desired - metrics.top); + + setScrollTop(scrollTarget, desired); + dispatchWheel(scrollTarget, applied); + + const afterTop = getScrollMetrics(scrollTarget).top; + if (Math.abs(afterTop - beforeTop) < 5 && scrollTarget !== scrollRoot()) { + const root = scrollRoot(); + const rootBefore = root.scrollTop; + root.scrollTop = rootBefore + applied; + window.scrollBy(0, applied); + dispatchWheel(root, applied); + } + }; + + // 主查找逻辑 + let lastScrollTop = 0; + let stagnantChecks = 0; + let noScrollChangeCount = 0; + let totalClickedButtons = 0; + + for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { + const container = getContainer(); + if (!container) { + await sleep(300); + continue; + } + + if (attempt %% CLICK_MORE_INTERVAL === 0) { + const clicked = clickShowMoreButtons(); + if (clicked > 0) { + totalClickedButtons += clicked; + console.log('点击了 ' + clicked + ' 个"更多"按钮,累计: ' + totalClickedButtons); + await sleep(CLICK_WAIT_TIME); + + await sleep(200); + const clicked2 = clickShowMoreButtons(); + if (clicked2 > 0) { + totalClickedButtons += clicked2; + console.log('二次检查点击了 ' + clicked2 + ' 个"更多"按钮'); + await sleep(CLICK_WAIT_TIME); + } + + const foundInfo = findTargetComment(); + if (foundInfo) { + console.log('点击"更多"后找到评论,总共点击了 ' + totalClickedButtons + ' 个按钮'); + return { + status: 'found', + attempts: attempt + 1, + clickedButtons: totalClickedButtons, + selector: foundInfo.selector, + commentId: foundInfo.commentId + }; + } + } + } + + const foundInfo = findTargetComment(); + if (foundInfo) { + console.log('找到评论,尝试次数: ' + (attempt + 1) + ',总共点击了 ' + totalClickedButtons + ' 个按钮'); + return { + status: 'found', + attempts: attempt + 1, + clickedButtons: totalClickedButtons, + selector: foundInfo.selector, + commentId: foundInfo.commentId + }; + } + + const target = findScrollTarget(); + const beforeTop = getScrollMetrics(target).top; + performScroll(target); + await sleep(SCROLL_TIMEOUT); + const afterTop = getScrollMetrics(target).top; + + if (Math.abs(afterTop - beforeTop) < 5) { + noScrollChangeCount += 1; + } else { + noScrollChangeCount = 0; + lastScrollTop = afterTop; + } + + if (noScrollChangeCount >= NO_CHANGE_SCROLL_LIMIT) { + return { status: 'not_found', reason: 'no-scroll-change', attempts: attempt + 1, clickedButtons: totalClickedButtons }; + } + + if (INTERVAL_MS > SCROLL_TIMEOUT) { + await sleep(INTERVAL_MS - SCROLL_TIMEOUT); + } } + + return { status: 'not_found', reason: 'timeout', attempts: MAX_ATTEMPTS, clickedButtons: totalClickedButtons }; + }`, commentID, userID) + + // 执行 JS + result, err := page.Eval(findCommentJS) + if err != nil { + logrus.Errorf("执行查找评论 JS 失败: %v", err) + return nil, fmt.Errorf("执行查找评论 JS 失败: %w", err) } - return nil, fmt.Errorf("未找到用户ID: %s", userID) -} + // 解析结果 + resultJSON, err := page.ObjectToJSON(result) + if err != nil { + logrus.Errorf("无法将结果转换为 JSON: %v", err) + return nil, fmt.Errorf("无法将结果转换为 JSON: %w", err) + } -// 等待评论容器加载完成 + status := resultJSON.Get("status").Str() + reason := resultJSON.Get("reason").Str() + attempts := resultJSON.Get("attempts").Int() + clickedButtons := resultJSON.Get("clickedButtons").Int() + selector := resultJSON.Get("selector").Str() + + logrus.Infof("查找结果: status=%s, reason=%s, attempts=%d, clickedButtons=%d, selector=%s", + status, reason, attempts, clickedButtons, selector) + + if status != "found" { + return nil, fmt.Errorf("未找到评论 (commentID: %s, userID: %s), 原因: %s, 尝试次数: %d, 点击按钮: %d", + commentID, userID, reason, attempts, clickedButtons) + } + + // === 关键修改:使用返回的稳定选择器而不是临时标记 === + el, err := page.Element(selector) + if err != nil { + logrus.Errorf("找到评论但无法获取元素,选择器: %s, 错误: %v", selector, err) + + // 如果稳定选择器失败,尝试重新查找 + logrus.Info("尝试通过 commentID 重新查找...") + if commentID != "" { + fallbackSelector := fmt.Sprintf("#comment-%s", commentID) + el, err = page.Element(fallbackSelector) + if err == nil { + logrus.Infof("通过备用选择器 %s 成功找到元素", fallbackSelector) + return el, nil + } + } + + return nil, fmt.Errorf("找到评论但无法获取元素: %w", err) + } + + logrus.Infof("✓ 成功获取评论元素,选择器: %s", selector) + return el, nil +} func waitForCommentsContainer(page *rod.Page) { jsCode := `() => { - // 等待comments-container元素出现 let attempts = 0; const maxAttempts = 10; const checkContainer = () => { const container = document.querySelector('.comments-container'); if (container) { - // 检查容器内是否有评论内容 const comments = container.querySelectorAll('.comment-item, .comment'); return comments.length > 0; } return false; }; - // 定期检查评论容器是否加载完成 const interval = setInterval(() => { attempts++; if (checkContainer() || attempts >= maxAttempts) { @@ -303,122 +531,9 @@ func waitForCommentsContainer(page *rod.Page) { }` page.Eval(jsCode) - time.Sleep(2 * time.Second) // 等待检查完成 + time.Sleep(2 * time.Second) } -func ensureCommentsVisible(page *rod.Page) { - // 专门针对comments-container元素的JavaScript代码 - jsCode := `() => { - // 查找comments-container元素 - const commentsContainer = document.querySelector('.comments-container'); - - // 如果找到comments-container,尝试滚动到视图中并在其内部滚动 - if (commentsContainer) { - // 先滚动到视图中 - commentsContainer.scrollIntoView({behavior: 'instant', block: 'start'}); - - // 等待一下再在容器内部滚动 - setTimeout(() => { - // 在comments-container内部滚动以显示评论 - if (commentsContainer.scrollHeight > commentsContainer.clientHeight) { - const maxScroll = commentsContainer.scrollHeight - commentsContainer.clientHeight; - if (maxScroll > 0) { - // 滚动到一半位置 - commentsContainer.scrollTop = Math.min(maxScroll, commentsContainer.clientHeight * 0.5); - } - } - }, 200); - - return true; - } - - return false; - }` - - page.Eval(jsCode) - time.Sleep(1 * time.Second) -} - -func scrollComments(page *rod.Page) bool { - scrollJS := `() => { - let scrolled = false; - - // 专门查找comments-container元素 - const commentsContainer = document.querySelector('.comments-container'); - - if (commentsContainer) { - const maxScroll = commentsContainer.scrollHeight - commentsContainer.clientHeight; - if (maxScroll > 0 && commentsContainer.scrollTop < maxScroll) { - // 滚动更多内容 - const delta = Math.max(commentsContainer.clientHeight * 0.8, 400); - commentsContainer.scrollTop = Math.min(maxScroll, commentsContainer.scrollTop + delta); - scrolled = true; - } - } - - return scrolled; - }` - res, err := page.Eval(scrollJS) - if err != nil { - logrus.Warnf("scroll comments failed: %v", err) - return false - } - if res == nil { - return false - } - return res.Value.Bool() -} - -// performFullScroll 执行更彻底的滚动策略 -func performFullScroll(page *rod.Page) { - logrus.Infof("执行彻底滚动策略") - - // 策略1: 滚动到评论容器的不同位置 - scrollPositionsJS := `() => { - const commentsContainer = document.querySelector('.comments-container'); - if (!commentsContainer) return false; - - const maxScroll = commentsContainer.scrollHeight - commentsContainer.clientHeight; - if (maxScroll <= 0) return false; - - // 根据当前滚动位置决定下一步滚动 - const currentScroll = commentsContainer.scrollTop; - const scrollRatio = currentScroll / maxScroll; - - if (scrollRatio < 0.3) { - // 滚动到30%位置 - commentsContainer.scrollTop = maxScroll * 0.3; - } else if (scrollRatio < 0.6) { - // 滚动到60%位置 - commentsContainer.scrollTop = maxScroll * 0.6; - } else if (scrollRatio < 0.9) { - // 滚动到90%位置 - commentsContainer.scrollTop = maxScroll * 0.9; - } else { - // 滚动到底部 - commentsContainer.scrollTop = maxScroll; - } - - return true; - }` - - if _, err := page.Eval(scrollPositionsJS); err != nil { - logrus.Warnf("彻底滚动失败: %v", err) - } - -} - -func buildIdentifier(commentID, userID string) string { - if commentID != "" && userID != "" { - return fmt.Sprintf("comment_id=%s / user_id=%s", commentID, userID) - } - if commentID != "" { - return commentID - } - return userID -} - -// 选取当前层主的回复按钮 func findReplyButton(commentEl *rod.Element) (*rod.Element, error) { if commentEl == nil { return nil, fmt.Errorf("评论元素为空") @@ -435,19 +550,14 @@ func findReplyButton(commentEl *rod.Element) (*rod.Element, error) { return btn, nil } -// verifyClickSuccess 验证点击是否真的成功(检查是否出现了回复输入框) func verifyClickSuccess(clickedEl *rod.Element) bool { - // 获取页面实例 page := clickedEl.Page() - - // 检查是否出现了回复输入框 selectors := []string{ "div.input-box div.content-edit p.content-input", } for _, selector := range selectors { if el, err := page.Element(selector); err == nil && el != nil { - // 检查元素是否可见 if visible, _ := el.Visible(); visible { logrus.Infof("验证成功:找到可见的回复输入框 (%s)", selector) return true @@ -460,17 +570,18 @@ func verifyClickSuccess(clickedEl *rod.Element) bool { func findReplyInput(page *rod.Page, commentEl *rod.Element) (*rod.Element, error) { activeEditableJS := `() => { - const active = document.activeElement; - if (active && active.getAttribute && active.getAttribute('contenteditable') === 'true') { - return active; - } - return null; - }` + const active = document.activeElement; + if (active && active.getAttribute && active.getAttribute('contenteditable') === 'true') { + return active; + } + return null; + }` if el, err := page.ElementByJS(rod.Eval(activeEditableJS)); err == nil && el != nil { return el, nil } + selectors := []string{ - "div.input-box div.content-edit p.content-input", // 原有选择器 + "div.input-box div.content-edit p.content-input", } for _, selector := range selectors { if el, err := page.Element(selector); err == nil && el != nil { @@ -486,7 +597,6 @@ func tryClickChainForComment(el *rod.Element) bool { return false } - // 获取元素信息用于调试 text, _ := el.Text() classAttr, _ := el.Attribute("class") class := "" @@ -499,48 +609,32 @@ func tryClickChainForComment(el *rod.Element) bool { } logrus.Infof("准备点击元素 - 文本: '%s', 类: '%s', 标签: %s", text, class, tagName) - // 检查元素是否可见和可点击 visible, _ := el.Visible() logrus.Infof("元素可见性: %v", visible) - // 滚动到元素位置 - _, _ = el.Eval(`() => { try { this.scrollIntoView({behavior: "instant", block: "center"}); } catch (e) {} return true }`) + _, _ = el.Eval(`() => { + try { + this.scrollIntoView({behavior: "instant", block: "center"}); + } catch (e) {} + return true + }`) time.Sleep(500 * time.Millisecond) - // 只使用直接点击方式 - clickMethods := []struct { - name string - fn func(*rod.Element) bool - }{ - {"直接点击", func(e *rod.Element) bool { - if err := e.Click(proto.InputMouseButtonLeft, 1); err != nil { - logrus.Warnf("直接点击失败: %v", err) - return false - } - logrus.Infof("直接点击成功") - return true - }}, + if err := el.Click(proto.InputMouseButtonLeft, 1); err != nil { + logrus.Warnf("点击失败: %v", err) + return false } - for i, method := range clickMethods { - logrus.Infof("尝试点击方法 %d: %s", i+1, method.name) - if method.fn(el) { - // 点击后等待一下,检查是否有反应 - time.Sleep(1 * time.Second) + logrus.Infof("点击成功") + time.Sleep(1 * time.Second) - // 验证点击是否真的成功(检查是否出现了回复输入框) - success := verifyClickSuccess(el) - if success { - logrus.Infof("点击方法 %s 执行成功且有效", method.name) - return true - } else { - logrus.Warnf("点击方法 %s 执行成功但无效(没有出现回复输入框)", method.name) - // 继续尝试下一种方法 - } - } + success := verifyClickSuccess(el) + if success { + logrus.Infof("点击执行成功且有效") + return true } - logrus.Errorf("所有点击方法都失败") + logrus.Warnf("点击执行成功但无效(没有出现回复输入框)") return false } diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index 7eecf6b..b92d18f 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -35,6 +35,56 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken page.MustWaitDOMStable() time.Sleep(1 * time.Second) + // === 检测「笔记暂时无法浏览」或类似不可访问页面 === + unavailableResult := page.MustEval(`() => { + const wrapper = document.querySelector('.access-wrapper, .error-wrapper, .not-found-wrapper, .blocked-wrapper'); + if (!wrapper) return null; + + const text = wrapper.textContent || ''; + const keywords = [ + '当前笔记暂时无法浏览', + '该内容因违规已被删除', + '该笔记已被删除', + '内容不存在', + '笔记不存在', + '已失效', + '私密笔记', + '仅作者可见', + '因用户设置,你无法查看', + '因违规无法查看', + '这是一片荒地点击评论' + ]; + + for (const kw of keywords) { + if (text.includes(kw)) { + return kw.trim(); + } + } + return null; + }`) + + // The result is a gson.JSON object. We need to get its raw JSON representation to check for "null". + rawJSON, err := unavailableResult.MarshalJSON() + if err != nil { + logrus.Errorf("无法解析页面状态检查的结果: %v", err) + return nil, fmt.Errorf("无法解析页面状态检查的结果: %w", err) + } + + if string(rawJSON) != "null" { + var reason string + // JS 返回的字符串会被 JSON 编码,所以需要 Unmarshal + if err := json.Unmarshal(rawJSON, &reason); err == nil { + logrus.Warnf("笔记不可访问: %s", reason) + return nil, fmt.Errorf("笔记不可访问: %s", reason) + } else { + // 如果解析失败,直接使用原始值 + rawReason := string(rawJSON) + logrus.Warnf("笔记不可访问,且无法解析原因: %s", rawReason) + return nil, fmt.Errorf("笔记不可访问,无法解析原因: %s", rawReason) + } + } + + // === 加载全部评论(简化版本)=== if loadAllComments { scrollAllCommentsJS := `() => { const INTERVAL_MS = 900; @@ -100,70 +150,33 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken } }; - // 点击所有"更多"按钮 - 使用多种策略确保不遗漏 + // 简化的点击"更多"按钮函数 - 只使用 .show-more 选择器 const clickShowMoreButtons = () => { - // 尝试多个可能的选择器 - const selectors = [ - '.show-more', - '.show-more-btn', - '[class*="show-more"]', - '[class*="showMore"]', - 'button:has-text("更多")', - 'span:has-text("更多")', - 'div:has-text("更多")' - ]; - - const clickedElements = new Set(); let clickedCount = 0; - selectors.forEach((selector) => { + const elements = document.querySelectorAll('.show-more'); + + elements.forEach((el) => { try { - const elements = document.querySelectorAll(selector); - elements.forEach((el) => { - // 避免重复点击同一个元素 - if (clickedElements.has(el)) return; - - // 检查元素文本是否包含"更多"或者是否有相关class - const text = el.textContent || ''; - const className = el.className || ''; - const shouldClick = text.includes('更多') || - className.includes('show-more') || - className.includes('showMore'); - - if (!shouldClick) return; - - // 检查元素是否可见(放宽条件,不要求完全在视口内) - const rect = el.getBoundingClientRect(); - const style = window.getComputedStyle(el); - const isVisible = ( - rect.height > 0 && - rect.width > 0 && - style.display !== 'none' && - style.visibility !== 'hidden' && - style.opacity !== '0' && - rect.top < window.innerHeight + 500 && // 允许元素在视口下方500px内 - rect.bottom > -500 // 允许元素在视口上方500px内 - ); - - if (isVisible) { - try { - // 尝试多种点击方式 - el.click(); - - // 如果是嵌套元素,也尝试点击父元素 - if (el.parentElement && el.parentElement.classList.contains('show-more')) { - el.parentElement.click(); - } - - clickedElements.add(el); - clickedCount++; - } catch (err) { - console.debug('点击失败', err); - } - } - }); + // 检查元素是否可见 + const rect = el.getBoundingClientRect(); + const style = window.getComputedStyle(el); + const isVisible = ( + rect.height > 0 && + rect.width > 0 && + style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0' && + rect.top < window.innerHeight + 500 && // 允许元素在视口下方500px内 + rect.bottom > -500 // 允许元素在视口上方500px内 + ); + + if (isVisible) { + el.click(); + clickedCount++; + } } catch (err) { - console.debug('选择器错误: ' + selector, err); + console.debug('点击失败', err); } }); @@ -244,6 +257,7 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken dispatchWheel(root, applied); } }; + return (async () => { let lastCount = 0; let stagnantChecks = 0; @@ -346,6 +360,7 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken } } + // === 提取笔记详情数据 === result := page.MustEval(`() => { if (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.note && From c5fa30bd3e9b0be1a8c611c6bbd973053ad2d7db Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Thu, 27 Nov 2025 02:25:59 +0800 Subject: [PATCH 11/19] fix --- xiaohongshu/feed_detail.go | 640 +++++++++++++++++++++---------------- 1 file changed, 356 insertions(+), 284 deletions(-) diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index b92d18f..50d813b 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -7,6 +7,7 @@ import ( "time" "github.com/go-rod/rod" + "github.com/go-rod/rod/lib/proto" "github.com/sirupsen/logrus" "github.com/xpzouying/xiaohongshu-mcp/errors" ) @@ -27,7 +28,6 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) - logrus.Infof("打开 feed 详情页: %s", url) // 导航到详情页 @@ -35,7 +35,24 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken page.MustWaitDOMStable() time.Sleep(1 * time.Second) - // === 检测「笔记暂时无法浏览」或类似不可访问页面 === + // 检测页面是否不可访问 + if err := checkPageAccessible(page); err != nil { + return nil, err + } + + // 加载全部评论 + if loadAllComments { + if err := f.loadAllComments(page); err != nil { + logrus.Warnf("加载全部评论失败: %v", err) + } + } + + // 提取笔记详情数据 + return f.extractFeedDetail(page, feedID) +} + +// checkPageAccessible 检查页面是否可访问 +func checkPageAccessible(page *rod.Page) error { unavailableResult := page.MustEval(`() => { const wrapper = document.querySelector('.access-wrapper, .error-wrapper, .not-found-wrapper, .blocked-wrapper'); if (!wrapper) return null; @@ -51,8 +68,7 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken '私密笔记', '仅作者可见', '因用户设置,你无法查看', - '因违规无法查看', - '这是一片荒地点击评论' + '因违规无法查看' ]; for (const kw of keywords) { @@ -63,304 +79,360 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken return null; }`) - // The result is a gson.JSON object. We need to get its raw JSON representation to check for "null". rawJSON, err := unavailableResult.MarshalJSON() if err != nil { logrus.Errorf("无法解析页面状态检查的结果: %v", err) - return nil, fmt.Errorf("无法解析页面状态检查的结果: %w", err) + return fmt.Errorf("无法解析页面状态检查的结果: %w", err) } if string(rawJSON) != "null" { var reason string - // JS 返回的字符串会被 JSON 编码,所以需要 Unmarshal if err := json.Unmarshal(rawJSON, &reason); err == nil { logrus.Warnf("笔记不可访问: %s", reason) - return nil, fmt.Errorf("笔记不可访问: %s", reason) + return fmt.Errorf("笔记不可访问: %s", reason) + } + rawReason := string(rawJSON) + logrus.Warnf("笔记不可访问,且无法解析原因: %s", rawReason) + return fmt.Errorf("笔记不可访问,无法解析原因: %s", rawReason) + } + + return nil +} + +// loadAllComments 加载所有评论 +func (f *FeedDetailAction) loadAllComments(page *rod.Page) error { + const ( + maxAttempts = 500 + scrollInterval = 600 * time.Millisecond + clickMoreInterval = 1 // 每次滚动都检查"更多"按钮 + stagnantLimit = 20 // 增加停滞容忍度 + noScrollChangeLimit = 15 // 增加滚动停滞容忍度 + minScrollDelta = 10 // 最小有效滚动距离 + aggressiveClickEvery = 5 // 每5次尝试进行一次激进点击 + ) + + logrus.Info("开始加载所有评论...") + + // 先滚动到评论区 + scrollToCommentsArea(page) + time.Sleep(1 * time.Second) + + var ( + lastCount = 0 + lastScrollTop = 0 + stagnantChecks = 0 + noScrollChangeCount = 0 + totalClickedButtons = 0 + attempt = 0 + ) + + for attempt = 0; attempt < maxAttempts; attempt++ { + logrus.Debugf("=== 尝试 %d/%d ===", attempt+1, maxAttempts) + + // === 1. 检查是否到达底部 === + if checkEndContainer(page) { + logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部") + // 到底部后再做最后一轮点击 + finalClicked := clickShowMoreButtons(page) + totalClickedButtons += finalClicked + if finalClicked > 0 { + logrus.Infof("底部最后点击了 %d 个按钮", finalClicked) + time.Sleep(1 * time.Second) + } + + currentCount := getCommentCount(page) + logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击按钮: %d", + currentCount, attempt+1, totalClickedButtons) + return nil + } + + // === 2. 每次都点击"更多"按钮 === + if attempt%clickMoreInterval == 0 { + clicked := clickShowMoreButtons(page) + if clicked > 0 { + totalClickedButtons += clicked + logrus.Infof("点击了 %d 个'更多'按钮,累计: %d", clicked, totalClickedButtons) + time.Sleep(500 * time.Millisecond) + + // 多轮检查 + for round := 0; round < 2; round++ { + time.Sleep(300 * time.Millisecond) + clicked2 := clickShowMoreButtons(page) + if clicked2 > 0 { + totalClickedButtons += clicked2 + logrus.Infof("第 %d 轮再次点击了 %d 个按钮", round+2, clicked2) + time.Sleep(500 * time.Millisecond) + } else { + break + } + } + } + } + + // === 4. 获取当前评论数量 === + currentCount := getCommentCount(page) + totalCount := getTotalCommentCount(page) + + logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount) + + // 检查是否已加载所有评论(但继续滚动到底部确认) + if totalCount > 0 && currentCount >= totalCount { + logrus.Infof("评论数量已达标: %d/%d,继续滚动到底部确认...", currentCount, totalCount) + // 不要立即返回,继续滚动到底部 + } + + // === 5. 检查评论数量变化 === + if currentCount != lastCount { + logrus.Infof("✓ 评论数量增加: %d -> %d (+%d)", lastCount, currentCount, currentCount-lastCount) + lastCount = currentCount + stagnantChecks = 0 // 重置停滞计数 } else { - // 如果解析失败,直接使用原始值 - rawReason := string(rawJSON) - logrus.Warnf("笔记不可访问,且无法解析原因: %s", rawReason) - return nil, fmt.Errorf("笔记不可访问,无法解析原因: %s", rawReason) + stagnantChecks++ + if stagnantChecks%5 == 0 { + logrus.Debugf("评论数量停滞 %d 次", stagnantChecks) + } + } + + // 只有在严重停滞时才考虑退出 + if stagnantChecks >= stagnantLimit { + logrus.Infof("评论数量长期停滞,尝试最后冲刺...") + // 最后冲刺:大幅滚动 + 点击 + finalPush(page) + finalClicked := clickShowMoreButtons(page) + totalClickedButtons += finalClicked + + if checkEndContainer(page) { + logrus.Infof("✓ 最终到达底部,评论数: %d, 点击按钮: %d", + currentCount, totalClickedButtons) + return nil + } + + // 还没到底部,继续 + logrus.Infof("未到底部,重置停滞计数,继续加载...") + stagnantChecks = 0 + } + + // === 6. 执行滚动 === + _, scrollDelta, currentScrollTop := scrollWithMouse(page) + + // === 7. 检查滚动变化 === + if scrollDelta < minScrollDelta || currentScrollTop == lastScrollTop { + noScrollChangeCount++ + if noScrollChangeCount%5 == 0 { + logrus.Debugf("滚动停滞 %d 次,尝试大幅滚动", noScrollChangeCount) + // 尝试更大幅度滚动 + largeScroll(page) + time.Sleep(300 * time.Millisecond) + } + } else { + noScrollChangeCount = 0 + lastScrollTop = currentScrollTop + } + + // 只有严重滚动停滞时才考虑结束 + if noScrollChangeCount >= noScrollChangeLimit { + logrus.Infof("滚动严重停滞,尝试最后冲刺...") + finalPush(page) + + if checkEndContainer(page) { + currentCount := getCommentCount(page) + logrus.Infof("✓ 最终到达底部,评论数: %d, 点击按钮: %d", + currentCount, totalClickedButtons) + return nil + } + + // 重置计数继续 + logrus.Infof("未到底部,重置滚动计数,继续加载...") + noScrollChangeCount = 0 + lastScrollTop = 0 + } + + // === 8. 等待内容加载 === + time.Sleep(scrollInterval) + } + + // === 9. 达到最大尝试次数,做最后的冲刺 === + logrus.Infof("达到最大尝试次数 %d,执行最后冲刺...", maxAttempts) + finalPush(page) + finalClicked := clickShowMoreButtons(page) + totalClickedButtons += finalClicked + + currentCount := getCommentCount(page) + hasEnd := checkEndContainer(page) + + logrus.Infof("✓ 加载结束: %d 条评论, 总点击按钮: %d, 到达底部: %v", + currentCount, totalClickedButtons, hasEnd) + + return nil +} + +// scrollToCommentsArea 滚动到评论区 +func scrollToCommentsArea(page *rod.Page) { + logrus.Info("滚动到评论区...") + page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (container) { + container.scrollIntoView({behavior: 'smooth', block: 'start'}); + } + }`) +} + +// finalPush 最后冲刺:大幅滚动到底部 +func finalPush(page *rod.Page) { + logrus.Info("执行最后冲刺滚动...") + + for i := 0; i < 20; i++ { + // 检查是否已经到底部 + if checkEndContainer(page) { + logrus.Debug("已到底部,停止冲刺") + return + } + + beforeTop := getScrollTop(page) + + // 大幅滚动 + largeScroll(page) + time.Sleep(200 * time.Millisecond) + + // 点击出现的按钮 + clicked := clickShowMoreButtons(page) + if clicked > 0 { + time.Sleep(500 * time.Millisecond) + } + + afterTop := getScrollTop(page) + + // 如果滚动没变化,尝试JS滚动 + if afterTop == beforeTop { + page.MustEval(`() => { + window.scrollTo(0, document.body.scrollHeight); + }`) + time.Sleep(300 * time.Millisecond) + } + } +} + +// largeScroll 大幅度滚动 +func largeScroll(page *rod.Page) { + // 方法1: Mouse.Scroll 大幅度滚动 + page.Mouse.Scroll(0, 2000, 5) + time.Sleep(100 * time.Millisecond) +} + +// scrollWithMouse 使用 Mouse 模拟滚轮滚动 +func scrollWithMouse(page *rod.Page) (bool, int, int) { + beforeTop := getScrollTop(page) + + // 获取视口高度 + viewportHeight := page.MustEval(`() => window.innerHeight`).Int() + + // 计算滚动距离(每次滚动视口高度的 80%) + scrollDelta := float64(viewportHeight) * 0.8 + if scrollDelta < 500 { + scrollDelta = 500 + } + + // 使用 Mouse.Scroll 模拟滚轮滚动 + err := page.Mouse.Scroll(0, scrollDelta, 5) + if err != nil { + logrus.Warnf("鼠标滚动失败: %v", err) + return false, 0, beforeTop + } + + // 等待滚动完成 + time.Sleep(150 * time.Millisecond) + + afterTop := getScrollTop(page) + actualDelta := afterTop - beforeTop + scrolled := actualDelta > 5 + + if scrolled { + logrus.Debugf("滚动: %d -> %d (Δ%d)", beforeTop, afterTop, actualDelta) + } + + return scrolled, actualDelta, afterTop +} + +// getScrollTop 获取当前滚动位置 +func getScrollTop(page *rod.Page) int { + result := page.MustEval(`() => { + return window.pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0; + }`) + return result.Int() +} + +// clickShowMoreButtons 点击所有可见的"更多"按钮 +func clickShowMoreButtons(page *rod.Page) int { + elements, err := page.Elements(".show-more") + if err != nil { + return 0 + } + + clickedCount := 0 + + for _, el := range elements { + // 检查元素是否可见 + visible, err := el.Visible() + if err != nil || !visible { + continue + } + + // 检查是否在 DOM 中 + box, err := el.Shape() + if err != nil || len(box.Quads) == 0 { + continue + } + + // 点击元素 + if err := el.Click(proto.InputMouseButtonLeft, 1); err == nil { + clickedCount++ + time.Sleep(150 * time.Millisecond) } } - // === 加载全部评论(简化版本)=== - if loadAllComments { - scrollAllCommentsJS := `() => { - const INTERVAL_MS = 900; - const STAGNANT_LIMIT = 8; - const NO_CHANGE_SCROLL_LIMIT = 3; - const DELTA_MIN = 480; - const SCROLL_TIMEOUT = 900; - const MAX_ATTEMPTS = 200; - const CLICK_MORE_INTERVAL = 2; // 每滚动2次检查一次"更多"按钮 - const CLICK_WAIT_TIME = 300; // 点击后等待时间 + return clickedCount +} - const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); - const scrollRoot = () => document.scrollingElement || document.documentElement || document.body; - const getContainer = () => document.querySelector('.comments-container'); - const getCommentCount = (container) => - container ? container.querySelectorAll('.comment-item, .comment-item-sub, .comment').length : 0; - const getTotalCount = (container) => { - if (!container) return null; - const text = (container.querySelector('.total')?.textContent || '').replace(/\s+/g, ''); - const match = text.match(/共(\d+)条评论/); - return match ? parseInt(match[1], 10) : null; - }; - const getScrollMetrics = (el) => { - if (!el) { - return { top: 0, max: 0, client: window.innerHeight }; - } - if (el === window || el === document || el === document.body || el === document.documentElement) { - const root = scrollRoot(); - return { - top: root.scrollTop, - max: Math.max(root.scrollHeight - root.clientHeight, 0), - client: root.clientHeight || window.innerHeight - }; - } - return { - top: el.scrollTop, - max: Math.max(el.scrollHeight - el.clientHeight, 0), - client: el.clientHeight - }; - }; - const setScrollTop = (el, value) => { - if (!el) return; - if (el === window || el === document || el === document.body || el === document.documentElement) { - const root = scrollRoot(); - root.scrollTop = value; - window.scrollTo(0, value); - return; - } - el.scrollTop = value; - }; - const dispatchWheel = (el, delta) => { - if (!el) return; - try { - const wheel = new WheelEvent('wheel', { - deltaY: delta, - bubbles: true, - cancelable: true - }); - el.dispatchEvent(wheel); - el.dispatchEvent(new Event('scroll', { bubbles: true })); - } catch (err) { - console.debug('dispatchWheel error', err); - } - }; +// getCommentCount 获取当前评论数量 +func getCommentCount(page *rod.Page) int { + result := page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (!container) return 0; + return container.querySelectorAll('.comment-item, .comment-item-sub, .comment').length; + }`) + return result.Int() +} + +// getTotalCommentCount 获取总评论数 +func getTotalCommentCount(page *rod.Page) int { + result := page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (!container) return 0; - // 简化的点击"更多"按钮函数 - 只使用 .show-more 选择器 - const clickShowMoreButtons = () => { - let clickedCount = 0; - - const elements = document.querySelectorAll('.show-more'); - - elements.forEach((el) => { - try { - // 检查元素是否可见 - const rect = el.getBoundingClientRect(); - const style = window.getComputedStyle(el); - const isVisible = ( - rect.height > 0 && - rect.width > 0 && - style.display !== 'none' && - style.visibility !== 'hidden' && - style.opacity !== '0' && - rect.top < window.innerHeight + 500 && // 允许元素在视口下方500px内 - rect.bottom > -500 // 允许元素在视口上方500px内 - ); - - if (isVisible) { - el.click(); - clickedCount++; - } - } catch (err) { - console.debug('点击失败', err); - } - }); - - return clickedCount; - }; - - let cachedTarget = null; - const collectCandidates = () => { - const container = getContainer(); - const candidatesSet = new Set(); - if (container) { - let current = container; - while (current) { - if (current instanceof HTMLElement) { - candidatesSet.add(current); - } - current = current.parentElement; - } - container.querySelectorAll('*').forEach((node) => { - if (node instanceof HTMLElement) { - candidatesSet.add(node); - } - }); - } - [document.body, document.documentElement].forEach((node) => { - if (node instanceof HTMLElement) { - candidatesSet.add(node); - } - }); - const candidates = []; - candidatesSet.forEach((node) => { - const style = window.getComputedStyle(node); - const overflowY = style.overflowY; - const scrollable = node.scrollHeight - node.clientHeight > 40; - const hasScrollStyle = /auto|scroll|overlay/i.test(overflowY); - const weight = - (node.contains(container) ? 1000 : 0) + - (node === container ? 800 : 0) + - (hasScrollStyle ? 400 : 0) + - (scrollable ? 300 : 0) - - (node === document.body || node === document.documentElement ? 50 : 0); - if (scrollable || hasScrollStyle || node === document.body || node === document.documentElement) { - candidates.push({ node, weight }); - } - }); - candidates.sort((a, b) => b.weight - a.weight); - return candidates.map((candidate) => candidate.node); - }; - const findScrollTarget = () => { - if (cachedTarget && cachedTarget.isConnected) { - return cachedTarget; - } - const candidates = collectCandidates(); - cachedTarget = candidates.find((node) => { - const metrics = getScrollMetrics(node); - return metrics.max > 30 || metrics.client > 0; - }) || scrollRoot(); - return cachedTarget; - }; - const performScroll = (target) => { - const scrollTarget = target || findScrollTarget(); - if (!scrollTarget) { - window.scrollBy(0, window.innerHeight * 0.8); - return; - } - const metrics = getScrollMetrics(scrollTarget); - const beforeTop = metrics.top; - const desired = metrics.max > 0 ? Math.min(metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN), metrics.max) : metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN); - const applied = Math.max(0, desired - metrics.top); - setScrollTop(scrollTarget, desired); - dispatchWheel(scrollTarget, applied); - const afterTop = getScrollMetrics(scrollTarget).top; - if (Math.abs(afterTop - beforeTop) < 5 && scrollTarget !== scrollRoot()) { - const root = scrollRoot(); - const rootBefore = root.scrollTop; - root.scrollTop = rootBefore + applied; - window.scrollBy(0, applied); - dispatchWheel(root, applied); - } - }; + const totalEl = container.querySelector('.total'); + if (!totalEl) return 0; - return (async () => { - let lastCount = 0; - let stagnantChecks = 0; - let noScrollChangeCount = 0; - let totalClickedButtons = 0; - - for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { - const container = getContainer(); - if (!container) { - await sleep(300); - continue; - } - - // 每隔一定次数检查并点击"更多"按钮 - if (attempt % CLICK_MORE_INTERVAL === 0) { - const clicked = clickShowMoreButtons(); - if (clicked > 0) { - totalClickedButtons += clicked; - console.log('点击了 ' + clicked + ' 个"更多"按钮,累计: ' + totalClickedButtons); - await sleep(CLICK_WAIT_TIME); // 等待内容展开 - - // 点击后再次检查是否有新的"更多"按钮出现 - await sleep(200); - const clicked2 = clickShowMoreButtons(); - if (clicked2 > 0) { - totalClickedButtons += clicked2; - console.log('二次检查点击了 ' + clicked2 + ' 个"更多"按钮'); - await sleep(CLICK_WAIT_TIME); - } - } - } - - const total = getTotalCount(container); - const count = getCommentCount(container); - if (total && count >= total) { - return { - status: 'complete', - reason: 'total', - attempts: attempt + 1, - count, - total, - clickedButtons: totalClickedButtons - }; - } - if (count === lastCount) { - stagnantChecks += 1; - } else { - lastCount = count; - stagnantChecks = 0; - } - if (stagnantChecks >= STAGNANT_LIMIT) { - return { - status: 'complete', - reason: 'stagnant', - attempts: attempt + 1, - count, - total, - clickedButtons: totalClickedButtons - }; - } - const target = findScrollTarget(); - const beforeTop = getScrollMetrics(target).top; - performScroll(target); - await sleep(SCROLL_TIMEOUT); - const afterTop = getScrollMetrics(target).top; - if (Math.abs(afterTop - beforeTop) < 5) { - noScrollChangeCount += 1; - } else { - noScrollChangeCount = 0; - } - if (noScrollChangeCount >= NO_CHANGE_SCROLL_LIMIT) { - return { - status: 'complete', - reason: 'no-scroll-change', - attempts: attempt + 1, - count, - total, - clickedButtons: totalClickedButtons - }; - } - if (INTERVAL_MS > SCROLL_TIMEOUT) { - await sleep(INTERVAL_MS - SCROLL_TIMEOUT); - } - } - return { - status: 'timeout', - clickedButtons: totalClickedButtons - }; - })() - .then((res) => JSON.stringify(res)) - .catch((err) => JSON.stringify({ status: 'error', message: err && err.message ? err.message : String(err) })); - }` + const text = (totalEl.textContent || '').replace(/\s+/g, ''); + const match = text.match(/共(\d+)条评论/); + return match ? parseInt(match[1], 10) : 0; + }`) + return result.Int() +} - if res, err := page.Eval(scrollAllCommentsJS); err != nil { - logrus.Warnf("加载全部评论失败: %v", err) - } else if res != nil { - if str := res.Value.Str(); str != "" { - logrus.Infof("评论滚动结果: %s", str) - } - } - } +// checkEndContainer 检查是否出现 "THE END" 元素 +func checkEndContainer(page *rod.Page) bool { + result := page.MustEval(`() => { + const endContainer = document.querySelector('.end-container'); + if (!endContainer) return false; + + const text = (endContainer.textContent || '').trim().toUpperCase(); + return text.includes('THE END') || text.includes('THEEND'); + }`) + return result.Bool() +} - // === 提取笔记详情数据 === +// extractFeedDetail 提取 Feed 详情数据 +func (f *FeedDetailAction) extractFeedDetail(page *rod.Page, feedID string) (*FeedDetailResponse, error) { result := page.MustEval(`() => { if (window.__INITIAL_STATE__ && window.__INITIAL_STATE__.note && From 9b15339ef0edf410b9a0eab9a76d5b16b57e62ae Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Wed, 3 Dec 2025 02:42:50 +0800 Subject: [PATCH 12/19] =?UTF-8?q?fix:=20=E8=AF=A6=E6=83=85=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0=E8=87=AA=E5=AE=9A=E4=B9=89=E5=8A=A0=E8=BD=BD=E6=95=B0?= =?UTF-8?q?=E9=87=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- handlers_api.go | 19 +- mcp_handlers.go | 48 +++- mcp_server.go | 20 +- service.go | 7 +- types.go | 19 +- xiaohongshu/feed_detail.go | 490 ++++++++++++++++++++++++------------- 6 files changed, 417 insertions(+), 186 deletions(-) diff --git a/handlers_api.go b/handlers_api.go index 646ccaa..9335511 100644 --- a/handlers_api.go +++ b/handlers_api.go @@ -164,8 +164,23 @@ func (s *AppServer) getFeedDetailHandler(c *gin.Context) { return } - // 获取 Feed 详情 - result, err := s.xiaohongshuService.GetFeedDetail(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments) + var result *FeedDetailResponse + var err error + + if req.CommentConfig != nil { + // 使用配置参数 + config := xiaohongshu.CommentLoadConfig{ + ClickMoreReplies: req.CommentConfig.ClickMoreReplies, + MaxRepliesThreshold: req.CommentConfig.MaxRepliesThreshold, + MaxCommentItems: req.CommentConfig.MaxCommentItems, + ScrollSpeed: req.CommentConfig.ScrollSpeed, + } + result, err = s.xiaohongshuService.GetFeedDetailWithConfig(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments, config) + } else { + // 使用默认配置 + result, err = s.xiaohongshuService.GetFeedDetail(c.Request.Context(), req.FeedID, req.XsecToken, req.LoadAllComments) + } + if err != nil { respondError(c, http.StatusInternalServerError, "GET_FEED_DETAIL_FAILED", "获取Feed详情失败", err.Error()) diff --git a/mcp_handlers.go b/mcp_handlers.go index be1e03f..c0f1e9a 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -322,9 +322,53 @@ func (s *AppServer) handleGetFeedDetail(ctx context.Context, args map[string]any } } - logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v", feedID, loadAll) + // 解析评论配置参数,如果未提供则使用默认值 + config := xiaohongshu.DefaultCommentLoadConfig() - result, err := s.xiaohongshuService.GetFeedDetail(ctx, feedID, xsecToken, loadAll) + if raw, ok := args["click_more_replies"]; ok { + switch v := raw.(type) { + case bool: + config.ClickMoreReplies = v + case string: + if parsed, err := strconv.ParseBool(v); err == nil { + config.ClickMoreReplies = parsed + } + } + } + + if raw, ok := args["max_replies_threshold"]; ok { + switch v := raw.(type) { + case float64: + config.MaxRepliesThreshold = int(v) + case string: + if parsed, err := strconv.Atoi(v); err == nil { + config.MaxRepliesThreshold = parsed + } + case int: + config.MaxRepliesThreshold = v + } + } + + if raw, ok := args["max_comment_items"]; ok { + switch v := raw.(type) { + case float64: + config.MaxCommentItems = int(v) + case string: + if parsed, err := strconv.Atoi(v); err == nil { + config.MaxCommentItems = parsed + } + case int: + config.MaxCommentItems = v + } + } + + if raw, ok := args["scroll_speed"].(string); ok && raw != "" { + config.ScrollSpeed = raw + } + + logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v, config=%+v", feedID, loadAll, config) + + result, err := s.xiaohongshuService.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAll, config) if err != nil { return &MCPToolResult{ Content: []MCPContent{{ diff --git a/mcp_server.go b/mcp_server.go index 3c127e8..8c35e7a 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -45,9 +45,13 @@ type FilterOption struct { // FeedDetailArgs 获取Feed详情的参数 type FeedDetailArgs struct { - FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` - XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` - LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批评论)"` + FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` + XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` + LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批评论)"` + ClickMoreReplies bool `json:"click_more_replies,omitempty" jsonschema:"是否点击'更多回复'按钮 (默认: false)"` + MaxRepliesThreshold int `json:"max_replies_threshold,omitempty" jsonschema:"回复数量阈值,超过此数量的'更多'按钮将被跳过 (0表示不跳过任何, 默认: 10)"` + MaxCommentItems int `json:"max_comment_items,omitempty" jsonschema:"最大加载评论数(0表示加载所有, 默认: 0)"` + ScrollSpeed string `json:"scroll_speed,omitempty" jsonschema:"滚动速度: 'slow'|'normal'|'fast' (默认: 'normal')"` } // UserProfileArgs 获取用户主页的参数 @@ -214,9 +218,13 @@ func registerTools(server *mcp.Server, appServer *AppServer) { }, withPanicRecovery("get_feed_detail", func(ctx context.Context, req *mcp.CallToolRequest, args FeedDetailArgs) (*mcp.CallToolResult, any, error) { argsMap := map[string]interface{}{ - "feed_id": args.FeedID, - "xsec_token": args.XsecToken, - "load_all_comments": args.LoadAllComments, + "feed_id": args.FeedID, + "xsec_token": args.XsecToken, + "load_all_comments": args.LoadAllComments, + "click_more_replies": args.ClickMoreReplies, + "max_replies_threshold": args.MaxRepliesThreshold, + "max_comment_items": args.MaxCommentItems, + "scroll_speed": args.ScrollSpeed, } result := appServer.handleGetFeedDetail(ctx, argsMap) return convertToMCPResult(result), nil, nil diff --git a/service.go b/service.go index 2c7c1a3..b1c6dc1 100644 --- a/service.go +++ b/service.go @@ -322,6 +322,11 @@ func (s *XiaohongshuService) SearchFeeds(ctx context.Context, keyword string, fi // GetFeedDetail 获取Feed详情 func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) { + return s.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, xiaohongshu.DefaultCommentLoadConfig()) +} + +// GetFeedDetailWithConfig 使用配置获取Feed详情 +func (s *XiaohongshuService) GetFeedDetailWithConfig(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config xiaohongshu.CommentLoadConfig) (*FeedDetailResponse, error) { b := newBrowser() defer b.Close() @@ -332,7 +337,7 @@ func (s *XiaohongshuService) GetFeedDetail(ctx context.Context, feedID, xsecToke action := xiaohongshu.NewFeedDetailAction(page) // 获取 Feed 详情 - result, err := action.GetFeedDetail(ctx, feedID, xsecToken, loadAllComments) + result, err := action.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, config) if err != nil { return nil, err } diff --git a/types.go b/types.go index cfd3a8c..c607ada 100644 --- a/types.go +++ b/types.go @@ -34,11 +34,24 @@ type MCPContent struct { Data string `json:"data"` } +// CommentLoadConfig 评论加载配置 +type CommentLoadConfig struct { + // 是否点击"更多回复"按钮 + ClickMoreReplies bool `json:"click_more_replies,omitempty"` + // 回复数量阈值,超过这个数量的"更多"按钮将被跳过(0表示不跳过任何) + MaxRepliesThreshold int `json:"max_replies_threshold,omitempty"` + // 最大加载评论数(comment-item数量),0表示加载所有 + MaxCommentItems int `json:"max_comment_items,omitempty"` + // 滚动速度等级: slow(慢速), normal(正常), fast(快速) + ScrollSpeed string `json:"scroll_speed,omitempty"` +} + // FeedDetailRequest Feed详情请求 type FeedDetailRequest struct { - FeedID string `json:"feed_id" binding:"required"` - XsecToken string `json:"xsec_token" binding:"required"` - LoadAllComments bool `json:"load_all_comments,omitempty"` + FeedID string `json:"feed_id" binding:"required"` + XsecToken string `json:"xsec_token" binding:"required"` + LoadAllComments bool `json:"load_all_comments,omitempty"` + CommentConfig *CommentLoadConfig `json:"comment_config,omitempty"` } type SearchFeedsRequest struct { diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index 50d813b..85d560a 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -4,6 +4,9 @@ import ( "context" "encoding/json" "fmt" + "math/rand" + "regexp" + "strconv" "time" "github.com/go-rod/rod" @@ -12,6 +15,28 @@ import ( "github.com/xpzouying/xiaohongshu-mcp/errors" ) +// CommentLoadConfig 评论加载配置 +type CommentLoadConfig struct { + // 是否点击"更多回复"按钮 + ClickMoreReplies bool + // 回复数量阈值,超过这个数量的"更多"按钮将被跳过(0表示不跳过任何) + MaxRepliesThreshold int + // 最大加载评论数(comment-item数量),0表示加载所有 + MaxCommentItems int + // 滚动速度等级: slow(慢速), normal(正常), fast(快速) + ScrollSpeed string +} + +// DefaultCommentLoadConfig 默认配置 +func DefaultCommentLoadConfig() CommentLoadConfig { + return CommentLoadConfig{ + ClickMoreReplies: false, // 默认不点击"更多回复" + MaxRepliesThreshold: 10, // 默认超过10条回复就跳过 + MaxCommentItems: 0, // 默认加载所有评论 + ScrollSpeed: "normal", + } +} + // FeedDetailAction 表示 Feed 详情页动作 type FeedDetailAction struct { page *rod.Page @@ -23,12 +48,19 @@ func NewFeedDetailAction(page *rod.Page) *FeedDetailAction { } // GetFeedDetail 获取 Feed 详情页数据 -func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool) (*FeedDetailResponse, error) { - page := f.page.Context(ctx).Timeout(5 * time.Minute) +func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) { + return f.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, config) +} + +// GetFeedDetailWithConfig 获取 Feed 详情页数据(带配置) +func (f *FeedDetailAction) GetFeedDetailWithConfig(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) { + page := f.page.Context(ctx).Timeout(10 * time.Minute) // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) logrus.Infof("打开 feed 详情页: %s", url) + logrus.Infof("配置: 点击更多=%v, 回复阈值=%d, 最大评论数=%d, 滚动速度=%s", + config.ClickMoreReplies, config.MaxRepliesThreshold, config.MaxCommentItems, config.ScrollSpeed) // 导航到详情页 page.MustNavigate(url) @@ -42,7 +74,7 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken // 加载全部评论 if loadAllComments { - if err := f.loadAllComments(page); err != nil { + if err := f.loadAllCommentsWithConfig(page, config); err != nil { logrus.Warnf("加载全部评论失败: %v", err) } } @@ -99,23 +131,28 @@ func checkPageAccessible(page *rod.Page) error { return nil } -// loadAllComments 加载所有评论 -func (f *FeedDetailAction) loadAllComments(page *rod.Page) error { +// loadAllCommentsWithConfig 加载所有评论(带配置) +func (f *FeedDetailAction) loadAllCommentsWithConfig(page *rod.Page, config CommentLoadConfig) error { + maxAttempts := 500 + if config.MaxCommentItems > 0 { + // 如果设置了最大评论数,减少尝试次数 + maxAttempts = config.MaxCommentItems * 3 + } + const ( - maxAttempts = 500 - scrollInterval = 600 * time.Millisecond - clickMoreInterval = 1 // 每次滚动都检查"更多"按钮 - stagnantLimit = 20 // 增加停滞容忍度 - noScrollChangeLimit = 15 // 增加滚动停滞容忍度 - minScrollDelta = 10 // 最小有效滚动距离 - aggressiveClickEvery = 5 // 每5次尝试进行一次激进点击 + stagnantLimit = 20 + noScrollChangeLimit = 15 + minScrollDelta = 10 ) - logrus.Info("开始加载所有评论...") + // 获取滚动间隔(根据速度) + scrollInterval := getScrollInterval(config.ScrollSpeed) + + logrus.Info("开始加载评论...") // 先滚动到评论区 scrollToCommentsArea(page) - time.Sleep(1 * time.Second) + humanDelay() var ( lastCount = 0 @@ -123,6 +160,7 @@ func (f *FeedDetailAction) loadAllComments(page *rod.Page) error { stagnantChecks = 0 noScrollChangeCount = 0 totalClickedButtons = 0 + skippedButtons = 0 attempt = 0 ) @@ -132,36 +170,42 @@ func (f *FeedDetailAction) loadAllComments(page *rod.Page) error { // === 1. 检查是否到达底部 === if checkEndContainer(page) { logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部") - // 到底部后再做最后一轮点击 - finalClicked := clickShowMoreButtons(page) - totalClickedButtons += finalClicked - if finalClicked > 0 { - logrus.Infof("底部最后点击了 %d 个按钮", finalClicked) - time.Sleep(1 * time.Second) - } + humanDelay() currentCount := getCommentCount(page) - logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击按钮: %d", - currentCount, attempt+1, totalClickedButtons) + logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击: %d, 跳过: %d", + currentCount, attempt+1, totalClickedButtons, skippedButtons) return nil } - // === 2. 每次都点击"更多"按钮 === - if attempt%clickMoreInterval == 0 { - clicked := clickShowMoreButtons(page) - if clicked > 0 { - totalClickedButtons += clicked - logrus.Infof("点击了 %d 个'更多'按钮,累计: %d", clicked, totalClickedButtons) - time.Sleep(500 * time.Millisecond) + // === 2. 获取当前评论数 === + currentCount := getCommentCount(page) - // 多轮检查 - for round := 0; round < 2; round++ { - time.Sleep(300 * time.Millisecond) - clicked2 := clickShowMoreButtons(page) - if clicked2 > 0 { + // === 3. 点击"更多"按钮(人性化:每隔几次尝试才点击一次) === + if config.ClickMoreReplies && attempt%3 == 0 { + clicked, skipped := clickShowMoreButtonsSmart(page, config.MaxRepliesThreshold) + if clicked > 0 || skipped > 0 { + totalClickedButtons += clicked + skippedButtons += skipped + logrus.Infof("点击'更多': %d 个, 跳过: %d 个, 累计点击: %d, 累计跳过: %d", + clicked, skipped, totalClickedButtons, skippedButtons) + + // 点击后等待更长时间,模拟人阅读新内容(800-1500ms) + readTime := time.Duration(800+rand.Intn(700)) * time.Millisecond + time.Sleep(readTime) + + // 多轮检查(但减少轮数,避免太频繁) + for round := 0; round < 1; round++ { + // 等待一段时间再检查(模拟人继续浏览) + time.Sleep(time.Duration(500+rand.Intn(500)) * time.Millisecond) + clicked2, skipped2 := clickShowMoreButtonsSmart(page, config.MaxRepliesThreshold) + if clicked2 > 0 || skipped2 > 0 { totalClickedButtons += clicked2 - logrus.Infof("第 %d 轮再次点击了 %d 个按钮", round+2, clicked2) - time.Sleep(500 * time.Millisecond) + skippedButtons += skipped2 + logrus.Infof("第 %d 轮: 点击 %d, 跳过 %d", round+2, clicked2, skipped2) + // 再次等待阅读时间 + readTime2 := time.Duration(600+rand.Intn(600)) * time.Millisecond + time.Sleep(readTime2) } else { break } @@ -169,103 +213,258 @@ func (f *FeedDetailAction) loadAllComments(page *rod.Page) error { } } - // === 4. 获取当前评论数量 === - currentCount := getCommentCount(page) + // === 4. 获取评论数量 === totalCount := getTotalCommentCount(page) - logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount) - // 检查是否已加载所有评论(但继续滚动到底部确认) - if totalCount > 0 && currentCount >= totalCount { - logrus.Infof("评论数量已达标: %d/%d,继续滚动到底部确认...", currentCount, totalCount) - // 不要立即返回,继续滚动到底部 - } - // === 5. 检查评论数量变化 === if currentCount != lastCount { - logrus.Infof("✓ 评论数量增加: %d -> %d (+%d)", lastCount, currentCount, currentCount-lastCount) + logrus.Infof("✓ 评论增加: %d -> %d (+%d)", lastCount, currentCount, currentCount-lastCount) lastCount = currentCount - stagnantChecks = 0 // 重置停滞计数 + stagnantChecks = 0 } else { stagnantChecks++ if stagnantChecks%5 == 0 { - logrus.Debugf("评论数量停滞 %d 次", stagnantChecks) + logrus.Debugf("评论停滞 %d 次", stagnantChecks) } } - // 只有在严重停滞时才考虑退出 + // === 5.1 检查是否已达到目标评论数(在评论数停滞时)=== + if config.MaxCommentItems > 0 && currentCount >= config.MaxCommentItems { + // 达到目标且停滞2次,确认加载完成 + if stagnantChecks >= 2 { + logrus.Infof("✓ 已达到目标评论数: %d/%d (停滞%d次), 停止加载", + currentCount, config.MaxCommentItems, stagnantChecks) + return nil + } + // 刚达到目标,继续滚动确认 + if stagnantChecks > 0 { + logrus.Debugf("已达目标数 %d/%d,再确认 %d 次...", + currentCount, config.MaxCommentItems, 2-stagnantChecks) + } + } + + // === 6. 停滞处理 === if stagnantChecks >= stagnantLimit { - logrus.Infof("评论数量长期停滞,尝试最后冲刺...") - // 最后冲刺:大幅滚动 + 点击 - finalPush(page) - finalClicked := clickShowMoreButtons(page) - totalClickedButtons += finalClicked + logrus.Infof("评论停滞,尝试最后冲刺...") + finalPush(page, config.ScrollSpeed) if checkEndContainer(page) { - logrus.Infof("✓ 最终到达底部,评论数: %d, 点击按钮: %d", - currentCount, totalClickedButtons) + logrus.Infof("✓ 到达底部,评论数: %d", currentCount) return nil } - // 还没到底部,继续 - logrus.Infof("未到底部,重置停滞计数,继续加载...") + logrus.Infof("未到底部,重置停滞计数") stagnantChecks = 0 } - // === 6. 执行滚动 === - _, scrollDelta, currentScrollTop := scrollWithMouse(page) + // === 7. 执行人性化滚动 === + // 先滚动到最后一个评论(触发懒加载的关键!) + if currentCount > 0 { + scrollToLastComment(page) + time.Sleep(time.Duration(300+rand.Intn(200)) * time.Millisecond) + } + + _, scrollDelta, currentScrollTop := humanScroll(page, config.ScrollSpeed) - // === 7. 检查滚动变化 === + // === 8. 检查滚动变化 === if scrollDelta < minScrollDelta || currentScrollTop == lastScrollTop { noScrollChangeCount++ if noScrollChangeCount%5 == 0 { - logrus.Debugf("滚动停滞 %d 次,尝试大幅滚动", noScrollChangeCount) - // 尝试更大幅度滚动 - largeScroll(page) - time.Sleep(300 * time.Millisecond) + logrus.Debugf("滚动停滞 %d 次", noScrollChangeCount) + largeScroll(page, config.ScrollSpeed) + humanDelay() } } else { noScrollChangeCount = 0 lastScrollTop = currentScrollTop } - // 只有严重滚动停滞时才考虑结束 + // === 9. 滚动停滞处理 === if noScrollChangeCount >= noScrollChangeLimit { - logrus.Infof("滚动严重停滞,尝试最后冲刺...") - finalPush(page) + logrus.Infof("滚动停滞,最后冲刺...") + finalPush(page, config.ScrollSpeed) if checkEndContainer(page) { - currentCount := getCommentCount(page) - logrus.Infof("✓ 最终到达底部,评论数: %d, 点击按钮: %d", - currentCount, totalClickedButtons) + logrus.Infof("✓ 到达底部,评论数: %d", currentCount) return nil } - // 重置计数继续 - logrus.Infof("未到底部,重置滚动计数,继续加载...") + logrus.Infof("重置滚动计数") noScrollChangeCount = 0 lastScrollTop = 0 } - // === 8. 等待内容加载 === + // === 10. 等待内容加载 === time.Sleep(scrollInterval) } - // === 9. 达到最大尝试次数,做最后的冲刺 === - logrus.Infof("达到最大尝试次数 %d,执行最后冲刺...", maxAttempts) - finalPush(page) - finalClicked := clickShowMoreButtons(page) - totalClickedButtons += finalClicked + // === 11. 最后冲刺 === + logrus.Infof("达到最大尝试次数,最后冲刺...") + finalPush(page, config.ScrollSpeed) currentCount := getCommentCount(page) hasEnd := checkEndContainer(page) - logrus.Infof("✓ 加载结束: %d 条评论, 总点击按钮: %d, 到达底部: %v", - currentCount, totalClickedButtons, hasEnd) + logrus.Infof("✓ 加载结束: %d 条评论, 点击: %d, 跳过: %d, 到达底部: %v", + currentCount, totalClickedButtons, skippedButtons, hasEnd) return nil } +// getScrollInterval 根据速度获取滚动间隔 +func getScrollInterval(speed string) time.Duration { + switch speed { + case "slow": + return time.Duration(1200+rand.Intn(300)) * time.Millisecond + case "fast": + return time.Duration(300+rand.Intn(100)) * time.Millisecond + default: // normal + return time.Duration(600+rand.Intn(200)) * time.Millisecond + } +} + +// humanDelay 人性化延迟 +func humanDelay() { + delay := time.Duration(300+rand.Intn(400)) * time.Millisecond + time.Sleep(delay) +} + +// clickShowMoreButtonsSmart 智能点击"更多"按钮(根据回复数量判断,人性化操作) +func clickShowMoreButtonsSmart(page *rod.Page, maxRepliesThreshold int) (clicked, skipped int) { + elements, err := page.Elements(".show-more") + if err != nil { + return 0, 0 + } + + // 正则表达式:匹配"展开 X 条回复" + replyCountRegex := regexp.MustCompile(`展开\s*(\d+)\s*条回复`) + + // 限制每次最多点击的按钮数量(模拟人不会一次性点击太多) + maxClickPerRound := 3 + rand.Intn(3) // 每次3-5个 + clickedInRound := 0 + + for _, el := range elements { + // 限制单次点击数量 + if clickedInRound >= maxClickPerRound { + break + } + + // 检查元素是否可见 + visible, err := el.Visible() + if err != nil || !visible { + continue + } + + // 检查是否在 DOM 中 + box, err := el.Shape() + if err != nil || len(box.Quads) == 0 { + continue + } + + // 获取按钮文本 + text, err := el.Text() + if err != nil { + continue + } + + // 判断是否需要跳过 + shouldSkip := false + if maxRepliesThreshold > 0 { + matches := replyCountRegex.FindStringSubmatch(text) + if len(matches) > 1 { + replyCount, err := strconv.Atoi(matches[1]) + if err == nil && replyCount > maxRepliesThreshold { + shouldSkip = true + logrus.Debugf("跳过'%s'(回复数 %d > 阈值 %d)", text, replyCount, maxRepliesThreshold) + } + } + } + + if shouldSkip { + skipped++ + continue + } + + // === 人性化点击流程 === + // 1. 先滚动到元素附近(模拟人看到按钮) + el.MustEval(`() => { + try { + this.scrollIntoView({behavior: 'smooth', block: 'center'}); + } catch (e) {} + }`) + + // 2. 等待滚动完成 + 模拟人看到按钮后的反应时间(300-800ms) + reactionTime := time.Duration(300+rand.Intn(500)) * time.Millisecond + time.Sleep(reactionTime) + + // 3. 模拟鼠标移动到按钮上(悬停效果) + box, _ = el.Shape() + if len(box.Quads) > 0 { + // 计算按钮中心点 + x := float64(box.Quads[0][0]+box.Quads[0][4]) / 2 + y := float64(box.Quads[0][1]+box.Quads[0][5]) / 2 + page.Mouse.MustMoveTo(x, y) + // 悬停时间(模拟人确认要点击) + time.Sleep(time.Duration(100+rand.Intn(200)) * time.Millisecond) + } + + // 4. 点击元素 + if err := el.Click(proto.InputMouseButtonLeft, 1); err == nil { + clicked++ + clickedInRound++ + logrus.Debugf("点击了'%s'", text) + + // 5. 点击后的延迟(模拟人阅读新内容的时间,500-1200ms) + readTime := time.Duration(500+rand.Intn(700)) * time.Millisecond + time.Sleep(readTime) + } + } + + return clicked, skipped +} + +// humanScroll 人性化滚动 +func humanScroll(page *rod.Page, speed string) (bool, int, int) { + beforeTop := getScrollTop(page) + viewportHeight := page.MustEval(`() => window.innerHeight`).Int() + + // 根据速度调整滚动距离 + var scrollRatio float64 + switch speed { + case "slow": + scrollRatio = 0.5 + rand.Float64()*0.2 // 50%-70% + case "fast": + scrollRatio = 0.9 + rand.Float64()*0.2 // 90%-110% + default: // normal + scrollRatio = 0.7 + rand.Float64()*0.2 // 70%-90% + } + + scrollDelta := float64(viewportHeight) * scrollRatio + if scrollDelta < 400 { + scrollDelta = 400 + } + + // 添加随机波动 + scrollDelta += float64(rand.Intn(100) - 50) + + // 使用JS的 scrollBy 方法进行滚动 + page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta) + + // 等待滚动完成 + time.Sleep(time.Duration(100+rand.Intn(100)) * time.Millisecond) + + afterTop := getScrollTop(page) + actualDelta := afterTop - beforeTop + scrolled := actualDelta > 5 + + if scrolled { + logrus.Debugf("滚动: %d -> %d (Δ%d)", beforeTop, afterTop, actualDelta) + } + + return scrolled, actualDelta, afterTop +} + // scrollToCommentsArea 滚动到评论区 func scrollToCommentsArea(page *rod.Page) { logrus.Info("滚动到评论区...") @@ -277,80 +476,59 @@ func scrollToCommentsArea(page *rod.Page) { }`) } -// finalPush 最后冲刺:大幅滚动到底部 -func finalPush(page *rod.Page) { - logrus.Info("执行最后冲刺滚动...") +// scrollToLastComment 滚动到最后一个评论(触发懒加载的关键) +func scrollToLastComment(page *rod.Page) { + page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (!container) return; + + // 查找最后一个主评论 + const comments = container.querySelectorAll('.parent-comment'); + if (comments.length > 0) { + const lastComment = comments[comments.length - 1]; + // 滚动到最后一个评论,让它出现在视口中间偏下位置 + lastComment.scrollIntoView({behavior: 'smooth', block: 'center'}); + } + }`) +} - for i := 0; i < 20; i++ { - // 检查是否已经到底部 +// finalPush 最后冲刺:大幅滚动到底部 +func finalPush(page *rod.Page, speed string) { + logrus.Info("执行最后冲刺...") + + for i := 0; i < 15; i++ { if checkEndContainer(page) { - logrus.Debug("已到底部,停止冲刺") return } beforeTop := getScrollTop(page) + largeScroll(page, speed) - // 大幅滚动 - largeScroll(page) - time.Sleep(200 * time.Millisecond) - - // 点击出现的按钮 - clicked := clickShowMoreButtons(page) - if clicked > 0 { - time.Sleep(500 * time.Millisecond) - } + // 人性化延迟 + time.Sleep(time.Duration(200+rand.Intn(200)) * time.Millisecond) afterTop := getScrollTop(page) - - // 如果滚动没变化,尝试JS滚动 if afterTop == beforeTop { - page.MustEval(`() => { - window.scrollTo(0, document.body.scrollHeight); - }`) - time.Sleep(300 * time.Millisecond) + page.MustEval(`() => window.scrollTo(0, document.body.scrollHeight)`) + time.Sleep(time.Duration(300+rand.Intn(200)) * time.Millisecond) } } } // largeScroll 大幅度滚动 -func largeScroll(page *rod.Page) { - // 方法1: Mouse.Scroll 大幅度滚动 - page.Mouse.Scroll(0, 2000, 5) - time.Sleep(100 * time.Millisecond) -} - -// scrollWithMouse 使用 Mouse 模拟滚轮滚动 -func scrollWithMouse(page *rod.Page) (bool, int, int) { - beforeTop := getScrollTop(page) - - // 获取视口高度 - viewportHeight := page.MustEval(`() => window.innerHeight`).Int() - - // 计算滚动距离(每次滚动视口高度的 80%) - scrollDelta := float64(viewportHeight) * 0.8 - if scrollDelta < 500 { - scrollDelta = 500 +func largeScroll(page *rod.Page, speed string) { + var scrollDelta float64 + switch speed { + case "slow": + scrollDelta = 1000 + float64(rand.Intn(500)) + case "fast": + scrollDelta = 3000 + float64(rand.Intn(1000)) + default: // normal + scrollDelta = 2000 + float64(rand.Intn(500)) } - // 使用 Mouse.Scroll 模拟滚轮滚动 - err := page.Mouse.Scroll(0, scrollDelta, 5) - if err != nil { - logrus.Warnf("鼠标滚动失败: %v", err) - return false, 0, beforeTop - } - - // 等待滚动完成 - time.Sleep(150 * time.Millisecond) - - afterTop := getScrollTop(page) - actualDelta := afterTop - beforeTop - scrolled := actualDelta > 5 - - if scrolled { - logrus.Debugf("滚动: %d -> %d (Δ%d)", beforeTop, afterTop, actualDelta) - } - - return scrolled, actualDelta, afterTop + page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta) + time.Sleep(time.Duration(100+rand.Intn(50)) * time.Millisecond) } // getScrollTop 获取当前滚动位置 @@ -361,44 +539,12 @@ func getScrollTop(page *rod.Page) int { return result.Int() } -// clickShowMoreButtons 点击所有可见的"更多"按钮 -func clickShowMoreButtons(page *rod.Page) int { - elements, err := page.Elements(".show-more") - if err != nil { - return 0 - } - - clickedCount := 0 - - for _, el := range elements { - // 检查元素是否可见 - visible, err := el.Visible() - if err != nil || !visible { - continue - } - - // 检查是否在 DOM 中 - box, err := el.Shape() - if err != nil || len(box.Quads) == 0 { - continue - } - - // 点击元素 - if err := el.Click(proto.InputMouseButtonLeft, 1); err == nil { - clickedCount++ - time.Sleep(150 * time.Millisecond) - } - } - - return clickedCount -} - // getCommentCount 获取当前评论数量 func getCommentCount(page *rod.Page) int { result := page.MustEval(`() => { const container = document.querySelector('.comments-container'); if (!container) return 0; - return container.querySelectorAll('.comment-item, .comment-item-sub, .comment').length; + return container.querySelectorAll('.parent-comment').length; }`) return result.Int() } From cbbec86000830439e01aeae242dcd8b4441e6435 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Thu, 4 Dec 2025 01:10:40 +0800 Subject: [PATCH 13/19] =?UTF-8?q?fix:=20=E8=87=AA=E5=AE=9A=E4=B9=89?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=8C=E4=BC=98=E5=8C=96=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- xiaohongshu/comment_feed.go | 596 +++++------------------------------- xiaohongshu/feed_detail.go | 14 +- 2 files changed, 84 insertions(+), 526 deletions(-) diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index c3f48bf..f0e9e96 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -27,18 +27,16 @@ func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, url := makeFeedDetailURL(feedID, xsecToken) logrus.Infof("打开 feed 详情页: %s", url) - if err := page.Navigate(url); err != nil { - logrus.Warnf("Failed to navigate to feed detail page: %v", err) - return fmt.Errorf("无法打开帖子详情页,该帖子可能在网页端不可访问: %w", err) - } - - if err := page.WaitStable(2 * time.Second); err != nil { - logrus.Warnf("Failed to wait for page stable: %v", err) - return fmt.Errorf("页面加载超时,该帖子可能在网页端不可访问: %w", err) - } - + // 导航到详情页 + page.MustNavigate(url) + page.MustWaitDOMStable() time.Sleep(1 * time.Second) + // 检测页面是否可访问 + if err := checkPageAccessible(page); err != nil { + return err + } + elem, err := page.Element("div.input-box div.content-edit span") if err != nil { logrus.Warnf("Failed to find comment input box: %v", err) @@ -87,568 +85,118 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke url := makeFeedDetailURL(feedID, xsecToken) logrus.Infof("打开 feed 详情页进行回复: %s", url) + // 导航到详情页 page.MustNavigate(url) page.MustWaitDOMStable() - time.Sleep(3 * time.Second) + time.Sleep(1 * time.Second) + + // 检测页面是否可访问 + if err := checkPageAccessible(page); err != nil { + return err + } // 等待评论容器加载 - waitForCommentsContainer(page) time.Sleep(2 * time.Second) - // 使用新的查找逻辑(完全在 JS 中执行) - commentEl, err := findCommentElementNew(page, commentID, userID) + // 使用 Go 实现的查找逻辑 + commentEl, err := findCommentElement(page, commentID, userID) if err != nil { return fmt.Errorf("无法找到评论: %w", err) } - // 多次滚动确保可见 - for i := 0; i < 3; i++ { - logrus.Infof("第 %d 次滚动到评论位置...", i+1) - _, _ = commentEl.Eval(`() => { - this.scrollIntoView({behavior: "instant", block: "center"}); - return true - }`) - time.Sleep(1500 * time.Millisecond) + // 滚动到评论位置 + logrus.Info("滚动到评论位置...") + commentEl.MustScrollIntoView() + time.Sleep(1 * time.Second) - // 往下多滚动一点 - page.MustEval(`() => window.scrollBy(0, 150)`) - time.Sleep(500 * time.Millisecond) - } - - logrus.Info("滚动完成,准备点击回复按钮") + logrus.Info("准备点击回复按钮") // 查找并点击回复按钮 - replyBtn, err := findReplyButton(commentEl) + replyBtn, err := commentEl.Element(".right .interactions .reply") if err != nil { return fmt.Errorf("无法找到回复按钮: %w", err) } - if !tryClickChainForComment(replyBtn) { - return fmt.Errorf("点击回复按钮失败") + if err := replyBtn.Click(proto.InputMouseButtonLeft, 1); err != nil { + return fmt.Errorf("点击回复按钮失败: %w", err) } - time.Sleep(2 * time.Second) + time.Sleep(1 * time.Second) // 查找回复输入框 - inputEl, err := findReplyInput(page, commentEl) + inputEl, err := page.Element("div.input-box div.content-edit p.content-input") if err != nil { return fmt.Errorf("无法找到回复输入框: %w", err) } - // 聚焦并输入内容 - if _, evalErr := inputEl.Eval(`() => { - try { - this.focus(); - } catch (e) {} - return true - }`); evalErr != nil { - logrus.Warnf("focus reply input failed: %v", evalErr) + // 输入内容 + if err := inputEl.Input(content); err != nil { + return fmt.Errorf("输入回复内容失败: %w", err) } - inputEl.MustInput(content) time.Sleep(500 * time.Millisecond) // 查找并点击提交按钮 - submitBtn, err := findSubmitButton(page) + submitBtn, err := page.Element("div.bottom button.submit") if err != nil { return fmt.Errorf("无法找到提交按钮: %w", err) } - if !tryClickChainForComment(submitBtn) { - return fmt.Errorf("点击回复提交按钮失败") + if err := submitBtn.Click(proto.InputMouseButtonLeft, 1); err != nil { + return fmt.Errorf("点击提交按钮失败: %w", err) } - time.Sleep(3 * time.Second) + time.Sleep(2 * time.Second) + logrus.Infof("回复评论成功") return nil } -func findCommentElementNew(page *rod.Page, commentID, userID string) (*rod.Element, error) { - logrus.Infof("🔍 开始查找评论(新方法)- commentID: %s, userID: %s", commentID, userID) +// findCommentElement 查找指定评论元素(Go 实现,减少 JS 代码) +func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { + logrus.Infof("开始查找评论 - commentID: %s, userID: %s", commentID, userID) - // 修改 JS:找到后记录元素的 ID - findCommentJS := fmt.Sprintf(`async () => { - const INTERVAL_MS = 900; - const STAGNANT_LIMIT = 8; - const NO_CHANGE_SCROLL_LIMIT = 3; - const DELTA_MIN = 480; - const SCROLL_TIMEOUT = 900; - const MAX_ATTEMPTS = 100; - const CLICK_MORE_INTERVAL = 2; - const CLICK_WAIT_TIME = 300; + const maxAttempts = 50 + const scrollInterval = 800 * time.Millisecond - const TARGET_COMMENT_ID = %q; - const TARGET_USER_ID = %q; - - console.log('开始查找评论 - TARGET_COMMENT_ID:', TARGET_COMMENT_ID, 'TARGET_USER_ID:', TARGET_USER_ID); - - const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); - const scrollRoot = () => document.scrollingElement || document.documentElement || document.body; - const getContainer = () => document.querySelector('.comments-container'); - - const clickShowMoreButtons = () => { - let clickedCount = 0; - const elements = document.querySelectorAll('.show-more'); - - elements.forEach((el) => { - try { - const rect = el.getBoundingClientRect(); - const style = window.getComputedStyle(el); - const isVisible = ( - rect.height > 0 && - rect.width > 0 && - style.display !== 'none' && - style.visibility !== 'hidden' && - style.opacity !== '0' && - rect.top < window.innerHeight + 500 && - rect.bottom > -500 - ); - - if (isVisible) { - el.click(); - clickedCount++; - } - } catch (err) { - console.debug('点击失败', err); - } - }); - - return clickedCount; - }; - - // === 修改:返回元素的稳定标识符 === - const findTargetComment = () => { - // 优先通过 commentID 查找 - if (TARGET_COMMENT_ID) { - const byId = document.querySelector('#comment-' + TARGET_COMMENT_ID); - if (byId) { - console.log('通过 commentID 找到评论:', TARGET_COMMENT_ID); - // 返回包含完整信息的对象 - return { - element: byId, - selector: '#comment-' + TARGET_COMMENT_ID, - commentId: TARGET_COMMENT_ID - }; - } - } - - // 通过 userID 查找 - if (TARGET_USER_ID) { - const allComments = document.querySelectorAll('.comment-item, .comment'); - for (const comment of allComments) { - const userIdEl = comment.querySelector('[data-user-id="' + TARGET_USER_ID + '"]'); - if (userIdEl) { - console.log('通过 userID 找到评论:', TARGET_USER_ID); - - // 尝试获取评论的 ID - const commentId = comment.id; - if (commentId) { - return { - element: comment, - selector: '#' + commentId, - commentId: commentId.replace('comment-', '') - }; - } else { - // 如果没有 ID,给它添加一个唯一标识 - const uniqueId = 'xhs-found-' + Date.now() + '-' + Math.random().toString(36).substr(2, 9); - comment.id = uniqueId; - return { - element: comment, - selector: '#' + uniqueId, - commentId: null - }; - } - } - } - } - - return null; - }; - - // ... (保留原有的滚动逻辑) ... - const getScrollMetrics = (el) => { - if (!el) { - return { top: 0, max: 0, client: window.innerHeight }; - } - if (el === window || el === document || el === document.body || el === document.documentElement) { - const root = scrollRoot(); - return { - top: root.scrollTop, - max: Math.max(root.scrollHeight - root.clientHeight, 0), - client: root.clientHeight || window.innerHeight - }; - } - return { - top: el.scrollTop, - max: Math.max(el.scrollHeight - el.clientHeight, 0), - client: el.clientHeight - }; - }; - - const setScrollTop = (el, value) => { - if (!el) return; - if (el === window || el === document || el === document.body || el === document.documentElement) { - const root = scrollRoot(); - root.scrollTop = value; - window.scrollTo(0, value); - return; - } - el.scrollTop = value; - }; - - const dispatchWheel = (el, delta) => { - if (!el) return; - try { - const wheel = new WheelEvent('wheel', { - deltaY: delta, - bubbles: true, - cancelable: true - }); - el.dispatchEvent(wheel); - el.dispatchEvent(new Event('scroll', { bubbles: true })); - } catch (err) { - console.debug('dispatchWheel error', err); - } - }; - - const findScrollTarget = () => { - const container = getContainer(); - const candidates = new Set(); - - if (container) { - let current = container; - while (current) { - if (current instanceof HTMLElement) { - candidates.add(current); - } - current = current.parentElement; - } - } - - candidates.add(document.body); - candidates.add(document.documentElement); - - const weighted = Array.from(candidates).map((node) => { - const style = window.getComputedStyle(node); - const overflowY = style.overflowY; - const scrollable = node.scrollHeight - node.clientHeight > 40; - const hasScrollStyle = /auto|scroll|overlay/i.test(overflowY); - const weight = - (node.contains(container) ? 1000 : 0) + - (node === container ? 800 : 0) + - (hasScrollStyle ? 400 : 0) + - (scrollable ? 300 : 0) - - (node === document.body || node === document.documentElement ? 50 : 0); - - if (scrollable || hasScrollStyle || node === document.body || node === document.documentElement) { - return { node, weight }; - } - return null; - }).filter(Boolean); - - weighted.sort((a, b) => b.weight - a.weight); - - return weighted.length > 0 ? weighted[0].node : scrollRoot(); - }; - - const performScroll = (target) => { - const scrollTarget = target || findScrollTarget(); - if (!scrollTarget) { - window.scrollBy(0, window.innerHeight * 0.8); - return; - } - - const metrics = getScrollMetrics(scrollTarget); - const beforeTop = metrics.top; - const desired = metrics.max > 0 - ? Math.min(metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN), metrics.max) - : metrics.top + Math.max(metrics.client * 0.85, DELTA_MIN); - const applied = Math.max(0, desired - metrics.top); - - setScrollTop(scrollTarget, desired); - dispatchWheel(scrollTarget, applied); - - const afterTop = getScrollMetrics(scrollTarget).top; - if (Math.abs(afterTop - beforeTop) < 5 && scrollTarget !== scrollRoot()) { - const root = scrollRoot(); - const rootBefore = root.scrollTop; - root.scrollTop = rootBefore + applied; - window.scrollBy(0, applied); - dispatchWheel(root, applied); - } - }; - - // 主查找逻辑 - let lastScrollTop = 0; - let stagnantChecks = 0; - let noScrollChangeCount = 0; - let totalClickedButtons = 0; - - for (let attempt = 0; attempt < MAX_ATTEMPTS; attempt++) { - const container = getContainer(); - if (!container) { - await sleep(300); - continue; - } - - if (attempt %% CLICK_MORE_INTERVAL === 0) { - const clicked = clickShowMoreButtons(); - if (clicked > 0) { - totalClickedButtons += clicked; - console.log('点击了 ' + clicked + ' 个"更多"按钮,累计: ' + totalClickedButtons); - await sleep(CLICK_WAIT_TIME); - - await sleep(200); - const clicked2 = clickShowMoreButtons(); - if (clicked2 > 0) { - totalClickedButtons += clicked2; - console.log('二次检查点击了 ' + clicked2 + ' 个"更多"按钮'); - await sleep(CLICK_WAIT_TIME); - } - - const foundInfo = findTargetComment(); - if (foundInfo) { - console.log('点击"更多"后找到评论,总共点击了 ' + totalClickedButtons + ' 个按钮'); - return { - status: 'found', - attempts: attempt + 1, - clickedButtons: totalClickedButtons, - selector: foundInfo.selector, - commentId: foundInfo.commentId - }; - } - } - } - - const foundInfo = findTargetComment(); - if (foundInfo) { - console.log('找到评论,尝试次数: ' + (attempt + 1) + ',总共点击了 ' + totalClickedButtons + ' 个按钮'); - return { - status: 'found', - attempts: attempt + 1, - clickedButtons: totalClickedButtons, - selector: foundInfo.selector, - commentId: foundInfo.commentId - }; - } - - const target = findScrollTarget(); - const beforeTop = getScrollMetrics(target).top; - performScroll(target); - await sleep(SCROLL_TIMEOUT); - const afterTop = getScrollMetrics(target).top; - - if (Math.abs(afterTop - beforeTop) < 5) { - noScrollChangeCount += 1; - } else { - noScrollChangeCount = 0; - lastScrollTop = afterTop; - } - - if (noScrollChangeCount >= NO_CHANGE_SCROLL_LIMIT) { - return { status: 'not_found', reason: 'no-scroll-change', attempts: attempt + 1, clickedButtons: totalClickedButtons }; - } - - if (INTERVAL_MS > SCROLL_TIMEOUT) { - await sleep(INTERVAL_MS - SCROLL_TIMEOUT); - } + // 先滚动到评论区 + page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (container) { + container.scrollIntoView({behavior: 'smooth', block: 'start'}); } - - return { status: 'not_found', reason: 'timeout', attempts: MAX_ATTEMPTS, clickedButtons: totalClickedButtons }; - }`, commentID, userID) - - // 执行 JS - result, err := page.Eval(findCommentJS) - if err != nil { - logrus.Errorf("执行查找评论 JS 失败: %v", err) - return nil, fmt.Errorf("执行查找评论 JS 失败: %w", err) - } - - // 解析结果 - resultJSON, err := page.ObjectToJSON(result) - if err != nil { - logrus.Errorf("无法将结果转换为 JSON: %v", err) - return nil, fmt.Errorf("无法将结果转换为 JSON: %w", err) - } - - status := resultJSON.Get("status").Str() - reason := resultJSON.Get("reason").Str() - attempts := resultJSON.Get("attempts").Int() - clickedButtons := resultJSON.Get("clickedButtons").Int() - selector := resultJSON.Get("selector").Str() - - logrus.Infof("查找结果: status=%s, reason=%s, attempts=%d, clickedButtons=%d, selector=%s", - status, reason, attempts, clickedButtons, selector) - - if status != "found" { - return nil, fmt.Errorf("未找到评论 (commentID: %s, userID: %s), 原因: %s, 尝试次数: %d, 点击按钮: %d", - commentID, userID, reason, attempts, clickedButtons) - } - - // === 关键修改:使用返回的稳定选择器而不是临时标记 === - el, err := page.Element(selector) - if err != nil { - logrus.Errorf("找到评论但无法获取元素,选择器: %s, 错误: %v", selector, err) - - // 如果稳定选择器失败,尝试重新查找 - logrus.Info("尝试通过 commentID 重新查找...") - if commentID != "" { - fallbackSelector := fmt.Sprintf("#comment-%s", commentID) - el, err = page.Element(fallbackSelector) - if err == nil { - logrus.Infof("通过备用选择器 %s 成功找到元素", fallbackSelector) - return el, nil - } - } - - return nil, fmt.Errorf("找到评论但无法获取元素: %w", err) - } - - logrus.Infof("✓ 成功获取评论元素,选择器: %s", selector) - return el, nil -} -func waitForCommentsContainer(page *rod.Page) { - jsCode := `() => { - let attempts = 0; - const maxAttempts = 10; - - const checkContainer = () => { - const container = document.querySelector('.comments-container'); - if (container) { - const comments = container.querySelectorAll('.comment-item, .comment'); - return comments.length > 0; - } - return false; - }; - - const interval = setInterval(() => { - attempts++; - if (checkContainer() || attempts >= maxAttempts) { - clearInterval(interval); - } - }, 500); - - return checkContainer(); - }` - - page.Eval(jsCode) - time.Sleep(2 * time.Second) -} - -func findReplyButton(commentEl *rod.Element) (*rod.Element, error) { - if commentEl == nil { - return nil, fmt.Errorf("评论元素为空") - } - - selector := ".right .interactions .reply" - btn, err := commentEl.Element(selector) - if err != nil || btn == nil { - logrus.Warnf("未找到回复按钮,选择器: %s, err: %v", selector, err) - return nil, fmt.Errorf("未找到回复按钮") - } - - logrus.Infof("通过选择器 %s 找到回复按钮", selector) - return btn, nil -} - -func verifyClickSuccess(clickedEl *rod.Element) bool { - page := clickedEl.Page() - selectors := []string{ - "div.input-box div.content-edit p.content-input", - } - - for _, selector := range selectors { - if el, err := page.Element(selector); err == nil && el != nil { - if visible, _ := el.Visible(); visible { - logrus.Infof("验证成功:找到可见的回复输入框 (%s)", selector) - return true - } - } - } - logrus.Infof("验证失败:没有找到回复输入框") - return false -} - -func findReplyInput(page *rod.Page, commentEl *rod.Element) (*rod.Element, error) { - activeEditableJS := `() => { - const active = document.activeElement; - if (active && active.getAttribute && active.getAttribute('contenteditable') === 'true') { - return active; - } - return null; - }` - if el, err := page.ElementByJS(rod.Eval(activeEditableJS)); err == nil && el != nil { - return el, nil - } - - selectors := []string{ - "div.input-box div.content-edit p.content-input", - } - for _, selector := range selectors { - if el, err := page.Element(selector); err == nil && el != nil { - return el, nil - } - } - return nil, fmt.Errorf("未找到回复输入框") -} - -func tryClickChainForComment(el *rod.Element) bool { - if el == nil { - logrus.Errorf("要点击的元素为空") - return false - } - - text, _ := el.Text() - classAttr, _ := el.Attribute("class") - class := "" - if classAttr != nil { - class = *classAttr - } - tagName := "" - if desc, err := el.Describe(0, false); err == nil && desc != nil { - tagName = desc.NodeName - } - logrus.Infof("准备点击元素 - 文本: '%s', 类: '%s', 标签: %s", text, class, tagName) - - visible, _ := el.Visible() - logrus.Infof("元素可见性: %v", visible) - - _, _ = el.Eval(`() => { - try { - this.scrollIntoView({behavior: "instant", block: "center"}); - } catch (e) {} - return true }`) - time.Sleep(500 * time.Millisecond) - - if err := el.Click(proto.InputMouseButtonLeft, 1); err != nil { - logrus.Warnf("点击失败: %v", err) - return false - } - - logrus.Infof("点击成功") time.Sleep(1 * time.Second) - success := verifyClickSuccess(el) - if success { - logrus.Infof("点击执行成功且有效") - return true - } + for attempt := 0; attempt < maxAttempts; attempt++ { + logrus.Debugf("查找尝试 %d/%d", attempt+1, maxAttempts) - logrus.Warnf("点击执行成功但无效(没有出现回复输入框)") - return false -} - -func findSubmitButton(page *rod.Page) (*rod.Element, error) { - selectors := []string{ - "div.bottom button.submit", - } - for _, selector := range selectors { - if el, err := page.Element(selector); err == nil && el != nil { - disabled, _ := el.Attribute("disabled") - if disabled == nil { + // 优先通过 commentID 查找 + if commentID != "" { + selector := fmt.Sprintf("#comment-%s", commentID) + if el, err := page.Element(selector); err == nil { + logrus.Infof("✓ 通过 commentID 找到评论: %s", commentID) return el, nil } } + + // 通过 userID 查找 + if userID != "" { + elements, err := page.Elements(".comment-item, .comment") + if err == nil { + for _, el := range elements { + userEl, err := el.Element(fmt.Sprintf(`[data-user-id="%s"]`, userID)) + if err == nil && userEl != nil { + logrus.Infof("✓ 通过 userID 找到评论: %s", userID) + return el, nil + } + } + } + } + + // 滚动页面 + page.MustEval(`() => window.scrollBy(0, window.innerHeight * 0.8)`) + time.Sleep(scrollInterval) } - return nil, fmt.Errorf("未找到回复发布按钮") + + return nil, fmt.Errorf("未找到评论 (commentID: %s, userID: %s), 尝试次数: %d", commentID, userID, maxAttempts) } diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index 85d560a..ce9efaa 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -85,11 +85,15 @@ func (f *FeedDetailAction) GetFeedDetailWithConfig(ctx context.Context, feedID, // checkPageAccessible 检查页面是否可访问 func checkPageAccessible(page *rod.Page) error { + // 等待页面稳定,确保错误提示已加载 + time.Sleep(500 * time.Millisecond) + unavailableResult := page.MustEval(`() => { const wrapper = document.querySelector('.access-wrapper, .error-wrapper, .not-found-wrapper, .blocked-wrapper'); if (!wrapper) return null; - const text = wrapper.textContent || ''; + // 获取所有文本内容(包括子元素) + const text = wrapper.textContent || wrapper.innerText || ''; const keywords = [ '当前笔记暂时无法浏览', '该内容因违规已被删除', @@ -105,9 +109,15 @@ func checkPageAccessible(page *rod.Page) error { for (const kw of keywords) { if (text.includes(kw)) { - return kw.trim(); + return kw; } } + + // 如果找到了 wrapper 但没有匹配关键词,返回完整文本用于调试 + if (text.trim()) { + return '未知错误: ' + text.trim(); + } + return null; }`) From 3b6d211d093c3ddf37d7cbeac409c595b9faf41a Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 14:50:32 +0800 Subject: [PATCH 14/19] =?UTF-8?q?fix:=20=E8=A1=A5=E5=85=85=E8=AF=B4?= =?UTF-8?q?=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mcp_server.go | 4 ++-- types.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mcp_server.go b/mcp_server.go index 8c35e7a..9353bcf 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -47,10 +47,10 @@ type FilterOption struct { type FeedDetailArgs struct { FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` - LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批评论)"` + LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批一级评论)"` ClickMoreReplies bool `json:"click_more_replies,omitempty" jsonschema:"是否点击'更多回复'按钮 (默认: false)"` MaxRepliesThreshold int `json:"max_replies_threshold,omitempty" jsonschema:"回复数量阈值,超过此数量的'更多'按钮将被跳过 (0表示不跳过任何, 默认: 10)"` - MaxCommentItems int `json:"max_comment_items,omitempty" jsonschema:"最大加载评论数(0表示加载所有, 默认: 0)"` + MaxCommentItems int `json:"max_comment_items,omitempty" jsonschema:"最大加载一级评论数(0表示加载所有一级评论, 默认: 0)"` ScrollSpeed string `json:"scroll_speed,omitempty" jsonschema:"滚动速度: 'slow'|'normal'|'fast' (默认: 'normal')"` } diff --git a/types.go b/types.go index c607ada..f729bf7 100644 --- a/types.go +++ b/types.go @@ -40,7 +40,7 @@ type CommentLoadConfig struct { ClickMoreReplies bool `json:"click_more_replies,omitempty"` // 回复数量阈值,超过这个数量的"更多"按钮将被跳过(0表示不跳过任何) MaxRepliesThreshold int `json:"max_replies_threshold,omitempty"` - // 最大加载评论数(comment-item数量),0表示加载所有 + // 最大加载评论数(.parent-comment数量),0表示加载所有 MaxCommentItems int `json:"max_comment_items,omitempty"` // 滚动速度等级: slow(慢速), normal(正常), fast(快速) ScrollSpeed string `json:"scroll_speed,omitempty"` From 9f4418059f5c7403f8e079486ceb2a735bd90c94 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 15:00:40 +0800 Subject: [PATCH 15/19] =?UTF-8?q?fix:=20=E8=A1=A5=E5=85=85=E8=AF=B4?= =?UTF-8?q?=E6=98=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mcp_server.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mcp_server.go b/mcp_server.go index 9353bcf..5e2eed2 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -47,7 +47,7 @@ type FilterOption struct { type FeedDetailArgs struct { FeedID string `json:"feed_id" jsonschema:"小红书笔记ID,从Feed列表获取"` XsecToken string `json:"xsec_token" jsonschema:"访问令牌,从Feed列表的xsecToken字段获取"` - LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批一级评论)"` + LoadAllComments bool `json:"load_all_comments,omitempty" jsonschema:"是否加载全部评论(默认false,仅返回首批前十条一级评论)"` ClickMoreReplies bool `json:"click_more_replies,omitempty" jsonschema:"是否点击'更多回复'按钮 (默认: false)"` MaxRepliesThreshold int `json:"max_replies_threshold,omitempty" jsonschema:"回复数量阈值,超过此数量的'更多'按钮将被跳过 (0表示不跳过任何, 默认: 10)"` MaxCommentItems int `json:"max_comment_items,omitempty" jsonschema:"最大加载一级评论数(0表示加载所有一级评论, 默认: 0)"` From 8f01632f59f8611767ecbe8bbde291c45cf50284 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 15:35:30 +0800 Subject: [PATCH 16/19] fix: fix --- go.mod | 1 + go.sum | 2 + xiaohongshu/feed_detail.go | 1081 +++++++++++++++++++++--------------- 3 files changed, 637 insertions(+), 447 deletions(-) diff --git a/go.mod b/go.mod index d1bc3ba..74180a3 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/xpzouying/xiaohongshu-mcp go 1.24.0 require ( + github.com/avast/retry-go/v4 v4.6.0 github.com/gin-gonic/gin v1.10.1 github.com/go-rod/rod v0.116.2 github.com/h2non/filetype v1.1.3 diff --git a/go.sum b/go.sum index 25fc399..92aa39f 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/avast/retry-go/v4 v4.6.0 h1:K9xNA+KeB8HHc2aWFuLb25Offp+0iVRXEvFx8IinRJA= +github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRSHKIr2PyOE= github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= diff --git a/xiaohongshu/feed_detail.go b/xiaohongshu/feed_detail.go index ce9efaa..5fbdd34 100644 --- a/xiaohongshu/feed_detail.go +++ b/xiaohongshu/feed_detail.go @@ -9,320 +9,321 @@ import ( "strconv" "time" + "github.com/avast/retry-go/v4" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/proto" "github.com/sirupsen/logrus" "github.com/xpzouying/xiaohongshu-mcp/errors" ) -// CommentLoadConfig 评论加载配置 -type CommentLoadConfig struct { - // 是否点击"更多回复"按钮 - ClickMoreReplies bool - // 回复数量阈值,超过这个数量的"更多"按钮将被跳过(0表示不跳过任何) - MaxRepliesThreshold int - // 最大加载评论数(comment-item数量),0表示加载所有 - MaxCommentItems int - // 滚动速度等级: slow(慢速), normal(正常), fast(快速) - ScrollSpeed string +// ========== 配置常量 ========== +const ( + defaultMaxAttempts = 500 + stagnantLimit = 20 + minScrollDelta = 10 + maxClickPerRound = 3 + stagnantCheckThreshold = 2 // 达到目标后需要停滞几次才确认 + largeScrollTrigger = 5 // 停滞多少次后触发大滚动 + buttonClickInterval = 3 // 每隔多少次尝试点击一次按钮 + finalSprintPushCount = 15 +) + +// 延迟时间配置(毫秒) +type delayConfig struct { + min, max int +} + +var ( + humanDelayRange = delayConfig{300, 700} + reactionTimeRange = delayConfig{300, 800} + hoverTimeRange = delayConfig{100, 300} + readTimeRange = delayConfig{500, 1200} + shortReadRange = delayConfig{600, 1200} + scrollWaitRange = delayConfig{100, 200} + postScrollRange = delayConfig{300, 500} +) + +// ========== 数据结构 ========== + +type CommentLoadConfig struct { + ClickMoreReplies bool + MaxRepliesThreshold int + MaxCommentItems int + ScrollSpeed string } -// DefaultCommentLoadConfig 默认配置 func DefaultCommentLoadConfig() CommentLoadConfig { return CommentLoadConfig{ - ClickMoreReplies: false, // 默认不点击"更多回复" - MaxRepliesThreshold: 10, // 默认超过10条回复就跳过 - MaxCommentItems: 0, // 默认加载所有评论 + ClickMoreReplies: false, + MaxRepliesThreshold: 10, + MaxCommentItems: 0, ScrollSpeed: "normal", } } -// FeedDetailAction 表示 Feed 详情页动作 type FeedDetailAction struct { page *rod.Page } -// NewFeedDetailAction 创建 Feed 详情页动作 func NewFeedDetailAction(page *rod.Page) *FeedDetailAction { return &FeedDetailAction{page: page} } -// GetFeedDetail 获取 Feed 详情页数据 +// ========== 主要业务逻辑 ========== + func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) { return f.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAllComments, config) } -// GetFeedDetailWithConfig 获取 Feed 详情页数据(带配置) func (f *FeedDetailAction) GetFeedDetailWithConfig(ctx context.Context, feedID, xsecToken string, loadAllComments bool, config CommentLoadConfig) (*FeedDetailResponse, error) { page := f.page.Context(ctx).Timeout(10 * time.Minute) - - // 构建详情页 URL url := makeFeedDetailURL(feedID, xsecToken) + logrus.Infof("打开 feed 详情页: %s", url) logrus.Infof("配置: 点击更多=%v, 回复阈值=%d, 最大评论数=%d, 滚动速度=%s", config.ClickMoreReplies, config.MaxRepliesThreshold, config.MaxCommentItems, config.ScrollSpeed) - - // 导航到详情页 - page.MustNavigate(url) - page.MustWaitDOMStable() - time.Sleep(1 * time.Second) - - // 检测页面是否不可访问 + + // 使用retry-go处理页面导航和DOM稳定等待 + err := retry.Do( + func() error { + page.MustNavigate(url) + page.MustWaitDOMStable() + return nil + }, + retry.Attempts(3), + retry.Delay(500*time.Millisecond), + retry.MaxJitter(1000*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("页面导航重试 #%d: %v", n, err) + }), + ) + if err != nil { + logrus.Errorf("页面导航失败: %v", err) + return nil, err + } + sleepRandom(1000, 1000) + if err := checkPageAccessible(page); err != nil { return nil, err } - - // 加载全部评论 + if loadAllComments { if err := f.loadAllCommentsWithConfig(page, config); err != nil { logrus.Warnf("加载全部评论失败: %v", err) } } - - // 提取笔记详情数据 + return f.extractFeedDetail(page, feedID) } -// checkPageAccessible 检查页面是否可访问 -func checkPageAccessible(page *rod.Page) error { - // 等待页面稳定,确保错误提示已加载 - time.Sleep(500 * time.Millisecond) - - unavailableResult := page.MustEval(`() => { - const wrapper = document.querySelector('.access-wrapper, .error-wrapper, .not-found-wrapper, .blocked-wrapper'); - if (!wrapper) return null; +// ========== 评论加载器 ========== - // 获取所有文本内容(包括子元素) - const text = wrapper.textContent || wrapper.innerText || ''; - const keywords = [ - '当前笔记暂时无法浏览', - '该内容因违规已被删除', - '该笔记已被删除', - '内容不存在', - '笔记不存在', - '已失效', - '私密笔记', - '仅作者可见', - '因用户设置,你无法查看', - '因违规无法查看' - ]; - - for (const kw of keywords) { - if (text.includes(kw)) { - return kw; - } - } - - // 如果找到了 wrapper 但没有匹配关键词,返回完整文本用于调试 - if (text.trim()) { - return '未知错误: ' + text.trim(); - } - - return null; - }`) - - rawJSON, err := unavailableResult.MarshalJSON() - if err != nil { - logrus.Errorf("无法解析页面状态检查的结果: %v", err) - return fmt.Errorf("无法解析页面状态检查的结果: %w", err) - } - - if string(rawJSON) != "null" { - var reason string - if err := json.Unmarshal(rawJSON, &reason); err == nil { - logrus.Warnf("笔记不可访问: %s", reason) - return fmt.Errorf("笔记不可访问: %s", reason) - } - rawReason := string(rawJSON) - logrus.Warnf("笔记不可访问,且无法解析原因: %s", rawReason) - return fmt.Errorf("笔记不可访问,无法解析原因: %s", rawReason) - } - - return nil +type commentLoader struct { + page *rod.Page + config CommentLoadConfig + stats *loadStats + state *loadState +} + +type loadStats struct { + totalClicked int + totalSkipped int + attempts int +} + +type loadState struct { + lastCount int + lastScrollTop int + stagnantChecks int } -// loadAllCommentsWithConfig 加载所有评论(带配置) func (f *FeedDetailAction) loadAllCommentsWithConfig(page *rod.Page, config CommentLoadConfig) error { - maxAttempts := 500 - if config.MaxCommentItems > 0 { - // 如果设置了最大评论数,减少尝试次数 - maxAttempts = config.MaxCommentItems * 3 + loader := &commentLoader{ + page: page, + config: config, + stats: &loadStats{}, + state: &loadState{}, } + + return loader.load() +} - const ( - stagnantLimit = 20 - noScrollChangeLimit = 15 - minScrollDelta = 10 - ) - - // 获取滚动间隔(根据速度) - scrollInterval := getScrollInterval(config.ScrollSpeed) - +func (cl *commentLoader) load() error { + maxAttempts := cl.calculateMaxAttempts() + scrollInterval := getScrollInterval(cl.config.ScrollSpeed) + logrus.Info("开始加载评论...") - - // 先滚动到评论区 - scrollToCommentsArea(page) - humanDelay() - - var ( - lastCount = 0 - lastScrollTop = 0 - stagnantChecks = 0 - noScrollChangeCount = 0 - totalClickedButtons = 0 - skippedButtons = 0 - attempt = 0 - ) - - for attempt = 0; attempt < maxAttempts; attempt++ { - logrus.Debugf("=== 尝试 %d/%d ===", attempt+1, maxAttempts) - - // === 1. 检查是否到达底部 === - if checkEndContainer(page) { - logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部") - humanDelay() - - currentCount := getCommentCount(page) - logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击: %d, 跳过: %d", - currentCount, attempt+1, totalClickedButtons, skippedButtons) + scrollToCommentsArea(cl.page) + sleepRandom(humanDelayRange.min, humanDelayRange.max) + + for cl.stats.attempts = 0; cl.stats.attempts < maxAttempts; cl.stats.attempts++ { + logrus.Debugf("=== 尝试 %d/%d ===", cl.stats.attempts+1, maxAttempts) + + if cl.checkComplete() { return nil } - - // === 2. 获取当前评论数 === - currentCount := getCommentCount(page) - - // === 3. 点击"更多"按钮(人性化:每隔几次尝试才点击一次) === - if config.ClickMoreReplies && attempt%3 == 0 { - clicked, skipped := clickShowMoreButtonsSmart(page, config.MaxRepliesThreshold) - if clicked > 0 || skipped > 0 { - totalClickedButtons += clicked - skippedButtons += skipped - logrus.Infof("点击'更多': %d 个, 跳过: %d 个, 累计点击: %d, 累计跳过: %d", - clicked, skipped, totalClickedButtons, skippedButtons) - - // 点击后等待更长时间,模拟人阅读新内容(800-1500ms) - readTime := time.Duration(800+rand.Intn(700)) * time.Millisecond - time.Sleep(readTime) - - // 多轮检查(但减少轮数,避免太频繁) - for round := 0; round < 1; round++ { - // 等待一段时间再检查(模拟人继续浏览) - time.Sleep(time.Duration(500+rand.Intn(500)) * time.Millisecond) - clicked2, skipped2 := clickShowMoreButtonsSmart(page, config.MaxRepliesThreshold) - if clicked2 > 0 || skipped2 > 0 { - totalClickedButtons += clicked2 - skippedButtons += skipped2 - logrus.Infof("第 %d 轮: 点击 %d, 跳过 %d", round+2, clicked2, skipped2) - // 再次等待阅读时间 - readTime2 := time.Duration(600+rand.Intn(600)) * time.Millisecond - time.Sleep(readTime2) - } else { - break - } - } - } - } - - // === 4. 获取评论数量 === - totalCount := getTotalCommentCount(page) - logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount) - - // === 5. 检查评论数量变化 === - if currentCount != lastCount { - logrus.Infof("✓ 评论增加: %d -> %d (+%d)", lastCount, currentCount, currentCount-lastCount) - lastCount = currentCount - stagnantChecks = 0 - } else { - stagnantChecks++ - if stagnantChecks%5 == 0 { - logrus.Debugf("评论停滞 %d 次", stagnantChecks) - } - } - - // === 5.1 检查是否已达到目标评论数(在评论数停滞时)=== - if config.MaxCommentItems > 0 && currentCount >= config.MaxCommentItems { - // 达到目标且停滞2次,确认加载完成 - if stagnantChecks >= 2 { - logrus.Infof("✓ 已达到目标评论数: %d/%d (停滞%d次), 停止加载", - currentCount, config.MaxCommentItems, stagnantChecks) - return nil - } - // 刚达到目标,继续滚动确认 - if stagnantChecks > 0 { - logrus.Debugf("已达目标数 %d/%d,再确认 %d 次...", - currentCount, config.MaxCommentItems, 2-stagnantChecks) - } - } - - // === 6. 停滞处理 === - if stagnantChecks >= stagnantLimit { - logrus.Infof("评论停滞,尝试最后冲刺...") - finalPush(page, config.ScrollSpeed) - - if checkEndContainer(page) { - logrus.Infof("✓ 到达底部,评论数: %d", currentCount) - return nil - } - - logrus.Infof("未到底部,重置停滞计数") - stagnantChecks = 0 - } - - // === 7. 执行人性化滚动 === - // 先滚动到最后一个评论(触发懒加载的关键!) - if currentCount > 0 { - scrollToLastComment(page) - time.Sleep(time.Duration(300+rand.Intn(200)) * time.Millisecond) + + if cl.shouldClickButtons() { + cl.clickButtonsWithRetry() } - _, scrollDelta, currentScrollTop := humanScroll(page, config.ScrollSpeed) - - // === 8. 检查滚动变化 === - if scrollDelta < minScrollDelta || currentScrollTop == lastScrollTop { - noScrollChangeCount++ - if noScrollChangeCount%5 == 0 { - logrus.Debugf("滚动停滞 %d 次", noScrollChangeCount) - largeScroll(page, config.ScrollSpeed) - humanDelay() - } - } else { - noScrollChangeCount = 0 - lastScrollTop = currentScrollTop + currentCount := getCommentCount(cl.page) + cl.updateState(currentCount) + + if cl.shouldStopAtTarget(currentCount) { + return nil } - - // === 9. 滚动停滞处理 === - if noScrollChangeCount >= noScrollChangeLimit { - logrus.Infof("滚动停滞,最后冲刺...") - finalPush(page, config.ScrollSpeed) - - if checkEndContainer(page) { - logrus.Infof("✓ 到达底部,评论数: %d", currentCount) - return nil - } - - logrus.Infof("重置滚动计数") - noScrollChangeCount = 0 - lastScrollTop = 0 - } - - // === 10. 等待内容加载 === + + cl.performScroll() + cl.handleStagnation() + time.Sleep(scrollInterval) } - - // === 11. 最后冲刺 === - logrus.Infof("达到最大尝试次数,最后冲刺...") - finalPush(page, config.ScrollSpeed) - - currentCount := getCommentCount(page) - hasEnd := checkEndContainer(page) - - logrus.Infof("✓ 加载结束: %d 条评论, 点击: %d, 跳过: %d, 到达底部: %v", - currentCount, totalClickedButtons, skippedButtons, hasEnd) - + + cl.performFinalSprint() return nil } -// getScrollInterval 根据速度获取滚动间隔 +func (cl *commentLoader) calculateMaxAttempts() int { + if cl.config.MaxCommentItems > 0 { + return cl.config.MaxCommentItems * 3 + } + return defaultMaxAttempts +} + +func (cl *commentLoader) checkComplete() bool { + if checkEndContainer(cl.page) { + currentCount := getCommentCount(cl.page) + logrus.Infof("✓ 检测到 'THE END' 元素,已滑动到底部") + sleepRandom(humanDelayRange.min, humanDelayRange.max) + logrus.Infof("✓ 加载完成: %d 条评论, 尝试次数: %d, 点击: %d, 跳过: %d", + currentCount, cl.stats.attempts+1, cl.stats.totalClicked, cl.stats.totalSkipped) + return true + } + return false +} + +func (cl *commentLoader) shouldClickButtons() bool { + return cl.config.ClickMoreReplies && cl.stats.attempts%buttonClickInterval == 0 +} + +func (cl *commentLoader) clickButtonsWithRetry() { + clicked, skipped := clickShowMoreButtonsSmart(cl.page, cl.config.MaxRepliesThreshold) + if clicked > 0 || skipped > 0 { + cl.stats.totalClicked += clicked + cl.stats.totalSkipped += skipped + logrus.Infof("点击'更多': %d 个, 跳过: %d 个, 累计点击: %d, 累计跳过: %d", + clicked, skipped, cl.stats.totalClicked, cl.stats.totalSkipped) + + sleepRandom(readTimeRange.min, readTimeRange.max) + + // 重试一轮 + clicked2, skipped2 := clickShowMoreButtonsSmart(cl.page, cl.config.MaxRepliesThreshold) + if clicked2 > 0 || skipped2 > 0 { + cl.stats.totalClicked += clicked2 + cl.stats.totalSkipped += skipped2 + logrus.Infof("第 2 轮: 点击 %d, 跳过 %d", clicked2, skipped2) + sleepRandom(shortReadRange.min, shortReadRange.max) + } + } +} + +func (cl *commentLoader) updateState(currentCount int) { + totalCount := getTotalCommentCount(cl.page) + logrus.Debugf("当前评论: %d, 目标: %d", currentCount, totalCount) + + if currentCount != cl.state.lastCount { + logrus.Infof("✓ 评论增加: %d -> %d (+%d)", + cl.state.lastCount, currentCount, currentCount-cl.state.lastCount) + cl.state.lastCount = currentCount + cl.state.stagnantChecks = 0 + } else { + cl.state.stagnantChecks++ + if cl.state.stagnantChecks%5 == 0 { + logrus.Debugf("评论停滞 %d 次", cl.state.stagnantChecks) + } + } +} + +func (cl *commentLoader) shouldStopAtTarget(currentCount int) bool { + if cl.config.MaxCommentItems <= 0 || currentCount < cl.config.MaxCommentItems { + return false + } + + if cl.state.stagnantChecks >= stagnantCheckThreshold { + logrus.Infof("✓ 已达到目标评论数: %d/%d (停滞%d次), 停止加载", + currentCount, cl.config.MaxCommentItems, cl.state.stagnantChecks) + return true + } + + if cl.state.stagnantChecks > 0 { + logrus.Debugf("已达目标数 %d/%d,再确认 %d 次...", + currentCount, cl.config.MaxCommentItems, stagnantCheckThreshold-cl.state.stagnantChecks) + } + + return false +} + +func (cl *commentLoader) performScroll() { + currentCount := getCommentCount(cl.page) + if currentCount > 0 { + scrollToLastComment(cl.page) + sleepRandom(postScrollRange.min, postScrollRange.max) + } + + largeMode := cl.state.stagnantChecks >= largeScrollTrigger + pushCount := 1 + if largeMode { + pushCount = 3 + rand.Intn(3) + } + + _, scrollDelta, currentScrollTop := humanScroll(cl.page, cl.config.ScrollSpeed, largeMode, pushCount) + + if scrollDelta < minScrollDelta || currentScrollTop == cl.state.lastScrollTop { + cl.state.stagnantChecks++ + if cl.state.stagnantChecks%5 == 0 { + logrus.Debugf("滚动停滞 %d 次", cl.state.stagnantChecks) + } + } else { + cl.state.stagnantChecks = 0 + cl.state.lastScrollTop = currentScrollTop + } +} + +func (cl *commentLoader) handleStagnation() { + if cl.state.stagnantChecks >= stagnantLimit { + logrus.Infof("停滞过多,尝试大冲刺...") + humanScroll(cl.page, cl.config.ScrollSpeed, true, 10) + cl.state.stagnantChecks = 0 + + if checkEndContainer(cl.page) { + currentCount := getCommentCount(cl.page) + logrus.Infof("✓ 到达底部,评论数: %d", currentCount) + } + } +} + +func (cl *commentLoader) performFinalSprint() { + logrus.Infof("达到最大尝试次数,最后冲刺...") + humanScroll(cl.page, cl.config.ScrollSpeed, true, finalSprintPushCount) + + currentCount := getCommentCount(cl.page) + hasEnd := checkEndContainer(cl.page) + logrus.Infof("✓ 加载结束: %d 条评论, 点击: %d, 跳过: %d, 到达底部: %v", + currentCount, cl.stats.totalClicked, cl.stats.totalSkipped, hasEnd) +} + +// ========== 工具函数 ========== + +func sleepRandom(minMs, maxMs int) { + if maxMs <= minMs { + time.Sleep(time.Duration(minMs) * time.Millisecond) + return + } + delay := time.Duration(minMs+rand.Intn(maxMs-minMs)) * time.Millisecond + time.Sleep(delay) +} + func getScrollInterval(speed string) time.Duration { switch speed { case "slow": @@ -334,148 +335,195 @@ func getScrollInterval(speed string) time.Duration { } } -// humanDelay 人性化延迟 -func humanDelay() { - delay := time.Duration(300+rand.Intn(400)) * time.Millisecond - time.Sleep(delay) -} +// ========== 按钮点击 ========== -// clickShowMoreButtonsSmart 智能点击"更多"按钮(根据回复数量判断,人性化操作) func clickShowMoreButtonsSmart(page *rod.Page, maxRepliesThreshold int) (clicked, skipped int) { elements, err := page.Elements(".show-more") if err != nil { return 0, 0 } - - // 正则表达式:匹配"展开 X 条回复" + replyCountRegex := regexp.MustCompile(`展开\s*(\d+)\s*条回复`) - - // 限制每次最多点击的按钮数量(模拟人不会一次性点击太多) - maxClickPerRound := 3 + rand.Intn(3) // 每次3-5个 + maxClick := maxClickPerRound + rand.Intn(maxClickPerRound) clickedInRound := 0 - + for _, el := range elements { - // 限制单次点击数量 - if clickedInRound >= maxClickPerRound { + if clickedInRound >= maxClick { break } - - // 检查元素是否可见 - visible, err := el.Visible() - if err != nil || !visible { + + if !isElementClickable(el) { continue } - - // 检查是否在 DOM 中 - box, err := el.Shape() - if err != nil || len(box.Quads) == 0 { - continue - } - - // 获取按钮文本 + text, err := el.Text() if err != nil { continue } - - // 判断是否需要跳过 - shouldSkip := false - if maxRepliesThreshold > 0 { - matches := replyCountRegex.FindStringSubmatch(text) - if len(matches) > 1 { - replyCount, err := strconv.Atoi(matches[1]) - if err == nil && replyCount > maxRepliesThreshold { - shouldSkip = true - logrus.Debugf("跳过'%s'(回复数 %d > 阈值 %d)", text, replyCount, maxRepliesThreshold) - } - } - } - - if shouldSkip { + + if shouldSkipButton(text, maxRepliesThreshold, replyCountRegex) { skipped++ continue } - - // === 人性化点击流程 === - // 1. 先滚动到元素附近(模拟人看到按钮) - el.MustEval(`() => { - try { - this.scrollIntoView({behavior: 'smooth', block: 'center'}); - } catch (e) {} - }`) - - // 2. 等待滚动完成 + 模拟人看到按钮后的反应时间(300-800ms) - reactionTime := time.Duration(300+rand.Intn(500)) * time.Millisecond - time.Sleep(reactionTime) - - // 3. 模拟鼠标移动到按钮上(悬停效果) - box, _ = el.Shape() - if len(box.Quads) > 0 { - // 计算按钮中心点 - x := float64(box.Quads[0][0]+box.Quads[0][4]) / 2 - y := float64(box.Quads[0][1]+box.Quads[0][5]) / 2 - page.Mouse.MustMoveTo(x, y) - // 悬停时间(模拟人确认要点击) - time.Sleep(time.Duration(100+rand.Intn(200)) * time.Millisecond) - } - - // 4. 点击元素 - if err := el.Click(proto.InputMouseButtonLeft, 1); err == nil { + + if clickElementWithHumanBehavior(page, el, text) { clicked++ clickedInRound++ - logrus.Debugf("点击了'%s'", text) - - // 5. 点击后的延迟(模拟人阅读新内容的时间,500-1200ms) - readTime := time.Duration(500+rand.Intn(700)) * time.Millisecond - time.Sleep(readTime) } } - + return clicked, skipped } -// humanScroll 人性化滚动 -func humanScroll(page *rod.Page, speed string) (bool, int, int) { +func isElementClickable(el *rod.Element) bool { + visible, err := el.Visible() + if err != nil || !visible { + return false + } + + box, err := el.Shape() + return err == nil && len(box.Quads) > 0 +} + +func shouldSkipButton(text string, threshold int, regex *regexp.Regexp) bool { + if threshold <= 0 { + return false + } + + matches := regex.FindStringSubmatch(text) + if len(matches) > 1 { + if replyCount, err := strconv.Atoi(matches[1]); err == nil && replyCount > threshold { + logrus.Debugf("跳过'%s'(回复数 %d > 阈值 %d)", text, replyCount, threshold) + return true + } + } + return false +} + +func clickElementWithHumanBehavior(page *rod.Page, el *rod.Element, text string) bool { + var clickSuccess bool + + // 使用retry-go进行点击操作重试 + err := retry.Do( + func() error { + // 滚动到元素 + el.MustEval(`() => { + try { + this.scrollIntoView({behavior: 'smooth', block: 'center'}); + } catch (e) {} + }`) + + sleepRandom(reactionTimeRange.min, reactionTimeRange.max) + + // 鼠标悬停 + if box, err := el.Shape(); err == nil && len(box.Quads) > 0 { + x := float64(box.Quads[0][0]+box.Quads[0][4]) / 2 + y := float64(box.Quads[0][1]+box.Quads[0][5]) / 2 + page.Mouse.MustMoveTo(x, y) + sleepRandom(hoverTimeRange.min, hoverTimeRange.max) + } + + // 点击 + if err := el.Click(proto.InputMouseButtonLeft, 1); err != nil { + return err // 返回错误以触发重试 + } + + // 模拟人类阅读时间 + sleepRandom(readTimeRange.min, readTimeRange.max) + clickSuccess = true + return nil + }, + retry.Attempts(3), + retry.Delay(100*time.Millisecond), + retry.MaxJitter(200*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("点击重试 #%d: %s, 错误: %v", n, text, err) + }), + ) + + if err != nil { + logrus.Debugf("点击失败 '%s': %v", text, err) + return false + } + + if clickSuccess { + logrus.Debugf("点击了'%s'", text) + } + + return clickSuccess +} + +// ========== 滚动相关 ========== + +func humanScroll(page *rod.Page, speed string, largeMode bool, pushCount int) (bool, int, int) { beforeTop := getScrollTop(page) viewportHeight := page.MustEval(`() => window.innerHeight`).Int() + + baseRatio := getScrollRatio(speed) + if largeMode { + baseRatio *= 2.0 + } + + scrolled := false + actualDelta := 0 + currentScrollTop := beforeTop + + for i := 0; i < max(1, pushCount); i++ { + scrollDelta := calculateScrollDelta(viewportHeight, baseRatio) + page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta) + + sleepRandom(scrollWaitRange.min, scrollWaitRange.max) + + currentScrollTop = getScrollTop(page) + deltaThisTime := currentScrollTop - beforeTop + actualDelta += deltaThisTime + + if deltaThisTime > 5 { + scrolled = true + } + + beforeTop = currentScrollTop + + if i < pushCount-1 { + sleepRandom(humanDelayRange.min, humanDelayRange.max) + } + } + + if !scrolled && pushCount > 0 { + page.MustEval(`() => window.scrollTo(0, document.body.scrollHeight)`) + sleepRandom(postScrollRange.min, postScrollRange.max) + currentScrollTop = getScrollTop(page) + actualDelta = currentScrollTop - beforeTop + actualDelta + scrolled = actualDelta > 5 + } + + if scrolled { + logrus.Debugf("滚动: %d -> %d (Δ%d, large=%v, push=%d)", + beforeTop-actualDelta, currentScrollTop, actualDelta, largeMode, pushCount) + } + + return scrolled, actualDelta, currentScrollTop +} - // 根据速度调整滚动距离 - var scrollRatio float64 +func getScrollRatio(speed string) float64 { switch speed { case "slow": - scrollRatio = 0.5 + rand.Float64()*0.2 // 50%-70% + return 0.5 case "fast": - scrollRatio = 0.9 + rand.Float64()*0.2 // 90%-110% + return 0.9 default: // normal - scrollRatio = 0.7 + rand.Float64()*0.2 // 70%-90% + return 0.7 } +} - scrollDelta := float64(viewportHeight) * scrollRatio +func calculateScrollDelta(viewportHeight int, baseRatio float64) float64 { + scrollDelta := float64(viewportHeight) * (baseRatio + rand.Float64()*0.2) if scrollDelta < 400 { scrollDelta = 400 } - - // 添加随机波动 - scrollDelta += float64(rand.Intn(100) - 50) - - // 使用JS的 scrollBy 方法进行滚动 - page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta) - - // 等待滚动完成 - time.Sleep(time.Duration(100+rand.Intn(100)) * time.Millisecond) - - afterTop := getScrollTop(page) - actualDelta := afterTop - beforeTop - scrolled := actualDelta > 5 - - if scrolled { - logrus.Debugf("滚动: %d -> %d (Δ%d)", beforeTop, afterTop, actualDelta) - } - - return scrolled, actualDelta, afterTop + return scrollDelta + float64(rand.Intn(100)-50) } -// scrollToCommentsArea 滚动到评论区 func scrollToCommentsArea(page *rod.Page) { logrus.Info("滚动到评论区...") page.MustEval(`() => { @@ -486,137 +534,276 @@ func scrollToCommentsArea(page *rod.Page) { }`) } -// scrollToLastComment 滚动到最后一个评论(触发懒加载的关键) func scrollToLastComment(page *rod.Page) { page.MustEval(`() => { const container = document.querySelector('.comments-container'); if (!container) return; - - // 查找最后一个主评论 const comments = container.querySelectorAll('.parent-comment'); if (comments.length > 0) { const lastComment = comments[comments.length - 1]; - // 滚动到最后一个评论,让它出现在视口中间偏下位置 lastComment.scrollIntoView({behavior: 'smooth', block: 'center'}); } }`) } -// finalPush 最后冲刺:大幅滚动到底部 -func finalPush(page *rod.Page, speed string) { - logrus.Info("执行最后冲刺...") +// ========== DOM 查询 ========== - for i := 0; i < 15; i++ { - if checkEndContainer(page) { - return - } - - beforeTop := getScrollTop(page) - largeScroll(page, speed) - - // 人性化延迟 - time.Sleep(time.Duration(200+rand.Intn(200)) * time.Millisecond) - - afterTop := getScrollTop(page) - if afterTop == beforeTop { - page.MustEval(`() => window.scrollTo(0, document.body.scrollHeight)`) - time.Sleep(time.Duration(300+rand.Intn(200)) * time.Millisecond) - } - } -} - -// largeScroll 大幅度滚动 -func largeScroll(page *rod.Page, speed string) { - var scrollDelta float64 - switch speed { - case "slow": - scrollDelta = 1000 + float64(rand.Intn(500)) - case "fast": - scrollDelta = 3000 + float64(rand.Intn(1000)) - default: // normal - scrollDelta = 2000 + float64(rand.Intn(500)) - } - - page.MustEval(`(delta) => { window.scrollBy(0, delta); }`, scrollDelta) - time.Sleep(time.Duration(100+rand.Intn(50)) * time.Millisecond) -} - -// getScrollTop 获取当前滚动位置 func getScrollTop(page *rod.Page) int { - result := page.MustEval(`() => { - return window.pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0; - }`) - return result.Int() + var result int + + // 使用retry-go来处理可能的DOM查询失败 + err := retry.Do( + func() error { + evalResult := page.MustEval(`() => { + return window.pageYOffset || document.documentElement.scrollTop || document.body.scrollTop || 0; + }`) + + result = evalResult.Int() + return nil + }, + retry.Attempts(3), + retry.Delay(100*time.Millisecond), + retry.MaxJitter(200*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("获取滚动位置重试 #%d: %v", n, err) + }), + ) + + if err != nil { + logrus.Warnf("获取滚动位置失败: %v", err) + return 0 // 失败时返回0 + } + + return result } -// getCommentCount 获取当前评论数量 func getCommentCount(page *rod.Page) int { - result := page.MustEval(`() => { - const container = document.querySelector('.comments-container'); - if (!container) return 0; - return container.querySelectorAll('.parent-comment').length; - }`) - return result.Int() + var result int + + // 使用retry-go来处理可能的DOM查询失败 + err := retry.Do( + func() error { + evalResult := page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (!container) return 0; + return container.querySelectorAll('.parent-comment').length; + }`) + + result = evalResult.Int() + return nil + }, + retry.Attempts(3), + retry.Delay(100*time.Millisecond), + retry.MaxJitter(200*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("获取评论计数重试 #%d: %v", n, err) + }), + ) + + if err != nil { + logrus.Warnf("获取评论计数失败: %v", err) + return 0 // 失败时返回0 + } + + return result } -// getTotalCommentCount 获取总评论数 func getTotalCommentCount(page *rod.Page) int { - result := page.MustEval(`() => { - const container = document.querySelector('.comments-container'); - if (!container) return 0; - - const totalEl = container.querySelector('.total'); - if (!totalEl) return 0; - - const text = (totalEl.textContent || '').replace(/\s+/g, ''); - const match = text.match(/共(\d+)条评论/); - return match ? parseInt(match[1], 10) : 0; - }`) - return result.Int() + var result int + + // 使用retry-go来处理可能的DOM查询失败 + err := retry.Do( + func() error { + evalResult := page.MustEval(`() => { + const container = document.querySelector('.comments-container'); + if (!container) return 0; + const totalEl = container.querySelector('.total'); + if (!totalEl) return 0; + const text = (totalEl.textContent || '').replace(/\s+/g, ''); + const match = text.match(/共(\d+)条评论/); + return match ? parseInt(match[1], 10) : 0; + }`) + + result = evalResult.Int() + return nil + }, + retry.Attempts(3), + retry.Delay(100*time.Millisecond), + retry.MaxJitter(200*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("获取总评论计数重试 #%d: %v", n, err) + }), + ) + + if err != nil { + logrus.Warnf("获取总评论计数失败: %v", err) + return 0 // 失败时返回0 + } + + return result } -// checkEndContainer 检查是否出现 "THE END" 元素 func checkEndContainer(page *rod.Page) bool { - result := page.MustEval(`() => { - const endContainer = document.querySelector('.end-container'); - if (!endContainer) return false; - - const text = (endContainer.textContent || '').trim().toUpperCase(); - return text.includes('THE END') || text.includes('THEEND'); - }`) - return result.Bool() + var result bool + + // 使用retry-go来处理可能的DOM查询失败 + err := retry.Do( + func() error { + evalResult := page.MustEval(`() => { + const endContainer = document.querySelector('.end-container'); + if (!endContainer) return false; + const text = (endContainer.textContent || '').trim().toUpperCase(); + return text.includes('THE END') || text.includes('THEEND'); + }`) + + result = evalResult.Bool() + return nil + }, + retry.Attempts(3), + retry.Delay(100*time.Millisecond), + retry.MaxJitter(200*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("检查结束容器重试 #%d: %v", n, err) + }), + ) + + if err != nil { + logrus.Warnf("检查结束容器失败: %v", err) + return false // 失败时返回false + } + + return result } -// extractFeedDetail 提取 Feed 详情数据 -func (f *FeedDetailAction) extractFeedDetail(page *rod.Page, feedID string) (*FeedDetailResponse, error) { - result := page.MustEval(`() => { - if (window.__INITIAL_STATE__ && - window.__INITIAL_STATE__.note && - window.__INITIAL_STATE__.note.noteDetailMap) { - const noteDetailMap = window.__INITIAL_STATE__.note.noteDetailMap; - return JSON.stringify(noteDetailMap); - } - return ""; - }`).String() +// ========== 页面检查 ========== +func checkPageAccessible(page *rod.Page) error { + time.Sleep(500 * time.Millisecond) + + // 使用retry-go来处理可能的DOM查询失败 + err := retry.Do( + func() error { + result := page.MustEval(`() => { + const wrapper = document.querySelector('.access-wrapper, .error-wrapper, .not-found-wrapper, .blocked-wrapper'); + if (!wrapper) return null; + + const text = wrapper.textContent || wrapper.innerText || ''; + const keywords = [ + '当前笔记暂时无法浏览', + '该内容因违规已被删除', + '该笔记已被删除', + '内容不存在', + '笔记不存在', + '已失效', + '私密笔记', + '仅作者可见', + '因用户设置,你无法查看', + '因违规无法查看' + ]; + + for (const kw of keywords) { + if (text.includes(kw)) { + return kw; + } + } + + if (text.trim()) { + return '未知错误: ' + text.trim(); + } + return null; + }`) + + rawJSON, marshalErr := result.MarshalJSON() + if marshalErr != nil { + return fmt.Errorf("无法序列化页面状态检查结果: %w", marshalErr) + } + + if string(rawJSON) != "null" { + var reason string + if unmarshalErr := json.Unmarshal(rawJSON, &reason); unmarshalErr == nil { + logrus.Warnf("笔记不可访问: %s", reason) + return fmt.Errorf("笔记不可访问: %s", reason) + } + + rawReason := string(rawJSON) + logrus.Warnf("笔记不可访问,且无法解析原因: %s", rawReason) + return fmt.Errorf("笔记不可访问,无法解析原因: %s", rawReason) + } + + return nil + }, + retry.Attempts(3), + retry.Delay(200*time.Millisecond), + retry.MaxJitter(300*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("页面可访问性检查重试 #%d: %v", n, err) + }), + ) + + // If the error is nil, it means no access issue was found + if err == nil { + return nil // Page is accessible + } + + // Return the original error from the retry operation + return err +} + +// ========== 数据提取 ========== + +func (f *FeedDetailAction) extractFeedDetail(page *rod.Page, feedID string) (*FeedDetailResponse, error) { + var result string + + // 使用retry-go来处理可能的DOM查询失败 + err := retry.Do( + func() error { + evalResult := page.MustEval(`() => { + if (window.__INITIAL_STATE__ && + window.__INITIAL_STATE__.note && + window.__INITIAL_STATE__.note.noteDetailMap) { + const noteDetailMap = window.__INITIAL_STATE__.note.noteDetailMap; + return JSON.stringify(noteDetailMap); + } + return ""; + }`).String() + + if evalResult != "" { + result = evalResult + return nil + } + return fmt.Errorf("无法获取初始状态数据") + }, + retry.Attempts(3), + retry.Delay(200*time.Millisecond), + retry.MaxJitter(300*time.Millisecond), + retry.OnRetry(func(n uint, err error) { + logrus.Debugf("提取Feed详情重试 #%d: %v", n, err) + }), + ) + + if err != nil { + logrus.Errorf("提取Feed详情失败: %v", err) + return nil, fmt.Errorf("提取Feed详情失败: %w", err) + } + if result == "" { return nil, errors.ErrNoFeedDetail } - + var noteDetailMap map[string]struct { Note FeedDetail `json:"note"` Comments CommentList `json:"comments"` } - + if err := json.Unmarshal([]byte(result), ¬eDetailMap); err != nil { return nil, fmt.Errorf("failed to unmarshal noteDetailMap: %w", err) } - + noteDetail, exists := noteDetailMap[feedID] if !exists { return nil, fmt.Errorf("feed %s not found in noteDetailMap", feedID) } - + return &FeedDetailResponse{ Note: noteDetail.Note, Comments: noteDetail.Comments, @@ -625,4 +812,4 @@ func (f *FeedDetailAction) extractFeedDetail(page *rod.Page, feedID string) (*Fe func makeFeedDetailURL(feedID, xsecToken string) string { return fmt.Sprintf("https://www.xiaohongshu.com/explore/%s?xsec_token=%s&xsec_source=pc_feed", feedID, xsecToken) -} +} \ No newline at end of file From aa8a64dbaff59fc522a52e537650fb3d37b138c1 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 17:06:41 +0800 Subject: [PATCH 17/19] fix:fix --- go.mod | 3 +- go.sum | 2 - mcp_handlers.go | 64 ------------------- mcp_server.go | 7 --- types.go | 14 ----- xiaohongshu/comment_feed.go | 118 +++++++++++++++++++++++++++++------- 6 files changed, 97 insertions(+), 111 deletions(-) diff --git a/go.mod b/go.mod index be1375c..f47d5b2 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/xpzouying/xiaohongshu-mcp go 1.24.0 require ( - github.com/avast/retry-go/v4 v4.6.0 + github.com/avast/retry-go/v4 v4.7.0 github.com/gin-gonic/gin v1.10.1 github.com/go-rod/rod v0.116.2 github.com/h2non/filetype v1.1.3 @@ -16,7 +16,6 @@ require ( ) require ( - github.com/avast/retry-go/v4 v4.7.0 // indirect github.com/bytedance/sonic v1.11.6 // indirect github.com/bytedance/sonic/loader v0.1.1 // indirect github.com/cloudwego/base64x v0.1.4 // indirect diff --git a/go.sum b/go.sum index a6c928f..e0d41e3 100644 --- a/go.sum +++ b/go.sum @@ -79,8 +79,6 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= -github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= -github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= diff --git a/mcp_handlers.go b/mcp_handlers.go index 9dc3cef..12ee3da 100644 --- a/mcp_handlers.go +++ b/mcp_handlers.go @@ -8,10 +8,6 @@ import ( "strings" "time" - "strconv" - "strings" - "time" - "github.com/sirupsen/logrus" "github.com/xpzouying/xiaohongshu-mcp/cookies" "github.com/xpzouying/xiaohongshu-mcp/xiaohongshu" @@ -401,67 +397,7 @@ func (s *AppServer) handleGetFeedDetail(ctx context.Context, args map[string]any } logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v, config=%+v", feedID, loadAll, config) - loadAll := false - if raw, ok := args["load_all_comments"]; ok { - switch v := raw.(type) { - case bool: - loadAll = v - case string: - if parsed, err := strconv.ParseBool(v); err == nil { - loadAll = parsed - } - case float64: - loadAll = v != 0 - } - } - // 解析评论配置参数,如果未提供则使用默认值 - config := xiaohongshu.DefaultCommentLoadConfig() - - if raw, ok := args["click_more_replies"]; ok { - switch v := raw.(type) { - case bool: - config.ClickMoreReplies = v - case string: - if parsed, err := strconv.ParseBool(v); err == nil { - config.ClickMoreReplies = parsed - } - } - } - - if raw, ok := args["max_replies_threshold"]; ok { - switch v := raw.(type) { - case float64: - config.MaxRepliesThreshold = int(v) - case string: - if parsed, err := strconv.Atoi(v); err == nil { - config.MaxRepliesThreshold = parsed - } - case int: - config.MaxRepliesThreshold = v - } - } - - if raw, ok := args["max_comment_items"]; ok { - switch v := raw.(type) { - case float64: - config.MaxCommentItems = int(v) - case string: - if parsed, err := strconv.Atoi(v); err == nil { - config.MaxCommentItems = parsed - } - case int: - config.MaxCommentItems = v - } - } - - if raw, ok := args["scroll_speed"].(string); ok && raw != "" { - config.ScrollSpeed = raw - } - - logrus.Infof("MCP: 获取Feed详情 - Feed ID: %s, loadAllComments=%v, config=%+v", feedID, loadAll, config) - - result, err := s.xiaohongshuService.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAll, config) result, err := s.xiaohongshuService.GetFeedDetailWithConfig(ctx, feedID, xsecToken, loadAll, config) if err != nil { return &MCPToolResult{ diff --git a/mcp_server.go b/mcp_server.go index 577750e..6d920f0 100644 --- a/mcp_server.go +++ b/mcp_server.go @@ -237,13 +237,6 @@ func registerTools(server *mcp.Server, appServer *AppServer) { "max_replies_threshold": args.MaxRepliesThreshold, "max_comment_items": args.MaxCommentItems, "scroll_speed": args.ScrollSpeed, - "feed_id": args.FeedID, - "xsec_token": args.XsecToken, - "load_all_comments": args.LoadAllComments, - "click_more_replies": args.ClickMoreReplies, - "max_replies_threshold": args.MaxRepliesThreshold, - "max_comment_items": args.MaxCommentItems, - "scroll_speed": args.ScrollSpeed, } result := appServer.handleGetFeedDetail(ctx, argsMap) return convertToMCPResult(result), nil, nil diff --git a/types.go b/types.go index 2147208..f729bf7 100644 --- a/types.go +++ b/types.go @@ -46,26 +46,12 @@ type CommentLoadConfig struct { ScrollSpeed string `json:"scroll_speed,omitempty"` } -// CommentLoadConfig 评论加载配置 -type CommentLoadConfig struct { - // 是否点击"更多回复"按钮 - ClickMoreReplies bool `json:"click_more_replies,omitempty"` - // 回复数量阈值,超过这个数量的"更多"按钮将被跳过(0表示不跳过任何) - MaxRepliesThreshold int `json:"max_replies_threshold,omitempty"` - // 最大加载评论数(comment-item数量),0表示加载所有 - MaxCommentItems int `json:"max_comment_items,omitempty"` - // 滚动速度等级: slow(慢速), normal(正常), fast(快速) - ScrollSpeed string `json:"scroll_speed,omitempty"` -} - // FeedDetailRequest Feed详情请求 type FeedDetailRequest struct { FeedID string `json:"feed_id" binding:"required"` XsecToken string `json:"xsec_token" binding:"required"` LoadAllComments bool `json:"load_all_comments,omitempty"` CommentConfig *CommentLoadConfig `json:"comment_config,omitempty"` - LoadAllComments bool `json:"load_all_comments,omitempty"` - CommentConfig *CommentLoadConfig `json:"comment_config,omitempty"` } type SearchFeedsRequest struct { diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index f0e9e96..a2f0b4e 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -22,7 +22,8 @@ func NewCommentFeedAction(page *rod.Page) *CommentFeedAction { // PostComment 发表评论到 Feed func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, content string) error { - page := f.page.Context(ctx).Timeout(60 * time.Second) + // 不使用 Context(ctx),避免继承外部 context 的超时 + page := f.page.Timeout(60 * time.Second) url := makeFeedDetailURL(feedID, xsecToken) logrus.Infof("打开 feed 详情页: %s", url) @@ -81,7 +82,8 @@ func (f *CommentFeedAction) PostComment(ctx context.Context, feedID, xsecToken, // ReplyToComment 回复指定评论 func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToken, commentID, userID, content string) error { // 增加超时时间,因为需要滚动查找评论 - page := f.page.Context(ctx).Timeout(5 * time.Minute) + // 注意:不使用 Context(ctx),避免继承外部 context 的超时 + page := f.page.Timeout(5 * time.Minute) url := makeFeedDetailURL(feedID, xsecToken) logrus.Infof("打开 feed 详情页进行回复: %s", url) @@ -151,50 +153,122 @@ func (f *CommentFeedAction) ReplyToComment(ctx context.Context, feedID, xsecToke return nil } -// findCommentElement 查找指定评论元素(Go 实现,减少 JS 代码) +// findCommentElement 查找指定评论元素(参考 feed_detail.go 的滚动逻辑) func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, error) { logrus.Infof("开始查找评论 - commentID: %s, userID: %s", commentID, userID) - const maxAttempts = 50 + const maxAttempts = 100 const scrollInterval = 800 * time.Millisecond // 先滚动到评论区 - page.MustEval(`() => { - const container = document.querySelector('.comments-container'); - if (container) { - container.scrollIntoView({behavior: 'smooth', block: 'start'}); - } - }`) + scrollToCommentsArea(page) time.Sleep(1 * time.Second) - for attempt := 0; attempt < maxAttempts; attempt++ { - logrus.Debugf("查找尝试 %d/%d", attempt+1, maxAttempts) + var lastCommentCount = 0 + stagnantChecks := 0 - // 优先通过 commentID 查找 + logrus.Infof("开始循环查找,最大尝试次数: %d", maxAttempts) + + for attempt := 0; attempt < maxAttempts; attempt++ { + logrus.Infof("=== 查找尝试 %d/%d ===", attempt+1, maxAttempts) + + // === 1. 检查是否到达底部 === + if checkEndContainer(page) { + logrus.Info("已到达评论底部,未找到目标评论") + break + } + + // === 2. 获取当前评论数量 === + currentCount := getCommentCount(page) + logrus.Infof("当前评论数: %d", currentCount) + + if currentCount != lastCommentCount { + logrus.Infof("✓ 评论数增加: %d -> %d", lastCommentCount, currentCount) + lastCommentCount = currentCount + stagnantChecks = 0 + } else { + stagnantChecks++ + if stagnantChecks%5 == 0 { + logrus.Infof("评论数停滞 %d 次", stagnantChecks) + } + } + + // === 3. 停滞检测 === + if stagnantChecks >= 10 { + logrus.Info("评论数量停滞超过10次,可能已加载完所有评论") + break + } + + // === 4. 先滚动到最后一个评论(触发懒加载)=== + if currentCount > 0 { + logrus.Infof("滚动到最后一个评论(共 %d 条)", currentCount) + _, err := page.Eval(`() => { + const container = document.querySelector('.comments-container'); + if (!container) return false; + + // 查找最后一个评论 + const comments = container.querySelectorAll('.parent-comment, .comment-item, .comment'); + if (comments.length > 0) { + const lastComment = comments[comments.length - 1]; + lastComment.scrollIntoView({behavior: 'smooth', block: 'center'}); + return true; + } + return false; + }`) + if err != nil { + logrus.Warnf("滚动到最后一个评论失败: %v", err) + } + time.Sleep(300 * time.Millisecond) + } + + // === 5. 继续向下滚动 === + logrus.Infof("继续向下滚动...") + _, err := page.Eval(`() => { window.scrollBy(0, window.innerHeight * 0.8); return true; }`) + if err != nil { + logrus.Warnf("滚动失败: %v", err) + } + time.Sleep(500 * time.Millisecond) + + // === 6. 滚动后立即查找(边滚动边查找)=== + // 优先通过 commentID 查找(使用 Timeout 避免长时间等待) if commentID != "" { selector := fmt.Sprintf("#comment-%s", commentID) - if el, err := page.Element(selector); err == nil { - logrus.Infof("✓ 通过 commentID 找到评论: %s", commentID) + logrus.Infof("尝试通过 commentID 查找: %s", selector) + + // 使用 Timeout 避免长时间等待 + el, err := page.Timeout(2 * time.Second).Element(selector) + if err == nil && el != nil { + logrus.Infof("✓ 通过 commentID 找到评论: %s (尝试 %d 次)", commentID, attempt+1) return el, nil } + logrus.Infof("未找到 commentID (2秒超时)") } // 通过 userID 查找 if userID != "" { - elements, err := page.Elements(".comment-item, .comment") - if err == nil { - for _, el := range elements { - userEl, err := el.Element(fmt.Sprintf(`[data-user-id="%s"]`, userID)) + logrus.Infof("尝试通过 userID 查找: %s", userID) + + // 使用 Timeout 避免长时间等待 + elements, err := page.Timeout(2 * time.Second).Elements(".comment-item, .comment, .parent-comment") + if err == nil && len(elements) > 0 { + logrus.Infof("找到 %d 个评论元素", len(elements)) + for i, el := range elements { + // 快速检查,不等待 + userEl, err := el.Timeout(500 * time.Millisecond).Element(fmt.Sprintf(`[data-user-id="%s"]`, userID)) if err == nil && userEl != nil { - logrus.Infof("✓ 通过 userID 找到评论: %s", userID) + logrus.Infof("✓ 通过 userID 在第 %d 个元素中找到评论: %s (尝试 %d 次)", i+1, userID, attempt+1) return el, nil } } + logrus.Infof("在 %d 个元素中未找到匹配的 userID", len(elements)) + } else { + logrus.Infof("获取评论元素失败或超时: %v", err) } } + + logrus.Infof("本次尝试未找到目标评论,继续下一轮...") - // 滚动页面 - page.MustEval(`() => window.scrollBy(0, window.innerHeight * 0.8)`) + // === 7. 等待内容加载 === time.Sleep(scrollInterval) } From 047fa7cb6f216d790f55f59088e2e897a2be3737 Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 17:11:11 +0800 Subject: [PATCH 18/19] fix: fix --- xiaohongshu/publish.go | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/xiaohongshu/publish.go b/xiaohongshu/publish.go index c7635c4..fd7a29c 100644 --- a/xiaohongshu/publish.go +++ b/xiaohongshu/publish.go @@ -7,8 +7,6 @@ import ( "os" "strings" "time" - "unicode" - "unicode/utf8" "github.com/go-rod/rod" "github.com/go-rod/rod/lib/input" @@ -57,11 +55,6 @@ func (p *PublishAction) Publish(ctx context.Context, content PublishImageContent return errors.New("图片不能为空") } - trimmedContent := strings.TrimRightFunc(content.Content, unicode.IsSpace) - if utf8.RuneCountInString(trimmedContent) > 1000 { - return errors.New("正文内容不能超过1000个字符") - } - page := p.page.Context(ctx) if err := uploadImages(page, content.ImagePaths); err != nil { @@ -440,4 +433,4 @@ func isElementVisible(elem *rod.Element) bool { } return visible -} +} \ No newline at end of file From 484ab31d8ea920c6be17acbf175005cac000665a Mon Sep 17 00:00:00 2001 From: chekayo <9827969+chekayo@user.noreply.gitee.com> Date: Sun, 7 Dec 2025 17:42:28 +0800 Subject: [PATCH 19/19] fix: fix --- xiaohongshu/comment_feed.go | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/xiaohongshu/comment_feed.go b/xiaohongshu/comment_feed.go index a2f0b4e..198b069 100644 --- a/xiaohongshu/comment_feed.go +++ b/xiaohongshu/comment_feed.go @@ -202,21 +202,18 @@ func findCommentElement(page *rod.Page, commentID, userID string) (*rod.Element, // === 4. 先滚动到最后一个评论(触发懒加载)=== if currentCount > 0 { logrus.Infof("滚动到最后一个评论(共 %d 条)", currentCount) - _, err := page.Eval(`() => { - const container = document.querySelector('.comments-container'); - if (!container) return false; - - // 查找最后一个评论 - const comments = container.querySelectorAll('.parent-comment, .comment-item, .comment'); - if (comments.length > 0) { - const lastComment = comments[comments.length - 1]; - lastComment.scrollIntoView({behavior: 'smooth', block: 'center'}); - return true; + + // 使用 Go 获取所有评论元素 + elements, err := page.Timeout(2 * time.Second).Elements(".parent-comment, .comment-item, .comment") + if err == nil && len(elements) > 0 { + // 滚动到最后一个评论 + lastComment := elements[len(elements)-1] + err := lastComment.ScrollIntoView() + if err != nil { + logrus.Warnf("滚动到最后一个评论失败: %v", err) } - return false; - }`) - if err != nil { - logrus.Warnf("滚动到最后一个评论失败: %v", err) + } else { + logrus.Warnf("未找到评论元素: %v", err) } time.Sleep(300 * time.Millisecond) }