fix get data panic (#244)

* fix: 修复 data 获取时的循环引用错误
This commit is contained in:
zy
2025-10-16 23:00:57 +08:00
committed by GitHub
parent 844ff8c102
commit df623caf18
8 changed files with 129 additions and 85 deletions

View File

@@ -7,6 +7,8 @@ import (
"time"
"github.com/go-rod/rod"
"github.com/sirupsen/logrus"
"github.com/xpzouying/xiaohongshu-mcp/errors"
)
// FeedDetailAction 表示 Feed 详情页动作
@@ -26,39 +28,37 @@ func (f *FeedDetailAction) GetFeedDetail(ctx context.Context, feedID, xsecToken
// 构建详情页 URL
url := makeFeedDetailURL(feedID, xsecToken)
logrus.Infof("打开 feed 详情页: %s", url)
// 导航到详情页
page.MustNavigate(url)
page.MustWaitDOMStable()
time.Sleep(1 * time.Second)
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
result := page.MustEval(`() => {
if (window.__INITIAL_STATE__) {
return JSON.stringify(window.__INITIAL_STATE__);
if (window.__INITIAL_STATE__ &&
window.__INITIAL_STATE__.note &&
window.__INITIAL_STATE__.note.noteDetailMap) {
const noteDetailMap = window.__INITIAL_STATE__.note.noteDetailMap;
return JSON.stringify(noteDetailMap);
}
return "";
}`).String()
if result == "" {
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
return nil, errors.ErrNoFeedDetail
}
// 定义响应结构并直接反序列化
var initialState struct {
Note struct {
NoteDetailMap map[string]struct {
Note FeedDetail `json:"note"`
Comments CommentList `json:"comments"`
} `json:"noteDetailMap"`
} `json:"note"`
var noteDetailMap map[string]struct {
Note FeedDetail `json:"note"`
Comments CommentList `json:"comments"`
}
if err := json.Unmarshal([]byte(result), &initialState); err != nil {
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
if err := json.Unmarshal([]byte(result), &noteDetailMap); err != nil {
return nil, fmt.Errorf("failed to unmarshal noteDetailMap: %w", err)
}
// 从 noteDetailMap 中获取对应 feedID 的数据
noteDetail, exists := initialState.Note.NoteDetailMap[feedID]
noteDetail, exists := noteDetailMap[feedID]
if !exists {
return nil, fmt.Errorf("feed %s not found in noteDetailMap", feedID)
}

View File

@@ -7,17 +7,13 @@ import (
"time"
"github.com/go-rod/rod"
"github.com/xpzouying/xiaohongshu-mcp/errors"
)
type FeedsListAction struct {
page *rod.Page
}
// FeedsResult 定义页面初始状态结构
type FeedsResult struct {
Feed FeedData `json:"feed"`
}
func NewFeedsListAction(page *rod.Page) *FeedsListAction {
pp := page.Timeout(60 * time.Second)
@@ -33,24 +29,27 @@ func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) {
time.Sleep(1 * time.Second)
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
result := page.MustEval(`() => {
if (window.__INITIAL_STATE__) {
return JSON.stringify(window.__INITIAL_STATE__);
if (window.__INITIAL_STATE__ &&
window.__INITIAL_STATE__.feed &&
window.__INITIAL_STATE__.feed.feeds) {
const feeds = window.__INITIAL_STATE__.feed.feeds;
const feedsData = feeds.value !== undefined ? feeds.value : feeds._value;
if (feedsData) {
return JSON.stringify(feedsData);
}
}
return "";
}`).String()
if result == "" {
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
return nil, errors.ErrNoFeeds
}
// 解析完整的 InitialState
var state FeedsResult
if err := json.Unmarshal([]byte(result), &state); err != nil {
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
var feeds []Feed
if err := json.Unmarshal([]byte(result), &feeds); err != nil {
return nil, fmt.Errorf("failed to unmarshal feeds: %w", err)
}
// 返回 feed.feeds._value
return state.Feed.Feeds.Value, nil
return feeds, nil
}

View File

@@ -9,6 +9,7 @@ import (
"github.com/go-rod/rod"
"github.com/pkg/errors"
"github.com/sirupsen/logrus"
myerrors "github.com/xpzouying/xiaohongshu-mcp/errors"
)
// ActionResult 通用动作响应(点赞/收藏等)
@@ -213,33 +214,33 @@ func (a *FavoriteAction) toggleFavorite(page *rod.Page, feedID string, targetCol
// getInteractState 从 __INITIAL_STATE__ 读取笔记的点赞/收藏状态
func (a *interactAction) getInteractState(page *rod.Page, feedID string) (liked bool, collected bool, err error) {
result := page.MustEval(`() => {
if (window.__INITIAL_STATE__) {
return JSON.stringify(window.__INITIAL_STATE__);
if (window.__INITIAL_STATE__ &&
window.__INITIAL_STATE__.note &&
window.__INITIAL_STATE__.note.noteDetailMap) {
return JSON.stringify(window.__INITIAL_STATE__.note.noteDetailMap);
}
return "";
}`).String()
if result == "" {
return false, false, fmt.Errorf("__INITIAL_STATE__ not found")
return false, false, myerrors.ErrNoFeedDetail
}
var state struct {
// 直接解析为 noteDetailMap
var noteDetailMap map[string]struct {
Note struct {
NoteDetailMap map[string]struct {
Note struct {
InteractInfo struct {
Liked bool `json:"liked"`
Collected bool `json:"collected"`
} `json:"interactInfo"`
} `json:"note"`
} `json:"noteDetailMap"`
InteractInfo struct {
Liked bool `json:"liked"`
Collected bool `json:"collected"`
} `json:"interactInfo"`
} `json:"note"`
}
if err := json.Unmarshal([]byte(result), &state); err != nil {
return false, false, errors.Wrap(err, "unmarshal __INITIAL_STATE__ failed")
if err := json.Unmarshal([]byte(result), &noteDetailMap); err != nil {
return false, false, errors.Wrap(err, "unmarshal noteDetailMap failed")
}
detail, ok := state.Note.NoteDetailMap[feedID]
detail, ok := noteDetailMap[feedID]
if !ok {
return false, false, fmt.Errorf("feed %s not in noteDetailMap", feedID)
}

View File

@@ -8,6 +8,7 @@ import (
"time"
"github.com/go-rod/rod"
"github.com/xpzouying/xiaohongshu-mcp/errors"
)
type SearchResult struct {
@@ -190,24 +191,29 @@ func (s *SearchAction) Search(ctx context.Context, keyword string, filters ...Fi
page.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
}
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
result := page.MustEval(`() => {
if (window.__INITIAL_STATE__) {
return JSON.stringify(window.__INITIAL_STATE__);
if (window.__INITIAL_STATE__ &&
window.__INITIAL_STATE__.search &&
window.__INITIAL_STATE__.search.feeds) {
const feeds = window.__INITIAL_STATE__.search.feeds;
const feedsData = feeds.value !== undefined ? feeds.value : feeds._value;
if (feedsData) {
return JSON.stringify(feedsData);
}
return "";
}`).String()
}
return "";
}`).String()
if result == "" {
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
return nil, errors.ErrNoFeeds
}
var searchResult SearchResult
if err := json.Unmarshal([]byte(result), &searchResult); err != nil {
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
var feeds []Feed
if err := json.Unmarshal([]byte(result), &feeds); err != nil {
return nil, fmt.Errorf("failed to unmarshal feeds: %w", err)
}
return searchResult.Search.Feeds.Value, nil
return feeds, nil
}
func makeSearchURL(keyword string) string {

View File

@@ -33,42 +33,71 @@ func (u *UserProfileAction) UserProfile(ctx context.Context, userID, xsecToken s
func (u *UserProfileAction) extractUserProfileData(page *rod.Page) (*UserProfileResponse, error) {
page.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
result := page.MustEval(`() => {
if (window.__INITIAL_STATE__) {
return JSON.stringify(window.__INITIAL_STATE__);
userDataResult := page.MustEval(`() => {
if (window.__INITIAL_STATE__ &&
window.__INITIAL_STATE__.user &&
window.__INITIAL_STATE__.user.userPageData) {
const userPageData = window.__INITIAL_STATE__.user.userPageData;
const data = userPageData.value !== undefined ? userPageData.value : userPageData._value;
if (data) {
return JSON.stringify(data);
}
return "";
}`).String()
}
return "";
}`).String()
if result == "" {
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
if userDataResult == "" {
return nil, fmt.Errorf("user.userPageData.value not found in __INITIAL_STATE__")
}
// 定义响应结构并直接反序列化
var initialState = struct {
User struct {
UserPageData UserPageData `json:"userPageData"`
Notes struct {
Feeds [][]Feed `json:"_rawValue"` // 帖子为双重数组
} `json:"notes"`
} `json:"user"`
}{}
if err := json.Unmarshal([]byte(result), &initialState); err != nil {
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
// 2. 获取用户帖子window.__INITIAL_STATE__.user.notes.value
notesResult := page.MustEval(`() => {
if (window.__INITIAL_STATE__ &&
window.__INITIAL_STATE__.user &&
window.__INITIAL_STATE__.user.notes) {
const notes = window.__INITIAL_STATE__.user.notes;
// 优先使用 valuegetter如果不存在则使用 _value内部字段
const data = notes.value !== undefined ? notes.value : notes._value;
if (data) {
return JSON.stringify(data);
}
}
return "";
}`).String()
if notesResult == "" {
return nil, fmt.Errorf("user.notes.value not found in __INITIAL_STATE__")
}
// 解析用户信息
var userPageData struct {
Interactions []UserInteractions `json:"interactions"`
BasicInfo UserBasicInfo `json:"basicInfo"`
}
if err := json.Unmarshal([]byte(userDataResult), &userPageData); err != nil {
return nil, fmt.Errorf("failed to unmarshal userPageData: %w", err)
}
// 解析帖子数据(帖子为双重数组)
var notesFeeds [][]Feed
if err := json.Unmarshal([]byte(notesResult), &notesFeeds); err != nil {
return nil, fmt.Errorf("failed to unmarshal notes: %w", err)
}
// 组装响应
response := &UserProfileResponse{
UserBasicInfo: initialState.User.UserPageData.RawValue.BasicInfo,
Interactions: initialState.User.UserPageData.RawValue.Interactions,
UserBasicInfo: userPageData.BasicInfo,
Interactions: userPageData.Interactions,
}
// 添加用户贴子
for _, feeds := range initialState.User.Notes.Feeds {
// 添加用户帖子(展平双重数组)
for _, feeds := range notesFeeds {
if len(feeds) != 0 {
response.Feeds = append(response.Feeds, feeds...)
}
}
return response, nil
}
func makeUserProfileURL(userID, xsecToken string) string {