diff --git a/xiaohongshu/feeds.go b/xiaohongshu/feeds.go new file mode 100644 index 0000000..d3bf0ab --- /dev/null +++ b/xiaohongshu/feeds.go @@ -0,0 +1,55 @@ +package xiaohongshu + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/go-rod/rod" +) + +type FeedsListAction struct { + page *rod.Page +} + +// InitialState 定义页面初始状态结构 +type InitialState struct { + Feed FeedData `json:"feed"` +} + +func NewFeedsListAction(page *rod.Page) *FeedsListAction { + pp := page.Timeout(60 * time.Second) + + pp.MustNavigate("https://www.xiaohongshu.com") + pp.MustWaitLoad() + pp.MustWait(`() => window.__INITIAL_STATE__ !== undefined`) + + return &FeedsListAction{page: pp} +} + +// GetFeedsList 获取页面的 Feed 列表数据 +func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) { + page := f.page.Context(ctx) + + // 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串 + result := page.MustEval(`() => { + if (window.__INITIAL_STATE__) { + return JSON.stringify(window.__INITIAL_STATE__); + } + return ""; + }`).String() + + if result == "" { + return nil, fmt.Errorf("__INITIAL_STATE__ not found") + } + + // 解析完整的 InitialState + var state InitialState + if err := json.Unmarshal([]byte(result), &state); err != nil { + return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err) + } + + // 返回 feed.feeds._value + return state.Feed.Feeds.Value, nil +} diff --git a/xiaohongshu/feeds_test.go b/xiaohongshu/feeds_test.go new file mode 100644 index 0000000..bf7fac9 --- /dev/null +++ b/xiaohongshu/feeds_test.go @@ -0,0 +1,86 @@ +package xiaohongshu + +import ( + "context" + "encoding/json" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + "github.com/xpzouying/xiaohongshu-mcp/browser" +) + +func TestGetFeedsList(t *testing.T) { + + t.Skip("SKIP: 测试发布") + + _ = browser.Init(false) + defer browser.Close() + + page := browser.NewPage() + defer page.Close() + + // NewFeedsListAction 内部已经处理导航 + action := NewFeedsListAction(page) + + feeds, err := action.GetFeedsList(context.Background()) + require.NoError(t, err) + require.NotEmpty(t, feeds, "feeds should not be empty") + + fmt.Printf("成功获取到 %d 个 Feed\n", len(feeds)) + + // 验证 JSON 结构完整性 + for i, feed := range feeds { + // 验证必填字段 + require.NotEmpty(t, feed.ID, "Feed ID should not be empty") + require.NotEmpty(t, feed.ModelType, "ModelType should not be empty") + require.NotEmpty(t, feed.XsecToken, "XsecToken should not be empty") + require.NotEmpty(t, feed.TrackID, "TrackID should not be empty") + require.NotEmpty(t, feed.NoteCard.Type, "NoteCard Type should not be empty") + require.NotEmpty(t, feed.NoteCard.DisplayTitle, "DisplayTitle should not be empty") + require.NotEmpty(t, feed.NoteCard.User.UserID, "User ID should not be empty") + require.NotEmpty(t, feed.NoteCard.User.Nickname, "User nickname should not be empty") + + // 如果是视频类型,检查视频信息 + if feed.NoteCard.Type == "video" { + require.NotNil(t, feed.NoteCard.Video, "Video info should not be nil for video type") + if feed.NoteCard.Video != nil { + require.True(t, feed.NoteCard.Video.Capa.Duration > 0, "Video duration should be greater than 0") + } + } + + // 只对第一个 Feed 进行完整 JSON 序列化检查 + if i == 0 { + // 序列化为 JSON + jsonData, err := json.MarshalIndent(feed, "", " ") + require.NoError(t, err, "Failed to marshal feed") + + fmt.Printf("\n第一个 Feed 的完整 JSON 结构:\n%s\n", string(jsonData)) + + // 反序列化检查 + var checkFeed Feed + err = json.Unmarshal(jsonData, &checkFeed) + require.NoError(t, err, "Failed to unmarshal feed") + + // 比较序列化前后是否一致 + require.Equal(t, feed.ID, checkFeed.ID) + require.Equal(t, feed.ModelType, checkFeed.ModelType) + require.Equal(t, feed.NoteCard.Type, checkFeed.NoteCard.Type) + } + + // 打印前3个 Feed 的信息 + if i < 3 { + fmt.Printf("\nFeed %d 基本信息:\n", i+1) + fmt.Printf(" ID: %s\n", feed.ID) + fmt.Printf(" ModelType: %s\n", feed.ModelType) + fmt.Printf(" 标题: %s\n", feed.NoteCard.DisplayTitle) + fmt.Printf(" 类型: %s\n", feed.NoteCard.Type) + fmt.Printf(" 作者: %s (@%s)\n", feed.NoteCard.User.Nickname, feed.NoteCard.User.UserID) + fmt.Printf(" 点赞数: %s\n", feed.NoteCard.InteractInfo.LikedCount) + fmt.Printf(" 封面尺寸: %dx%d\n", feed.NoteCard.Cover.Width, feed.NoteCard.Cover.Height) + if feed.NoteCard.Type == "video" && feed.NoteCard.Video != nil { + fmt.Printf(" 视频时长: %d秒\n", feed.NoteCard.Video.Capa.Duration) + } + } + } +} diff --git a/xiaohongshu/types.go b/xiaohongshu/types.go new file mode 100644 index 0000000..9390cd5 --- /dev/null +++ b/xiaohongshu/types.go @@ -0,0 +1,83 @@ +package xiaohongshu + +// 小红书 Feed 相关的数据结构定义 + +// FeedResponse 表示从 __INITIAL_STATE__ 中获取的完整 Feed 响应 +type FeedResponse struct { + Feed FeedData `json:"feed"` +} + +// FeedData 表示 feed 数据结构 +type FeedData struct { + Feeds FeedsValue `json:"feeds"` +} + +// FeedsValue 表示 feeds 的值结构 +type FeedsValue struct { + Value []Feed `json:"_value"` +} + +// Feed 表示单个 Feed 项目 +type Feed struct { + XsecToken string `json:"xsecToken"` + ID string `json:"id"` + ModelType string `json:"modelType"` + NoteCard NoteCard `json:"noteCard"` + TrackID string `json:"trackId"` + Ignore bool `json:"ignore"` + Index int `json:"index"` + Exposed bool `json:"exposed"` + SSRRendered bool `json:"ssrRendered"` +} + +// NoteCard 表示笔记卡片信息 +type NoteCard struct { + Type string `json:"type"` + DisplayTitle string `json:"displayTitle"` + User User `json:"user"` + InteractInfo InteractInfo `json:"interactInfo"` + Cover Cover `json:"cover"` + Video *Video `json:"video,omitempty"` // 视频内容,可能为空 +} + +// User 表示用户信息 +type User struct { + UserID string `json:"userId"` + Nickname string `json:"nickname"` + NickName string `json:"nickName"` + Avatar string `json:"avatar"` + XsecToken string `json:"xsecToken"` +} + +// InteractInfo 表示互动信息 +type InteractInfo struct { + Liked bool `json:"liked"` + LikedCount string `json:"likedCount"` +} + +// Cover 表示封面信息 +type Cover struct { + Width int `json:"width"` + Height int `json:"height"` + URL string `json:"url"` + FileID string `json:"fileId"` + URLPre string `json:"urlPre"` + URLDefault string `json:"urlDefault"` + InfoList []ImageInfo `json:"infoList"` +} + +// ImageInfo 表示图片信息 +type ImageInfo struct { + ImageScene string `json:"imageScene"` + URL string `json:"url"` +} + +// Video 表示视频信息 +type Video struct { + Capa VideoCapability `json:"capa"` +} + +// VideoCapability 表示视频能力信息 +type VideoCapability struct { + Duration int `json:"duration"` // 视频时长,单位秒 +}