feat: 实现获取小红书首页 Feed 列表功能

- 添加 FeedsListAction 用于获取页面 window.__INITIAL_STATE__ 数据
- 定义完整的 Feed 数据结构,包含笔记、视频、用户信息等
- 实现 GetFeedsList 方法解析并返回 Feed 列表
- 添加单元测试验证数据获取和 JSON 序列化

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
zy
2025-08-12 01:11:27 +08:00
parent e156592d34
commit 108da414fd
3 changed files with 224 additions and 0 deletions

55
xiaohongshu/feeds.go Normal file
View File

@@ -0,0 +1,55 @@
package xiaohongshu
import (
"context"
"encoding/json"
"fmt"
"time"
"github.com/go-rod/rod"
)
type FeedsListAction struct {
page *rod.Page
}
// InitialState 定义页面初始状态结构
type InitialState struct {
Feed FeedData `json:"feed"`
}
func NewFeedsListAction(page *rod.Page) *FeedsListAction {
pp := page.Timeout(60 * time.Second)
pp.MustNavigate("https://www.xiaohongshu.com")
pp.MustWaitLoad()
pp.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
return &FeedsListAction{page: pp}
}
// GetFeedsList 获取页面的 Feed 列表数据
func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) {
page := f.page.Context(ctx)
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
result := page.MustEval(`() => {
if (window.__INITIAL_STATE__) {
return JSON.stringify(window.__INITIAL_STATE__);
}
return "";
}`).String()
if result == "" {
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
}
// 解析完整的 InitialState
var state InitialState
if err := json.Unmarshal([]byte(result), &state); err != nil {
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
}
// 返回 feed.feeds._value
return state.Feed.Feeds.Value, nil
}

86
xiaohongshu/feeds_test.go Normal file
View File

@@ -0,0 +1,86 @@
package xiaohongshu
import (
"context"
"encoding/json"
"fmt"
"testing"
"github.com/stretchr/testify/require"
"github.com/xpzouying/xiaohongshu-mcp/browser"
)
func TestGetFeedsList(t *testing.T) {
t.Skip("SKIP: 测试发布")
_ = browser.Init(false)
defer browser.Close()
page := browser.NewPage()
defer page.Close()
// NewFeedsListAction 内部已经处理导航
action := NewFeedsListAction(page)
feeds, err := action.GetFeedsList(context.Background())
require.NoError(t, err)
require.NotEmpty(t, feeds, "feeds should not be empty")
fmt.Printf("成功获取到 %d 个 Feed\n", len(feeds))
// 验证 JSON 结构完整性
for i, feed := range feeds {
// 验证必填字段
require.NotEmpty(t, feed.ID, "Feed ID should not be empty")
require.NotEmpty(t, feed.ModelType, "ModelType should not be empty")
require.NotEmpty(t, feed.XsecToken, "XsecToken should not be empty")
require.NotEmpty(t, feed.TrackID, "TrackID should not be empty")
require.NotEmpty(t, feed.NoteCard.Type, "NoteCard Type should not be empty")
require.NotEmpty(t, feed.NoteCard.DisplayTitle, "DisplayTitle should not be empty")
require.NotEmpty(t, feed.NoteCard.User.UserID, "User ID should not be empty")
require.NotEmpty(t, feed.NoteCard.User.Nickname, "User nickname should not be empty")
// 如果是视频类型,检查视频信息
if feed.NoteCard.Type == "video" {
require.NotNil(t, feed.NoteCard.Video, "Video info should not be nil for video type")
if feed.NoteCard.Video != nil {
require.True(t, feed.NoteCard.Video.Capa.Duration > 0, "Video duration should be greater than 0")
}
}
// 只对第一个 Feed 进行完整 JSON 序列化检查
if i == 0 {
// 序列化为 JSON
jsonData, err := json.MarshalIndent(feed, "", " ")
require.NoError(t, err, "Failed to marshal feed")
fmt.Printf("\n第一个 Feed 的完整 JSON 结构:\n%s\n", string(jsonData))
// 反序列化检查
var checkFeed Feed
err = json.Unmarshal(jsonData, &checkFeed)
require.NoError(t, err, "Failed to unmarshal feed")
// 比较序列化前后是否一致
require.Equal(t, feed.ID, checkFeed.ID)
require.Equal(t, feed.ModelType, checkFeed.ModelType)
require.Equal(t, feed.NoteCard.Type, checkFeed.NoteCard.Type)
}
// 打印前3个 Feed 的信息
if i < 3 {
fmt.Printf("\nFeed %d 基本信息:\n", i+1)
fmt.Printf(" ID: %s\n", feed.ID)
fmt.Printf(" ModelType: %s\n", feed.ModelType)
fmt.Printf(" 标题: %s\n", feed.NoteCard.DisplayTitle)
fmt.Printf(" 类型: %s\n", feed.NoteCard.Type)
fmt.Printf(" 作者: %s (@%s)\n", feed.NoteCard.User.Nickname, feed.NoteCard.User.UserID)
fmt.Printf(" 点赞数: %s\n", feed.NoteCard.InteractInfo.LikedCount)
fmt.Printf(" 封面尺寸: %dx%d\n", feed.NoteCard.Cover.Width, feed.NoteCard.Cover.Height)
if feed.NoteCard.Type == "video" && feed.NoteCard.Video != nil {
fmt.Printf(" 视频时长: %d秒\n", feed.NoteCard.Video.Capa.Duration)
}
}
}
}

83
xiaohongshu/types.go Normal file
View File

@@ -0,0 +1,83 @@
package xiaohongshu
// 小红书 Feed 相关的数据结构定义
// FeedResponse 表示从 __INITIAL_STATE__ 中获取的完整 Feed 响应
type FeedResponse struct {
Feed FeedData `json:"feed"`
}
// FeedData 表示 feed 数据结构
type FeedData struct {
Feeds FeedsValue `json:"feeds"`
}
// FeedsValue 表示 feeds 的值结构
type FeedsValue struct {
Value []Feed `json:"_value"`
}
// Feed 表示单个 Feed 项目
type Feed struct {
XsecToken string `json:"xsecToken"`
ID string `json:"id"`
ModelType string `json:"modelType"`
NoteCard NoteCard `json:"noteCard"`
TrackID string `json:"trackId"`
Ignore bool `json:"ignore"`
Index int `json:"index"`
Exposed bool `json:"exposed"`
SSRRendered bool `json:"ssrRendered"`
}
// NoteCard 表示笔记卡片信息
type NoteCard struct {
Type string `json:"type"`
DisplayTitle string `json:"displayTitle"`
User User `json:"user"`
InteractInfo InteractInfo `json:"interactInfo"`
Cover Cover `json:"cover"`
Video *Video `json:"video,omitempty"` // 视频内容,可能为空
}
// User 表示用户信息
type User struct {
UserID string `json:"userId"`
Nickname string `json:"nickname"`
NickName string `json:"nickName"`
Avatar string `json:"avatar"`
XsecToken string `json:"xsecToken"`
}
// InteractInfo 表示互动信息
type InteractInfo struct {
Liked bool `json:"liked"`
LikedCount string `json:"likedCount"`
}
// Cover 表示封面信息
type Cover struct {
Width int `json:"width"`
Height int `json:"height"`
URL string `json:"url"`
FileID string `json:"fileId"`
URLPre string `json:"urlPre"`
URLDefault string `json:"urlDefault"`
InfoList []ImageInfo `json:"infoList"`
}
// ImageInfo 表示图片信息
type ImageInfo struct {
ImageScene string `json:"imageScene"`
URL string `json:"url"`
}
// Video 表示视频信息
type Video struct {
Capa VideoCapability `json:"capa"`
}
// VideoCapability 表示视频能力信息
type VideoCapability struct {
Duration int `json:"duration"` // 视频时长,单位秒
}