feat: 实现获取小红书首页 Feed 列表功能
- 添加 FeedsListAction 用于获取页面 window.__INITIAL_STATE__ 数据 - 定义完整的 Feed 数据结构,包含笔记、视频、用户信息等 - 实现 GetFeedsList 方法解析并返回 Feed 列表 - 添加单元测试验证数据获取和 JSON 序列化 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
55
xiaohongshu/feeds.go
Normal file
55
xiaohongshu/feeds.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package xiaohongshu
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
)
|
||||
|
||||
type FeedsListAction struct {
|
||||
page *rod.Page
|
||||
}
|
||||
|
||||
// InitialState 定义页面初始状态结构
|
||||
type InitialState struct {
|
||||
Feed FeedData `json:"feed"`
|
||||
}
|
||||
|
||||
func NewFeedsListAction(page *rod.Page) *FeedsListAction {
|
||||
pp := page.Timeout(60 * time.Second)
|
||||
|
||||
pp.MustNavigate("https://www.xiaohongshu.com")
|
||||
pp.MustWaitLoad()
|
||||
pp.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
|
||||
|
||||
return &FeedsListAction{page: pp}
|
||||
}
|
||||
|
||||
// GetFeedsList 获取页面的 Feed 列表数据
|
||||
func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) {
|
||||
page := f.page.Context(ctx)
|
||||
|
||||
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
|
||||
result := page.MustEval(`() => {
|
||||
if (window.__INITIAL_STATE__) {
|
||||
return JSON.stringify(window.__INITIAL_STATE__);
|
||||
}
|
||||
return "";
|
||||
}`).String()
|
||||
|
||||
if result == "" {
|
||||
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
|
||||
}
|
||||
|
||||
// 解析完整的 InitialState
|
||||
var state InitialState
|
||||
if err := json.Unmarshal([]byte(result), &state); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
|
||||
}
|
||||
|
||||
// 返回 feed.feeds._value
|
||||
return state.Feed.Feeds.Value, nil
|
||||
}
|
||||
86
xiaohongshu/feeds_test.go
Normal file
86
xiaohongshu/feeds_test.go
Normal file
@@ -0,0 +1,86 @@
|
||||
package xiaohongshu
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/xpzouying/xiaohongshu-mcp/browser"
|
||||
)
|
||||
|
||||
func TestGetFeedsList(t *testing.T) {
|
||||
|
||||
t.Skip("SKIP: 测试发布")
|
||||
|
||||
_ = browser.Init(false)
|
||||
defer browser.Close()
|
||||
|
||||
page := browser.NewPage()
|
||||
defer page.Close()
|
||||
|
||||
// NewFeedsListAction 内部已经处理导航
|
||||
action := NewFeedsListAction(page)
|
||||
|
||||
feeds, err := action.GetFeedsList(context.Background())
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, feeds, "feeds should not be empty")
|
||||
|
||||
fmt.Printf("成功获取到 %d 个 Feed\n", len(feeds))
|
||||
|
||||
// 验证 JSON 结构完整性
|
||||
for i, feed := range feeds {
|
||||
// 验证必填字段
|
||||
require.NotEmpty(t, feed.ID, "Feed ID should not be empty")
|
||||
require.NotEmpty(t, feed.ModelType, "ModelType should not be empty")
|
||||
require.NotEmpty(t, feed.XsecToken, "XsecToken should not be empty")
|
||||
require.NotEmpty(t, feed.TrackID, "TrackID should not be empty")
|
||||
require.NotEmpty(t, feed.NoteCard.Type, "NoteCard Type should not be empty")
|
||||
require.NotEmpty(t, feed.NoteCard.DisplayTitle, "DisplayTitle should not be empty")
|
||||
require.NotEmpty(t, feed.NoteCard.User.UserID, "User ID should not be empty")
|
||||
require.NotEmpty(t, feed.NoteCard.User.Nickname, "User nickname should not be empty")
|
||||
|
||||
// 如果是视频类型,检查视频信息
|
||||
if feed.NoteCard.Type == "video" {
|
||||
require.NotNil(t, feed.NoteCard.Video, "Video info should not be nil for video type")
|
||||
if feed.NoteCard.Video != nil {
|
||||
require.True(t, feed.NoteCard.Video.Capa.Duration > 0, "Video duration should be greater than 0")
|
||||
}
|
||||
}
|
||||
|
||||
// 只对第一个 Feed 进行完整 JSON 序列化检查
|
||||
if i == 0 {
|
||||
// 序列化为 JSON
|
||||
jsonData, err := json.MarshalIndent(feed, "", " ")
|
||||
require.NoError(t, err, "Failed to marshal feed")
|
||||
|
||||
fmt.Printf("\n第一个 Feed 的完整 JSON 结构:\n%s\n", string(jsonData))
|
||||
|
||||
// 反序列化检查
|
||||
var checkFeed Feed
|
||||
err = json.Unmarshal(jsonData, &checkFeed)
|
||||
require.NoError(t, err, "Failed to unmarshal feed")
|
||||
|
||||
// 比较序列化前后是否一致
|
||||
require.Equal(t, feed.ID, checkFeed.ID)
|
||||
require.Equal(t, feed.ModelType, checkFeed.ModelType)
|
||||
require.Equal(t, feed.NoteCard.Type, checkFeed.NoteCard.Type)
|
||||
}
|
||||
|
||||
// 打印前3个 Feed 的信息
|
||||
if i < 3 {
|
||||
fmt.Printf("\nFeed %d 基本信息:\n", i+1)
|
||||
fmt.Printf(" ID: %s\n", feed.ID)
|
||||
fmt.Printf(" ModelType: %s\n", feed.ModelType)
|
||||
fmt.Printf(" 标题: %s\n", feed.NoteCard.DisplayTitle)
|
||||
fmt.Printf(" 类型: %s\n", feed.NoteCard.Type)
|
||||
fmt.Printf(" 作者: %s (@%s)\n", feed.NoteCard.User.Nickname, feed.NoteCard.User.UserID)
|
||||
fmt.Printf(" 点赞数: %s\n", feed.NoteCard.InteractInfo.LikedCount)
|
||||
fmt.Printf(" 封面尺寸: %dx%d\n", feed.NoteCard.Cover.Width, feed.NoteCard.Cover.Height)
|
||||
if feed.NoteCard.Type == "video" && feed.NoteCard.Video != nil {
|
||||
fmt.Printf(" 视频时长: %d秒\n", feed.NoteCard.Video.Capa.Duration)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
83
xiaohongshu/types.go
Normal file
83
xiaohongshu/types.go
Normal file
@@ -0,0 +1,83 @@
|
||||
package xiaohongshu
|
||||
|
||||
// 小红书 Feed 相关的数据结构定义
|
||||
|
||||
// FeedResponse 表示从 __INITIAL_STATE__ 中获取的完整 Feed 响应
|
||||
type FeedResponse struct {
|
||||
Feed FeedData `json:"feed"`
|
||||
}
|
||||
|
||||
// FeedData 表示 feed 数据结构
|
||||
type FeedData struct {
|
||||
Feeds FeedsValue `json:"feeds"`
|
||||
}
|
||||
|
||||
// FeedsValue 表示 feeds 的值结构
|
||||
type FeedsValue struct {
|
||||
Value []Feed `json:"_value"`
|
||||
}
|
||||
|
||||
// Feed 表示单个 Feed 项目
|
||||
type Feed struct {
|
||||
XsecToken string `json:"xsecToken"`
|
||||
ID string `json:"id"`
|
||||
ModelType string `json:"modelType"`
|
||||
NoteCard NoteCard `json:"noteCard"`
|
||||
TrackID string `json:"trackId"`
|
||||
Ignore bool `json:"ignore"`
|
||||
Index int `json:"index"`
|
||||
Exposed bool `json:"exposed"`
|
||||
SSRRendered bool `json:"ssrRendered"`
|
||||
}
|
||||
|
||||
// NoteCard 表示笔记卡片信息
|
||||
type NoteCard struct {
|
||||
Type string `json:"type"`
|
||||
DisplayTitle string `json:"displayTitle"`
|
||||
User User `json:"user"`
|
||||
InteractInfo InteractInfo `json:"interactInfo"`
|
||||
Cover Cover `json:"cover"`
|
||||
Video *Video `json:"video,omitempty"` // 视频内容,可能为空
|
||||
}
|
||||
|
||||
// User 表示用户信息
|
||||
type User struct {
|
||||
UserID string `json:"userId"`
|
||||
Nickname string `json:"nickname"`
|
||||
NickName string `json:"nickName"`
|
||||
Avatar string `json:"avatar"`
|
||||
XsecToken string `json:"xsecToken"`
|
||||
}
|
||||
|
||||
// InteractInfo 表示互动信息
|
||||
type InteractInfo struct {
|
||||
Liked bool `json:"liked"`
|
||||
LikedCount string `json:"likedCount"`
|
||||
}
|
||||
|
||||
// Cover 表示封面信息
|
||||
type Cover struct {
|
||||
Width int `json:"width"`
|
||||
Height int `json:"height"`
|
||||
URL string `json:"url"`
|
||||
FileID string `json:"fileId"`
|
||||
URLPre string `json:"urlPre"`
|
||||
URLDefault string `json:"urlDefault"`
|
||||
InfoList []ImageInfo `json:"infoList"`
|
||||
}
|
||||
|
||||
// ImageInfo 表示图片信息
|
||||
type ImageInfo struct {
|
||||
ImageScene string `json:"imageScene"`
|
||||
URL string `json:"url"`
|
||||
}
|
||||
|
||||
// Video 表示视频信息
|
||||
type Video struct {
|
||||
Capa VideoCapability `json:"capa"`
|
||||
}
|
||||
|
||||
// VideoCapability 表示视频能力信息
|
||||
type VideoCapability struct {
|
||||
Duration int `json:"duration"` // 视频时长,单位秒
|
||||
}
|
||||
Reference in New Issue
Block a user