feat: 实现获取小红书首页 Feed 列表功能
- 添加 FeedsListAction 用于获取页面 window.__INITIAL_STATE__ 数据 - 定义完整的 Feed 数据结构,包含笔记、视频、用户信息等 - 实现 GetFeedsList 方法解析并返回 Feed 列表 - 添加单元测试验证数据获取和 JSON 序列化 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
55
xiaohongshu/feeds.go
Normal file
55
xiaohongshu/feeds.go
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
package xiaohongshu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-rod/rod"
|
||||||
|
)
|
||||||
|
|
||||||
|
type FeedsListAction struct {
|
||||||
|
page *rod.Page
|
||||||
|
}
|
||||||
|
|
||||||
|
// InitialState 定义页面初始状态结构
|
||||||
|
type InitialState struct {
|
||||||
|
Feed FeedData `json:"feed"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFeedsListAction(page *rod.Page) *FeedsListAction {
|
||||||
|
pp := page.Timeout(60 * time.Second)
|
||||||
|
|
||||||
|
pp.MustNavigate("https://www.xiaohongshu.com")
|
||||||
|
pp.MustWaitLoad()
|
||||||
|
pp.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
|
||||||
|
|
||||||
|
return &FeedsListAction{page: pp}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetFeedsList 获取页面的 Feed 列表数据
|
||||||
|
func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) {
|
||||||
|
page := f.page.Context(ctx)
|
||||||
|
|
||||||
|
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
|
||||||
|
result := page.MustEval(`() => {
|
||||||
|
if (window.__INITIAL_STATE__) {
|
||||||
|
return JSON.stringify(window.__INITIAL_STATE__);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}`).String()
|
||||||
|
|
||||||
|
if result == "" {
|
||||||
|
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// 解析完整的 InitialState
|
||||||
|
var state InitialState
|
||||||
|
if err := json.Unmarshal([]byte(result), &state); err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 返回 feed.feeds._value
|
||||||
|
return state.Feed.Feeds.Value, nil
|
||||||
|
}
|
||||||
86
xiaohongshu/feeds_test.go
Normal file
86
xiaohongshu/feeds_test.go
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
package xiaohongshu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
"github.com/xpzouying/xiaohongshu-mcp/browser"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestGetFeedsList(t *testing.T) {
|
||||||
|
|
||||||
|
t.Skip("SKIP: 测试发布")
|
||||||
|
|
||||||
|
_ = browser.Init(false)
|
||||||
|
defer browser.Close()
|
||||||
|
|
||||||
|
page := browser.NewPage()
|
||||||
|
defer page.Close()
|
||||||
|
|
||||||
|
// NewFeedsListAction 内部已经处理导航
|
||||||
|
action := NewFeedsListAction(page)
|
||||||
|
|
||||||
|
feeds, err := action.GetFeedsList(context.Background())
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEmpty(t, feeds, "feeds should not be empty")
|
||||||
|
|
||||||
|
fmt.Printf("成功获取到 %d 个 Feed\n", len(feeds))
|
||||||
|
|
||||||
|
// 验证 JSON 结构完整性
|
||||||
|
for i, feed := range feeds {
|
||||||
|
// 验证必填字段
|
||||||
|
require.NotEmpty(t, feed.ID, "Feed ID should not be empty")
|
||||||
|
require.NotEmpty(t, feed.ModelType, "ModelType should not be empty")
|
||||||
|
require.NotEmpty(t, feed.XsecToken, "XsecToken should not be empty")
|
||||||
|
require.NotEmpty(t, feed.TrackID, "TrackID should not be empty")
|
||||||
|
require.NotEmpty(t, feed.NoteCard.Type, "NoteCard Type should not be empty")
|
||||||
|
require.NotEmpty(t, feed.NoteCard.DisplayTitle, "DisplayTitle should not be empty")
|
||||||
|
require.NotEmpty(t, feed.NoteCard.User.UserID, "User ID should not be empty")
|
||||||
|
require.NotEmpty(t, feed.NoteCard.User.Nickname, "User nickname should not be empty")
|
||||||
|
|
||||||
|
// 如果是视频类型,检查视频信息
|
||||||
|
if feed.NoteCard.Type == "video" {
|
||||||
|
require.NotNil(t, feed.NoteCard.Video, "Video info should not be nil for video type")
|
||||||
|
if feed.NoteCard.Video != nil {
|
||||||
|
require.True(t, feed.NoteCard.Video.Capa.Duration > 0, "Video duration should be greater than 0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 只对第一个 Feed 进行完整 JSON 序列化检查
|
||||||
|
if i == 0 {
|
||||||
|
// 序列化为 JSON
|
||||||
|
jsonData, err := json.MarshalIndent(feed, "", " ")
|
||||||
|
require.NoError(t, err, "Failed to marshal feed")
|
||||||
|
|
||||||
|
fmt.Printf("\n第一个 Feed 的完整 JSON 结构:\n%s\n", string(jsonData))
|
||||||
|
|
||||||
|
// 反序列化检查
|
||||||
|
var checkFeed Feed
|
||||||
|
err = json.Unmarshal(jsonData, &checkFeed)
|
||||||
|
require.NoError(t, err, "Failed to unmarshal feed")
|
||||||
|
|
||||||
|
// 比较序列化前后是否一致
|
||||||
|
require.Equal(t, feed.ID, checkFeed.ID)
|
||||||
|
require.Equal(t, feed.ModelType, checkFeed.ModelType)
|
||||||
|
require.Equal(t, feed.NoteCard.Type, checkFeed.NoteCard.Type)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 打印前3个 Feed 的信息
|
||||||
|
if i < 3 {
|
||||||
|
fmt.Printf("\nFeed %d 基本信息:\n", i+1)
|
||||||
|
fmt.Printf(" ID: %s\n", feed.ID)
|
||||||
|
fmt.Printf(" ModelType: %s\n", feed.ModelType)
|
||||||
|
fmt.Printf(" 标题: %s\n", feed.NoteCard.DisplayTitle)
|
||||||
|
fmt.Printf(" 类型: %s\n", feed.NoteCard.Type)
|
||||||
|
fmt.Printf(" 作者: %s (@%s)\n", feed.NoteCard.User.Nickname, feed.NoteCard.User.UserID)
|
||||||
|
fmt.Printf(" 点赞数: %s\n", feed.NoteCard.InteractInfo.LikedCount)
|
||||||
|
fmt.Printf(" 封面尺寸: %dx%d\n", feed.NoteCard.Cover.Width, feed.NoteCard.Cover.Height)
|
||||||
|
if feed.NoteCard.Type == "video" && feed.NoteCard.Video != nil {
|
||||||
|
fmt.Printf(" 视频时长: %d秒\n", feed.NoteCard.Video.Capa.Duration)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
83
xiaohongshu/types.go
Normal file
83
xiaohongshu/types.go
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
package xiaohongshu
|
||||||
|
|
||||||
|
// 小红书 Feed 相关的数据结构定义
|
||||||
|
|
||||||
|
// FeedResponse 表示从 __INITIAL_STATE__ 中获取的完整 Feed 响应
|
||||||
|
type FeedResponse struct {
|
||||||
|
Feed FeedData `json:"feed"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FeedData 表示 feed 数据结构
|
||||||
|
type FeedData struct {
|
||||||
|
Feeds FeedsValue `json:"feeds"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// FeedsValue 表示 feeds 的值结构
|
||||||
|
type FeedsValue struct {
|
||||||
|
Value []Feed `json:"_value"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Feed 表示单个 Feed 项目
|
||||||
|
type Feed struct {
|
||||||
|
XsecToken string `json:"xsecToken"`
|
||||||
|
ID string `json:"id"`
|
||||||
|
ModelType string `json:"modelType"`
|
||||||
|
NoteCard NoteCard `json:"noteCard"`
|
||||||
|
TrackID string `json:"trackId"`
|
||||||
|
Ignore bool `json:"ignore"`
|
||||||
|
Index int `json:"index"`
|
||||||
|
Exposed bool `json:"exposed"`
|
||||||
|
SSRRendered bool `json:"ssrRendered"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// NoteCard 表示笔记卡片信息
|
||||||
|
type NoteCard struct {
|
||||||
|
Type string `json:"type"`
|
||||||
|
DisplayTitle string `json:"displayTitle"`
|
||||||
|
User User `json:"user"`
|
||||||
|
InteractInfo InteractInfo `json:"interactInfo"`
|
||||||
|
Cover Cover `json:"cover"`
|
||||||
|
Video *Video `json:"video,omitempty"` // 视频内容,可能为空
|
||||||
|
}
|
||||||
|
|
||||||
|
// User 表示用户信息
|
||||||
|
type User struct {
|
||||||
|
UserID string `json:"userId"`
|
||||||
|
Nickname string `json:"nickname"`
|
||||||
|
NickName string `json:"nickName"`
|
||||||
|
Avatar string `json:"avatar"`
|
||||||
|
XsecToken string `json:"xsecToken"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// InteractInfo 表示互动信息
|
||||||
|
type InteractInfo struct {
|
||||||
|
Liked bool `json:"liked"`
|
||||||
|
LikedCount string `json:"likedCount"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cover 表示封面信息
|
||||||
|
type Cover struct {
|
||||||
|
Width int `json:"width"`
|
||||||
|
Height int `json:"height"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
FileID string `json:"fileId"`
|
||||||
|
URLPre string `json:"urlPre"`
|
||||||
|
URLDefault string `json:"urlDefault"`
|
||||||
|
InfoList []ImageInfo `json:"infoList"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// ImageInfo 表示图片信息
|
||||||
|
type ImageInfo struct {
|
||||||
|
ImageScene string `json:"imageScene"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Video 表示视频信息
|
||||||
|
type Video struct {
|
||||||
|
Capa VideoCapability `json:"capa"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// VideoCapability 表示视频能力信息
|
||||||
|
type VideoCapability struct {
|
||||||
|
Duration int `json:"duration"` // 视频时长,单位秒
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user