feat: 实现获取小红书首页 Feed 列表功能
- 添加 FeedsListAction 用于获取页面 window.__INITIAL_STATE__ 数据 - 定义完整的 Feed 数据结构,包含笔记、视频、用户信息等 - 实现 GetFeedsList 方法解析并返回 Feed 列表 - 添加单元测试验证数据获取和 JSON 序列化 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
55
xiaohongshu/feeds.go
Normal file
55
xiaohongshu/feeds.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package xiaohongshu
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/go-rod/rod"
|
||||
)
|
||||
|
||||
type FeedsListAction struct {
|
||||
page *rod.Page
|
||||
}
|
||||
|
||||
// InitialState 定义页面初始状态结构
|
||||
type InitialState struct {
|
||||
Feed FeedData `json:"feed"`
|
||||
}
|
||||
|
||||
func NewFeedsListAction(page *rod.Page) *FeedsListAction {
|
||||
pp := page.Timeout(60 * time.Second)
|
||||
|
||||
pp.MustNavigate("https://www.xiaohongshu.com")
|
||||
pp.MustWaitLoad()
|
||||
pp.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
|
||||
|
||||
return &FeedsListAction{page: pp}
|
||||
}
|
||||
|
||||
// GetFeedsList 获取页面的 Feed 列表数据
|
||||
func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) {
|
||||
page := f.page.Context(ctx)
|
||||
|
||||
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
|
||||
result := page.MustEval(`() => {
|
||||
if (window.__INITIAL_STATE__) {
|
||||
return JSON.stringify(window.__INITIAL_STATE__);
|
||||
}
|
||||
return "";
|
||||
}`).String()
|
||||
|
||||
if result == "" {
|
||||
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
|
||||
}
|
||||
|
||||
// 解析完整的 InitialState
|
||||
var state InitialState
|
||||
if err := json.Unmarshal([]byte(result), &state); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
|
||||
}
|
||||
|
||||
// 返回 feed.feeds._value
|
||||
return state.Feed.Feeds.Value, nil
|
||||
}
|
||||
Reference in New Issue
Block a user