- 添加 FeedsListAction 用于获取页面 window.__INITIAL_STATE__ 数据 - 定义完整的 Feed 数据结构,包含笔记、视频、用户信息等 - 实现 GetFeedsList 方法解析并返回 Feed 列表 - 添加单元测试验证数据获取和 JSON 序列化 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
56 lines
1.2 KiB
Go
56 lines
1.2 KiB
Go
package xiaohongshu
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/go-rod/rod"
|
|
)
|
|
|
|
type FeedsListAction struct {
|
|
page *rod.Page
|
|
}
|
|
|
|
// InitialState 定义页面初始状态结构
|
|
type InitialState struct {
|
|
Feed FeedData `json:"feed"`
|
|
}
|
|
|
|
func NewFeedsListAction(page *rod.Page) *FeedsListAction {
|
|
pp := page.Timeout(60 * time.Second)
|
|
|
|
pp.MustNavigate("https://www.xiaohongshu.com")
|
|
pp.MustWaitLoad()
|
|
pp.MustWait(`() => window.__INITIAL_STATE__ !== undefined`)
|
|
|
|
return &FeedsListAction{page: pp}
|
|
}
|
|
|
|
// GetFeedsList 获取页面的 Feed 列表数据
|
|
func (f *FeedsListAction) GetFeedsList(ctx context.Context) ([]Feed, error) {
|
|
page := f.page.Context(ctx)
|
|
|
|
// 获取 window.__INITIAL_STATE__ 并转换为 JSON 字符串
|
|
result := page.MustEval(`() => {
|
|
if (window.__INITIAL_STATE__) {
|
|
return JSON.stringify(window.__INITIAL_STATE__);
|
|
}
|
|
return "";
|
|
}`).String()
|
|
|
|
if result == "" {
|
|
return nil, fmt.Errorf("__INITIAL_STATE__ not found")
|
|
}
|
|
|
|
// 解析完整的 InitialState
|
|
var state InitialState
|
|
if err := json.Unmarshal([]byte(result), &state); err != nil {
|
|
return nil, fmt.Errorf("failed to unmarshal __INITIAL_STATE__: %w", err)
|
|
}
|
|
|
|
// 返回 feed.feeds._value
|
|
return state.Feed.Feeds.Value, nil
|
|
}
|