Files
xiaohongshu-mcp/pkg/downloader/images.go
coldmountain 52d0205aff fix: 添加请求头解决图片下载 403 错误 (#412)
- 添加 User-Agent 请求头,模拟浏览器请求
- 添加 Referer 请求头,使用图片 URL 的域名
- 改进错误信息,包含 URL 和状态码便于排查问题

Co-authored-by: user <user@example.com>
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-14 00:53:07 +08:00

164 lines
4.2 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package downloader
import (
"crypto/sha256"
"fmt"
"io"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
"github.com/h2non/filetype"
"github.com/pkg/errors"
)
// ImageDownloader 图片下载器
type ImageDownloader struct {
savePath string
httpClient *http.Client
}
// NewImageDownloader 创建图片下载器
func NewImageDownloader(savePath string) *ImageDownloader {
// 确保保存目录存在
if err := os.MkdirAll(savePath, 0755); err != nil {
panic(fmt.Sprintf("failed to create save path: %v", err))
}
return &ImageDownloader{
savePath: savePath,
httpClient: &http.Client{
Timeout: 30 * time.Second,
},
}
}
// DownloadImage 下载图片
// 返回本地文件路径
func (d *ImageDownloader) DownloadImage(imageURL string) (string, error) {
// 验证URL格式
if !d.isValidImageURL(imageURL) {
return "", errors.New("invalid image URL format")
}
// 创建请求并设置请求头
req, err := http.NewRequest("GET", imageURL, nil)
if err != nil {
return "", errors.Wrap(err, "failed to create request")
}
// 设置 User-Agent模拟浏览器请求
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
// 设置 Referer使用图片 URL 的域名
parsedURL, _ := url.Parse(imageURL)
if parsedURL != nil {
req.Header.Set("Referer", fmt.Sprintf("%s://%s/", parsedURL.Scheme, parsedURL.Host))
}
// 下载图片数据
resp, err := d.httpClient.Do(req)
if err != nil {
return "", errors.Wrapf(err, "failed to download image from %s", imageURL)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("download failed with status %d for URL: %s", resp.StatusCode, imageURL)
}
// 读取图片数据
imageData, err := io.ReadAll(resp.Body)
if err != nil {
return "", errors.Wrap(err, "failed to read image data")
}
// 检测图片格式
kind, err := filetype.Match(imageData)
if err != nil {
return "", errors.Wrap(err, "failed to detect file type")
}
if !filetype.IsImage(imageData) {
return "", errors.New("downloaded file is not a valid image")
}
// 生成唯一文件名
fileName := d.generateFileName(imageURL, kind.Extension)
filePath := filepath.Join(d.savePath, fileName)
// 如果文件已存在,直接返回路径
if _, err := os.Stat(filePath); err == nil {
return filePath, nil
}
// 保存到文件
if err := os.WriteFile(filePath, imageData, 0644); err != nil {
return "", errors.Wrap(err, "failed to save image")
}
return filePath, nil
}
// DownloadImages 批量下载图片
func (d *ImageDownloader) DownloadImages(imageURLs []string) ([]string, error) {
var localPaths []string
var errs []error
for _, imageURL := range imageURLs {
localPath, err := d.DownloadImage(imageURL)
if err != nil {
errs = append(errs, fmt.Errorf("failed to download %s: %w", imageURL, err))
continue
}
localPaths = append(localPaths, localPath)
}
if len(errs) > 0 {
return localPaths, fmt.Errorf("download errors occurred: %v", errs)
}
return localPaths, nil
}
// isValidImageURL 检查是否为有效的图片URL
func (d *ImageDownloader) isValidImageURL(rawURL string) bool {
// 检查是否以http/https开头
if !strings.HasPrefix(strings.ToLower(rawURL), "http://") &&
!strings.HasPrefix(strings.ToLower(rawURL), "https://") {
return false
}
// 检查URL格式
parsedURL, err := url.Parse(rawURL)
if err != nil {
return false
}
return parsedURL.Scheme != "" && parsedURL.Host != ""
}
// generateFileName 生成唯一的文件名
func (d *ImageDownloader) generateFileName(imageURL, extension string) string {
// 使用URL的SHA256哈希作为文件名确保唯一性
hash := sha256.Sum256([]byte(imageURL))
hashStr := fmt.Sprintf("%x", hash)
// 取前16位哈希值作为文件名
shortHash := hashStr[:16]
// 添加时间戳确保更好的唯一性
timestamp := time.Now().Unix()
return fmt.Sprintf("img_%s_%d.%s", shortHash, timestamp, extension)
}
// IsImageURL 判断字符串是否为图片URL
func IsImageURL(path string) bool {
return strings.HasPrefix(strings.ToLower(path), "http://") ||
strings.HasPrefix(strings.ToLower(path), "https://")
}