- 添加 User-Agent 请求头,模拟浏览器请求 - 添加 Referer 请求头,使用图片 URL 的域名 - 改进错误信息,包含 URL 和状态码便于排查问题 Co-authored-by: user <user@example.com> Co-authored-by: Cursor <cursoragent@cursor.com>
164 lines
4.2 KiB
Go
164 lines
4.2 KiB
Go
package downloader
|
||
|
||
import (
|
||
"crypto/sha256"
|
||
"fmt"
|
||
"io"
|
||
"net/http"
|
||
"net/url"
|
||
"os"
|
||
"path/filepath"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/h2non/filetype"
|
||
"github.com/pkg/errors"
|
||
)
|
||
|
||
// ImageDownloader 图片下载器
|
||
type ImageDownloader struct {
|
||
savePath string
|
||
httpClient *http.Client
|
||
}
|
||
|
||
// NewImageDownloader 创建图片下载器
|
||
func NewImageDownloader(savePath string) *ImageDownloader {
|
||
// 确保保存目录存在
|
||
if err := os.MkdirAll(savePath, 0755); err != nil {
|
||
panic(fmt.Sprintf("failed to create save path: %v", err))
|
||
}
|
||
|
||
return &ImageDownloader{
|
||
savePath: savePath,
|
||
httpClient: &http.Client{
|
||
Timeout: 30 * time.Second,
|
||
},
|
||
}
|
||
}
|
||
|
||
// DownloadImage 下载图片
|
||
// 返回本地文件路径
|
||
func (d *ImageDownloader) DownloadImage(imageURL string) (string, error) {
|
||
// 验证URL格式
|
||
if !d.isValidImageURL(imageURL) {
|
||
return "", errors.New("invalid image URL format")
|
||
}
|
||
|
||
// 创建请求并设置请求头
|
||
req, err := http.NewRequest("GET", imageURL, nil)
|
||
if err != nil {
|
||
return "", errors.Wrap(err, "failed to create request")
|
||
}
|
||
|
||
// 设置 User-Agent,模拟浏览器请求
|
||
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
|
||
|
||
// 设置 Referer,使用图片 URL 的域名
|
||
parsedURL, _ := url.Parse(imageURL)
|
||
if parsedURL != nil {
|
||
req.Header.Set("Referer", fmt.Sprintf("%s://%s/", parsedURL.Scheme, parsedURL.Host))
|
||
}
|
||
|
||
// 下载图片数据
|
||
resp, err := d.httpClient.Do(req)
|
||
if err != nil {
|
||
return "", errors.Wrapf(err, "failed to download image from %s", imageURL)
|
||
}
|
||
defer resp.Body.Close()
|
||
|
||
if resp.StatusCode != http.StatusOK {
|
||
return "", fmt.Errorf("download failed with status %d for URL: %s", resp.StatusCode, imageURL)
|
||
}
|
||
|
||
// 读取图片数据
|
||
imageData, err := io.ReadAll(resp.Body)
|
||
if err != nil {
|
||
return "", errors.Wrap(err, "failed to read image data")
|
||
}
|
||
|
||
// 检测图片格式
|
||
kind, err := filetype.Match(imageData)
|
||
if err != nil {
|
||
return "", errors.Wrap(err, "failed to detect file type")
|
||
}
|
||
|
||
if !filetype.IsImage(imageData) {
|
||
return "", errors.New("downloaded file is not a valid image")
|
||
}
|
||
|
||
// 生成唯一文件名
|
||
fileName := d.generateFileName(imageURL, kind.Extension)
|
||
filePath := filepath.Join(d.savePath, fileName)
|
||
|
||
// 如果文件已存在,直接返回路径
|
||
if _, err := os.Stat(filePath); err == nil {
|
||
return filePath, nil
|
||
}
|
||
|
||
// 保存到文件
|
||
if err := os.WriteFile(filePath, imageData, 0644); err != nil {
|
||
return "", errors.Wrap(err, "failed to save image")
|
||
}
|
||
|
||
return filePath, nil
|
||
}
|
||
|
||
// DownloadImages 批量下载图片
|
||
func (d *ImageDownloader) DownloadImages(imageURLs []string) ([]string, error) {
|
||
var localPaths []string
|
||
var errs []error
|
||
|
||
for _, imageURL := range imageURLs {
|
||
localPath, err := d.DownloadImage(imageURL)
|
||
if err != nil {
|
||
errs = append(errs, fmt.Errorf("failed to download %s: %w", imageURL, err))
|
||
continue
|
||
}
|
||
localPaths = append(localPaths, localPath)
|
||
}
|
||
|
||
if len(errs) > 0 {
|
||
return localPaths, fmt.Errorf("download errors occurred: %v", errs)
|
||
}
|
||
|
||
return localPaths, nil
|
||
}
|
||
|
||
// isValidImageURL 检查是否为有效的图片URL
|
||
func (d *ImageDownloader) isValidImageURL(rawURL string) bool {
|
||
// 检查是否以http/https开头
|
||
if !strings.HasPrefix(strings.ToLower(rawURL), "http://") &&
|
||
!strings.HasPrefix(strings.ToLower(rawURL), "https://") {
|
||
return false
|
||
}
|
||
|
||
// 检查URL格式
|
||
parsedURL, err := url.Parse(rawURL)
|
||
if err != nil {
|
||
return false
|
||
}
|
||
|
||
return parsedURL.Scheme != "" && parsedURL.Host != ""
|
||
}
|
||
|
||
// generateFileName 生成唯一的文件名
|
||
func (d *ImageDownloader) generateFileName(imageURL, extension string) string {
|
||
// 使用URL的SHA256哈希作为文件名,确保唯一性
|
||
hash := sha256.Sum256([]byte(imageURL))
|
||
hashStr := fmt.Sprintf("%x", hash)
|
||
|
||
// 取前16位哈希值作为文件名
|
||
shortHash := hashStr[:16]
|
||
|
||
// 添加时间戳确保更好的唯一性
|
||
timestamp := time.Now().Unix()
|
||
|
||
return fmt.Sprintf("img_%s_%d.%s", shortHash, timestamp, extension)
|
||
}
|
||
|
||
// IsImageURL 判断字符串是否为图片URL
|
||
func IsImageURL(path string) bool {
|
||
return strings.HasPrefix(strings.ToLower(path), "http://") ||
|
||
strings.HasPrefix(strings.ToLower(path), "https://")
|
||
}
|