UNPKG

article-writer-cn

Version:

AI 驱动的智能写作系统 - 专注公众号/自媒体文章创作

327 lines 10.7 kB
/** * 图片下载工具模块 * * 功能: * - 下载单个/批量图片 * - 图片格式验证 * - 并发控制和进度显示 * - 错误重试机制 */ import axios from 'axios'; import fs from 'fs-extra'; import path from 'path'; import ora from 'ora'; import pLimit from 'p-limit'; /** * 验证文件是否为有效图片 * 通过检查文件签名(Magic Bytes) */ export function validateImage(buffer) { if (buffer.length < 8) { return { valid: false }; } // PNG: 89 50 4E 47 0D 0A 1A 0A if (buffer[0] === 0x89 && buffer[1] === 0x50 && buffer[2] === 0x4E && buffer[3] === 0x47) { return { valid: true, format: 'png' }; } // JPG/JPEG: FF D8 FF if (buffer[0] === 0xFF && buffer[1] === 0xD8 && buffer[2] === 0xFF) { return { valid: true, format: 'jpg' }; } // WebP: 52 49 46 46 (RIFF) ... 57 45 42 50 (WEBP) if (buffer[0] === 0x52 && buffer[1] === 0x49 && buffer[2] === 0x46 && buffer[3] === 0x46 && buffer[8] === 0x57 && buffer[9] === 0x45 && buffer[10] === 0x42 && buffer[11] === 0x50) { return { valid: true, format: 'webp' }; } // GIF: 47 49 46 38 (GIF8) if (buffer[0] === 0x47 && buffer[1] === 0x49 && buffer[2] === 0x46 && buffer[3] === 0x38) { return { valid: true, format: 'gif' }; } // SVG: 通常以 < 开头 (3C), 检查是否包含 svg 关键字 const text = buffer.toString('utf8', 0, Math.min(buffer.length, 1000)); if (text.includes('<svg') || text.includes('<?xml')) { return { valid: true, format: 'svg' }; } return { valid: false }; } /** * 获取远程图片信息(不下载完整文件) */ export async function getRemoteImageInfo(url) { try { const response = await axios.head(url, { timeout: 10000, headers: { 'User-Agent': 'Mozilla/5.0 (compatible; ArticleWriter/1.0)' } }); const contentType = response.headers['content-type'] || ''; const contentLength = parseInt(response.headers['content-length'] || '0', 10); // 检查是否为图片类型 if (!contentType.startsWith('image/')) { return null; } // 从 Content-Type 推断格式 let format = 'unknown'; if (contentType.includes('png')) format = 'png'; else if (contentType.includes('jpeg') || contentType.includes('jpg')) format = 'jpg'; else if (contentType.includes('webp')) format = 'webp'; else if (contentType.includes('gif')) format = 'gif'; else if (contentType.includes('svg')) format = 'svg'; return { format, size: contentLength, contentType }; } catch (error) { return null; } } /** * 下载单个图片 * * 下载策略: * 1. 首先尝试 axios 直接下载 (快速) * 2. 如果失败且检测到 Playwright MCP, 使用浏览器下载 (处理反爬虫) * 3. 都失败则返回错误 */ export async function downloadImage(options, usePlaywright = false) { const { url, savePath, maxRetries = 3, timeout = 30000, userAgent = 'Mozilla/5.0 (compatible; ArticleWriter/1.0)' } = options; let lastError; // 策略1: axios 直接下载 (优先) if (!usePlaywright) { for (let attempt = 1; attempt <= maxRetries; attempt++) { try { // 下载图片 const response = await axios.get(url, { responseType: 'arraybuffer', timeout, headers: { 'User-Agent': userAgent }, maxRedirects: 5 }); const buffer = Buffer.from(response.data); // 验证图片 const validation = validateImage(buffer); if (!validation.valid) { return { url, savePath, success: false, error: 'Downloaded file is not a valid image' }; } // 确保目标目录存在 await fs.ensureDir(path.dirname(savePath)); // 保存图片 await fs.writeFile(savePath, buffer); return { url, savePath, success: true, size: buffer.length, format: validation.format }; } catch (error) { lastError = error; // 如果是权限错误(403/401), 不再重试,尝试 Playwright if (error.response?.status === 403 || error.response?.status === 401) { break; } // 如果是最后一次尝试,跳出 if (attempt === maxRetries) { break; } // 等待后重试(指数退避) await new Promise(resolve => setTimeout(resolve, 1000 * attempt)); } } } // 策略2: 如果 axios 失败,且启用 Playwright, 返回特殊标记 // (实际的 Playwright 下载由调用方处理,因为需要 MCP 工具) if (lastError?.response?.status === 403 || lastError?.response?.status === 401 || usePlaywright) { return { url, savePath, success: false, error: lastError?.message || 'Download failed after retries', // @ts-ignore - 添加特殊标记 needsPlaywright: true }; } // 所有重试都失败 return { url, savePath, success: false, error: lastError?.message || 'Download failed after retries' }; } /** * 批量下载图片 */ export async function downloadImages(tasks, concurrency = 5, showProgress = true) { const limit = pLimit(concurrency); const results = []; let spinner; if (showProgress) { spinner = ora('准备下载图片...').start(); } const promises = tasks.map((task, index) => limit(async () => { if (spinner) { spinner.text = `下载图片 ${index + 1}/${tasks.length}: ${path.basename(task.savePath)}`; } const result = await downloadImage(task); results.push(result); if (spinner) { const status = result.success ? '✓' : '✗'; const size = result.size ? `(${(result.size / 1024).toFixed(1)}KB)` : ''; spinner.text = `[${status}] ${path.basename(task.savePath)} ${size}`; } return result; })); await Promise.all(promises); if (spinner) { const successCount = results.filter(r => r.success).length; const totalCount = results.length; if (successCount === totalCount) { spinner.succeed(`图片下载完成: ${successCount}/${totalCount} 成功`); } else { spinner.warn(`图片下载完成: ${successCount}/${totalCount} 成功, ${totalCount - successCount} 失败`); } } return results; } /** * 从URL推断文件扩展名 */ export function inferFileExtension(url, contentType) { // 优先从 URL 推断 const urlExt = path.extname(new URL(url).pathname).toLowerCase(); if (urlExt && ['.png', '.jpg', '.jpeg', '.webp', '.gif', '.svg'].includes(urlExt)) { return urlExt; } // 从 Content-Type 推断 if (contentType) { if (contentType.includes('png')) return '.png'; if (contentType.includes('jpeg') || contentType.includes('jpg')) return '.jpg'; if (contentType.includes('webp')) return '.webp'; if (contentType.includes('gif')) return '.gif'; if (contentType.includes('svg')) return '.svg'; } // 默认 .png return '.png'; } /** * 生成下载日志 */ export async function saveDownloadLog(results, logPath) { const log = { timestamp: new Date().toISOString(), total: results.length, success: results.filter(r => r.success).length, failed: results.filter(r => !r.success).length, results: results.map(r => ({ url: r.url, fileName: path.basename(r.savePath), success: r.success, size: r.size, format: r.format, error: r.error })) }; await fs.writeJson(logPath, log, { spaces: 2 }); } /** * 将本地图片文件转换为 Base64 Data URI * 适用于微信公众号一键复制功能 * * @param imagePath - 图片文件的绝对路径 * @returns Base64 Data URI 字符串 (如: data:image/png;base64,...) * @throws 如果文件不存在或读取失败 * * @example * ```typescript * const dataUri = await imageToBase64('/path/to/image.png'); * // 返回: "..." * ``` */ export async function imageToBase64(imagePath) { // 检查文件是否存在 if (!await fs.pathExists(imagePath)) { throw new Error(`图片文件不存在: ${imagePath}`); } // 读取图片文件 const imageBuffer = await fs.readFile(imagePath); // 验证图片格式 const validation = validateImage(imageBuffer); if (!validation.valid) { throw new Error(`无效的图片文件: ${imagePath}`); } // 转换为 base64 const base64 = imageBuffer.toString('base64'); // 获取 MIME 类型 const ext = validation.format || path.extname(imagePath).slice(1); const mimeType = ext === 'jpg' ? 'jpeg' : ext; // 返回 Data URI return `data:image/${mimeType};base64,${base64}`; } /** * 判断路径是否为本地文件路径 * (非 http/https URL) */ export function isLocalPath(urlOrPath) { return !urlOrPath.startsWith('http://') && !urlOrPath.startsWith('https://'); } /** * 获取图片文件的大小(字节)和格式信息 */ export async function getImageInfo(imagePath) { try { if (!await fs.pathExists(imagePath)) { return null; } const stats = await fs.stat(imagePath); const buffer = await fs.readFile(imagePath); const validation = validateImage(buffer); if (!validation.valid) { return null; } return { format: validation.format || 'unknown', size: stats.size, }; } catch (error) { return null; } } //# sourceMappingURL=image-downloader.js.map