UNPKG

@alauda-fe/i18n-tools

Version:

基于 Azure OpenAI 的 JSON i18n 文件翻译和英文语法检查工具集

468 lines (398 loc) 15.2 kB
import fs from "node:fs/promises"; import path from "node:path"; import { AzureOpenAI } from "openai"; import { setTimeout } from "node:timers/promises"; import { Logger, Validator, ErrorHandler, ProgressTracker, CONFIG } from "./utils.js"; import { buildGrammarCheckSystemPrompt, buildGrammarCheckUserPrompt, buildCustomGrammarCheckSystemPrompt } from "./prompts.js"; // ====================== 初始化工具 ====================== const logger = new Logger('GrammarCheck'); const errorHandler = new ErrorHandler(logger, CONFIG.LOGS.GRAMMAR_CHECK); // ====================== 转义/反转义 JSON key 中的点 ====================== function escapeKey(seg) { return seg.replace(/\./g, CONFIG.DOT_ESC); } function unescapeKey(seg) { return seg.replace(new RegExp(CONFIG.DOT_ESC, "g"), "."); } // ====================== 工具函数 ====================== // 递归收集所有叶子节点路径,并 escapeKey 转义 "." function collectKeys(obj, prefix = "") { return Object.entries(obj).flatMap(([key, val]) => { const esc = escapeKey(key); const p = prefix ? `${prefix}.${esc}` : esc; if (val !== null && typeof val === "object") { return collectKeys(val, p); } return [p]; }); } // 嵌套对象读写:路径分割后 unescapeKey const nestedUtils = { get(obj, pathStr) { return pathStr .split(".") .map(unescapeKey) .reduce((o, k) => (o || {})[k], obj); }, set(obj, pathStr, value) { const segs = pathStr.split(".").map(unescapeKey); let cur = obj; while (segs.length > 1) { const k = segs.shift(); if (!cur[k] || typeof cur[k] !== "object") cur[k] = {}; cur = cur[k]; } cur[segs[0]] = value; }, }; // 将数组按指定大小分块 function chunkArray(arr, size) { const out = []; for (let i = 0; i < arr.length; i += size) { out.push(arr.slice(i, i + size)); } return out; } // ====================== 占位符处理 ====================== function placeholderize(text) { const map = {}; // 支持 {{var}} 和 ${var} 两种模板格式 const parts = text.match(/(\{\{[\s\S]+?\}\}|\$\{[\s\S]+?\})/g) || []; parts.forEach((p, i) => { const key = `__PH_${i}__`; map[key] = p; text = text.replace(p, key); }); return { text, map }; } function restorePlaceholders(text, map) { Object.entries(map).forEach(([key, orig]) => { text = text.replace(new RegExp(key, "g"), orig); }); return text; } function encodeBatch(batch) { const maps = {}; const newBatch = {}; for (const [k, v] of Object.entries(batch)) { if (typeof v === "string" && v.trim().length > 0) { const { text, map } = placeholderize(v); newBatch[k] = text; maps[k] = map; } else { newBatch[k] = v; maps[k] = {}; } } return { newBatch, maps }; } function decodeBatch(result, maps) { const out = {}; for (const [k, v] of Object.entries(result)) { out[k] = typeof v === "string" ? restorePlaceholders(v, maps[k] || {}) : v; } return out; } // ====================== 语法检查器工厂 ====================== function createGrammarChecker(openai, customPromptPath, extraRules) { async function doRequest(batch) { await setTimeout(CONFIG.REQUEST_INTERVAL); // 构建自定义提示词 const systemPrompt = await buildCustomGrammarCheckSystemPrompt(customPromptPath, extraRules); const payload = { model: "gpt-4o-mini", messages: [ { role: "system", content: systemPrompt }, { role: "user", content: buildGrammarCheckUserPrompt() }, { role: "user", content: JSON.stringify(batch) }, ], temperature: 0.1, // 降低随机性,保持一致性 response_format: { type: "json_object" }, }; logger.apiRequest(batch, '语法检查'); const resp = await openai.chat.completions.create(payload); const content = resp.choices[0].message.content; logger.apiResponse(content); return content; } return async (batch) => { const { newBatch, maps } = encodeBatch(batch); try { const content = await doRequest(newBatch); const data = JSON.parse(content); const decoded = decodeBatch(data, maps); // 验证模板变量完整性(支持 {{var}} 和 ${var} 格式) return Object.fromEntries( Object.entries(batch).map(([k, v]) => { if (typeof v !== "string") return [k, v]; const originalVars = (v.match(/(\{\{.*?\}\}|\$\{.*?\})/g) || []).sort().join(); const correctedVars = ((decoded[k] || "").match(/(\{\{.*?\}\}|\$\{.*?\})/g) || []) .sort() .join(); // 如果模板变量不匹配,保持原值 return [k, originalVars === correctedVars ? decoded[k] : v]; }) ); } catch (err) { const errorResult = errorHandler.handleApiError(err, () => doRequest(newBatch)); if (errorResult.shouldRetry && errorResult.retryCallback) { await setTimeout(errorResult.waitMs || 60000); try { const content = await errorResult.retryCallback(); const data = JSON.parse(content); const decoded = decodeBatch(data, maps); return Object.fromEntries( Object.entries(batch).map(([k, v]) => { if (typeof v !== "string") return [k, v]; const originalVars = (v.match(/(\{\{.*?\}\}|\$\{.*?\})/g) || []).sort().join(); const correctedVars = ((decoded[k] || "").match(/(\{\{.*?\}\}|\$\{.*?\})/g) || []) .sort() .join(); return [k, originalVars === correctedVars ? decoded[k] : v]; }) ); } catch {} } await errorHandler.logError(err, { batch: Object.keys(batch) }); // 返回原始值 return Object.fromEntries(Object.entries(batch)); } }; } // ====================== 主要语法检查函数 ====================== async function checkFileGrammar(filePath, token, dryRun = false, customPromptPath, extraRules) { // 参数验证 const validatedFilePath = await Validator.validateJsonFile(filePath); const validatedToken = Validator.validateApiKey(token); // 验证自定义提示词文件(如果提供) if (customPromptPath) { try { await Validator.validateFile(customPromptPath); logger.info(`使用自定义提示词: ${path.basename(customPromptPath)}`); } catch (err) { throw new Error(`自定义提示词文件无效: ${err.message}`); } } if (extraRules) { logger.info('应用额外检查规则'); } // 初始化 OpenAI 客户端 const openai = new AzureOpenAI({ endpoint: CONFIG.ENDPOINT, apiKey: validatedToken, apiVersion: CONFIG.API_VERSION, }); const grammarCheck = createGrammarChecker(openai, customPromptPath, extraRules); logger.file('read', validatedFilePath); const originalObj = JSON.parse(await fs.readFile(validatedFilePath, "utf8")); const workingObj = JSON.parse(JSON.stringify(originalObj)); // 深拷贝 // 收集所有需要检查的字符串条目 const allKeys = collectKeys(originalObj); const stringKeys = allKeys.filter((k) => { const value = nestedUtils.get(originalObj, k); return typeof value === "string" && value.trim().length > 0; }); if (!stringKeys.length) { logger.skip('文件中没有需要检查的字符串内容'); return { changed: false, changes: [] }; } logger.start(`开始检查 ${stringKeys.length} 条文本内容`); if (dryRun) { logger.info('运行在预览模式,不会修改文件'); } let changedCount = 0; const changes = []; const chunks = chunkArray(stringKeys, CONFIG.GRAMMAR_BATCH_SIZE); const progress = new ProgressTracker(logger, chunks.length); for (let i = 0; i < chunks.length; i++) { const keys = chunks[i]; logger.progress(`处理批次 ${i + 1}/${chunks.length} (${keys.length} 条目)`); const batch = {}; keys.forEach((k) => { batch[k] = nestedUtils.get(originalObj, k); }); const result = await grammarCheck(batch); // 检查并记录变更 Object.entries(result).forEach(([k, correctedValue]) => { const originalValue = nestedUtils.get(originalObj, k); if (originalValue !== correctedValue) { changedCount++; changes.push({ path: k, original: originalValue, corrected: correctedValue, }); if (!dryRun) { nestedUtils.set(workingObj, k, correctedValue); } logger.result(`发现修改:`); logger.info(` 路径: ${k}`); logger.info(` 原文: ${originalValue}`); logger.info(` 修正: ${correctedValue}`); } }); progress.update(true); } progress.finish(); logger.stats('检查完成统计', { '总条目数': stringKeys.length, '修改条目': changedCount, '修改比例': `${((changedCount / stringKeys.length) * 100).toFixed(1)}%` }); if (changedCount === 0) { logger.success('未发现需要修正的语法或拼写错误'); return { changed: false, changes: [] }; } if (dryRun) { logger.info('这是预览模式,实际文件未被修改'); logger.info('如需应用修改,请移除 --dry-run 参数重新运行'); return { changed: true, changes, dryRun: true }; } else { // 备份原文件 const backupPath = validatedFilePath + '.backup.' + Date.now(); await fs.copyFile(validatedFilePath, backupPath); logger.file('backup', backupPath); // 写入修正后的内容 await fs.writeFile(validatedFilePath, JSON.stringify(workingObj, null, 2) + '\n'); logger.file('write', validatedFilePath); // 保存变更记录 const changeLogPath = validatedFilePath + '.changes.' + Date.now() + '.json'; await fs.writeFile(changeLogPath, JSON.stringify(changes, null, 2)); logger.info(`变更记录已保存: ${path.basename(changeLogPath)}`); return { changed: true, changes, backupPath, changeLogPath, dryRun: false }; } } // ====================== 批量语法检查函数 ====================== export async function batchCheckGrammar(dirPath, token, dryRun = false, parallel = 1, customPromptPath, extraRules) { // 参数验证 const validatedDirPath = await Validator.validateDirectory(dirPath); const validatedToken = Validator.validateApiKey(token); const validatedParallel = Validator.validateParallel(parallel); // 验证自定义提示词文件(如果提供) if (customPromptPath) { try { await Validator.validateFile(customPromptPath); logger.info(`使用自定义提示词: ${path.basename(customPromptPath)}`); } catch (err) { throw new Error(`自定义提示词文件无效: ${err.message}`); } } if (extraRules) { logger.info('应用额外检查规则'); } // 获取所有JSON文件 const files = (await fs.readdir(validatedDirPath)) .filter((f) => f.endsWith(".json") && f !== "_config.json") .map((f) => path.join(validatedDirPath, f)); if (files.length === 0) { logger.skip('目录中没有找到可检查的 JSON 文件'); return { processedFiles: 0, totalChanges: 0, results: [] }; } logger.start(`开始批量语法检查: ${files.length} 个文件`); logger.info(`并行处理数量: ${validatedParallel}`); if (dryRun) { logger.info('运行在预览模式,不会修改文件'); } const results = []; let totalChanges = 0; // 处理文件的函数 const processFile = async (filePath) => { try { logger.file('check', filePath); const result = await checkFileGrammar(filePath, validatedToken, dryRun, customPromptPath, extraRules); totalChanges += result.changes.length; return { file: filePath, success: true, ...result }; } catch (err) { logger.error(`处理文件失败 ${path.basename(filePath)}: ${err.message}`); await errorHandler.logError(err, { file: filePath, operation: 'batch-check' }); return { file: filePath, success: false, error: err.message }; } }; // 并行处理文件 const progress = new ProgressTracker(logger, files.length); for (let i = 0; i < files.length; i += validatedParallel) { const batch = files.slice(i, i + validatedParallel); const batchResults = await Promise.all(batch.map(processFile)); results.push(...batchResults); // 更新进度 batchResults.forEach(result => progress.update(result.success)); } progress.finish(); // 输出总结 const successCount = results.filter(r => r.success).length; const failCount = results.filter(r => !r.success).length; logger.stats('批量处理完成统计', { '处理文件数': files.length, '成功处理': successCount, '处理失败': failCount, '总修改数': totalChanges, '并行数量': validatedParallel }); if (failCount > 0) { logger.warn('以下文件处理失败:'); results.filter(r => !r.success).forEach(result => { logger.error(` - ${path.basename(result.file)}: ${result.error}`); }); } return { processedFiles: files.length, successCount, failCount, totalChanges, results }; } // ====================== 导出命令函数 ====================== export async function grammarCheckCommand(options) { const { file, token, dryRun, customPrompt, extraRules } = options; try { logger.start('开始语法检查任务'); // 解析 token(支持环境变量) const { TokenResolver } = await import("./utils.js"); const resolvedToken = TokenResolver.resolveToken(token); const tokenSource = TokenResolver.getTokenSource(token); logger.info(`Token 来源: ${tokenSource}`); await checkFileGrammar(file, resolvedToken, dryRun, customPrompt, extraRules); logger.finish('语法检查完成!'); } catch (err) { logger.error(`语法检查失败: ${err.message}`); await errorHandler.logError(err, { command: 'grammar-check', options }); process.exit(1); } } export async function batchGrammarCheckCommand(options) { const { dir, token, dryRun, parallel, customPrompt, extraRules } = options; try { logger.start('开始批量语法检查任务'); // 解析 token(支持环境变量) const { TokenResolver } = await import("./utils.js"); const resolvedToken = TokenResolver.resolveToken(token); const tokenSource = TokenResolver.getTokenSource(token); logger.info(`Token 来源: ${tokenSource}`); const parallelNum = parseInt(parallel) || 1; await batchCheckGrammar(dir, resolvedToken, dryRun, parallelNum, customPrompt, extraRules); logger.finish('批量语法检查完成!'); } catch (err) { logger.error(`批量语法检查失败: ${err.message}`); await errorHandler.logError(err, { command: 'batch-grammar-check', options }); process.exit(1); } }