@alauda-fe/i18n-tools
Version:
基于 Azure OpenAI 的 JSON i18n 文件翻译和英文语法检查工具集
468 lines (398 loc) • 15.2 kB
JavaScript
import fs from "node:fs/promises";
import path from "node:path";
import { AzureOpenAI } from "openai";
import { setTimeout } from "node:timers/promises";
import { Logger, Validator, ErrorHandler, ProgressTracker, CONFIG } from "./utils.js";
import {
buildGrammarCheckSystemPrompt,
buildGrammarCheckUserPrompt,
buildCustomGrammarCheckSystemPrompt
} from "./prompts.js";
// ====================== 初始化工具 ======================
const logger = new Logger('GrammarCheck');
const errorHandler = new ErrorHandler(logger, CONFIG.LOGS.GRAMMAR_CHECK);
// ====================== 转义/反转义 JSON key 中的点 ======================
function escapeKey(seg) {
return seg.replace(/\./g, CONFIG.DOT_ESC);
}
function unescapeKey(seg) {
return seg.replace(new RegExp(CONFIG.DOT_ESC, "g"), ".");
}
// ====================== 工具函数 ======================
// 递归收集所有叶子节点路径,并 escapeKey 转义 "."
function collectKeys(obj, prefix = "") {
return Object.entries(obj).flatMap(([key, val]) => {
const esc = escapeKey(key);
const p = prefix ? `${prefix}.${esc}` : esc;
if (val !== null && typeof val === "object") {
return collectKeys(val, p);
}
return [p];
});
}
// 嵌套对象读写:路径分割后 unescapeKey
const nestedUtils = {
get(obj, pathStr) {
return pathStr
.split(".")
.map(unescapeKey)
.reduce((o, k) => (o || {})[k], obj);
},
set(obj, pathStr, value) {
const segs = pathStr.split(".").map(unescapeKey);
let cur = obj;
while (segs.length > 1) {
const k = segs.shift();
if (!cur[k] || typeof cur[k] !== "object") cur[k] = {};
cur = cur[k];
}
cur[segs[0]] = value;
},
};
// 将数组按指定大小分块
function chunkArray(arr, size) {
const out = [];
for (let i = 0; i < arr.length; i += size) {
out.push(arr.slice(i, i + size));
}
return out;
}
// ====================== 占位符处理 ======================
function placeholderize(text) {
const map = {};
// 支持 {{var}} 和 ${var} 两种模板格式
const parts = text.match(/(\{\{[\s\S]+?\}\}|\$\{[\s\S]+?\})/g) || [];
parts.forEach((p, i) => {
const key = `__PH_${i}__`;
map[key] = p;
text = text.replace(p, key);
});
return { text, map };
}
function restorePlaceholders(text, map) {
Object.entries(map).forEach(([key, orig]) => {
text = text.replace(new RegExp(key, "g"), orig);
});
return text;
}
function encodeBatch(batch) {
const maps = {};
const newBatch = {};
for (const [k, v] of Object.entries(batch)) {
if (typeof v === "string" && v.trim().length > 0) {
const { text, map } = placeholderize(v);
newBatch[k] = text;
maps[k] = map;
} else {
newBatch[k] = v;
maps[k] = {};
}
}
return { newBatch, maps };
}
function decodeBatch(result, maps) {
const out = {};
for (const [k, v] of Object.entries(result)) {
out[k] = typeof v === "string" ? restorePlaceholders(v, maps[k] || {}) : v;
}
return out;
}
// ====================== 语法检查器工厂 ======================
function createGrammarChecker(openai, customPromptPath, extraRules) {
async function doRequest(batch) {
await setTimeout(CONFIG.REQUEST_INTERVAL);
// 构建自定义提示词
const systemPrompt = await buildCustomGrammarCheckSystemPrompt(customPromptPath, extraRules);
const payload = {
model: "gpt-4o-mini",
messages: [
{ role: "system", content: systemPrompt },
{ role: "user", content: buildGrammarCheckUserPrompt() },
{ role: "user", content: JSON.stringify(batch) },
],
temperature: 0.1, // 降低随机性,保持一致性
response_format: { type: "json_object" },
};
logger.apiRequest(batch, '语法检查');
const resp = await openai.chat.completions.create(payload);
const content = resp.choices[0].message.content;
logger.apiResponse(content);
return content;
}
return async (batch) => {
const { newBatch, maps } = encodeBatch(batch);
try {
const content = await doRequest(newBatch);
const data = JSON.parse(content);
const decoded = decodeBatch(data, maps);
// 验证模板变量完整性(支持 {{var}} 和 ${var} 格式)
return Object.fromEntries(
Object.entries(batch).map(([k, v]) => {
if (typeof v !== "string") return [k, v];
const originalVars = (v.match(/(\{\{.*?\}\}|\$\{.*?\})/g) || []).sort().join();
const correctedVars = ((decoded[k] || "").match(/(\{\{.*?\}\}|\$\{.*?\})/g) || [])
.sort()
.join();
// 如果模板变量不匹配,保持原值
return [k, originalVars === correctedVars ? decoded[k] : v];
})
);
} catch (err) {
const errorResult = errorHandler.handleApiError(err, () => doRequest(newBatch));
if (errorResult.shouldRetry && errorResult.retryCallback) {
await setTimeout(errorResult.waitMs || 60000);
try {
const content = await errorResult.retryCallback();
const data = JSON.parse(content);
const decoded = decodeBatch(data, maps);
return Object.fromEntries(
Object.entries(batch).map(([k, v]) => {
if (typeof v !== "string") return [k, v];
const originalVars = (v.match(/(\{\{.*?\}\}|\$\{.*?\})/g) || []).sort().join();
const correctedVars = ((decoded[k] || "").match(/(\{\{.*?\}\}|\$\{.*?\})/g) || [])
.sort()
.join();
return [k, originalVars === correctedVars ? decoded[k] : v];
})
);
} catch {}
}
await errorHandler.logError(err, { batch: Object.keys(batch) });
// 返回原始值
return Object.fromEntries(Object.entries(batch));
}
};
}
// ====================== 主要语法检查函数 ======================
async function checkFileGrammar(filePath, token, dryRun = false, customPromptPath, extraRules) {
// 参数验证
const validatedFilePath = await Validator.validateJsonFile(filePath);
const validatedToken = Validator.validateApiKey(token);
// 验证自定义提示词文件(如果提供)
if (customPromptPath) {
try {
await Validator.validateFile(customPromptPath);
logger.info(`使用自定义提示词: ${path.basename(customPromptPath)}`);
} catch (err) {
throw new Error(`自定义提示词文件无效: ${err.message}`);
}
}
if (extraRules) {
logger.info('应用额外检查规则');
}
// 初始化 OpenAI 客户端
const openai = new AzureOpenAI({
endpoint: CONFIG.ENDPOINT,
apiKey: validatedToken,
apiVersion: CONFIG.API_VERSION,
});
const grammarCheck = createGrammarChecker(openai, customPromptPath, extraRules);
logger.file('read', validatedFilePath);
const originalObj = JSON.parse(await fs.readFile(validatedFilePath, "utf8"));
const workingObj = JSON.parse(JSON.stringify(originalObj)); // 深拷贝
// 收集所有需要检查的字符串条目
const allKeys = collectKeys(originalObj);
const stringKeys = allKeys.filter((k) => {
const value = nestedUtils.get(originalObj, k);
return typeof value === "string" && value.trim().length > 0;
});
if (!stringKeys.length) {
logger.skip('文件中没有需要检查的字符串内容');
return { changed: false, changes: [] };
}
logger.start(`开始检查 ${stringKeys.length} 条文本内容`);
if (dryRun) {
logger.info('运行在预览模式,不会修改文件');
}
let changedCount = 0;
const changes = [];
const chunks = chunkArray(stringKeys, CONFIG.GRAMMAR_BATCH_SIZE);
const progress = new ProgressTracker(logger, chunks.length);
for (let i = 0; i < chunks.length; i++) {
const keys = chunks[i];
logger.progress(`处理批次 ${i + 1}/${chunks.length} (${keys.length} 条目)`);
const batch = {};
keys.forEach((k) => {
batch[k] = nestedUtils.get(originalObj, k);
});
const result = await grammarCheck(batch);
// 检查并记录变更
Object.entries(result).forEach(([k, correctedValue]) => {
const originalValue = nestedUtils.get(originalObj, k);
if (originalValue !== correctedValue) {
changedCount++;
changes.push({
path: k,
original: originalValue,
corrected: correctedValue,
});
if (!dryRun) {
nestedUtils.set(workingObj, k, correctedValue);
}
logger.result(`发现修改:`);
logger.info(` 路径: ${k}`);
logger.info(` 原文: ${originalValue}`);
logger.info(` 修正: ${correctedValue}`);
}
});
progress.update(true);
}
progress.finish();
logger.stats('检查完成统计', {
'总条目数': stringKeys.length,
'修改条目': changedCount,
'修改比例': `${((changedCount / stringKeys.length) * 100).toFixed(1)}%`
});
if (changedCount === 0) {
logger.success('未发现需要修正的语法或拼写错误');
return { changed: false, changes: [] };
}
if (dryRun) {
logger.info('这是预览模式,实际文件未被修改');
logger.info('如需应用修改,请移除 --dry-run 参数重新运行');
return { changed: true, changes, dryRun: true };
} else {
// 备份原文件
const backupPath = validatedFilePath + '.backup.' + Date.now();
await fs.copyFile(validatedFilePath, backupPath);
logger.file('backup', backupPath);
// 写入修正后的内容
await fs.writeFile(validatedFilePath, JSON.stringify(workingObj, null, 2) + '\n');
logger.file('write', validatedFilePath);
// 保存变更记录
const changeLogPath = validatedFilePath + '.changes.' + Date.now() + '.json';
await fs.writeFile(changeLogPath, JSON.stringify(changes, null, 2));
logger.info(`变更记录已保存: ${path.basename(changeLogPath)}`);
return {
changed: true,
changes,
backupPath,
changeLogPath,
dryRun: false
};
}
}
// ====================== 批量语法检查函数 ======================
export async function batchCheckGrammar(dirPath, token, dryRun = false, parallel = 1, customPromptPath, extraRules) {
// 参数验证
const validatedDirPath = await Validator.validateDirectory(dirPath);
const validatedToken = Validator.validateApiKey(token);
const validatedParallel = Validator.validateParallel(parallel);
// 验证自定义提示词文件(如果提供)
if (customPromptPath) {
try {
await Validator.validateFile(customPromptPath);
logger.info(`使用自定义提示词: ${path.basename(customPromptPath)}`);
} catch (err) {
throw new Error(`自定义提示词文件无效: ${err.message}`);
}
}
if (extraRules) {
logger.info('应用额外检查规则');
}
// 获取所有JSON文件
const files = (await fs.readdir(validatedDirPath))
.filter((f) => f.endsWith(".json") && f !== "_config.json")
.map((f) => path.join(validatedDirPath, f));
if (files.length === 0) {
logger.skip('目录中没有找到可检查的 JSON 文件');
return { processedFiles: 0, totalChanges: 0, results: [] };
}
logger.start(`开始批量语法检查: ${files.length} 个文件`);
logger.info(`并行处理数量: ${validatedParallel}`);
if (dryRun) {
logger.info('运行在预览模式,不会修改文件');
}
const results = [];
let totalChanges = 0;
// 处理文件的函数
const processFile = async (filePath) => {
try {
logger.file('check', filePath);
const result = await checkFileGrammar(filePath, validatedToken, dryRun, customPromptPath, extraRules);
totalChanges += result.changes.length;
return {
file: filePath,
success: true,
...result
};
} catch (err) {
logger.error(`处理文件失败 ${path.basename(filePath)}: ${err.message}`);
await errorHandler.logError(err, { file: filePath, operation: 'batch-check' });
return {
file: filePath,
success: false,
error: err.message
};
}
};
// 并行处理文件
const progress = new ProgressTracker(logger, files.length);
for (let i = 0; i < files.length; i += validatedParallel) {
const batch = files.slice(i, i + validatedParallel);
const batchResults = await Promise.all(batch.map(processFile));
results.push(...batchResults);
// 更新进度
batchResults.forEach(result => progress.update(result.success));
}
progress.finish();
// 输出总结
const successCount = results.filter(r => r.success).length;
const failCount = results.filter(r => !r.success).length;
logger.stats('批量处理完成统计', {
'处理文件数': files.length,
'成功处理': successCount,
'处理失败': failCount,
'总修改数': totalChanges,
'并行数量': validatedParallel
});
if (failCount > 0) {
logger.warn('以下文件处理失败:');
results.filter(r => !r.success).forEach(result => {
logger.error(` - ${path.basename(result.file)}: ${result.error}`);
});
}
return {
processedFiles: files.length,
successCount,
failCount,
totalChanges,
results
};
}
// ====================== 导出命令函数 ======================
export async function grammarCheckCommand(options) {
const { file, token, dryRun, customPrompt, extraRules } = options;
try {
logger.start('开始语法检查任务');
// 解析 token(支持环境变量)
const { TokenResolver } = await import("./utils.js");
const resolvedToken = TokenResolver.resolveToken(token);
const tokenSource = TokenResolver.getTokenSource(token);
logger.info(`Token 来源: ${tokenSource}`);
await checkFileGrammar(file, resolvedToken, dryRun, customPrompt, extraRules);
logger.finish('语法检查完成!');
} catch (err) {
logger.error(`语法检查失败: ${err.message}`);
await errorHandler.logError(err, { command: 'grammar-check', options });
process.exit(1);
}
}
export async function batchGrammarCheckCommand(options) {
const { dir, token, dryRun, parallel, customPrompt, extraRules } = options;
try {
logger.start('开始批量语法检查任务');
// 解析 token(支持环境变量)
const { TokenResolver } = await import("./utils.js");
const resolvedToken = TokenResolver.resolveToken(token);
const tokenSource = TokenResolver.getTokenSource(token);
logger.info(`Token 来源: ${tokenSource}`);
const parallelNum = parseInt(parallel) || 1;
await batchCheckGrammar(dir, resolvedToken, dryRun, parallelNum, customPrompt, extraRules);
logger.finish('批量语法检查完成!');
} catch (err) {
logger.error(`批量语法检查失败: ${err.message}`);
await errorHandler.logError(err, { command: 'batch-grammar-check', options });
process.exit(1);
}
}