i18n-ai-translate
Version:
AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.
357 lines • 14.7 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.translate = translate;
exports.translateDiff = translateDiff;
const constants_1 = require("./constants");
const fastest_levenshtein_1 = require("fastest-levenshtein");
const flat_1 = require("flat");
const cache_1 = require("./cache");
const utils_1 = require("./utils");
const chat_pool_1 = __importDefault(require("./chat_pool"));
const generate_1 = __importDefault(require("./generate_json/generate"));
const prompt_mode_1 = __importDefault(require("./enums/prompt_mode"));
const rate_limiter_1 = __importDefault(require("./rate_limiter"));
const generate_2 = __importDefault(require("./generate_csv/generate"));
function getPool(options) {
// When the caller (typically cli_translate.ts in language-concurrent
// mode) supplies its own pool, reuse it. This is what makes the
// shared TPM budget actually shared across parallel languages — a
// fresh pool here would give each language its own limiter and
// defeat the cap.
if (options.pool)
return options.pool;
const rateLimiter = options.rateLimiter ??
new rate_limiter_1.default(options.rateLimitMs, options.verbose, options.tokensPerMinute);
return chat_pool_1.default.create({
apiKey: options.apiKey,
chatParams: options.chatParams,
concurrency: Math.max(1, options.concurrency ?? 1),
engine: options.engine,
host: options.host,
model: options.model,
rateLimiter,
});
}
function replaceNewlinesWithPlaceholder(templatedStringPrefix, templatedStringSuffix, flatInput) {
for (const key in flatInput) {
if (Object.prototype.hasOwnProperty.call(flatInput, key)) {
flatInput[key] = flatInput[key].replaceAll("\n", `${templatedStringPrefix}NEWLINE${templatedStringSuffix}`);
}
}
}
function replacePlaceholderWithNewLines(templatedStringPrefix, templatedStringSuffix, sortedOutput) {
for (const key in sortedOutput) {
if (Object.prototype.hasOwnProperty.call(sortedOutput, key)) {
sortedOutput[key] = sortedOutput[key].replaceAll(`${templatedStringPrefix}NEWLINE${templatedStringSuffix}`, "\n");
}
}
}
function groupSimilarValues(flatInput) {
const groups = [];
for (const key in flatInput) {
if (Object.prototype.hasOwnProperty.call(flatInput, key)) {
const val = flatInput[key];
const existingGroup = groups.find((group) => Object.values(group).some((entry) => {
const distPercent = (0, fastest_levenshtein_1.distance)(val, entry) /
Math.max(val.length, entry.length);
return distPercent < 0.3;
}));
if (existingGroup) {
existingGroup[key] = val;
}
else {
groups.push({ [key]: val });
}
}
}
for (let i = groups.length - 1; i > 0; i--) {
const j = Math.floor(Math.random() * (i + 1));
[groups[i], groups[j]] = [groups[j], groups[i]];
}
flatInput = {};
for (const groupObj of groups) {
for (const [k, v] of Object.entries(groupObj)) {
flatInput[k] = v;
}
}
return { flatInput, groups };
}
function startTranslationStatsItem() {
return {
batchStartTime: 0,
enqueuedItems: 0,
processedItems: 0,
processedTokens: 0,
totalItems: 0,
totalTokens: 0,
};
}
function startTranslationStats() {
return {
translate: startTranslationStatsItem(),
verify: startTranslationStatsItem(),
};
}
async function getTranslation(ctx) {
if (ctx.options.verbose) {
(0, utils_1.printInfo)(`Translation prompting mode: ${ctx.options.promptMode}\n`);
}
switch (ctx.options.promptMode) {
case prompt_mode_1.default.JSON: {
const generateTranslationJSON = new generate_1.default(ctx.options);
return generateTranslationJSON.translateJSON(ctx);
}
case prompt_mode_1.default.CSV:
return (0, generate_2.default)(ctx);
default:
throw new Error("Prompt mode is not set");
}
}
function setDefaults(options) {
if (!options.templatedStringPrefix)
options.templatedStringPrefix = constants_1.DEFAULT_TEMPLATED_STRING_PREFIX;
if (!options.templatedStringSuffix)
options.templatedStringSuffix = constants_1.DEFAULT_TEMPLATED_STRING_SUFFIX;
if (!options.batchMaxTokens)
options.batchMaxTokens = constants_1.DEFAULT_REQUEST_TOKENS;
if (!options.batchSize)
options.batchSize = constants_1.DEFAULT_BATCH_SIZE;
if (!options.verbose)
options.verbose = false;
if (!options.ensureChangedTranslation)
options.ensureChangedTranslation = false;
if (!options.skipTranslationVerification)
options.skipTranslationVerification = false;
if (!options.skipStylingVerification)
options.skipStylingVerification = false;
if (options.continueOnError === undefined)
options.continueOnError = true;
}
/**
* Translate the input JSON to the given language
* @param options - The options for the translation
*/
async function translate(options) {
setDefaults(options);
// Accept both codes and English language names. If a user passed
// "English" we normalise to "en" and note the substitution so they
// know it happened.
const resolvedInput = (0, utils_1.resolveLanguageCode)(options.inputLanguageCode);
if (resolvedInput !== options.inputLanguageCode) {
if (options.verbose) {
(0, utils_1.printInfo)(`Interpreted '${options.inputLanguageCode}' as '${resolvedInput}'`);
}
options.inputLanguageCode = resolvedInput;
}
const resolvedOutput = (0, utils_1.resolveLanguageCode)(options.outputLanguageCode);
if (resolvedOutput !== options.outputLanguageCode) {
if (options.verbose) {
(0, utils_1.printInfo)(`Interpreted '${options.outputLanguageCode}' as '${resolvedOutput}'`);
}
options.outputLanguageCode = resolvedOutput;
}
// Validate the input and output languages are valid
if (!(0, utils_1.isValidLanguageCode)(options.inputLanguageCode)) {
throw new Error(`Invalid input language code: ${options.inputLanguageCode}`);
}
if (!(0, utils_1.isValidLanguageCode)(options.outputLanguageCode)) {
throw new Error(`Invalid output language code: ${options.outputLanguageCode}`);
}
if (options.verbose) {
(0, utils_1.printInfo)(`Translating from ${options.inputLanguageCode} to ${options.outputLanguageCode}...`);
}
const pool = getPool(options);
let flatInput = (0, flat_1.flatten)(options.inputJSON, {
delimiter: constants_1.FLATTEN_DELIMITER,
});
replaceNewlinesWithPlaceholder(options.templatedStringPrefix, options.templatedStringSuffix, flatInput);
const canonicalToDupes = {};
const valueBuckets = {};
for (const [k, v] of Object.entries(flatInput)) {
(valueBuckets[v] ??= []).push(k);
}
for (const keys of Object.values(valueBuckets)) {
if (keys.length > 1) {
const [canonical, ...dupes] = keys;
canonicalToDupes[canonical] = dupes;
for (const k of dupes) {
delete flatInput[k];
}
}
}
if (options.verbose) {
for (const [canonical, dupes] of Object.entries(canonicalToDupes)) {
(0, utils_1.printInfo)(`De-duplicating ${canonical}\n=>\n${dupes.join("\n")}\n\n`);
}
}
// Translation memory: pull any source string already in the cache
// out of the work set so only misses reach the model. This extends
// the in-file de-duplication above across runs and files. Hits are
// merged back into the output below; misses are recorded after.
const { cache } = options;
const cachedOutput = {};
const missSourceByKey = {};
if (cache) {
for (const [key, source] of Object.entries(flatInput)) {
const hit = (0, cache_1.getCachedTranslation)(cache, options.inputLanguageCode, options.outputLanguageCode, options.context ?? "", source);
if (hit !== undefined) {
cachedOutput[key] = hit;
delete flatInput[key];
}
else {
missSourceByKey[key] = source;
}
}
if (options.verbose) {
(0, utils_1.printInfo)(`Cache: ${Object.keys(cachedOutput).length} hit(s), ${Object.keys(missSourceByKey).length} miss(es)`);
}
}
const grouped = groupSimilarValues(flatInput);
flatInput = grouped.flatInput;
const translationStats = startTranslationStats();
const translated = await getTranslation({
flatInput,
groups: grouped.groups,
options,
pool,
stats: translationStats,
});
// Record freshly translated strings so the next run can reuse them.
if (cache) {
for (const [key, source] of Object.entries(missSourceByKey)) {
const value = translated[key];
if (value !== undefined) {
(0, cache_1.setCachedTranslation)(cache, options.inputLanguageCode, options.outputLanguageCode, options.context ?? "", source, value);
}
}
}
const output = { ...cachedOutput, ...translated };
for (const [canonical, dupes] of Object.entries(canonicalToDupes)) {
const canonicalTranslation = output[canonical];
for (const k of dupes) {
output[k] = canonicalTranslation;
}
}
const sortedOutput = {};
for (const key of Object.keys(output).sort()) {
sortedOutput[key] = output[key];
}
replacePlaceholderWithNewLines(options.templatedStringPrefix, options.templatedStringSuffix, sortedOutput);
const unflattenedOutput = (0, flat_1.unflatten)(sortedOutput, {
delimiter: constants_1.FLATTEN_DELIMITER,
});
if (options.verbose) {
(0, utils_1.printExecutionTime)(translationStats.translate.batchStartTime, "Total execution time: ");
}
return unflattenedOutput;
}
/**
* Translate the difference of an input JSON to the given languages
* @param options - The options for the translation
*/
async function translateDiff(options) {
const flatInputBefore = (0, flat_1.flatten)(options.inputJSONBefore, {
delimiter: constants_1.FLATTEN_DELIMITER,
});
const flatInputAfter = (0, flat_1.flatten)(options.inputJSONAfter, {
delimiter: constants_1.FLATTEN_DELIMITER,
});
const flatToUpdateJSONs = {};
for (const lang in options.toUpdateJSONs) {
if (Object.prototype.hasOwnProperty.call(options.toUpdateJSONs, lang)) {
const flatToUpdateJSON = (0, flat_1.flatten)(options.toUpdateJSONs[lang], {
delimiter: constants_1.FLATTEN_DELIMITER,
});
flatToUpdateJSONs[lang] = flatToUpdateJSON;
}
}
const addedKeys = [];
const modifiedKeys = [];
const deletedKeys = [];
for (const key in flatInputBefore) {
if (flatInputBefore[key] !== flatInputAfter[key]) {
if (flatInputAfter[key] === undefined) {
deletedKeys.push(key);
}
else {
modifiedKeys.push(key);
}
}
}
for (const key in flatInputAfter) {
if (flatInputBefore[key] === undefined) {
addedKeys.push(key);
}
}
if (options.verbose) {
(0, utils_1.printInfo)(`Added keys: ${addedKeys.join("\n")}\n`);
(0, utils_1.printInfo)(`Modified keys: ${modifiedKeys.join("\n")}\n`);
(0, utils_1.printInfo)(`Deleted keys: ${deletedKeys.join("\n")}\n`);
}
for (const key of deletedKeys) {
for (const lang in flatToUpdateJSONs) {
if (Object.prototype.hasOwnProperty.call(flatToUpdateJSONs, lang)) {
delete flatToUpdateJSONs[lang][key];
}
}
}
const translatedJSONs = {};
for (const languageCode in flatToUpdateJSONs) {
if (Object.prototype.hasOwnProperty.call(flatToUpdateJSONs, languageCode)) {
// Seed with the existing per-language map (minus the keys
// deleted upstream) so unchanged translations are preserved.
// Without this the accumulator would hold only the delta and
// writing it to disk would wipe every pre-existing key.
translatedJSONs[languageCode] = {
...flatToUpdateJSONs[languageCode],
};
const addedAndModifiedTranslations = {};
for (const key of addedKeys) {
addedAndModifiedTranslations[key] = flatInputAfter[key];
}
for (const key of modifiedKeys) {
addedAndModifiedTranslations[key] = flatInputAfter[key];
}
// eslint-disable-next-line no-await-in-loop
const translated = await translate({
...options,
inputJSON: addedAndModifiedTranslations,
outputLanguageCode: languageCode,
});
const flatTranslated = (0, flat_1.flatten)(translated, {
delimiter: constants_1.FLATTEN_DELIMITER,
});
for (const key in flatTranslated) {
if (Object.prototype.hasOwnProperty.call(flatTranslated, key)) {
translatedJSONs[languageCode][key] = flatTranslated[key];
}
}
// Sort the keys
translatedJSONs[languageCode] = Object.keys(translatedJSONs[languageCode])
.sort()
.reduce((obj, key) => {
obj[key] = translatedJSONs[languageCode][key];
return obj;
}, {});
if (options.onLanguageComplete) {
const unflattened = (0, flat_1.unflatten)(translatedJSONs[languageCode], {
delimiter: constants_1.FLATTEN_DELIMITER,
});
options.onLanguageComplete(languageCode, unflattened, translatedJSONs[languageCode]);
}
}
}
const unflatToUpdateJSONs = {};
for (const lang in translatedJSONs) {
if (Object.prototype.hasOwnProperty.call(translatedJSONs, lang)) {
unflatToUpdateJSONs[lang] = (0, flat_1.unflatten)(translatedJSONs[lang], {
delimiter: constants_1.FLATTEN_DELIMITER,
});
}
}
return unflatToUpdateJSONs;
}
//# sourceMappingURL=translate.js.map