UNPKG

i18n-ai-translate

Version:

AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.

357 lines 14.7 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.translate = translate; exports.translateDiff = translateDiff; const constants_1 = require("./constants"); const fastest_levenshtein_1 = require("fastest-levenshtein"); const flat_1 = require("flat"); const cache_1 = require("./cache"); const utils_1 = require("./utils"); const chat_pool_1 = __importDefault(require("./chat_pool")); const generate_1 = __importDefault(require("./generate_json/generate")); const prompt_mode_1 = __importDefault(require("./enums/prompt_mode")); const rate_limiter_1 = __importDefault(require("./rate_limiter")); const generate_2 = __importDefault(require("./generate_csv/generate")); function getPool(options) { // When the caller (typically cli_translate.ts in language-concurrent // mode) supplies its own pool, reuse it. This is what makes the // shared TPM budget actually shared across parallel languages — a // fresh pool here would give each language its own limiter and // defeat the cap. if (options.pool) return options.pool; const rateLimiter = options.rateLimiter ?? new rate_limiter_1.default(options.rateLimitMs, options.verbose, options.tokensPerMinute); return chat_pool_1.default.create({ apiKey: options.apiKey, chatParams: options.chatParams, concurrency: Math.max(1, options.concurrency ?? 1), engine: options.engine, host: options.host, model: options.model, rateLimiter, }); } function replaceNewlinesWithPlaceholder(templatedStringPrefix, templatedStringSuffix, flatInput) { for (const key in flatInput) { if (Object.prototype.hasOwnProperty.call(flatInput, key)) { flatInput[key] = flatInput[key].replaceAll("\n", `${templatedStringPrefix}NEWLINE${templatedStringSuffix}`); } } } function replacePlaceholderWithNewLines(templatedStringPrefix, templatedStringSuffix, sortedOutput) { for (const key in sortedOutput) { if (Object.prototype.hasOwnProperty.call(sortedOutput, key)) { sortedOutput[key] = sortedOutput[key].replaceAll(`${templatedStringPrefix}NEWLINE${templatedStringSuffix}`, "\n"); } } } function groupSimilarValues(flatInput) { const groups = []; for (const key in flatInput) { if (Object.prototype.hasOwnProperty.call(flatInput, key)) { const val = flatInput[key]; const existingGroup = groups.find((group) => Object.values(group).some((entry) => { const distPercent = (0, fastest_levenshtein_1.distance)(val, entry) / Math.max(val.length, entry.length); return distPercent < 0.3; })); if (existingGroup) { existingGroup[key] = val; } else { groups.push({ [key]: val }); } } } for (let i = groups.length - 1; i > 0; i--) { const j = Math.floor(Math.random() * (i + 1)); [groups[i], groups[j]] = [groups[j], groups[i]]; } flatInput = {}; for (const groupObj of groups) { for (const [k, v] of Object.entries(groupObj)) { flatInput[k] = v; } } return { flatInput, groups }; } function startTranslationStatsItem() { return { batchStartTime: 0, enqueuedItems: 0, processedItems: 0, processedTokens: 0, totalItems: 0, totalTokens: 0, }; } function startTranslationStats() { return { translate: startTranslationStatsItem(), verify: startTranslationStatsItem(), }; } async function getTranslation(ctx) { if (ctx.options.verbose) { (0, utils_1.printInfo)(`Translation prompting mode: ${ctx.options.promptMode}\n`); } switch (ctx.options.promptMode) { case prompt_mode_1.default.JSON: { const generateTranslationJSON = new generate_1.default(ctx.options); return generateTranslationJSON.translateJSON(ctx); } case prompt_mode_1.default.CSV: return (0, generate_2.default)(ctx); default: throw new Error("Prompt mode is not set"); } } function setDefaults(options) { if (!options.templatedStringPrefix) options.templatedStringPrefix = constants_1.DEFAULT_TEMPLATED_STRING_PREFIX; if (!options.templatedStringSuffix) options.templatedStringSuffix = constants_1.DEFAULT_TEMPLATED_STRING_SUFFIX; if (!options.batchMaxTokens) options.batchMaxTokens = constants_1.DEFAULT_REQUEST_TOKENS; if (!options.batchSize) options.batchSize = constants_1.DEFAULT_BATCH_SIZE; if (!options.verbose) options.verbose = false; if (!options.ensureChangedTranslation) options.ensureChangedTranslation = false; if (!options.skipTranslationVerification) options.skipTranslationVerification = false; if (!options.skipStylingVerification) options.skipStylingVerification = false; if (options.continueOnError === undefined) options.continueOnError = true; } /** * Translate the input JSON to the given language * @param options - The options for the translation */ async function translate(options) { setDefaults(options); // Accept both codes and English language names. If a user passed // "English" we normalise to "en" and note the substitution so they // know it happened. const resolvedInput = (0, utils_1.resolveLanguageCode)(options.inputLanguageCode); if (resolvedInput !== options.inputLanguageCode) { if (options.verbose) { (0, utils_1.printInfo)(`Interpreted '${options.inputLanguageCode}' as '${resolvedInput}'`); } options.inputLanguageCode = resolvedInput; } const resolvedOutput = (0, utils_1.resolveLanguageCode)(options.outputLanguageCode); if (resolvedOutput !== options.outputLanguageCode) { if (options.verbose) { (0, utils_1.printInfo)(`Interpreted '${options.outputLanguageCode}' as '${resolvedOutput}'`); } options.outputLanguageCode = resolvedOutput; } // Validate the input and output languages are valid if (!(0, utils_1.isValidLanguageCode)(options.inputLanguageCode)) { throw new Error(`Invalid input language code: ${options.inputLanguageCode}`); } if (!(0, utils_1.isValidLanguageCode)(options.outputLanguageCode)) { throw new Error(`Invalid output language code: ${options.outputLanguageCode}`); } if (options.verbose) { (0, utils_1.printInfo)(`Translating from ${options.inputLanguageCode} to ${options.outputLanguageCode}...`); } const pool = getPool(options); let flatInput = (0, flat_1.flatten)(options.inputJSON, { delimiter: constants_1.FLATTEN_DELIMITER, }); replaceNewlinesWithPlaceholder(options.templatedStringPrefix, options.templatedStringSuffix, flatInput); const canonicalToDupes = {}; const valueBuckets = {}; for (const [k, v] of Object.entries(flatInput)) { (valueBuckets[v] ??= []).push(k); } for (const keys of Object.values(valueBuckets)) { if (keys.length > 1) { const [canonical, ...dupes] = keys; canonicalToDupes[canonical] = dupes; for (const k of dupes) { delete flatInput[k]; } } } if (options.verbose) { for (const [canonical, dupes] of Object.entries(canonicalToDupes)) { (0, utils_1.printInfo)(`De-duplicating ${canonical}\n=>\n${dupes.join("\n")}\n\n`); } } // Translation memory: pull any source string already in the cache // out of the work set so only misses reach the model. This extends // the in-file de-duplication above across runs and files. Hits are // merged back into the output below; misses are recorded after. const { cache } = options; const cachedOutput = {}; const missSourceByKey = {}; if (cache) { for (const [key, source] of Object.entries(flatInput)) { const hit = (0, cache_1.getCachedTranslation)(cache, options.inputLanguageCode, options.outputLanguageCode, options.context ?? "", source); if (hit !== undefined) { cachedOutput[key] = hit; delete flatInput[key]; } else { missSourceByKey[key] = source; } } if (options.verbose) { (0, utils_1.printInfo)(`Cache: ${Object.keys(cachedOutput).length} hit(s), ${Object.keys(missSourceByKey).length} miss(es)`); } } const grouped = groupSimilarValues(flatInput); flatInput = grouped.flatInput; const translationStats = startTranslationStats(); const translated = await getTranslation({ flatInput, groups: grouped.groups, options, pool, stats: translationStats, }); // Record freshly translated strings so the next run can reuse them. if (cache) { for (const [key, source] of Object.entries(missSourceByKey)) { const value = translated[key]; if (value !== undefined) { (0, cache_1.setCachedTranslation)(cache, options.inputLanguageCode, options.outputLanguageCode, options.context ?? "", source, value); } } } const output = { ...cachedOutput, ...translated }; for (const [canonical, dupes] of Object.entries(canonicalToDupes)) { const canonicalTranslation = output[canonical]; for (const k of dupes) { output[k] = canonicalTranslation; } } const sortedOutput = {}; for (const key of Object.keys(output).sort()) { sortedOutput[key] = output[key]; } replacePlaceholderWithNewLines(options.templatedStringPrefix, options.templatedStringSuffix, sortedOutput); const unflattenedOutput = (0, flat_1.unflatten)(sortedOutput, { delimiter: constants_1.FLATTEN_DELIMITER, }); if (options.verbose) { (0, utils_1.printExecutionTime)(translationStats.translate.batchStartTime, "Total execution time: "); } return unflattenedOutput; } /** * Translate the difference of an input JSON to the given languages * @param options - The options for the translation */ async function translateDiff(options) { const flatInputBefore = (0, flat_1.flatten)(options.inputJSONBefore, { delimiter: constants_1.FLATTEN_DELIMITER, }); const flatInputAfter = (0, flat_1.flatten)(options.inputJSONAfter, { delimiter: constants_1.FLATTEN_DELIMITER, }); const flatToUpdateJSONs = {}; for (const lang in options.toUpdateJSONs) { if (Object.prototype.hasOwnProperty.call(options.toUpdateJSONs, lang)) { const flatToUpdateJSON = (0, flat_1.flatten)(options.toUpdateJSONs[lang], { delimiter: constants_1.FLATTEN_DELIMITER, }); flatToUpdateJSONs[lang] = flatToUpdateJSON; } } const addedKeys = []; const modifiedKeys = []; const deletedKeys = []; for (const key in flatInputBefore) { if (flatInputBefore[key] !== flatInputAfter[key]) { if (flatInputAfter[key] === undefined) { deletedKeys.push(key); } else { modifiedKeys.push(key); } } } for (const key in flatInputAfter) { if (flatInputBefore[key] === undefined) { addedKeys.push(key); } } if (options.verbose) { (0, utils_1.printInfo)(`Added keys: ${addedKeys.join("\n")}\n`); (0, utils_1.printInfo)(`Modified keys: ${modifiedKeys.join("\n")}\n`); (0, utils_1.printInfo)(`Deleted keys: ${deletedKeys.join("\n")}\n`); } for (const key of deletedKeys) { for (const lang in flatToUpdateJSONs) { if (Object.prototype.hasOwnProperty.call(flatToUpdateJSONs, lang)) { delete flatToUpdateJSONs[lang][key]; } } } const translatedJSONs = {}; for (const languageCode in flatToUpdateJSONs) { if (Object.prototype.hasOwnProperty.call(flatToUpdateJSONs, languageCode)) { // Seed with the existing per-language map (minus the keys // deleted upstream) so unchanged translations are preserved. // Without this the accumulator would hold only the delta and // writing it to disk would wipe every pre-existing key. translatedJSONs[languageCode] = { ...flatToUpdateJSONs[languageCode], }; const addedAndModifiedTranslations = {}; for (const key of addedKeys) { addedAndModifiedTranslations[key] = flatInputAfter[key]; } for (const key of modifiedKeys) { addedAndModifiedTranslations[key] = flatInputAfter[key]; } // eslint-disable-next-line no-await-in-loop const translated = await translate({ ...options, inputJSON: addedAndModifiedTranslations, outputLanguageCode: languageCode, }); const flatTranslated = (0, flat_1.flatten)(translated, { delimiter: constants_1.FLATTEN_DELIMITER, }); for (const key in flatTranslated) { if (Object.prototype.hasOwnProperty.call(flatTranslated, key)) { translatedJSONs[languageCode][key] = flatTranslated[key]; } } // Sort the keys translatedJSONs[languageCode] = Object.keys(translatedJSONs[languageCode]) .sort() .reduce((obj, key) => { obj[key] = translatedJSONs[languageCode][key]; return obj; }, {}); if (options.onLanguageComplete) { const unflattened = (0, flat_1.unflatten)(translatedJSONs[languageCode], { delimiter: constants_1.FLATTEN_DELIMITER, }); options.onLanguageComplete(languageCode, unflattened, translatedJSONs[languageCode]); } } } const unflatToUpdateJSONs = {}; for (const lang in translatedJSONs) { if (Object.prototype.hasOwnProperty.call(translatedJSONs, lang)) { unflatToUpdateJSONs[lang] = (0, flat_1.unflatten)(translatedJSONs[lang], { delimiter: constants_1.FLATTEN_DELIMITER, }); } } return unflatToUpdateJSONs; } //# sourceMappingURL=translate.js.map