i18n-ai-translate
Version:
AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.
572 lines • 28.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const constants_1 = require("../constants");
const js_tiktoken_1 = require("js-tiktoken");
const types_1 = require("./types");
const utils_1 = require("../utils");
const retry_1 = require("../retry");
const shard_runner_1 = require("../shard_runner");
const prompts_1 = require("./prompts");
class GenerateTranslationJSON {
tikToken;
templatedStringRegex;
constructor(options) {
// js-tiktoken is a pure-JS port — no WASM required, so global
// installs work on Windows without the tiktoken_bg.wasm dance
// that plagued the native tiktoken package (issue #428).
this.tikToken = (0, js_tiktoken_1.getEncoding)("cl100k_base");
this.templatedStringRegex = (0, utils_1.getTemplatedStringRegex)(options.templatedStringPrefix, options.templatedStringSuffix);
}
/**
* Complete the initial translation of the input text.
* @param flatInput - The flatinput object containing the json to translate
* @param options - The options to generate the translation
* @param chats - The options to generate the translation
* @param translationStats - The translation statistics
*/
async translateJSON(ctx) {
const { flatInput, options, pool, groups, stats } = ctx;
// Seed stats once up front; per-shard work then just increments
// the shared counters.
const allItems = this.generateTranslateItemArray(flatInput);
stats.translate.totalItems = allItems.length;
stats.translate.totalTokens = allItems.reduce((sum, item) => sum + item.translationTokens, 0);
stats.translate.batchStartTime = Date.now();
const perShardResults = await (0, shard_runner_1.runAcrossShards)(flatInput, groups, pool, async (shard, chats) => {
const shardItems = this.generateTranslateItemArray(shard);
const translated = await this.generateTranslationJSON(shardItems, options, chats, stats.translate, pool.rateLimiter);
if (options.skipTranslationVerification) {
return translated;
}
return this.generateVerificationJSON(translated, options, chats, stats.verify, pool.rateLimiter);
});
const combined = [];
for (const shardResult of perShardResults) {
combined.push(...shardResult);
}
return this.convertTranslateItemToIndex(combined);
}
/**
* Run the verification step against a source/target pair without
* writing anything. Returns one entry per invalid key, reporting
* what the model thought was wrong and what it would have fixed
* the translation to.
*
* This deliberately does NOT go through generateVerificationJSON —
* that path is designed to *fix* failures and re-verify, so
* successfully-fixed items come back with failure="" and the check
* report would miss every issue. Instead we call the verification
* prompt once per batch, parse the raw schema output, and surface
* the `valid: false` items directly.
*/
async checkJSON(ctx) {
const { flatSource, flatTarget, options, pool } = ctx;
// Build items pre-populated with the on-disk translation — the
// verify prompt expects both `original` and `translated` to be
// filled in before it runs.
const items = [];
let id = 1;
for (const key in flatSource) {
if (!Object.prototype.hasOwnProperty.call(flatSource, key) ||
!(key in flatTarget)) {
continue;
}
const item = this.generateTranslateItem(id, key, flatSource[key]);
item.translated = flatTarget[key];
item.verificationTokens = this.getVerifyItemToken(item);
items.push(item);
id++;
}
if (items.length === 0)
return [];
const [chats] = pool.all();
const issues = [];
// Batch the items to stay within batchSize / batchMaxTokens.
// getBatchVerifyItemArray already handles token-aware slicing.
let remaining = items.slice();
while (remaining.length > 0) {
const batch = this.getBatchVerifyItemArray(remaining, options);
if (batch.length === 0)
break;
remaining = remaining.filter((it) => !batch.includes(it));
const promptText = (0, prompts_1.verificationPromptJSON)(options.inputLanguageCode, options.outputLanguageCode, this.generateVerifyItemsInput(batch), {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
});
// eslint-disable-next-line no-await-in-loop
const raw = await chats.verifyTranslationChat.sendMessage(promptText, types_1.VerifyItemOutputObjectSchema);
const parsed = this.parseVerificationToJSON(raw);
const idToItem = new Map(batch.map((i) => [i.id, i]));
for (const v of parsed) {
if (!this.isValidVerificationItem(v))
continue;
if (v.valid)
continue;
const item = idToItem.get(v.id);
if (!item)
continue;
issues.push({
issue: v.issue || "Flagged by verifier",
key: item.key,
original: item.original,
suggestion: v.fixedTranslation ?? "",
translated: flatTarget[item.key] ?? "",
});
}
}
return issues;
}
generateTranslateItemsInput(translateItems) {
return translateItems.map((translateItem) => ({
// Only adds 'context' to the object if it's not empty. Makes the prompt shorter and uses less tokens
...(translateItem.context !== ""
? { context: translateItem.context }
: {}),
...(translateItem.failure !== ""
? { failure: translateItem.failure }
: {}),
id: translateItem.id,
original: translateItem.original,
}));
}
generateVerifyItemsInput(verifyItems) {
return verifyItems.map((verifyItem) => ({
...(verifyItem.context !== ""
? { context: verifyItem.context }
: {}),
...(verifyItem.failure !== ""
? { failure: verifyItem.failure }
: {}),
id: verifyItem.id,
original: verifyItem.original,
translated: verifyItem.translated,
}));
}
generateTranslateItem(id, key, original) {
const translateItem = {
context: "",
failure: "",
id,
key,
original,
templateStrings: [],
translated: "",
translationAttempts: 0,
translationTokens: 0,
verificationAttempts: 0,
verificationTokens: 0,
};
// Maps the 'placeholders' in the translated object to make sure that none are missing
const match = original.match(this.templatedStringRegex);
if (match) {
translateItem.templateStrings = match;
}
// Tokens here are used to estimate accurately the execution time
translateItem.translationTokens =
this.getTranslateItemToken(translateItem);
return translateItem;
}
getBatchTranslateItemArray(translateItemArray, options) {
const promptTokens = this.tikToken.encode((0, prompts_1.translationPromptJSON)(options.inputLanguageCode, options.outputLanguageCode, [], {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
})).length;
// Remove the tokens used by the prompt and divide the remaining tokens divided by 2 (half for the input/output) with a 10% margin of error
const maxInputTokens = ((Number(options.batchMaxTokens) - promptTokens) * 0.9) / 2;
let currentTokens = 0;
const batchTranslateItemArray = [];
for (const translateItem of translateItemArray) {
// If a failure message is added the tokens for an item change
currentTokens +=
translateItem.failure !== ""
? this.getTranslateItemToken(translateItem)
: translateItem.translationTokens;
if (batchTranslateItemArray.length !== 0 &&
(currentTokens >= maxInputTokens ||
batchTranslateItemArray.length >= Number(options.batchSize))) {
break;
}
batchTranslateItemArray.push(translateItem);
if (translateItem.translationAttempts > 5) {
// Add a minimum of one items if the item has been tried many times
// Too many items can cause translations to fail
break;
}
}
return batchTranslateItemArray;
}
getBatchVerifyItemArray(translatedItemArray, options) {
const promptTokens = this.tikToken.encode((0, prompts_1.verificationPromptJSON)(options.inputLanguageCode, options.outputLanguageCode, [], {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
})).length;
const maxInputTokens = ((Number(options.batchMaxTokens) - promptTokens) * 0.9) / 2;
let currentTokens = 0;
const batchVerifyItemArray = [];
for (const translatedItem of translatedItemArray) {
currentTokens +=
translatedItem.failure !== ""
? this.getVerifyItemToken(translatedItem)
: translatedItem.verificationTokens;
if (batchVerifyItemArray.length !== 0 &&
(currentTokens >= maxInputTokens ||
batchVerifyItemArray.length >= Number(options.batchSize))) {
break;
}
batchVerifyItemArray.push(translatedItem);
if (translatedItem.verificationAttempts > 5) {
// Add a minimum of one items if the item has been tried many times
// Too many items can cause translations to fail
break;
}
}
return batchVerifyItemArray;
}
generateTranslateItemArray(flatInput) {
return Object.keys(flatInput).reduce((acc, key) => {
if (Object.prototype.hasOwnProperty.call(flatInput, key)) {
acc.push(this.generateTranslateItem(Object.keys(flatInput).indexOf(key) + 1, key, flatInput[key]));
}
return acc;
}, []);
}
getTranslateItemToken(translatedItem) {
return this.tikToken.encode(JSON.stringify(this.generateTranslateItemsInput([translatedItem])[0])).length;
}
getVerifyItemToken(translatedItem) {
return this.tikToken.encode(JSON.stringify(this.generateVerifyItemsInput([translatedItem])[0])).length;
}
async generateTranslationJSON(translateItemArray, options, chats, translationStats, rateLimiter) {
const generatedTranslation = [];
// totalItems / totalTokens / batchStartTime are set once by the
// public translateJSON entry point so parallel shards don't
// clobber one another's stats.
const skippedItems = [];
// translate items are removed from 'translateItemArray' when one is generated
// this is done to avoid 'losing' items if the model doesn't return one
while (translateItemArray.length > 0) {
const batchTranslateItemArray = this.getBatchTranslateItemArray(translateItemArray, options);
for (const batchTranslateItem of batchTranslateItemArray) {
batchTranslateItem.translationAttempts++;
if (batchTranslateItem.translationAttempts > constants_1.RETRY_ATTEMPTS) {
if (options.continueOnError) {
(0, utils_1.printError)(`Skipping key after ${constants_1.RETRY_ATTEMPTS} failed translation attempts: ${batchTranslateItem.key}`);
const idx = translateItemArray.findIndex((item) => item.id === batchTranslateItem.id);
if (idx !== -1)
translateItemArray.splice(idx, 1);
skippedItems.push(batchTranslateItem);
continue;
}
return Promise.reject(new Error(`Item failed to translate too many times: ${JSON.stringify(batchTranslateItem)}. If this persists try a different model`));
}
}
const filteredBatch = batchTranslateItemArray.filter((item) => item.translationAttempts <= constants_1.RETRY_ATTEMPTS);
if (filteredBatch.length === 0)
continue;
translationStats.enqueuedItems += filteredBatch.length;
// eslint-disable-next-line no-await-in-loop
const result = await this.runTranslationJob({
chats,
context: options.context,
ensureChangedTranslation: options.ensureChangedTranslation,
glossary: options.glossary,
inputLanguageCode: options.inputLanguageCode,
outputLanguageCode: options.outputLanguageCode,
overridePrompt: options.overridePrompt,
rateLimiter,
skipStylingVerification: options.skipStylingVerification,
skipTranslationVerification: options.skipTranslationVerification,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
translateItems: filteredBatch,
verboseLogging: options.verbose,
});
if (!result) {
return Promise.reject(new Error("Translation job failed"));
}
for (const translatedItem of result) {
// Check if the translated item exists in the untranslated item array
const index = translateItemArray.findIndex((item) => item.id === translatedItem.id);
if (index !== -1) {
// If it does remove it from the 'translateItemArray' used to queue items for translation
translateItemArray.splice(index, 1);
// Prepare the object then add it to results
translatedItem.verificationTokens =
this.getVerifyItemToken(translatedItem);
generatedTranslation.push(translatedItem);
translationStats.processedTokens +=
translatedItem.translationTokens;
}
translationStats.processedItems++;
}
(0, utils_1.printProgress)(options.skipTranslationVerification
? "Translating"
: "Step 1/2 - Translating", translationStats.batchStartTime, translationStats.totalTokens, translationStats.processedTokens);
}
(0, utils_1.printExecutionTime)(translationStats.batchStartTime, "\nTranslation execution time: ");
if (skippedItems.length > 0) {
(0, utils_1.printError)(`Skipped ${skippedItems.length} key(s) that exhausted retries: ${skippedItems
.map((item) => item.key)
.join(", ")}`);
}
return generatedTranslation;
}
async generateVerificationJSON(verifyItemArray, options, chats, translationStats, rateLimiter) {
const generatedVerification = [];
// Stats counters are set centrally in translateJSON so concurrent
// shards don't clobber each other.
if (translationStats.batchStartTime === 0) {
translationStats.batchStartTime = Date.now();
}
translationStats.totalItems += verifyItemArray.length;
translationStats.totalTokens += verifyItemArray.reduce((sum, verifyItem) => sum + verifyItem.verificationTokens, 0);
while (verifyItemArray.length > 0) {
const batchVerifyItemArray = this.getBatchVerifyItemArray(verifyItemArray, options);
for (const batchVerifyItem of batchVerifyItemArray) {
batchVerifyItem.verificationAttempts++;
if (batchVerifyItem.verificationAttempts > constants_1.RETRY_ATTEMPTS) {
if (options.continueOnError) {
(0, utils_1.printError)(`Skipping key after ${constants_1.RETRY_ATTEMPTS} failed verification attempts; accepting unverified translation: ${batchVerifyItem.key}`);
const idx = verifyItemArray.findIndex((item) => item.id === batchVerifyItem.id);
if (idx !== -1)
verifyItemArray.splice(idx, 1);
generatedVerification.push(batchVerifyItem);
continue;
}
return Promise.reject(new Error(`Item failed to verify too many times: ${JSON.stringify(batchVerifyItem)}. If this persists try a different model`));
}
}
const filteredVerifyBatch = batchVerifyItemArray.filter((item) => item.verificationAttempts <= constants_1.RETRY_ATTEMPTS);
if (filteredVerifyBatch.length === 0)
continue;
translationStats.enqueuedItems += filteredVerifyBatch.length;
// eslint-disable-next-line no-await-in-loop
const result = await this.runVerificationJob({
chats,
context: options.context,
ensureChangedTranslation: options.ensureChangedTranslation,
glossary: options.glossary,
inputLanguageCode: options.inputLanguageCode,
outputLanguageCode: options.outputLanguageCode,
overridePrompt: options.overridePrompt,
rateLimiter,
skipStylingVerification: options.skipStylingVerification,
skipTranslationVerification: options.skipTranslationVerification,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
translateItems: filteredVerifyBatch,
verboseLogging: options.verbose,
});
if (!result) {
return Promise.reject(new Error("Verification job failed"));
}
for (const translatedItem of result) {
const index = verifyItemArray.findIndex((item) => item.id === translatedItem.id);
if (index !== -1) {
verifyItemArray.splice(index, 1);
generatedVerification.push(translatedItem);
translationStats.processedTokens +=
translatedItem.verificationTokens;
}
translationStats.processedItems++;
}
(0, utils_1.printProgress)("Step 2/2 - Verifying", translationStats.batchStartTime, translationStats.totalTokens, translationStats.processedTokens);
}
(0, utils_1.printExecutionTime)(translationStats.batchStartTime, "Verification execution time: ");
return generatedVerification;
}
convertTranslateItemToIndex(generatedTranslation) {
return generatedTranslation.reduce((acc, translation) => {
acc[translation.key] = translation.translated;
return acc;
}, {});
}
parseTranslationToJSON(outputText) {
try {
return types_1.TranslateItemOutputObjectSchema.parse(JSON.parse(outputText))
.items;
}
catch (error) {
(0, utils_1.printError)(`Error parsing JSON: '${error}', output: '${outputText}'\n`);
return [];
}
}
parseVerificationToJSON(outputText) {
try {
return types_1.VerifyItemOutputObjectSchema.parse(JSON.parse(outputText))
.items;
}
catch (error) {
(0, utils_1.printError)(`Error parsing JSON: '${error}', output: '${outputText}'\n`);
return [];
}
}
isValidTranslateItem(item) {
return (typeof item.id === "number" &&
typeof item.translated === "string" &&
item.id > 0);
}
isValidVerificationItem(item) {
if (!(typeof item.id === "number"))
return false;
if (!(typeof item.valid === "boolean"))
return false;
if (item.id <= 0)
return false;
// 'fixedTranslation' should be a translation if valid is false
if (item.valid === false &&
!(typeof item.fixedTranslation === "string"))
return false;
return true;
}
createTranslateItemsWithTranslation(untranslatedItems, translatedItems) {
const output = [];
for (const untranslatedItem of untranslatedItems) {
const translatedItem = translatedItems.find((checkTranslatedItem) => untranslatedItem.id === checkTranslatedItem.id);
if (translatedItem) {
untranslatedItem.translated = translatedItem.translated;
if (translatedItem.translated === "") {
untranslatedItem.failure =
"The translated value cannot be an empty string";
continue;
}
const templateStrings = translatedItem.translated.match(this.templatedStringRegex) ?? [];
const missingVariables = (0, utils_1.getMissingVariables)(untranslatedItem.templateStrings, templateStrings);
if (missingVariables.length !== 0) {
// Item is updated with a failure message. This message gives the LLM a context to help it fix the translation.
// Without this the same error is made over and over again, with the message the new translation is generally accepted.
untranslatedItem.failure = `Ensure all variables are included. The following variables are missing from the previous translation and must be added: '${JSON.stringify(missingVariables)}'`;
continue;
}
output.push({
...untranslatedItem,
failure: "",
});
}
}
return output;
}
createVerifyItemsWithTranslation(translatedItemArray, verifiedItemArray) {
const output = [];
for (const translatedItem of translatedItemArray) {
const verifiedItem = verifiedItemArray.find((checkVerifiedItem) => translatedItem.id === checkVerifiedItem.id);
if (verifiedItem) {
if (verifiedItem.valid) {
output.push({
...translatedItem,
failure: "",
});
}
else {
translatedItem.translated =
verifiedItem.fixedTranslation;
if (verifiedItem.fixedTranslation === "") {
translatedItem.failure =
"The translated value cannot be an empty string";
continue;
}
const templateStrings = verifiedItem.fixedTranslation.match(this.templatedStringRegex) ?? [];
const missingVariables = (0, utils_1.getMissingVariables)(translatedItem.templateStrings, templateStrings);
if (missingVariables.length !== 0) {
translatedItem.failure = `Must add variables, missing from last translation: '${JSON.stringify(missingVariables)}'`;
continue;
}
// 'translatedItem' is updated and queued again to check if the new fixed translation is valid
translatedItem.failure = `Previous issue that should be corrected: '${verifiedItem.issue}'`;
}
}
}
return output;
}
async runTranslationJob(options) {
const generateState = {
fixedTranslationMappings: {},
generationRetries: 0,
translationToRetryAttempts: {},
};
const generationPromptText = (0, prompts_1.translationPromptJSON)(options.inputLanguageCode, options.outputLanguageCode, this.generateTranslateItemsInput(options.translateItems), {
context: options.context,
glossary: options.glossary,
keys: options.translateItems.map((it) => it.key),
overridePrompt: options.overridePrompt,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
});
let translated = "";
try {
translated = await (0, retry_1.retryWithBackoff)(() => this.generateJob(generationPromptText, options, generateState, types_1.TranslateItemOutputObjectSchema), {
maxRetries: constants_1.RETRY_ATTEMPTS,
rateLimiter: options.rateLimiter,
verbose: options.verboseLogging,
});
}
catch (e) {
(0, utils_1.printError)(`Failed to translate: ${e}\n`);
}
const parsedOutput = this.parseTranslationToJSON(translated);
const validTranslationObjects = parsedOutput.filter(this.isValidTranslateItem);
return this.createTranslateItemsWithTranslation(options.translateItems, validTranslationObjects);
}
async runVerificationJob(options) {
const generateState = {
fixedTranslationMappings: {},
generationRetries: 0,
translationToRetryAttempts: {},
};
const generationPromptText = (0, prompts_1.verificationPromptJSON)(options.inputLanguageCode, options.outputLanguageCode, this.generateVerifyItemsInput(options.translateItems), {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
});
let verified = "";
try {
verified = await (0, retry_1.retryWithBackoff)(() => this.generateJob(generationPromptText, options, generateState, types_1.VerifyItemOutputObjectSchema), {
maxRetries: constants_1.RETRY_ATTEMPTS,
rateLimiter: options.rateLimiter,
verbose: options.verboseLogging,
});
}
catch (e) {
(0, utils_1.printError)(`Failed to translate: ${e}\n`);
}
const parsedOutput = this.parseVerificationToJSON(verified);
const validTranslationObjects = parsedOutput.filter(this.isValidVerificationItem);
return this.createVerifyItemsWithTranslation(options.translateItems, validTranslationObjects);
}
verifyGenerationAndRetry(generationPromptText, options, generateState) {
generateState.generationRetries++;
if (generateState.generationRetries > 10) {
options.chats.generateTranslationChat.resetChatHistory();
return Promise.reject(new Error("Failed to generate content due to exception. Resetting history."));
}
(0, utils_1.printError)(`Erroring text = ${generationPromptText}\n`);
options.chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error("Failed to generate content due to exception."));
}
async generateJob(generationPromptText, options, generateState, format) {
const text = await options.chats.generateTranslationChat.sendMessage(generationPromptText, format);
if (!text) {
return this.verifyGenerationAndRetry(generationPromptText, options, generateState);
}
else {
generateState.generationRetries = 0;
}
if (options.verboseLogging) {
(0, utils_1.printWarn)(text);
}
return text;
}
}
exports.default = GenerateTranslationJSON;
//# sourceMappingURL=generate.js.map