i18n-ai-translate
Version:
AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.
273 lines • 12.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.default = translateCSV;
exports.splitTranslationLines = splitTranslationLines;
const constants_1 = require("../constants");
const prompts_1 = require("./prompts");
const utils_1 = require("../utils");
const retry_1 = require("../retry");
const shard_runner_1 = require("../shard_runner");
const verify_1 = require("./verify");
async function generateTranslation(options) {
const { input, inputLanguageCode: inputLanguage, outputLanguageCode: outputLanguage, templatedStringPrefix, templatedStringSuffix, } = options;
const generationPromptText = (0, prompts_1.generationPrompt)(inputLanguage, outputLanguage, input, {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
});
const templatedStringRegex = (0, utils_1.getTemplatedStringRegex)(templatedStringPrefix, templatedStringSuffix);
const splitInput = input.split("\n");
const generateState = {
fixedTranslationMappings: {},
generationRetries: 0,
inputLineToTemplatedString: {},
splitInput,
translationToRetryAttempts: {},
};
for (let i = 0; i < splitInput.length; i++) {
const match = splitInput[i].match(templatedStringRegex);
if (match) {
generateState.inputLineToTemplatedString[i] = match;
}
}
let translated = "";
try {
translated = await (0, retry_1.retryWithBackoff)(
// eslint-disable-next-line @typescript-eslint/no-use-before-define
() => generate(options, generationPromptText, generateState), {
maxRetries: constants_1.RETRY_ATTEMPTS,
rateLimiter: options.rateLimiter,
verbose: options.verboseLogging,
});
}
catch (e) {
(0, utils_1.printError)(`Failed to translate: ${e}`);
}
return translated;
}
/**
* Complete the initial translation of the input text.
* @param flatInput - The flatinput object containing the json to translate
* @param options - The options to generate the translation
* @param chats - The options to generate the translation
* @param translationStats - The translation statistics
*/
async function translateCSV(ctx) {
const { flatInput, options, pool, groups } = ctx;
const translationStats = ctx.stats.translate;
const output = {};
const totalKeys = Object.keys(flatInput).length;
const batchSize = Number(options.batchSize);
translationStats.batchStartTime = Date.now();
let processed = 0;
await (0, shard_runner_1.runAcrossShards)(flatInput, groups, pool, (shard, chats) => runShard(shard, chats, options, pool.rateLimiter, batchSize, output, {
onBatchCompleted: (count) => {
processed += count;
if (options.verbose) {
(0, utils_1.printProgress)("In Progress", translationStats.batchStartTime, totalKeys, processed);
}
},
}));
return output;
}
async function runShard(shardInput, chats, options, rateLimiter, batchSize, output, callbacks) {
const shardKeys = Object.keys(shardInput);
for (let i = 0; i < shardKeys.length; i += batchSize) {
const keys = shardKeys.slice(i, i + batchSize);
const input = keys.map((x) => `"${shardInput[x]}"`).join("\n");
// eslint-disable-next-line no-await-in-loop
const generatedTranslation = await generateTranslation({
chats,
context: options.context,
ensureChangedTranslation: options.ensureChangedTranslation,
glossary: options.glossary,
input,
inputLanguageCode: options.inputLanguageCode,
keys,
outputLanguageCode: options.outputLanguageCode,
overridePrompt: options.overridePrompt,
rateLimiter,
skipStylingVerification: options.skipStylingVerification,
skipTranslationVerification: options.skipTranslationVerification,
templatedStringPrefix: options.templatedStringPrefix,
templatedStringSuffix: options.templatedStringSuffix,
verboseLogging: options.verbose,
});
if (generatedTranslation === "") {
if (options.continueOnError) {
(0, utils_1.printError)(`Skipping ${keys.length} key(s) after repeated failures for ${options.outputLanguageCode}: ${keys.join(", ")}`);
continue;
}
(0, utils_1.printError)(`Failed to generate translation for ${options.outputLanguageCode}`);
return;
}
const splitLines = generatedTranslation.split("\n");
for (let j = 0; j < keys.length; j++) {
output[keys[j]] = splitLines[j].slice(1, -1);
if (options.verbose)
(0, utils_1.printInfo)(`${keys[j].replaceAll("*", ".")}:\n${shardInput[keys[j]]}\n=>\n${output[keys[j]]}\n`);
}
callbacks.onBatchCompleted(keys.length);
}
}
/**
* Split a model's CSV translation response into one entry per line,
* dropping blank lines. The model often pads the response with a
* trailing newline or blank separator lines; a genuine CSV translation
* is always a quoted string, so an empty line is never valid output —
* only noise. Filtering it here keeps the downstream line-count check
* from rejecting (and ultimately dropping) the whole batch over a stray
* blank line. See Bug 6.
* @param text - the raw model response
* @returns the non-empty lines
*/
function splitTranslationLines(text) {
return text.split("\n").filter((line) => line.trim() !== "");
}
async function generate(options, generationPromptText, generateState) {
const { chats, inputLanguageCode: inputLanguage, outputLanguageCode: outputLanguage, input, keys, verboseLogging, ensureChangedTranslation, } = options;
const { inputLineToTemplatedString, translationToRetryAttempts, fixedTranslationMappings, splitInput, // Fine to destructure here -- we never modify the original
} = generateState;
let text = await chats.generateTranslationChat.sendMessage(generationPromptText);
if (!text) {
generateState.generationRetries++;
if (generateState.generationRetries > 10) {
chats.generateTranslationChat.resetChatHistory();
return Promise.reject(new Error("Failed to generate content due to exception. Resetting history."));
}
(0, utils_1.printError)(`Erroring text = ${input}`);
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error("Failed to generate content due to exception."));
}
generateState.generationRetries = 0;
if (text.startsWith("```\n") && text.endsWith("\n```")) {
if (verboseLogging) {
(0, utils_1.printInfo)("\nResponse started and ended with triple backticks");
}
text = text.slice(4, -4);
}
// Response length matches. Blank lines (a trailing newline, stray
// separators) are dropped first so they don't trip the strict
// count check and cause an otherwise-valid batch to be retried into
// oblivion and silently skipped. See Bug 6.
const splitText = splitTranslationLines(text);
if (splitText.length !== keys.length) {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error(`Invalid number of lines: expected ${keys.length}, got ${splitText.length}. text = ${text}`));
}
// Templated strings match
for (const i in inputLineToTemplatedString) {
if (Object.prototype.hasOwnProperty.call(inputLineToTemplatedString, i)) {
for (const templatedString of inputLineToTemplatedString[i]) {
if (!splitText[i].includes(templatedString)) {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error(`Missing templated string: ${templatedString}`));
}
}
}
}
// Trim extra quotes if they exist
for (let i = 0; i < splitText.length; i++) {
let line = splitText[i];
while (line.startsWith('""')) {
line = line.slice(1);
}
while (line.endsWith('""')) {
line = line.slice(0, -1);
}
splitText[i] = line;
}
text = splitText.join("\n");
// Per-line translation verification
for (let i = 0; i < splitText.length; i++) {
let line = splitText[i];
if (!line.startsWith('"') ||
!line.endsWith('"') ||
line.endsWith('\\"')) {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error(`Invalid line: ${line}`));
}
else if (ensureChangedTranslation &&
line === splitInput[i] &&
line.length > 4) {
if (translationToRetryAttempts[line] === undefined) {
translationToRetryAttempts[line] = 0;
}
else if (fixedTranslationMappings[line]) {
splitText[i] = fixedTranslationMappings[line];
continue;
}
const retryTranslationPromptText = (0, prompts_1.failedTranslationPrompt)(inputLanguage, outputLanguage, splitInput[i], line);
const fixedText =
// eslint-disable-next-line no-await-in-loop
await chats.generateTranslationChat.sendMessage(retryTranslationPromptText);
if (fixedText === "") {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error("Failed to generate content due to exception."));
}
const oldText = line;
splitText[i] = fixedText;
line = fixedText;
// TODO: Move to helper
for (const j in inputLineToTemplatedString[i]) {
if (!splitText[i].includes(inputLineToTemplatedString[i][j])) {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error(`Missing templated string: ${inputLineToTemplatedString[i][j]}`));
}
}
// TODO: Move to helper
if (!line.startsWith('"') || !line.endsWith('"')) {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error(`Invalid line: ${line}`));
}
while (line.startsWith('""') && line.endsWith('""')) {
line = line.slice(1, -1);
}
if (line !== splitInput[i]) {
if (verboseLogging) {
(0, utils_1.printInfo)(`Successfully translated: ${oldText} => ${line}`);
}
text = splitText.join("\n");
fixedTranslationMappings[oldText] = line;
continue;
}
translationToRetryAttempts[line]++;
if (translationToRetryAttempts[line] < 3) {
chats.generateTranslationChat.rollbackLastMessage();
return Promise.reject(new Error(`No translation: ${line}`));
}
}
}
let translationVerificationResponse = "";
if (!options.skipTranslationVerification) {
translationVerificationResponse = await (0, verify_1.verifyTranslation)(chats.verifyTranslationChat, inputLanguage, outputLanguage, input, text, {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
});
}
if ((0, utils_1.isNAK)(translationVerificationResponse)) {
chats.generateTranslationChat.signalInvalid("translation");
return Promise.reject(new Error(`Invalid translation. text = ${text}`));
}
// Styling is folded into the accuracy prompt by default (the merged
// rubric above checks both). Only run the standalone styling pass
// when the user has explicitly supplied a stylingVerificationPrompt
// override — otherwise we'd be making a wasted API call that just
// echoes back an ACK to the trivial no-op prompt.
let stylingVerificationResponse = "";
const hasStylingOverride = Boolean(options.overridePrompt?.stylingVerificationPrompt);
if (!options.skipStylingVerification && hasStylingOverride) {
stylingVerificationResponse = await (0, verify_1.verifyStyling)(chats.verifyStylingChat, inputLanguage, outputLanguage, input, text, {
context: options.context,
glossary: options.glossary,
overridePrompt: options.overridePrompt,
});
}
if ((0, utils_1.isNAK)(stylingVerificationResponse)) {
chats.generateTranslationChat.signalInvalid("styling");
return Promise.reject(new Error(`Invalid styling. text = ${text}`));
}
return text;
}
//# sourceMappingURL=generate.js.map