UNPKG

i18n-ai-translate

Version:

AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.

440 lines (386 loc) 14.9 kB
import { RETRY_ATTEMPTS } from "../constants"; import { failedTranslationPrompt, generationPrompt } from "./prompts"; import { getTemplatedStringRegex, isNAK, printError, printInfo, printProgress, } from "../utils"; import { retryWithBackoff } from "../retry"; import { runAcrossShards } from "../shard_runner"; import { verifyStyling, verifyTranslation } from "./verify"; import type { GenerateStateCSV } from "../types"; import type Chats from "../interfaces/chats"; import type GenerateTranslationOptionsCSV from "../interfaces/generate_translation_options_csv"; import type RateLimiter from "../rate_limiter"; import type TranslateOptions from "../interfaces/translate_options"; import type TranslationContext from "../interfaces/translation_context"; async function generateTranslation( options: GenerateTranslationOptionsCSV, ): Promise<string> { const { input, inputLanguageCode: inputLanguage, outputLanguageCode: outputLanguage, templatedStringPrefix, templatedStringSuffix, } = options; const generationPromptText = generationPrompt( inputLanguage, outputLanguage, input, { context: options.context, glossary: options.glossary, overridePrompt: options.overridePrompt, }, ); const templatedStringRegex = getTemplatedStringRegex( templatedStringPrefix, templatedStringSuffix, ); const splitInput = input.split("\n"); const generateState: GenerateStateCSV = { fixedTranslationMappings: {}, generationRetries: 0, inputLineToTemplatedString: {}, splitInput, translationToRetryAttempts: {}, }; for (let i = 0; i < splitInput.length; i++) { const match = splitInput[i].match(templatedStringRegex); if (match) { generateState.inputLineToTemplatedString[i] = match; } } let translated = ""; try { translated = await retryWithBackoff( // eslint-disable-next-line @typescript-eslint/no-use-before-define () => generate(options, generationPromptText, generateState), { maxRetries: RETRY_ATTEMPTS, rateLimiter: options.rateLimiter, verbose: options.verboseLogging, }, ); } catch (e) { printError(`Failed to translate: ${e}`); } return translated; } /** * Complete the initial translation of the input text. * @param flatInput - The flatinput object containing the json to translate * @param options - The options to generate the translation * @param chats - The options to generate the translation * @param translationStats - The translation statistics */ export default async function translateCSV( ctx: TranslationContext, ): Promise<{ [key: string]: string }> { const { flatInput, options, pool, groups } = ctx; const translationStats = ctx.stats.translate; const output: { [key: string]: string } = {}; const totalKeys = Object.keys(flatInput).length; const batchSize = Number(options.batchSize); translationStats.batchStartTime = Date.now(); let processed = 0; await runAcrossShards(flatInput, groups, pool, (shard, chats) => runShard(shard, chats, options, pool.rateLimiter, batchSize, output, { onBatchCompleted: (count) => { processed += count; if (options.verbose) { printProgress( "In Progress", translationStats.batchStartTime, totalKeys, processed, ); } }, }), ); return output; } async function runShard( shardInput: { [key: string]: string }, chats: Chats, options: TranslateOptions, rateLimiter: RateLimiter, batchSize: number, output: { [key: string]: string }, callbacks: { onBatchCompleted: (count: number) => void }, ): Promise<void> { const shardKeys = Object.keys(shardInput); for (let i = 0; i < shardKeys.length; i += batchSize) { const keys = shardKeys.slice(i, i + batchSize); const input = keys.map((x) => `"${shardInput[x]}"`).join("\n"); // eslint-disable-next-line no-await-in-loop const generatedTranslation = await generateTranslation({ chats, context: options.context, ensureChangedTranslation: options.ensureChangedTranslation as boolean, glossary: options.glossary, input, inputLanguageCode: options.inputLanguageCode, keys, outputLanguageCode: options.outputLanguageCode, overridePrompt: options.overridePrompt, rateLimiter, skipStylingVerification: options.skipStylingVerification as boolean, skipTranslationVerification: options.skipTranslationVerification as boolean, templatedStringPrefix: options.templatedStringPrefix as string, templatedStringSuffix: options.templatedStringSuffix as string, verboseLogging: options.verbose as boolean, }); if (generatedTranslation === "") { if (options.continueOnError) { printError( `Skipping ${keys.length} key(s) after repeated failures for ${options.outputLanguageCode}: ${keys.join(", ")}`, ); continue; } printError( `Failed to generate translation for ${options.outputLanguageCode}`, ); return; } const splitLines = generatedTranslation.split("\n"); for (let j = 0; j < keys.length; j++) { output[keys[j]] = splitLines[j].slice(1, -1); if (options.verbose) printInfo( `${keys[j].replaceAll("*", ".")}:\n${shardInput[keys[j]]}\n=>\n${output[keys[j]]}\n`, ); } callbacks.onBatchCompleted(keys.length); } } /** * Split a model's CSV translation response into one entry per line, * dropping blank lines. The model often pads the response with a * trailing newline or blank separator lines; a genuine CSV translation * is always a quoted string, so an empty line is never valid output — * only noise. Filtering it here keeps the downstream line-count check * from rejecting (and ultimately dropping) the whole batch over a stray * blank line. See Bug 6. * @param text - the raw model response * @returns the non-empty lines */ export function splitTranslationLines(text: string): string[] { return text.split("\n").filter((line) => line.trim() !== ""); } async function generate( options: GenerateTranslationOptionsCSV, generationPromptText: string, generateState: GenerateStateCSV, ): Promise<string> { const { chats, inputLanguageCode: inputLanguage, outputLanguageCode: outputLanguage, input, keys, verboseLogging, ensureChangedTranslation, } = options; const { inputLineToTemplatedString, translationToRetryAttempts, fixedTranslationMappings, splitInput, // Fine to destructure here -- we never modify the original } = generateState; let text = await chats.generateTranslationChat.sendMessage(generationPromptText); if (!text) { generateState.generationRetries++; if (generateState.generationRetries > 10) { chats.generateTranslationChat.resetChatHistory(); return Promise.reject( new Error( "Failed to generate content due to exception. Resetting history.", ), ); } printError(`Erroring text = ${input}`); chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error("Failed to generate content due to exception."), ); } generateState.generationRetries = 0; if (text.startsWith("```\n") && text.endsWith("\n```")) { if (verboseLogging) { printInfo("\nResponse started and ended with triple backticks"); } text = text.slice(4, -4); } // Response length matches. Blank lines (a trailing newline, stray // separators) are dropped first so they don't trip the strict // count check and cause an otherwise-valid batch to be retried into // oblivion and silently skipped. See Bug 6. const splitText = splitTranslationLines(text); if (splitText.length !== keys.length) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error( `Invalid number of lines: expected ${keys.length}, got ${splitText.length}. text = ${text}`, ), ); } // Templated strings match for (const i in inputLineToTemplatedString) { if ( Object.prototype.hasOwnProperty.call(inputLineToTemplatedString, i) ) { for (const templatedString of inputLineToTemplatedString[i]) { if (!splitText[i].includes(templatedString)) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error( `Missing templated string: ${templatedString}`, ), ); } } } } // Trim extra quotes if they exist for (let i = 0; i < splitText.length; i++) { let line = splitText[i]; while (line.startsWith('""')) { line = line.slice(1); } while (line.endsWith('""')) { line = line.slice(0, -1); } splitText[i] = line; } text = splitText.join("\n"); // Per-line translation verification for (let i = 0; i < splitText.length; i++) { let line = splitText[i]; if ( !line.startsWith('"') || !line.endsWith('"') || line.endsWith('\\"') ) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject(new Error(`Invalid line: ${line}`)); } else if ( ensureChangedTranslation && line === splitInput[i] && line.length > 4 ) { if (translationToRetryAttempts[line] === undefined) { translationToRetryAttempts[line] = 0; } else if (fixedTranslationMappings[line]) { splitText[i] = fixedTranslationMappings[line]; continue; } const retryTranslationPromptText = failedTranslationPrompt( inputLanguage, outputLanguage, splitInput[i], line, ); const fixedText = // eslint-disable-next-line no-await-in-loop await chats.generateTranslationChat.sendMessage( retryTranslationPromptText, ); if (fixedText === "") { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error("Failed to generate content due to exception."), ); } const oldText = line; splitText[i] = fixedText; line = fixedText; // TODO: Move to helper for (const j in inputLineToTemplatedString[i]) { if (!splitText[i].includes(inputLineToTemplatedString[i][j])) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error( `Missing templated string: ${inputLineToTemplatedString[i][j]}`, ), ); } } // TODO: Move to helper if (!line.startsWith('"') || !line.endsWith('"')) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject(new Error(`Invalid line: ${line}`)); } while (line.startsWith('""') && line.endsWith('""')) { line = line.slice(1, -1); } if (line !== splitInput[i]) { if (verboseLogging) { printInfo(`Successfully translated: ${oldText} => ${line}`); } text = splitText.join("\n"); fixedTranslationMappings[oldText] = line; continue; } translationToRetryAttempts[line]++; if (translationToRetryAttempts[line] < 3) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject(new Error(`No translation: ${line}`)); } } } let translationVerificationResponse = ""; if (!options.skipTranslationVerification) { translationVerificationResponse = await verifyTranslation( chats.verifyTranslationChat, inputLanguage, outputLanguage, input, text, { context: options.context, glossary: options.glossary, overridePrompt: options.overridePrompt, }, ); } if (isNAK(translationVerificationResponse)) { chats.generateTranslationChat.signalInvalid("translation"); return Promise.reject(new Error(`Invalid translation. text = ${text}`)); } // Styling is folded into the accuracy prompt by default (the merged // rubric above checks both). Only run the standalone styling pass // when the user has explicitly supplied a stylingVerificationPrompt // override — otherwise we'd be making a wasted API call that just // echoes back an ACK to the trivial no-op prompt. let stylingVerificationResponse = ""; const hasStylingOverride = Boolean( options.overridePrompt?.stylingVerificationPrompt, ); if (!options.skipStylingVerification && hasStylingOverride) { stylingVerificationResponse = await verifyStyling( chats.verifyStylingChat, inputLanguage, outputLanguage, input, text, { context: options.context, glossary: options.glossary, overridePrompt: options.overridePrompt, }, ); } if (isNAK(stylingVerificationResponse)) { chats.generateTranslationChat.signalInvalid("styling"); return Promise.reject(new Error(`Invalid styling. text = ${text}`)); } return text; }