i18n-ai-translate

Version:

AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.

github.com/taahamahdi/i18n-ai-translate

taahamahdi/i18n-ai-translate

440 lines (386 loc) • 14.9 kB

text/typescript

import { RETRY_ATTEMPTS } from "../constants"; import { failedTranslationPrompt, generationPrompt } from "./prompts"; import { getTemplatedStringRegex, isNAK, printError, printInfo, printProgress, } from "../utils"; import { retryWithBackoff } from "../retry"; import { runAcrossShards } from "../shard_runner"; import { verifyStyling, verifyTranslation } from "./verify"; import type { GenerateStateCSV } from "../types"; import type Chats from "../interfaces/chats"; import type GenerateTranslationOptionsCSV from "../interfaces/generate_translation_options_csv"; import type RateLimiter from "../rate_limiter"; import type TranslateOptions from "../interfaces/translate_options"; import type TranslationContext from "../interfaces/translation_context"; async function generateTranslation( options: GenerateTranslationOptionsCSV, ): Promise<string> { const { input, inputLanguageCode: inputLanguage, outputLanguageCode: outputLanguage, templatedStringPrefix, templatedStringSuffix, } = options; const generationPromptText = generationPrompt( inputLanguage, outputLanguage, input, { context: options.context, glossary: options.glossary, overridePrompt: options.overridePrompt, }, ); const templatedStringRegex = getTemplatedStringRegex( templatedStringPrefix, templatedStringSuffix, ); const splitInput = input.split("\n"); const generateState: GenerateStateCSV = { fixedTranslationMappings: {}, generationRetries: 0, inputLineToTemplatedString: {}, splitInput, translationToRetryAttempts: {}, }; for (let i = 0; i < splitInput.length; i++) { const match = splitInput[i].match(templatedStringRegex); if (match) { generateState.inputLineToTemplatedString[i] = match; } } let translated = ""; try { translated = await retryWithBackoff( // eslint-disable-next-line @typescript-eslint/no-use-before-define () => generate(options, generationPromptText, generateState), { maxRetries: RETRY_ATTEMPTS, rateLimiter: options.rateLimiter, verbose: options.verboseLogging, }, ); } catch (e) { printError(`Failed to translate: ${e}`); } return translated; } /** * Complete the initial translation of the input text. * @param flatInput - The flatinput object containing the json to translate * @param options - The options to generate the translation * @param chats - The options to generate the translation * @param translationStats - The translation statistics */ export default async function translateCSV( ctx: TranslationContext, ): Promise<{ [key: string]: string }> { const { flatInput, options, pool, groups } = ctx; const translationStats = ctx.stats.translate; const output: { [key: string]: string } = {}; const totalKeys = Object.keys(flatInput).length; const batchSize = Number(options.batchSize); translationStats.batchStartTime = Date.now(); let processed = 0; await runAcrossShards(flatInput, groups, pool, (shard, chats) => runShard(shard, chats, options, pool.rateLimiter, batchSize, output, { onBatchCompleted: (count) => { processed += count; if (options.verbose) { printProgress( "In Progress", translationStats.batchStartTime, totalKeys, processed, ); } }, }), ); return output; } async function runShard( shardInput: { [key: string]: string }, chats: Chats, options: TranslateOptions, rateLimiter: RateLimiter, batchSize: number, output: { [key: string]: string }, callbacks: { onBatchCompleted: (count: number) => void }, ): Promise<void> { const shardKeys = Object.keys(shardInput); for (let i = 0; i < shardKeys.length; i += batchSize) { const keys = shardKeys.slice(i, i + batchSize); const input = keys.map((x) => `"${shardInput[x]}"`).join("\n"); // eslint-disable-next-line no-await-in-loop const generatedTranslation = await generateTranslation({ chats, context: options.context, ensureChangedTranslation: options.ensureChangedTranslation as boolean, glossary: options.glossary, input, inputLanguageCode: options.inputLanguageCode, keys, outputLanguageCode: options.outputLanguageCode, overridePrompt: options.overridePrompt, rateLimiter, skipStylingVerification: options.skipStylingVerification as boolean, skipTranslationVerification: options.skipTranslationVerification as boolean, templatedStringPrefix: options.templatedStringPrefix as string, templatedStringSuffix: options.templatedStringSuffix as string, verboseLogging: options.verbose as boolean, }); if (generatedTranslation === "") { if (options.continueOnError) { printError( `Skipping ${keys.length} key(s) after repeated failures for ${options.outputLanguageCode}: ${keys.join(", ")}`, ); continue; } printError( `Failed to generate translation for ${options.outputLanguageCode}`, ); return; } const splitLines = generatedTranslation.split("\n"); for (let j = 0; j < keys.length; j++) { output[keys[j]] = splitLines[j].slice(1, -1); if (options.verbose) printInfo( `${keys[j].replaceAll("*", ".")}:\n${shardInput[keys[j]]}\n=>\n${output[keys[j]]}\n`, ); } callbacks.onBatchCompleted(keys.length); } } /** * Split a model's CSV translation response into one entry per line, * dropping blank lines. The model often pads the response with a * trailing newline or blank separator lines; a genuine CSV translation * is always a quoted string, so an empty line is never valid output — * only noise. Filtering it here keeps the downstream line-count check * from rejecting (and ultimately dropping) the whole batch over a stray * blank line. See Bug 6. * @param text - the raw model response * @returns the non-empty lines */ export function splitTranslationLines(text: string): string[] { return text.split("\n").filter((line) => line.trim() !== ""); } async function generate( options: GenerateTranslationOptionsCSV, generationPromptText: string, generateState: GenerateStateCSV, ): Promise<string> { const { chats, inputLanguageCode: inputLanguage, outputLanguageCode: outputLanguage, input, keys, verboseLogging, ensureChangedTranslation, } = options; const { inputLineToTemplatedString, translationToRetryAttempts, fixedTranslationMappings, splitInput, // Fine to destructure here -- we never modify the original } = generateState; let text = await chats.generateTranslationChat.sendMessage(generationPromptText); if (!text) { generateState.generationRetries++; if (generateState.generationRetries > 10) { chats.generateTranslationChat.resetChatHistory(); return Promise.reject( new Error( "Failed to generate content due to exception. Resetting history.", ), ); } printError(`Erroring text = ${input}`); chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error("Failed to generate content due to exception."), ); } generateState.generationRetries = 0; if (text.startsWith("```\n") && text.endsWith("\n```")) { if (verboseLogging) { printInfo("\nResponse started and ended with triple backticks"); } text = text.slice(4, -4); } // Response length matches. Blank lines (a trailing newline, stray // separators) are dropped first so they don't trip the strict // count check and cause an otherwise-valid batch to be retried into // oblivion and silently skipped. See Bug 6. const splitText = splitTranslationLines(text); if (splitText.length !== keys.length) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error( `Invalid number of lines: expected ${keys.length}, got ${splitText.length}. text = ${text}`, ), ); } // Templated strings match for (const i in inputLineToTemplatedString) { if ( Object.prototype.hasOwnProperty.call(inputLineToTemplatedString, i) ) { for (const templatedString of inputLineToTemplatedString[i]) { if (!splitText[i].includes(templatedString)) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error( `Missing templated string: ${templatedString}`, ), ); } } } } // Trim extra quotes if they exist for (let i = 0; i < splitText.length; i++) { let line = splitText[i]; while (line.startsWith('""')) { line = line.slice(1); } while (line.endsWith('""')) { line = line.slice(0, -1); } splitText[i] = line; } text = splitText.join("\n"); // Per-line translation verification for (let i = 0; i < splitText.length; i++) { let line = splitText[i]; if ( !line.startsWith('"') || !line.endsWith('"') || line.endsWith('\\"') ) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject(new Error(`Invalid line: ${line}`)); } else if ( ensureChangedTranslation && line === splitInput[i] && line.length > 4 ) { if (translationToRetryAttempts[line] === undefined) { translationToRetryAttempts[line] = 0; } else if (fixedTranslationMappings[line]) { splitText[i] = fixedTranslationMappings[line]; continue; } const retryTranslationPromptText = failedTranslationPrompt( inputLanguage, outputLanguage, splitInput[i], line, ); const fixedText = // eslint-disable-next-line no-await-in-loop await chats.generateTranslationChat.sendMessage( retryTranslationPromptText, ); if (fixedText === "") { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error("Failed to generate content due to exception."), ); } const oldText = line; splitText[i] = fixedText; line = fixedText; // TODO: Move to helper for (const j in inputLineToTemplatedString[i]) { if (!splitText[i].includes(inputLineToTemplatedString[i][j])) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject( new Error( `Missing templated string: ${inputLineToTemplatedString[i][j]}`, ), ); } } // TODO: Move to helper if (!line.startsWith('"') || !line.endsWith('"')) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject(new Error(`Invalid line: ${line}`)); } while (line.startsWith('""') && line.endsWith('""')) { line = line.slice(1, -1); } if (line !== splitInput[i]) { if (verboseLogging) { printInfo(`Successfully translated: ${oldText} => ${line}`); } text = splitText.join("\n"); fixedTranslationMappings[oldText] = line; continue; } translationToRetryAttempts[line]++; if (translationToRetryAttempts[line] < 3) { chats.generateTranslationChat.rollbackLastMessage(); return Promise.reject(new Error(`No translation: ${line}`)); } } } let translationVerificationResponse = ""; if (!options.skipTranslationVerification) { translationVerificationResponse = await verifyTranslation( chats.verifyTranslationChat, inputLanguage, outputLanguage, input, text, { context: options.context, glossary: options.glossary, overridePrompt: options.overridePrompt, }, ); } if (isNAK(translationVerificationResponse)) { chats.generateTranslationChat.signalInvalid("translation"); return Promise.reject(new Error(`Invalid translation. text = ${text}`)); } // Styling is folded into the accuracy prompt by default (the merged // rubric above checks both). Only run the standalone styling pass // when the user has explicitly supplied a stylingVerificationPrompt // override — otherwise we'd be making a wasted API call that just // echoes back an ACK to the trivial no-op prompt. let stylingVerificationResponse = ""; const hasStylingOverride = Boolean( options.overridePrompt?.stylingVerificationPrompt, ); if (!options.skipStylingVerification && hasStylingOverride) { stylingVerificationResponse = await verifyStyling( chats.verifyStylingChat, inputLanguage, outputLanguage, input, text, { context: options.context, glossary: options.glossary, overridePrompt: options.overridePrompt, }, ); } if (isNAK(stylingVerificationResponse)) { chats.generateTranslationChat.signalInvalid("styling"); return Promise.reject(new Error(`Invalid styling. text = ${text}`)); } return text; }