i18n-ai-translate

Version:

AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.

github.com/taahamahdi/i18n-ai-translate

taahamahdi/i18n-ai-translate

512 lines (442 loc) • 16.3 kB

text/typescript

// End-to-end concurrency coverage. Unlike translate.spec.ts (which mocks the // CSV + JSON pipelines wholesale), this file mocks only ChatFactory so the // real translate.ts / pipelines / pool / limiter all execute. import type { ChatParams, Model } from "../types"; import type { ZodType, ZodTypeDef } from "zod"; import type Engine from "../enums/engine"; import type RateLimiter from "../rate_limiter"; // A global tracker the fake ChatFactory records into. Tests assert over // this to prove workers got distinct Chats instances. type ChatCall = { chatId: number; format: | "csv" | "csv-verify" | "csv-styling" | "json-translate" | "json-verify" | "unknown"; keys: string[]; }; const chatCalls: ChatCall[] = []; let nextChatId = 1; let failKeys: Set<string> | null = null; let rejectOn429Once: Set<string> | null = null; // When true, the fake appends blank lines to the CSV response to // simulate a model padding its output with a trailing newline (Bug 6). let csvTrailingBlank = false; function mintChatId(): number { const id = nextChatId; nextChatId++; return id; } function fakeTranslate(s: string): string { return `${s}_fr`; } function parseCsvInput(message: string): string[] { // The CSV prompt wraps the input block in triple backticks. const backtickBlock = message.match(/```\n([\s\S]*?)\n```/); if (!backtickBlock) return []; return backtickBlock[1] .split("\n") .map((line) => line.replace(/^"|"$/g, "")); } function parseJsonItems( message: string, ): Array<{ id: number; original: string }> { const backtickBlock = message.match(/```json\n([\s\S]*?)\n```/); if (!backtickBlock) return []; try { const parsed = JSON.parse(backtickBlock[1]); if (Array.isArray(parsed)) return parsed; return []; } catch { return []; } } function detectFormat( message: string, format?: ZodType<any, ZodTypeDef, any>, ): ChatCall["format"] { if (!format) { // CSV mode: distinguish the three prompt shapes so tests can // count accuracy vs. styling verify calls separately. if (/translation reviewer/.test(message)) return "csv-verify"; if (/^Reply with ACK\.$/.test(message.trim())) return "csv-styling"; return "csv"; } // Each JSON-mode prompt has a distinct preamble we can match on. if (/Check translations from/.test(message)) return "json-verify"; if (/Translate from/.test(message)) return "json-translate"; return "unknown"; } function makeFakeChat(): { startChat: jest.Mock; sendMessage: jest.Mock; resetChatHistory: jest.Mock; rollbackLastMessage: jest.Mock; signalInvalid: jest.Mock; chatId: number; } { const chatId = mintChatId(); const sendMessage = jest.fn( async ( message: string, format?: ZodType<any, ZodTypeDef, any>, ): Promise<string> => { const fmt = detectFormat(message, format); if (fmt === "csv") { const inputs = parseCsvInput(message); chatCalls.push({ chatId, format: fmt, keys: inputs }); const shouldReject = inputs.some((i) => failKeys?.has(i)); if (shouldReject) { throw new Error( `simulated failure for: ${inputs.join(",")}`, ); } if (inputs.some((i) => rejectOn429Once?.has(i))) { rejectOn429Once = null; const err = Object.assign(new Error("rate limited"), { headers: { "retry-after": "0" }, status: 429, }); throw err; } const body = inputs .map((s) => `"${fakeTranslate(s)}"`) .join("\n"); return csvTrailingBlank ? `${body}\n\n` : body; } if (fmt === "json-translate") { const items = parseJsonItems(message); chatCalls.push({ chatId, format: fmt, keys: items.map((it) => it.original), }); if (items.some((it) => failKeys?.has(it.original))) { throw new Error( `simulated failure for: ${items.map((it) => it.original).join(",")}`, ); } return JSON.stringify({ items: items.map((it) => ({ id: it.id, translated: fakeTranslate(it.original), })), }); } if (fmt === "json-verify") { const items = parseJsonItems(message); chatCalls.push({ chatId, format: fmt, keys: items.map((it) => it.original), }); return JSON.stringify({ items: items.map((it) => ({ fixedTranslation: "", id: it.id, issue: "", valid: true, })), }); } if (fmt === "csv-verify" || fmt === "csv-styling") { chatCalls.push({ chatId, format: fmt, keys: [] }); return "ACK"; } return ""; }, ); return { chatId, signalInvalid: jest.fn(), resetChatHistory: jest.fn(), rollbackLastMessage: jest.fn(), sendMessage, startChat: jest.fn(), }; } jest.mock("../chats/chat_factory", () => ({ __esModule: true, default: { newChat: jest.fn( ( _engine: Engine, _model: Model, _rateLimiter: RateLimiter, _apiKey?: string, _host?: string, _chatParams?: ChatParams, ) => makeFakeChat(), ), }, })); // delay() in utils.ts is used by the rate limiter and retry code; short-circuit // it so tests don't actually sleep. jest.mock("../utils", () => { const actual = jest.requireActual("../utils"); return { ...actual, delay: jest.fn(() => Promise.resolve()), printExecutionTime: jest.fn(), printInfo: jest.fn(), printProgress: jest.fn(), printWarn: jest.fn(), }; }); // Import AFTER mocks so translate() sees the mocked ChatFactory. // eslint-disable-next-line import/first import Engine_ from "../enums/engine"; // eslint-disable-next-line import/first import PromptMode from "../enums/prompt_mode"; // eslint-disable-next-line import/first import { translate } from "../translate"; process.env.OPENAI_API_KEY = "test"; const baseOptions = { apiKey: "test", batchMaxTokens: 4096, batchSize: 4, chatParams: {}, continueOnError: true, engine: Engine_.ChatGPT, host: undefined, inputLanguageCode: "en", model: "gpt-4.1", outputLanguageCode: "fr", rateLimitMs: 0, skipStylingVerification: true, skipTranslationVerification: true, templatedStringPrefix: "{{", templatedStringSuffix: "}}", verbose: false, }; beforeEach(() => { chatCalls.length = 0; nextChatId = 1; failKeys = null; rejectOn429Once = null; csvTrailingBlank = false; }); const toyInput = (): Record<string, string> => ({ bye1: "Bye", bye2: "Goodbye", hello1: "Hello", hello2: "Hi", thanks1: "Thanks", thanks2: "Thank you", yes1: "Yes", yes2: "Yeah", }); describe.each(Object.values(PromptMode))( "concurrency (promptMode=%s)", (promptMode) => { it("concurrency=1 and concurrency=4 produce the same output", async () => { const serial = (await translate({ ...baseOptions, concurrency: 1, inputJSON: toyInput(), promptMode, } as any)) as Record<string, string>; chatCalls.length = 0; nextChatId = 1; const parallel = (await translate({ ...baseOptions, concurrency: 4, inputJSON: toyInput(), promptMode, } as any)) as Record<string, string>; expect(parallel).toEqual(serial); expect(serial.hello1).toBe("Hello_fr"); }); it("concurrency=2 routes work to two distinct chat instances", async () => { await translate({ ...baseOptions, concurrency: 2, inputJSON: toyInput(), promptMode, } as any); const translateCalls = chatCalls.filter( (c) => c.format === "csv" || c.format === "json-translate", ); const uniqueChatIds = new Set(translateCalls.map((c) => c.chatId)); expect(uniqueChatIds.size).toBeGreaterThanOrEqual(2); }); it("every input key shows up in some translate chat call exactly once", async () => { const input = toyInput(); await translate({ ...baseOptions, concurrency: 2, inputJSON: input, promptMode, } as any); const seen = new Set<string>(); for (const call of chatCalls) { if (call.format !== "csv" && call.format !== "json-translate") { continue; } for (const key of call.keys) { seen.add(key); } } for (const value of Object.values(input)) { expect(seen.has(value)).toBe(true); } }); it("continueOnError skips failing work but keeps the rest of the translation", async () => { failKeys = new Set(["Hi"]); // hello2's value const out = (await translate({ ...baseOptions, // batchSize 1 so a CSV failure skips just one key, not a batch. batchSize: 1, concurrency: 2, continueOnError: true, inputJSON: toyInput(), promptMode, } as any)) as Record<string, string>; // Most keys should translate; the failing key may or may not // appear depending on mode. const translatedCount = Object.values(out).filter( (v) => typeof v === "string" && v.endsWith("_fr"), ).length; expect(translatedCount).toBeGreaterThanOrEqual(7); }); it("handles empty input at any concurrency", async () => { const out = await translate({ ...baseOptions, concurrency: 4, inputJSON: {}, promptMode, } as any); expect(out).toEqual({}); }); it("handles single-key input at concurrency higher than input size", async () => { const out = (await translate({ ...baseOptions, concurrency: 4, inputJSON: { only: "Single" }, promptMode, } as any)) as Record<string, string>; expect(out.only).toBe("Single_fr"); }); }, ); describe("rate limit penalty propagates through shared limiter", () => { it("a 429 on one worker delays other workers via the limiter", async () => { rejectOn429Once = new Set(["Hello"]); // Should still succeed thanks to retryWithBackoff. const out = (await translate({ ...baseOptions, concurrency: 2, inputJSON: toyInput(), promptMode: PromptMode.CSV, } as any)) as Record<string, string>; expect(out.hello1).toBe("Hello_fr"); // No assertion on the limiter itself here — the unit test in // retry.spec.ts already covers the penalize() wiring. What we're // proving here is just that a 429 in one worker doesn't kill // translation. }); }); describe("CSV styling verification", () => { it("does NOT fire a standalone styling call when no override is supplied", async () => { // Enable styling verification (it's off in baseOptions). Without // an overridePrompt.stylingVerificationPrompt, the accuracy prompt // already folds in styling, so we should make zero styling-only // calls. await translate({ ...baseOptions, concurrency: 1, inputJSON: toyInput(), promptMode: PromptMode.CSV, skipStylingVerification: false, skipTranslationVerification: false, } as any); const stylingCalls = chatCalls.filter( (c) => c.format === "csv-styling", ); expect(stylingCalls).toHaveLength(0); }); }); describe("CSV blank-line tolerance (Bug 6)", () => { it("still translates every key when the model pads the response with blank lines", async () => { // Before the fix, a trailing blank line made the response line // count exceed keys.length; the batch was rejected on every // retry and the keys were silently dropped from the output. csvTrailingBlank = true; const result = (await translate({ ...baseOptions, concurrency: 1, inputJSON: { greeting: "Hello", parting: "Bye" }, promptMode: PromptMode.CSV, } as any)) as Record<string, string>; expect(result).toEqual({ greeting: fakeTranslate("Hello"), parting: fakeTranslate("Bye"), }); }); }); describe("shared pool across translate() invocations", () => { it("reuses the same Chats instances across multiple translate() calls when a pool is supplied", async () => { // This is what --language-concurrency relies on: building one // pool up front and passing it into every language's translate // call, so all languages share one rate-limit / TPM budget // instead of each language spinning up its own. const ChatPool = (await import("../chat_pool")).default; const RateLimiter = (await import("../rate_limiter")).default; const rateLimiter = new RateLimiter(0, false); const pool = ChatPool.create({ apiKey: "test", chatParams: {} as any, concurrency: 2, engine: Engine_.ChatGPT, model: "gpt-4.1", rateLimiter, }); const chatIdsBefore = new Set( pool .all() .map( (triple) => (triple.generateTranslationChat as any).chatId, ), ); // Two back-to-back translate() calls that reuse the same pool. await translate({ ...baseOptions, concurrency: 2, inputJSON: { a: "A" }, pool, promptMode: PromptMode.CSV, rateLimiter, } as any); await translate({ ...baseOptions, concurrency: 2, inputJSON: { b: "B" }, outputLanguageCode: "es", pool, promptMode: PromptMode.CSV, rateLimiter, } as any); const chatIdsAfter = new Set( pool .all() .map( (triple) => (triple.generateTranslationChat as any).chatId, ), ); // Same instances — no fresh pool was created on the second // translate call. expect(chatIdsAfter).toEqual(chatIdsBefore); // And every generate call went through one of the N chats in // the pool (no orphan fresh chats that somehow weren't // registered with the pool). const generateCallChatIds = new Set( chatCalls.filter((c) => c.format === "csv").map((c) => c.chatId), ); for (const id of generateCallChatIds) { expect(chatIdsBefore.has(id)).toBe(true); } }); });