UNPKG

i18n-ai-translate

Version:

AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.

306 lines (267 loc) 9.14 kB
import type FormatAdapter from "./format_adapter"; /** * printf-style placeholder as used in iOS format strings: `%@`, `%d`, * `%1$@`, `%2$d`, `%.2f`, … Capture group (1) is the optional `N$` * positional index. `%@` (the Objective-C object specifier) is the * distinguishing addition over plain printf. * * Deliberately tolerant — the contract is "whatever we strip on read we * restore on write", so we don't fully validate the printf grammar. */ const PRINTF_REGEX = /%(?:(\d+)\$)?[-+ #0]*\d*(?:\.\d+)?(?:hh|h|ll|l|j|z|t|L|q)?[@sdifFeEgGxXoucpn%]/g; type PlaceholderMap = { /** Native tokens, one per arg slot (1-based index → tokens[i-1]). */ tokens: string[]; /** Whether tokens were positional (`%1$@`) or bare (`%@`). */ positional: boolean; }; /** * One chunk of the source file. `raw` chunks (comments, whitespace, * keys, `=`, quotes, `;`) are reproduced verbatim; `value` chunks hold * a single translatable string's inner content so it can be swapped. */ type StringsChunk = | { kind: "raw"; text: string } | { kind: "value"; key: string; /** Original escaped inner text (without the surrounding quotes). */ rawValue: string; /** Placeholder-stripped value; "unchanged" sentinel on write. */ normalizedValue: string; placeholders: PlaceholderMap; }; type StringsSidecar = { kind: "strings"; chunks: StringsChunk[]; }; /** * Decode `.strings` backslash escapes (`\"`, `\n`, `\Uxxxx`, …) into the * literal characters they represent. * @param s - the escaped source text * @returns the decoded literal string */ function unescapeStrings(s: string): string { let out = ""; for (let i = 0; i < s.length; i++) { const c = s[i]; if (c !== "\\") { out += c; continue; } const n = s[i + 1]; i++; switch (n) { case "n": out += "\n"; break; case "t": out += "\t"; break; case "r": out += "\r"; break; case "U": case "u": { const hex = s.slice(i + 1, i + 5); out += String.fromCharCode(parseInt(hex, 16)); i += 4; break; } // `\"`, `\\`, and anything else collapse to the bare char. default: if (n !== undefined) out += n; break; } } return out; } /** * Encode a translated value into `.strings` inner form. Only structural * escapes are emitted; non-ASCII stays UTF-8 (this tool writes UTF-8, * not legacy UTF-16). The surrounding quotes are added by the caller. * @param s - the literal value to encode * @returns the escaped inner string */ function escapeStringsValue(s: string): string { let out = ""; for (const ch of s) { switch (ch) { case "\\": out += "\\\\"; break; case "\"": out += "\\\""; break; case "\n": out += "\\n"; break; case "\t": out += "\\t"; break; case "\r": out += "\\r"; break; default: out += ch; } } return out; } function stripPlaceholders(text: string): { normalized: string; map: PlaceholderMap; } { const tokens: string[] = []; let positional = false; let autoIndex = 0; const normalized = text.replace(PRINTF_REGEX, (match, posIdx) => { if (match === "%%") return match; let index: number; if (posIdx) { positional = true; index = Number(posIdx); } else { autoIndex++; index = autoIndex; } tokens[index - 1] = match; return `{{arg${index}}}`; }); return { map: { positional, tokens }, normalized }; } function restorePlaceholders(text: string, map: PlaceholderMap): string { if (map.tokens.length === 0) return text; return text.replace(/\{\{arg(\d+)\}\}/g, (_match, idx) => { // A model-invented arg reference with no captured token is left // literal rather than silently dropped — same stance as the PO // and properties adapters; verification guards against it. const original = map.tokens[Number(idx) - 1]; return original ?? `{{arg${idx}}}`; }); } /** * Read a double-quoted literal starting at `raw[i]` (which must be `"`), * honouring backslash escapes so an escaped quote does not end it. * @param raw - the full source text * @param i - index of the opening quote * @returns the escaped inner content and the index just past the close */ function readQuoted(raw: string, i: number): { inner: string; end: number } { let j = i + 1; let inner = ""; while (j < raw.length) { const c = raw[j]; if (c === "\\") { inner += c + (raw[j + 1] ?? ""); j += 2; continue; } if (c === "\"") break; inner += c; j++; } return { end: j + 1, inner }; } const StringsAdapter: FormatAdapter<StringsSidecar> = { extensions: [".strings"] as const, name: "strings", read(raw: string): { flat: Record<string, string>; sidecar: StringsSidecar; } { const flat: Record<string, string> = {}; const chunks: StringsChunk[] = []; let buf = ""; let i = 0; let seenEquals = false; let currentKey: string | undefined; while (i < raw.length) { const c = raw[i]; // Block comment — consumed whole so quotes inside it are inert. if (c === "/" && raw[i + 1] === "*") { const close = raw.indexOf("*/", i + 2); const end = close === -1 ? raw.length : close + 2; buf += raw.slice(i, end); i = end; continue; } // Line comment — runs to (but not including) the newline. if (c === "/" && raw[i + 1] === "/") { const nl = raw.indexOf("\n", i); const end = nl === -1 ? raw.length : nl; buf += raw.slice(i, end); i = end; continue; } if (c === "\"") { const { inner, end } = readQuoted(raw, i); if (!seenEquals || currentKey === undefined) { // First quoted string of the statement is the key; // keys aren't translated, so keep them verbatim. buf += raw.slice(i, end); if (!seenEquals) currentKey = unescapeStrings(inner); } else { // Value: split the buffer so the inner text becomes // its own replaceable chunk, with the quotes staying // in the surrounding raw chunks. buf += "\""; chunks.push({ kind: "raw", text: buf }); const { normalized, map } = stripPlaceholders( unescapeStrings(inner), ); flat[currentKey] = normalized; chunks.push({ key: currentKey, kind: "value", normalizedValue: normalized, placeholders: map, rawValue: inner, }); buf = "\""; } i = end; continue; } if (c === "=") { seenEquals = true; buf += c; i++; continue; } if (c === ";") { seenEquals = false; currentKey = undefined; buf += c; i++; continue; } buf += c; i++; } if (buf) chunks.push({ kind: "raw", text: buf }); return { flat, sidecar: { chunks, kind: "strings" } }; }, write(translated: Record<string, string>, sidecar: StringsSidecar): string { let out = ""; for (const chunk of sidecar.chunks) { if (chunk.kind === "raw") { out += chunk.text; continue; } const value = translated[chunk.key]; // Unchanged (or dropped) values re-emit the original inner // bytes, preserving the source's exact escaping. if (value === undefined || value === chunk.normalizedValue) { out += chunk.rawValue; continue; } out += escapeStringsValue( restorePlaceholders(value, chunk.placeholders), ); } return out; }, }; export default StringsAdapter;