i18n-ai-translate
Version:
AI-powered localization CLI, Node library, and GitHub Action. Translate i18next JSON, Gettext PO, Java .properties, and iOS .strings with ChatGPT, Claude, Gemini, or local Ollama models.
259 lines • 9.12 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
/**
* Java MessageFormat placeholder: `{0}`, `{1}`, `{0,number}`,
* `{1,date,short}`, … Capture group (1) is the argument number.
*
* Deliberately tolerant and flat — it does not descend into nested
* MessageFormat (e.g. `{0,choice,...{1}...}`) or honour single-quote
* escaping (`'{'`). The adapter contract is only "whatever we strip on
* read, we restore on write", so a token we don't recognise is simply
* left untouched in both directions.
*/
const MESSAGE_FORMAT_REGEX = /\{(\d+)(?:,[^{}]*)?\}/g;
/**
* True when `line` ends with an odd number of backslashes — i.e. the
* final backslash is a line-continuation marker rather than an escape.
* @param line - the physical line to inspect
* @returns whether the line continues onto the next
*/
function endsWithOddBackslash(line) {
let count = 0;
for (let i = line.length - 1; i >= 0 && line[i] === "\\"; i--)
count++;
return count % 2 === 1;
}
/**
* Decode `.properties` backslash escapes (`\n`, `\t`, `\uXXXX`, `\=`, …)
* into the literal characters they represent.
* @param s - the escaped source text
* @returns the decoded literal string
*/
function unescapeProperties(s) {
let out = "";
for (let i = 0; i < s.length; i++) {
const c = s[i];
if (c !== "\\") {
out += c;
continue;
}
const n = s[i + 1];
i++;
switch (n) {
case "n":
out += "\n";
break;
case "t":
out += "\t";
break;
case "r":
out += "\r";
break;
case "f":
out += "\f";
break;
case "u": {
const hex = s.slice(i + 1, i + 5);
out += String.fromCharCode(parseInt(hex, 16));
i += 4;
break;
}
// `\=`, `\:`, `\#`, `\!`, `\\`, `\ `, … collapse to the bare
// character. `undefined` (a trailing lone backslash) drops.
default:
if (n !== undefined)
out += n;
break;
}
}
return out;
}
/**
* Encode a translated value back into `.properties` form. Only the
* structural escapes are emitted; non-ASCII is left as UTF-8 (this tool
* writes UTF-8 files, not legacy ISO-8859-1). Leading spaces are
* escaped so they survive the reader's value-trimming.
* @param s - the literal value to encode
* @returns the escaped value
*/
function escapePropertiesValue(s) {
let out = "";
for (const ch of s) {
switch (ch) {
case "\\":
out += "\\\\";
break;
case "\n":
out += "\\n";
break;
case "\r":
out += "\\r";
break;
case "\t":
out += "\\t";
break;
case "\f":
out += "\\f";
break;
default:
out += ch;
}
}
// Leading whitespace is otherwise stripped by the reader.
return out.replace(/^[ \t\f]+/, (m) => [...m].map((ch) => `\\${ch === " " ? " " : ch}`).join(""));
}
/**
* Split a single physical line into its key, separator, and the index
* where the value begins. `rawKey` includes any leading whitespace so
* re-emitting `rawKey + separator` reproduces the original prefix; the
* logical `key` is the unescaped, trimmed form used in the flat map.
* @param line - the first physical line of a logical entry
* @returns the parsed key, raw key, separator, and value start index
*/
function splitKeyValue(line) {
let i = 0;
while (i < line.length && /[ \t\f]/.test(line[i]))
i++;
const keyStart = i;
let keyEnd = i;
while (keyEnd < line.length) {
const c = line[keyEnd];
if (c === "\\") {
keyEnd += 2;
continue;
}
if (c === "=" || c === ":" || c === " " || c === "\t" || c === "\f") {
break;
}
keyEnd++;
}
let j = keyEnd;
while (j < line.length && /[ \t\f]/.test(line[j]))
j++;
if (line[j] === "=" || line[j] === ":")
j++;
while (j < line.length && /[ \t\f]/.test(line[j]))
j++;
return {
key: unescapeProperties(line.slice(keyStart, keyEnd)),
rawKey: line.slice(0, keyEnd),
separator: line.slice(keyEnd, j),
valueStart: j,
};
}
/**
* Join the value across continuation lines into one escaped string:
* drop each continuation backslash, strip leading whitespace from
* continuation lines, and shed any stray trailing `\r` (CRLF inputs).
* @param physLines - the physical lines making up one logical entry
* @param valueStart - index where the value begins on line one
* @returns the still-escaped joined value
*/
function assembleValue(physLines, valueStart) {
let value = "";
for (let j = 0; j < physLines.length; j++) {
let seg = j === 0
? physLines[0].slice(valueStart)
: physLines[j].replace(/^[ \t\f]+/, "");
seg = seg.replace(/\r$/, "");
// Every line but the last carries the continuation backslash;
// strip the single trailing one, leaving real escapes intact.
if (j < physLines.length - 1)
seg = seg.replace(/\\$/, "");
value += seg;
}
return value;
}
function stripPlaceholders(text) {
const tokens = [];
const normalized = text.replace(MESSAGE_FORMAT_REGEX, (match, num) => {
tokens[Number(num)] = match;
return `{{arg${num}}}`;
});
return { map: { tokens }, normalized };
}
function restorePlaceholders(text, map) {
if (map.tokens.length === 0)
return text;
return text.replace(/\{\{arg(\d+)\}\}/g, (_match, idx) => {
// A model-invented arg reference with no captured token is left
// literal rather than silently dropped — same stance as the PO
// adapter; the verification step guards against it.
const original = map.tokens[Number(idx)];
return original ?? `{{arg${idx}}}`;
});
}
const PropertiesAdapter = {
extensions: [".properties"],
name: "properties",
read(raw) {
const trailingNewline = raw.endsWith("\n");
const lines = raw.split("\n");
// A trailing newline yields a final empty element; it is encoded
// by `trailingNewline`, not as its own blank record.
if (trailingNewline)
lines.pop();
const flat = {};
const records = [];
let i = 0;
while (i < lines.length) {
const physical = lines[i];
const head = physical.replace(/^[ \t\f]+/, "").replace(/\r$/, "");
// Blank and comment lines pass through verbatim. Comments
// (`#` / `!`) are never subject to line continuation.
if (head === "" || head[0] === "#" || head[0] === "!") {
records.push({ kind: "raw", text: physical });
i++;
continue;
}
const physLines = [physical];
while (endsWithOddBackslash(physLines[physLines.length - 1]) &&
i + 1 < lines.length) {
i++;
physLines.push(lines[i]);
}
i++;
const { key, rawKey, separator, valueStart } = splitKeyValue(physLines[0]);
const rawValue = assembleValue(physLines, valueStart);
const { normalized, map } = stripPlaceholders(unescapeProperties(rawValue));
flat[key] = normalized;
records.push({
key,
kind: "entry",
normalizedValue: normalized,
placeholders: map,
rawBlock: physLines.join("\n"),
rawKey,
separator,
});
}
return {
flat,
sidecar: { kind: "properties", records, trailingNewline },
};
},
write(translated, sidecar) {
const out = [];
for (const record of sidecar.records) {
if (record.kind === "raw") {
out.push(record.text);
continue;
}
const value = translated[record.key];
// Unchanged (or dropped) values re-emit the original bytes,
// preserving the source's exact escaping and continuations.
if (value === undefined || value === record.normalizedValue) {
out.push(record.rawBlock);
continue;
}
const restored = restorePlaceholders(value, record.placeholders);
out.push(record.rawKey +
record.separator +
escapePropertiesValue(restored));
}
const body = out.join("\n");
return sidecar.trailingNewline ? `${body}\n` : body;
},
};
exports.default = PropertiesAdapter;
//# sourceMappingURL=properties_adapter.js.map