UNPKG

armenian-transliteration

Version:

Multi-standard Armenian transliteration (BGN/PCGN, ISO 9985, Hübschmann-Meillet, ALA-LC, Russian geographic profiles, Russian proper names, IPA)

1 lines 138 kB
{"version":3,"sources":["../src/index.ts","../src/armenian/alphabet.ts","../src/armenian/ligatures.ts","../src/armenian/normalize.ts","../src/scanner/scanner.ts","../src/engine/casing.ts","../src/engine/context.ts","../src/engine/punctuation.ts","../src/engine/engine.ts","../src/standards/latin/bgn-pcgn.ts","../src/standards/latin/iso-9985.ts","../src/standards/latin/hubschmann-meillet.ts","../src/standards/latin/ala-lc.ts","../src/standards/cyrillic/ru-geo-kt-1974.ts","../src/standards/cyrillic/ru-geo-ra-2011.ts","../src/standards/cyrillic/ru-proper-vartapetyan-1961.ts","../src/standards/cyrillic/ru-phonetic-eastern.ts","../src/standards/ipa/ipa-eastern.ts","../src/standards/ipa/ipa-western.ts","../src/standards/registry.ts","../src/profiles/metadata.ts","../src/profiles/index.ts"],"sourcesContent":["import type { Standard, TransliterateOptions } from \"./types.js\";\nimport { TransliterationEngine } from \"./engine/engine.js\";\nimport { getStandard } from \"./standards/registry.js\";\n\nexport type {\n Standard,\n LatinStandard,\n CyrillicStandard,\n IpaStandard,\n Direction,\n TransliterateOptions,\n TransliterationStandard,\n CharMapping,\n SequenceMapping,\n ContextCondition,\n ContextRule,\n LetterPosition,\n TargetScript,\n} from \"./types.js\";\n\nexport { listStandards } from \"./standards/registry.js\";\nexport {\n getProfile,\n getProfilesByTargetLanguage,\n listProfiles,\n profiles,\n} from \"./profiles/index.js\";\nexport type {\n ProfileDomain,\n ProfileMetadata,\n ProfileSource,\n ProfileSourceKind,\n ProfileStatus,\n} from \"./profiles/index.js\";\n\nconst engineCache = new Map<Standard, TransliterationEngine>();\n\nfunction assertString(text: string): void {\n if (typeof text !== \"string\") {\n throw new TypeError(\"Expected text to be a string\");\n }\n}\n\nfunction getEngine(standard: Standard): TransliterationEngine {\n let engine = engineCache.get(standard);\n if (!engine) {\n engine = new TransliterationEngine(getStandard(standard));\n engineCache.set(standard, engine);\n }\n return engine;\n}\n\n/**\n * Transliterate Armenian text to the chosen target script.\n *\n * @param text - The Armenian text to transliterate\n * @param options - Standard and direction options\n * @returns Transliterated text\n */\nexport function transliterate(\n text: string,\n options?: TransliterateOptions,\n): string {\n assertString(text);\n return getEngine(options?.standard ?? \"bgn-pcgn\").transliterate(text);\n}\n\n/**\n * Create a reusable transliterator function with fixed options.\n * More efficient for repeated transliterations with the same settings.\n *\n * @param options - Standard and direction options\n * @returns A function that transliterates text\n */\nexport function createTransliterator(\n options: TransliterateOptions = {},\n): (text: string) => string {\n const engine = getEngine(options.standard ?? \"bgn-pcgn\");\n return (text: string) => {\n assertString(text);\n return engine.transliterate(text);\n };\n}\n","export function isArmenianLetter(ch: string): boolean {\n const cp = ch.codePointAt(0);\n if (cp === undefined) return false;\n return (cp >= 0x0531 && cp <= 0x0556) || (cp >= 0x0561 && cp <= 0x0586);\n}\n\nexport function isArmenianUppercase(ch: string): boolean {\n const cp = ch.codePointAt(0);\n if (cp === undefined) return false;\n return cp >= 0x0531 && cp <= 0x0556;\n}\n\nexport function armenianToLower(ch: string): string {\n const cp = ch.codePointAt(0);\n if (cp === undefined) return ch;\n if (cp >= 0x0531 && cp <= 0x0556) {\n return String.fromCodePoint(cp + 0x30);\n }\n return ch;\n}\n\nconst ARMENIAN_PUNCTUATION: ReadonlySet<string> = new Set([\n \"ՙ\", // U+0559 MODIFIER LETTER LEFT HALF RING\n \"՚\", // U+055A ARMENIAN APOSTROPHE\n \"՛\", // U+055B ARMENIAN EMPHASIS MARK\n \"՜\", // U+055C ARMENIAN EXCLAMATION MARK\n \"՝\", // U+055D ARMENIAN COMMA\n \"՞\", // U+055E ARMENIAN QUESTION MARK\n \"՟\", // U+055F ARMENIAN ABBREVIATION MARK\n \"։\", // U+0589 ARMENIAN FULL STOP\n \"֊\", // U+058A ARMENIAN HYPHEN\n]);\n\nexport function isArmenianPunctuation(ch: string): boolean {\n return ARMENIAN_PUNCTUATION.has(ch);\n}\n\n/**\n * Diacritic-like punctuation that appears word-internally and should be\n * transparent to word-boundary tracking. Includes the Armenian emphasis\n * mark (stress diacritic, U+055B), apostrophe (U+055A), modifier letter\n * left half ring (U+0559), and abbreviation mark (U+055F).\n *\n * Excludes terminal punctuation (full stop ։, question ՞, comma ՝,\n * exclamation ՜, hyphen ֊) which DO break words.\n *\n * Scanner emits these as kind:\"punctuation\" so they still map through\n * the standard's punctuation table for output, but they do not break\n * an Armenian-letter run for context-rule purposes (e.g. \"մի՛թե\" must\n * not let ՛ make թ word-initial).\n */\nconst ARMENIAN_INWORD_DIACRITICS: ReadonlySet<string> = new Set([\n \"ՙ\", // U+0559\n \"՚\", // U+055A\n \"՛\", // U+055B\n \"՟\", // U+055F\n]);\n\nexport function isArmenianInWordDiacritic(ch: string): boolean {\n return ARMENIAN_INWORD_DIACRITICS.has(ch);\n}\n","/**\n * Armenian ligatures and their expansions.\n * These are in the Unicode Alphabetic Presentation Forms block (U+FB13-U+FB17).\n */\nexport const ARMENIAN_LIGATURES: ReadonlyMap<string, string> = new Map([\n [\"\\uFB13\", \"\\u0574\\u0576\"], // ﬓ → մն\n [\"\\uFB14\", \"\\u0574\\u0565\"], // ﬔ → մե\n [\"\\uFB15\", \"\\u0574\\u056B\"], // ﬕ → մի\n [\"\\uFB16\", \"\\u057E\\u0576\"], // ﬖ → վն\n [\"\\uFB17\", \"\\u0574\\u056D\"], // ﬗ → մխ\n]);\n","import { ARMENIAN_LIGATURES } from \"./ligatures.js\";\n\n/**\n * Normalize Armenian text before transliteration:\n * 1. Unicode NFC normalization\n * 2. Expand Armenian ligatures to their component letters\n */\nexport function normalizeArmenian(text: string): string {\n let result = text.normalize(\"NFC\");\n for (const [ligature, expansion] of ARMENIAN_LIGATURES) {\n result = result.replaceAll(ligature, expansion);\n }\n return result;\n}\n","import type { Token } from \"./tokens.js\";\nimport type { SequenceMapping } from \"../types.js\";\nimport {\n isArmenianInWordDiacritic,\n isArmenianLetter,\n isArmenianPunctuation,\n} from \"../armenian/alphabet.js\";\n\n/** U+0587 ARMENIAN SMALL LIGATURE EW — treated as an Armenian letter */\nconst ARMENIAN_EW = \"\\u0587\";\n\n/** Offset between Armenian uppercase (U+0531) and lowercase (U+0561) */\nconst ARMENIAN_CASE_OFFSET = 0x30;\n\nfunction armenianCharToLower(ch: string): string {\n const cp = ch.codePointAt(0);\n if (cp === undefined) return ch;\n if (cp >= 0x0531 && cp <= 0x0556) return String.fromCodePoint(cp + ARMENIAN_CASE_OFFSET);\n return ch;\n}\n\n/** Convert an Armenian string to all-lowercase */\nfunction seqToLower(s: string): string {\n return Array.from(s).map(armenianCharToLower).join(\"\");\n}\n\nfunction isArmenianLetterOrEw(ch: string): boolean {\n return ch === ARMENIAN_EW || isArmenianLetter(ch);\n}\n\n/** Check if a character is « (U+00AB) or » (U+00BB) */\nfunction isAngleBracketQuote(ch: string): boolean {\n return ch === \"\\u00AB\" || ch === \"\\u00BB\";\n}\n\ninterface SequencePattern {\n /** Lowercase canonical form used for matching */\n lowercase: string;\n length: number;\n}\n\n/**\n * Scans Armenian text into tokens.\n *\n * The scanner is parameterized by the multi-character sequences defined\n * in a transliteration standard (e.g., \"ու\" for BGN/PCGN, \"յու\"/\"յա\" for Russian).\n *\n * Algorithm:\n * 1. Greedy longest-match for multi-char sequences\n * 2. Single Armenian letters\n * 3. Armenian punctuation\n * 4. Whitespace\n * 5. Everything else accumulated as \"other\"\n */\nexport function scan(text: string, sequences: readonly SequenceMapping[]): Token[] {\n // Build unique set of sequence patterns (lowercase canonical), sorted longest-first.\n // Matching is case-insensitive: input slice is lowercased before comparison.\n const seenPatterns = new Set<string>();\n const patterns: SequencePattern[] = [];\n\n for (const mapping of sequences) {\n const lower = seqToLower(mapping.armenian);\n if (!seenPatterns.has(lower)) {\n seenPatterns.add(lower);\n patterns.push({ lowercase: lower, length: Array.from(lower).length });\n }\n }\n\n patterns.sort((a, b) => b.length - a.length);\n\n const chars = Array.from(text);\n const tokens: Token[] = [];\n\n // Track byte offset alongside char index for the token offset field\n // We report char-index offsets (consistent with Array.from iteration)\n let i = 0;\n let otherStart = -1;\n let otherValue = \"\";\n\n const flushOther = () => {\n if (otherValue.length > 0) {\n tokens.push({ kind: \"other\", value: otherValue, offset: otherStart });\n otherValue = \"\";\n otherStart = -1;\n }\n };\n\n while (i < chars.length) {\n const ch = chars[i];\n if (ch === undefined) break;\n\n // 1. Try greedy longest-match for multi-char sequences\n let matched = false;\n for (const pattern of patterns) {\n if (i + pattern.length > chars.length) continue;\n const slice = chars.slice(i, i + pattern.length).join(\"\");\n const sliceLower = seqToLower(slice);\n if (sliceLower === pattern.lowercase) {\n flushOther();\n tokens.push({ kind: \"armenian_sequence\", value: slice, offset: i });\n i += pattern.length;\n matched = true;\n break;\n }\n }\n if (matched) continue;\n\n // 2. Single Armenian letter (including U+0587)\n if (isArmenianLetterOrEw(ch)) {\n flushOther();\n tokens.push({ kind: \"armenian_letter\", value: ch, offset: i });\n i++;\n continue;\n }\n\n // 3. Armenian punctuation and angle-bracket quotes\n if (isArmenianPunctuation(ch) || isAngleBracketQuote(ch)) {\n flushOther();\n tokens.push({ kind: \"punctuation\", value: ch, offset: i });\n i++;\n continue;\n }\n\n // 4. Whitespace\n if (/\\s/.test(ch)) {\n flushOther();\n tokens.push({ kind: \"whitespace\", value: ch, offset: i });\n i++;\n continue;\n }\n\n // 5. Accumulate \"other\"\n if (otherValue.length === 0) otherStart = i;\n otherValue += ch;\n i++;\n }\n\n flushOther();\n\n return annotateWordPositions(tokens);\n}\n\n/**\n * Annotate each Armenian token with its position within its word.\n *\n * A \"word\" is a consecutive run of armenian_letter and armenian_sequence\n * tokens. Armenian in-word diacritic punctuation (emphasis ՛, apostrophe ՚,\n * left half-ring ՙ, abbreviation ՟) is transparent: it does not break a\n * word run, but does not receive a wordPosition of its own — the\n * surrounding letters are positioned as if the diacritic were absent.\n *\n * All other punctuation, whitespace, and \"other\" tokens close the run.\n *\n * Positions:\n * - \"isolated\": single-letter-token word\n * - \"initial\": first letter token of a multi-token word\n * - \"medial\": middle letter token(s)\n * - \"final\": last letter token\n */\nfunction annotateWordPositions(tokens: Token[]): Token[] {\n const isArmenian = (t: Token) =>\n t.kind === \"armenian_letter\" || t.kind === \"armenian_sequence\";\n const isInWordDiacritic = (t: Token) =>\n t.kind === \"punctuation\" && isArmenianInWordDiacritic(t.value);\n\n // Indices of Armenian-letter/sequence tokens belonging to the current run.\n let runLetterIndices: number[] = [];\n\n const closeRun = () => {\n if (runLetterIndices.length === 0) return;\n if (runLetterIndices.length === 1) {\n const idx = runLetterIndices[0];\n if (idx !== undefined) {\n const tok = tokens[idx];\n if (tok) tok.wordPosition = \"isolated\";\n }\n } else {\n const last = runLetterIndices.length - 1;\n for (let k = 0; k < runLetterIndices.length; k++) {\n const idx = runLetterIndices[k];\n if (idx === undefined) continue;\n const tok = tokens[idx];\n if (!tok) continue;\n if (k === 0) tok.wordPosition = \"initial\";\n else if (k === last) tok.wordPosition = \"final\";\n else tok.wordPosition = \"medial\";\n }\n }\n runLetterIndices = [];\n };\n\n for (let j = 0; j < tokens.length; j++) {\n const tok = tokens[j];\n if (!tok) continue;\n if (isArmenian(tok)) {\n runLetterIndices.push(j);\n } else if (isInWordDiacritic(tok)) {\n // Transparent within a run; if no run is open, it doesn't open one.\n continue;\n } else {\n closeRun();\n }\n }\n closeRun();\n\n return tokens;\n}\n","import {\n isArmenianLetter,\n isArmenianUppercase,\n armenianToLower,\n} from \"../armenian/alphabet.js\";\nimport type { Token } from \"../scanner/tokens.js\";\n\n/** Casing pattern of an Armenian word */\nexport type CasingPattern = \"upper\" | \"lower\" | \"title\";\n\n/** U+0587 ARMENIAN SMALL LIGATURE EW — always lowercase, no uppercase form */\nconst ARMENIAN_EW = \"\\u0587\";\n\n/**\n * Check if a character is an Armenian letter (including U+0587 և).\n */\nfunction isArmenianLetterOrEw(ch: string): boolean {\n return ch === ARMENIAN_EW || isArmenianLetter(ch);\n}\n\n/**\n * Detect the casing pattern of an Armenian token.\n * Looks only at Armenian letters (ignores non-Armenian chars).\n * U+0587 (և) is always lowercase — it has no uppercase form.\n */\nfunction isTokenUppercase(token: Token): boolean {\n const chars = Array.from(token.value);\n const armenianChars = chars.filter(isArmenianLetterOrEw);\n if (armenianChars.length === 0) return false;\n return armenianChars.every(\n (ch) => ch !== ARMENIAN_EW && isArmenianUppercase(ch),\n );\n}\n\n/**\n * Determine the casing pattern of a word from its constituent tokens.\n * A \"word\" is a consecutive run of Armenian tokens, possibly with\n * embedded in-word diacritic punctuation (which has no casing and is\n * skipped here).\n */\nexport function detectWordCasing(wordTokens: readonly Token[]): CasingPattern {\n const letterTokens = wordTokens.filter(\n (t) => t.kind === \"armenian_letter\" || t.kind === \"armenian_sequence\",\n );\n if (letterTokens.length === 0) return \"lower\";\n\n const allUpper = letterTokens.every(isTokenUppercase);\n if (allUpper) return \"upper\";\n\n // Check if first letter token starts with uppercase\n const firstLetter = letterTokens[0];\n if (firstLetter && isTokenUppercase(firstLetter)) return \"title\";\n\n return \"lower\";\n}\n\n/**\n * Apply casing to a transliterated string based on the source Armenian token.\n *\n * For a single token's output:\n * - If the source was uppercase: capitalize the first character of the output\n * - If the source was lowercase: output as-is (lowercase)\n *\n * Word-level ALL-CAPS handling is done separately by the engine.\n */\nexport function applySingleTokenCasing(\n sourceToken: Token,\n transliterated: string,\n): string {\n if (transliterated.length === 0) return transliterated;\n\n // Check if any Armenian char in the source is uppercase\n const hasUppercase = Array.from(sourceToken.value).some(\n (ch) => isArmenianLetter(ch) && isArmenianUppercase(ch),\n );\n\n if (hasUppercase) {\n // Capitalize first character of output\n return transliterated.charAt(0).toUpperCase() + transliterated.slice(1);\n }\n\n return transliterated;\n}\n\n/**\n * Apply ALL-CAPS casing to an entire word's transliterated output.\n */\nexport function applyUpperCasing(text: string): string {\n return text.toUpperCase();\n}\n\n/**\n * Get the lowercase canonical form of an Armenian token value.\n * Converts each Armenian character to lowercase.\n */\nexport function toLowerCanonical(value: string): string {\n return Array.from(value).map(armenianToLower).join(\"\");\n}\n","import type { ContextCondition, CharMapping, SequenceMapping } from \"../types.js\";\nimport type { Token } from \"../scanner/tokens.js\";\nimport { toLowerCanonical } from \"./casing.js\";\n\n/**\n * Evaluate a context condition against a token and its neighbors.\n * All specified conditions must be true (AND logic).\n */\nexport function evaluateCondition(\n condition: ContextCondition,\n token: Token,\n prevToken: Token | undefined,\n nextToken: Token | undefined,\n): boolean {\n // wordInitial: token must be at word start\n if (condition.wordInitial !== undefined) {\n const isInitial =\n token.wordPosition === \"initial\" || token.wordPosition === \"isolated\";\n if (condition.wordInitial !== isInitial) return false;\n }\n\n // position: token must be at one of the specified positions\n if (condition.position !== undefined) {\n const positions = Array.isArray(condition.position)\n ? condition.position\n : [condition.position];\n if (!token.wordPosition || !positions.includes(token.wordPosition))\n return false;\n }\n\n // followedBy: next token's lowercase value must be in the list\n if (condition.followedBy !== undefined) {\n if (!nextToken) return false;\n const nextLower = toLowerCanonical(nextToken.value);\n if (!condition.followedBy.includes(nextLower)) return false;\n }\n\n // notFollowedBy: next token's lowercase value must NOT be in the list\n if (condition.notFollowedBy !== undefined) {\n if (nextToken) {\n const nextLower = toLowerCanonical(nextToken.value);\n if (condition.notFollowedBy.includes(nextLower)) return false;\n }\n // If no next token, notFollowedBy is satisfied (nothing follows)\n }\n\n // precededBy: prev token's lowercase value must be in the list\n if (condition.precededBy !== undefined) {\n if (!prevToken) return false;\n const prevLower = toLowerCanonical(prevToken.value);\n if (!condition.precededBy.includes(prevLower)) return false;\n }\n\n return true;\n}\n\n/**\n * Resolve the transliteration output for a mapping, considering context rules.\n * Evaluates context rules in order; first matching rule wins.\n * Falls back to default target if no rules match.\n */\nexport function resolveMapping(\n mapping: CharMapping | SequenceMapping,\n token: Token,\n prevToken: Token | undefined,\n nextToken: Token | undefined,\n): string {\n if (mapping.contextRules) {\n for (const rule of mapping.contextRules) {\n if (evaluateCondition(rule.condition, token, prevToken, nextToken)) {\n return rule.target;\n }\n }\n }\n return mapping.target;\n}\n","/**\n * Default Armenian punctuation mappings.\n * Standards can override these via their `punctuation` field.\n */\nexport const DEFAULT_PUNCTUATION: Record<string, string> = {\n \"\\u0559\": \"'\", // ՙ ARMENIAN MODIFIER LETTER LEFT HALF RING\n \"\\u055A\": \"'\", // ՚ ARMENIAN APOSTROPHE\n \"\\u055B\": \"'\", // ՛ ARMENIAN EMPHASIS MARK\n \"\\u055C\": \"!\", // ՜ ARMENIAN EXCLAMATION MARK\n \"\\u055D\": \",\", // ՝ ARMENIAN COMMA\n \"\\u055E\": \"?\", // ՞ ARMENIAN QUESTION MARK\n \"\\u055F\": \".\", // ՟ ARMENIAN ABBREVIATION MARK\n \"\\u0589\": \".\", // ։ ARMENIAN FULL STOP\n \"\\u058A\": \"-\", // ֊ ARMENIAN HYPHEN\n \"\\u00AB\": '\"', // « LEFT-POINTING DOUBLE ANGLE QUOTATION MARK\n \"\\u00BB\": '\"', // » RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK\n};\n","import type { TransliterationStandard, CharMapping, SequenceMapping } from \"../types.js\";\nimport type { Token } from \"../scanner/tokens.js\";\nimport { isArmenianInWordDiacritic } from \"../armenian/alphabet.js\";\nimport { normalizeArmenian } from \"../armenian/normalize.js\";\nimport { scan } from \"../scanner/scanner.js\";\nimport { resolveMapping } from \"./context.js\";\nimport {\n detectWordCasing,\n applySingleTokenCasing,\n applyUpperCasing,\n toLowerCanonical,\n} from \"./casing.js\";\nimport { DEFAULT_PUNCTUATION } from \"./punctuation.js\";\n\n/**\n * Armenian in-word diacritic punctuation is transparent to word runs and\n * context-rule prev/next lookups. The token is still emitted (and mapped\n * through punctuation) but it is invisible to phonological context.\n */\nfunction isInWordDiacriticToken(token: Token | undefined): boolean {\n if (!token) return false;\n return (\n token.kind === \"punctuation\" && isArmenianInWordDiacritic(token.value)\n );\n}\n\n/**\n * Core transliteration engine.\n * Processes text through: normalize -> scan -> map -> casing.\n */\nexport class TransliterationEngine {\n private readonly charMap: Map<string, CharMapping>;\n private readonly seqMap: Map<string, SequenceMapping>;\n private readonly punctMap: Map<string, string>;\n\n constructor(private readonly standard: TransliterationStandard) {\n // Build lookup maps keyed by lowercase canonical Armenian\n this.charMap = new Map(\n standard.charMappings.map((m) => [m.armenian, m]),\n );\n this.seqMap = new Map(\n standard.sequenceMappings.map((m) => [m.armenian, m]),\n );\n\n // Merge default punctuation with standard-specific overrides\n this.punctMap = new Map(Object.entries(DEFAULT_PUNCTUATION));\n if (standard.punctuation) {\n for (const [key, value] of Object.entries(standard.punctuation)) {\n this.punctMap.set(key, value);\n }\n }\n }\n\n /** Transliterate Armenian text to the target script */\n transliterate(text: string): string {\n // 1. Normalize: expand ligatures, NFC\n const normalized = normalizeArmenian(text);\n\n // 2. Scan into tokens\n const tokens = scan(normalized, this.standard.sequenceMappings);\n\n // 3. Find word boundaries (consecutive Armenian token runs)\n const wordRuns = this.findWordRuns(tokens);\n\n // 4. For each word, detect casing and transliterate\n const output: string[] = new Array(tokens.length);\n const processedInWord = new Set<number>();\n\n for (const run of wordRuns) {\n const wordTokens = tokens.slice(run.start, run.end);\n const casing = detectWordCasing(wordTokens);\n\n for (let i = run.start; i < run.end; i++) {\n const token = tokens[i]!;\n if (isInWordDiacriticToken(token)) {\n // Diacritic — emit via punctuation map, skip phonological mapping.\n output[i] = this.punctMap.get(token.value) ?? token.value;\n processedInWord.add(i);\n continue;\n }\n const prev = findNeighbor(tokens, i, -1);\n const next = findNeighbor(tokens, i, +1);\n\n const mapped = this.mapToken(token, prev, next);\n output[i] =\n casing === \"upper\"\n ? applyUpperCasing(mapped)\n : applySingleTokenCasing(token, mapped);\n processedInWord.add(i);\n }\n\n if (casing === \"title\") {\n this.applyTitleCasingToFirstOutput(output, run);\n }\n }\n\n // 5. Process non-word tokens (punctuation, whitespace, other)\n for (let i = 0; i < tokens.length; i++) {\n if (processedInWord.has(i)) continue;\n const token = tokens[i]!;\n\n switch (token.kind) {\n case \"punctuation\":\n output[i] = this.punctMap.get(token.value) ?? token.value;\n break;\n default:\n output[i] = token.value;\n break;\n }\n }\n\n return output.join(\"\");\n }\n\n /** Map a single Armenian token to its transliterated form (lowercase) */\n private mapToken(\n token: Token,\n prev: Token | undefined,\n next: Token | undefined,\n ): string {\n const canonical = toLowerCanonical(token.value);\n\n if (token.kind === \"armenian_sequence\") {\n const mapping = this.seqMap.get(canonical);\n if (mapping) return resolveMapping(mapping, token, prev, next);\n }\n\n if (token.kind === \"armenian_letter\") {\n // Handle U+0587 (և) as a special case - it might be in seqMap\n const seqMapping = this.seqMap.get(canonical);\n if (seqMapping) return resolveMapping(seqMapping, token, prev, next);\n\n const charMapping = this.charMap.get(canonical);\n if (charMapping) return resolveMapping(charMapping, token, prev, next);\n }\n\n return token.value;\n }\n\n /** Preserve title case when the first source token maps to an empty string. */\n private applyTitleCasingToFirstOutput(\n output: string[],\n run: { start: number; end: number },\n ): void {\n for (let i = run.start; i < run.end; i++) {\n const value = output[i];\n if (value && value.length > 0) {\n output[i] = value.charAt(0).toUpperCase() + value.slice(1);\n return;\n }\n }\n }\n\n /**\n * Find consecutive runs of Armenian tokens (words). Armenian in-word\n * diacritics (՛ ՚ ՙ ՟) extend a run but cannot start one — they only\n * count as word-internal if surrounded by Armenian letters.\n */\n private findWordRuns(\n tokens: readonly Token[],\n ): Array<{ start: number; end: number }> {\n const runs: Array<{ start: number; end: number }> = [];\n let runStart = -1;\n let lastLetterIndex = -1;\n\n for (let i = 0; i <= tokens.length; i++) {\n const token = i < tokens.length ? tokens[i] : undefined;\n const isLetter =\n token?.kind === \"armenian_letter\" ||\n token?.kind === \"armenian_sequence\";\n const isDiacritic = isInWordDiacriticToken(token);\n\n if (isLetter) {\n if (runStart === -1) runStart = i;\n lastLetterIndex = i;\n } else if (isDiacritic && runStart !== -1) {\n // Extend the open run, but only commit to end:lastLetterIndex+1\n // if no further letter appears.\n continue;\n } else if (runStart !== -1) {\n runs.push({ start: runStart, end: lastLetterIndex + 1 });\n runStart = -1;\n lastLetterIndex = -1;\n }\n }\n\n return runs;\n }\n}\n\n/**\n * Find the nearest non-diacritic token from `tokens[i]` in `direction`\n * (-1 = before, +1 = after). Used for context-rule prev/next so in-word\n * diacritics are skipped during context evaluation.\n */\nfunction findNeighbor(\n tokens: readonly Token[],\n i: number,\n direction: -1 | 1,\n): Token | undefined {\n let j = i + direction;\n while (j >= 0 && j < tokens.length) {\n const candidate = tokens[j];\n if (candidate && !isInWordDiacriticToken(candidate)) return candidate;\n j += direction;\n }\n return undefined;\n}\n","import type { TransliterationStandard } from \"../../types.js\";\n\n// Armenian vowels (lowercase) used in context rules for ո word-initial behaviour\nconst VOWELS_AND_OU = [\"ա\", \"ե\", \"է\", \"ը\", \"ի\", \"ո\", \"ու\", \"օ\"] as const;\n\nexport const bgnPcgn: TransliterationStandard = {\n id: \"bgn-pcgn\",\n name: \"BGN/PCGN Romanization\",\n targetScript: \"latin\",\n reversible: false,\n\n charMappings: [\n // U+0561 ա\n { armenian: \"ա\", target: \"a\" },\n // U+0562 բ\n { armenian: \"բ\", target: \"b\" },\n // U+0563 գ\n { armenian: \"գ\", target: \"g\" },\n // U+0564 դ\n { armenian: \"դ\", target: \"d\" },\n // U+0565 ե — maps to \"ye\" initially and after vowels\n {\n armenian: \"ե\",\n target: \"e\",\n reverseDefault: true,\n contextRules: [\n { condition: { wordInitial: true }, target: \"ye\" },\n { condition: { precededBy: [...VOWELS_AND_OU] }, target: \"ye\" },\n ],\n },\n // U+0566 զ\n { armenian: \"զ\", target: \"z\" },\n // U+0567 է — same Latin \"e\" as ե, not the reverse default\n { armenian: \"է\", target: \"e\", reverseDefault: false },\n // U+0568 ը\n { armenian: \"ը\", target: \"y\", reverseDefault: false },\n // U+0569 թ — aspirated T\n { armenian: \"թ\", target: \"t\\u2019\", reverseDefault: false },\n // U+056A ժ\n { armenian: \"ժ\", target: \"zh\" },\n // U+056B ի\n { armenian: \"ի\", target: \"i\" },\n // U+056C լ\n { armenian: \"լ\", target: \"l\" },\n // U+056D խ\n { armenian: \"խ\", target: \"kh\" },\n // U+056E ծ — same Latin \"ts\" as ձ, not the reverse default\n { armenian: \"ծ\", target: \"ts\", reverseDefault: false },\n // U+056F կ\n { armenian: \"կ\", target: \"k\", reverseDefault: true },\n // U+0570 հ\n { armenian: \"հ\", target: \"h\" },\n // U+0571 ձ\n { armenian: \"ձ\", target: \"dz\" },\n // U+0572 ղ\n { armenian: \"ղ\", target: \"gh\" },\n // U+0573 ճ — same Latin \"ch\" as չ, not the reverse default\n { armenian: \"ճ\", target: \"ch\", reverseDefault: false },\n // U+0574 մ\n { armenian: \"մ\", target: \"m\" },\n // U+0575 յ\n { armenian: \"յ\", target: \"y\", reverseDefault: true },\n // U+0576 ն\n { armenian: \"ն\", target: \"n\" },\n // U+0577 շ\n { armenian: \"շ\", target: \"sh\" },\n // U+0578 ո — word-initial maps to \"vo\" except in ով\n {\n armenian: \"ո\",\n target: \"o\",\n reverseDefault: true,\n contextRules: [\n {\n condition: { wordInitial: true, followedBy: [\"վ\"] },\n target: \"o\",\n },\n {\n condition: { wordInitial: true },\n target: \"vo\",\n },\n ],\n },\n // U+0579 չ — aspirated CH\n { armenian: \"չ\", target: \"ch\\u2019\", reverseDefault: true },\n // U+057A պ — aspirated P counterpart; reverse default\n { armenian: \"պ\", target: \"p\", reverseDefault: true },\n // U+057B ջ\n { armenian: \"ջ\", target: \"j\" },\n // U+057C ռ — trilled R\n { armenian: \"ռ\", target: \"rr\" },\n // U+057D ս\n { armenian: \"ս\", target: \"s\" },\n // U+057E վ\n { armenian: \"վ\", target: \"v\" },\n // U+057F տ — reverse default \"t\"\n { armenian: \"տ\", target: \"t\", reverseDefault: true },\n // U+0580 ր — non-trilled R\n { armenian: \"ր\", target: \"r\" },\n // U+0581 ց — aspirated TS\n { armenian: \"ց\", target: \"ts\\u2019\", reverseDefault: true },\n // U+0582 ւ — not romanized standalone in BGN/PCGN; handled in ու/եւ sequences\n { armenian: \"ւ\", target: \"\" },\n // U+0583 փ — aspirated P; not reverse default\n { armenian: \"փ\", target: \"p\\u2019\", reverseDefault: false },\n // U+0584 ք — aspirated K; not reverse default\n { armenian: \"ք\", target: \"k\\u2019\", reverseDefault: false },\n // U+0585 օ — not reverse default (ո already maps to \"o\" with reverseDefault)\n { armenian: \"օ\", target: \"o\", reverseDefault: false },\n // U+0586 ֆ\n { armenian: \"ֆ\", target: \"f\" },\n ],\n\n sequenceMappings: [\n // ու digraph (U+0578 + U+0582) → \"u\"\n { armenian: \"ու\", target: \"u\" },\n // եվ sequence (ե + վ) as alternative spelling of \"ev\" — maps to \"yev\" initially and after vowels\n {\n armenian: \"եվ\",\n target: \"ev\",\n contextRules: [\n { condition: { wordInitial: true }, target: \"yev\" },\n { condition: { precededBy: [...VOWELS_AND_OU] }, target: \"yev\" },\n ],\n },\n // եւ traditional spelling (U+0565 + U+0582) — alternative spelling of \"ev\"\n {\n armenian: \"եւ\",\n target: \"ev\",\n contextRules: [\n { condition: { wordInitial: true }, target: \"yev\" },\n { condition: { precededBy: [...VOWELS_AND_OU] }, target: \"yev\" },\n ],\n },\n // և ligature (U+0587) — maps to \"yev\" initially and after vowels\n {\n armenian: \"և\",\n target: \"ev\",\n contextRules: [\n { condition: { wordInitial: true }, target: \"yev\" },\n { condition: { precededBy: [...VOWELS_AND_OU] }, target: \"yev\" },\n ],\n },\n ],\n\n punctuation: {\n \"։\": \".\", // Armenian full stop → period\n \"՞\": \"?\", // Armenian question mark → question mark\n \"՝\": \",\", // Armenian comma → comma\n \"՜\": \"!\", // Armenian exclamation mark → exclamation mark\n \"«\": '\"', // Armenian left guillemet → double quote\n \"»\": '\"', // Armenian right guillemet → double quote\n },\n};\n","import type { TransliterationStandard } from \"../../types.js\";\n\nexport const iso9985: TransliterationStandard = {\n id: \"iso-9985\",\n name: \"ISO 9985:1996\",\n targetScript: \"latin\",\n reversible: true,\n\n charMappings: [\n // U+0561 ա\n { armenian: \"ա\", target: \"a\" },\n // U+0562 բ\n { armenian: \"բ\", target: \"b\" },\n // U+0563 գ\n { armenian: \"գ\", target: \"g\" },\n // U+0564 դ\n { armenian: \"դ\", target: \"d\" },\n // U+0565 ե\n { armenian: \"ե\", target: \"e\" },\n // U+0566 զ\n { armenian: \"զ\", target: \"z\" },\n // U+0567 է → ē (e with macron, U+0113)\n { armenian: \"է\", target: \"ē\" },\n // U+0568 ը → ë (e with diaeresis, U+00EB)\n { armenian: \"ը\", target: \"ë\" },\n // U+0569 թ → tʿ (t + modifier letter left half ring U+02BF)\n { armenian: \"թ\", target: \"tʿ\" },\n // U+056A ժ → ž (U+017E)\n { armenian: \"ժ\", target: \"ž\" },\n // U+056B ի\n { armenian: \"ի\", target: \"i\" },\n // U+056C լ\n { armenian: \"լ\", target: \"l\" },\n // U+056D խ\n { armenian: \"խ\", target: \"x\" },\n // U+056E ծ → c̣ (c + combining dot below U+0323)\n { armenian: \"ծ\", target: \"c\\u0323\" },\n // U+056F կ\n { armenian: \"կ\", target: \"k\" },\n // U+0570 հ\n { armenian: \"հ\", target: \"h\" },\n // U+0571 ձ → j\n { armenian: \"ձ\", target: \"j\" },\n // U+0572 ղ → ġ (g + combining dot above U+0307)\n { armenian: \"ղ\", target: \"g\\u0307\" },\n // U+0573 ճ → č̣ (č U+010D + combining dot below U+0323)\n { armenian: \"ճ\", target: \"\\u010D\\u0323\" },\n // U+0574 մ\n { armenian: \"մ\", target: \"m\" },\n // U+0575 յ\n { armenian: \"յ\", target: \"y\" },\n // U+0576 ն\n { armenian: \"ն\", target: \"n\" },\n // U+0577 շ → š (U+0161)\n { armenian: \"շ\", target: \"š\" },\n // U+0578 ո\n { armenian: \"ո\", target: \"o\" },\n // U+0579 չ → č (U+010D)\n { armenian: \"չ\", target: \"č\" },\n // U+057A պ\n { armenian: \"պ\", target: \"p\" },\n // U+057B ջ → ǰ (j + combining caron, U+01F0)\n { armenian: \"ջ\", target: \"\\u01F0\" },\n // U+057C ռ → ṙ (r + combining dot above U+0307 applied to r... use ṙ U+1E59)\n { armenian: \"ռ\", target: \"\\u1E59\" },\n // U+057D ս\n { armenian: \"ս\", target: \"s\" },\n // U+057E վ\n { armenian: \"վ\", target: \"v\" },\n // U+057F տ\n { armenian: \"տ\", target: \"t\" },\n // U+0580 ր\n { armenian: \"ր\", target: \"r\" },\n // U+0581 ց → cʿ (c + modifier letter left half ring U+02BF)\n { armenian: \"ց\", target: \"c\\u02BF\" },\n // U+0582 ւ → w\n { armenian: \"ւ\", target: \"w\" },\n // U+0583 փ → pʿ (p + modifier letter left half ring U+02BF)\n { armenian: \"փ\", target: \"p\\u02BF\" },\n // U+0584 ք → kʿ (k + modifier letter left half ring U+02BF)\n { armenian: \"ք\", target: \"k\\u02BF\" },\n // U+0585 օ → ò (o with grave, U+00F2)\n { armenian: \"օ\", target: \"ò\" },\n // U+0586 ֆ\n { armenian: \"ֆ\", target: \"f\" },\n ],\n\n sequenceMappings: [\n // ու digraph (U+0578 + U+0582) — in ISO 9985 each letter maps independently (ո→o, ւ→w)\n // but the digraph \"ow\" is the natural reversible representation; no override needed.\n // և ligature (U+0587) → ew\n { armenian: \"և\", target: \"ew\" },\n ],\n\n punctuation: {\n \"։\": \".\",\n \"՞\": \"?\",\n \"՝\": \",\",\n \"՜\": \"!\",\n \"«\": '\"',\n \"»\": '\"',\n },\n};\n","import type { TransliterationStandard } from \"../../types.js\";\n\nexport const hubschmannMeillet: TransliterationStandard = {\n id: \"hubschmann-meillet\",\n name: \"Hübschmann-Meillet Transliteration\",\n targetScript: \"latin\",\n reversible: true,\n\n charMappings: [\n // U+0561 ա\n { armenian: \"ա\", target: \"a\" },\n // U+0562 բ\n { armenian: \"բ\", target: \"b\" },\n // U+0563 գ\n { armenian: \"գ\", target: \"g\" },\n // U+0564 դ\n { armenian: \"դ\", target: \"d\" },\n // U+0565 ե\n { armenian: \"ե\", target: \"e\" },\n // U+0566 զ\n { armenian: \"զ\", target: \"z\" },\n // U+0567 է → ē (e with macron, U+0113)\n { armenian: \"է\", target: \"ē\" },\n // U+0568 ը → ə (schwa, U+0259)\n { armenian: \"ը\", target: \"ə\" },\n // U+0569 թ → tʿ (t + modifier letter left half ring U+02BF)\n { armenian: \"թ\", target: \"tʿ\" },\n // U+056A ժ → ž (U+017E)\n { armenian: \"ժ\", target: \"ž\" },\n // U+056B ի\n { armenian: \"ի\", target: \"i\" },\n // U+056C լ\n { armenian: \"լ\", target: \"l\" },\n // U+056D խ\n { armenian: \"խ\", target: \"x\" },\n // U+056E ծ → c (plain c; H-M uses c without diacritic)\n { armenian: \"ծ\", target: \"c\" },\n // U+056F կ\n { armenian: \"կ\", target: \"k\" },\n // U+0570 հ\n { armenian: \"հ\", target: \"h\" },\n // U+0571 ձ → j\n { armenian: \"ձ\", target: \"j\" },\n // U+0572 ղ → ł (l with stroke, U+0142)\n { armenian: \"ղ\", target: \"ł\" },\n // U+0573 ճ → č (c with caron, U+010D)\n { armenian: \"ճ\", target: \"č\" },\n // U+0574 մ\n { armenian: \"մ\", target: \"m\" },\n // U+0575 յ\n { armenian: \"յ\", target: \"y\" },\n // U+0576 ն\n { armenian: \"ն\", target: \"n\" },\n // U+0577 շ → š (U+0161)\n { armenian: \"շ\", target: \"š\" },\n // U+0578 ո\n { armenian: \"ո\", target: \"o\" },\n // U+0579 չ → čʿ (č + modifier letter left half ring U+02BF)\n { armenian: \"չ\", target: \"č\\u02BF\" },\n // U+057A պ\n { armenian: \"պ\", target: \"p\" },\n // U+057B ջ → ǰ (j with combining caron, U+01F0)\n { armenian: \"ջ\", target: \"\\u01F0\" },\n // U+057C ռ → ṙ (r with dot above, U+1E59)\n { armenian: \"ռ\", target: \"\\u1E59\" },\n // U+057D ս\n { armenian: \"ս\", target: \"s\" },\n // U+057E վ\n { armenian: \"վ\", target: \"v\" },\n // U+057F տ\n { armenian: \"տ\", target: \"t\" },\n // U+0580 ր\n { armenian: \"ր\", target: \"r\" },\n // U+0581 ց → cʿ (c + modifier letter left half ring U+02BF)\n { armenian: \"ց\", target: \"c\\u02BF\" },\n // U+0582 ւ → w\n { armenian: \"ւ\", target: \"w\" },\n // U+0583 փ → pʿ (p + modifier letter left half ring U+02BF)\n { armenian: \"փ\", target: \"pʿ\" },\n // U+0584 ք → kʿ (k + modifier letter left half ring U+02BF)\n { armenian: \"ք\", target: \"kʿ\" },\n // U+0585 օ → ō (o with macron, U+014D)\n { armenian: \"օ\", target: \"ō\" },\n // U+0586 ֆ\n { armenian: \"ֆ\", target: \"f\" },\n ],\n\n sequenceMappings: [\n // ու digraph (U+0578 + U+0582) → u (per Pedersen H-M column 35)\n { armenian: \"ու\", target: \"u\" },\n // և ligature (U+0587) → ew\n { armenian: \"և\", target: \"ew\" },\n ],\n\n punctuation: {\n \"։\": \".\",\n \"՞\": \"?\",\n \"՝\": \",\",\n \"՜\": \"!\",\n \"«\": '\"',\n \"»\": '\"',\n },\n};\n","import type { TransliterationStandard } from \"../../types.js\";\n\n/**\n * ALA-LC (Library of Congress) Armenian Romanization, 2022 version.\n *\n * Source: https://www.loc.gov/catdir/cpso/romanization/armenian.pdf\n *\n * Notes encoded in this profile:\n *\n * - Note 1 (West Armenian bracketed alternates): the package emits only the\n * East/Classical default value; the West Armenian \"[p] / [k] / [t] / …\"\n * references are not produced.\n * - Note 2 (ե → y initial+vowel, Classical orthography): word-initial ե\n * followed by a vowel romanizes as \"y\" (replacing \"e\"), e.g. classical\n * \"Եա...\" → \"ya…\". Initial ե + consonant stays \"e\" (so \"Երևան\" → \"Erevan\").\n * - Note 3 (soft-sign disambiguation): inserts U+02B9 MODIFIER LETTER PRIME\n * between letter pairs whose romanization would otherwise be read as a\n * digraph (գհ → gʹh, դզ → dʹz, կհ → kʹh, սհ → sʹh, տս → tʹs).\n * - Note 4 (յ → ḥ initial-of-word-or-stem, Classical orthography): the\n * simple \"initial of a word\" form is implemented; \"initial of a stem in\n * a compound\" is morphology-dependent and cannot be detected automatically.\n * - Note 5 (եւ → ew, Classical): treated as a sequence; the modern ligature\n * և (U+0587) maps separately to \"ev\" per note 6.\n * - Note 6 (և → ev, with eʹv exception): the soft-sign exception for\n * lowercase \"եվ\" at word start has two lexical exceptions (ևեթ, ևս) and\n * is therefore NOT implemented; \"եվ\" maps to \"ev\" unconditionally.\n *\n * Diacritic / modifier characters used:\n * - U+02BB MODIFIER LETTER TURNED COMMA (ʻ): aspirate mark (թ, չ, ց, փ, ք)\n * - U+02B9 MODIFIER LETTER PRIME (ʹ): soft sign (note 3)\n * - U+0113 LATIN SMALL LETTER E WITH MACRON (ē): է\n * - U+011B LATIN SMALL LETTER E WITH CARON (ě): ը\n * - U+1E25 LATIN SMALL LETTER H WITH DOT BELOW (ḥ): word-initial յ\n * - U+1E5B LATIN SMALL LETTER R WITH DOT BELOW (ṛ): ռ\n * - U+014D LATIN SMALL LETTER O WITH MACRON (ō): օ\n */\n\n// Classical Armenian vowels used by note 2 (ե → y when followed by a vowel).\n// Includes the ու digraph token because the scanner emits ու as one sequence.\nconst ALA_LC_VOWELS = [\"ա\", \"ե\", \"է\", \"ը\", \"ի\", \"ո\", \"ու\", \"օ\"] as const;\n\nexport const alaLc: TransliterationStandard = {\n id: \"ala-lc\",\n name: \"ALA-LC Romanization (2022)\",\n targetScript: \"latin\",\n reversible: false,\n\n charMappings: [\n // U+0561 ա\n { armenian: \"ա\", target: \"a\" },\n // U+0562 բ\n { armenian: \"բ\", target: \"b\" },\n // U+0563 գ — note 3: gʹh when followed by հ\n {\n armenian: \"գ\",\n target: \"g\",\n contextRules: [\n { condition: { followedBy: [\"հ\"] }, target: \"gʹ\" },\n ],\n },\n // U+0564 դ — note 3: dʹz when followed by զ\n {\n armenian: \"դ\",\n target: \"d\",\n contextRules: [\n { condition: { followedBy: [\"զ\"] }, target: \"dʹ\" },\n ],\n },\n // U+0565 ե — note 2: y (replaces e) word-initial + followed by vowel\n {\n armenian: \"ե\",\n target: \"e\",\n reverseDefault: true,\n contextRules: [\n {\n condition: { wordInitial: true, followedBy: [...ALA_LC_VOWELS] },\n target: \"y\",\n },\n ],\n },\n // U+0566 զ\n { armenian: \"զ\", target: \"z\" },\n // U+0567 է → ē (e with macron, U+0113)\n { armenian: \"է\", target: \"ē\" },\n // U+0568 ը → ě (e with caron, U+011B)\n { armenian: \"ը\", target: \"ě\" },\n // U+0569 թ → tʻ (t + U+02BB modifier letter turned comma)\n { armenian: \"թ\", target: \"tʻ\" },\n // U+056A ժ → zh\n { armenian: \"ժ\", target: \"zh\" },\n // U+056B ի\n { armenian: \"ի\", target: \"i\" },\n // U+056C լ\n { armenian: \"լ\", target: \"l\" },\n // U+056D խ → kh\n { armenian: \"խ\", target: \"kh\" },\n // U+056E ծ → ts (note 1: West Armenian alternative [dz] not emitted)\n { armenian: \"ծ\", target: \"ts\", reverseDefault: true },\n // U+056F կ — note 3: kʹh when followed by հ\n {\n armenian: \"կ\",\n target: \"k\",\n reverseDefault: true,\n contextRules: [\n { condition: { followedBy: [\"հ\"] }, target: \"kʹ\" },\n ],\n },\n // U+0570 հ\n { armenian: \"հ\", target: \"h\" },\n // U+0571 ձ → dz (note 1: West Armenian alternative [ts] not emitted)\n { armenian: \"ձ\", target: \"dz\", reverseDefault: true },\n // U+0572 ղ → gh\n { armenian: \"ղ\", target: \"gh\" },\n // U+0573 ճ → ch (note 1: West Armenian alternative [j] not emitted)\n { armenian: \"ճ\", target: \"ch\", reverseDefault: true },\n // U+0574 մ\n { armenian: \"մ\", target: \"m\" },\n // U+0575 յ — note 4: ḥ word-initial (Classical orthography)\n {\n armenian: \"յ\",\n target: \"y\",\n reverseDefault: true,\n contextRules: [\n { condition: { wordInitial: true }, target: \"ḥ\" },\n ],\n },\n // U+0576 ն\n { armenian: \"ն\", target: \"n\" },\n // U+0577 շ → sh\n { armenian: \"շ\", target: \"sh\" },\n // U+0578 ո → o (NO word-initial vo rule)\n { armenian: \"ո\", target: \"o\", reverseDefault: true },\n // U+0579 չ → chʻ (ch + U+02BB)\n { armenian: \"չ\", target: \"chʻ\", reverseDefault: false },\n // U+057A պ\n { armenian: \"պ\", target: \"p\", reverseDefault: true },\n // U+057B ջ → j (note 1: West Armenian alternative [ch] not emitted)\n { armenian: \"ջ\", target: \"j\" },\n // U+057C ռ → ṙ (r with dot below, U+1E5B)\n { armenian: \"ռ\", target: \"ṛ\", reverseDefault: true },\n // U+057D ս — note 3: sʹh when followed by հ\n {\n armenian: \"ս\",\n target: \"s\",\n contextRules: [\n { condition: { followedBy: [\"հ\"] }, target: \"sʹ\" },\n ],\n },\n // U+057E վ\n { armenian: \"վ\", target: \"v\" },\n // U+057F տ — note 3: tʹs when followed by ս\n {\n armenian: \"տ\",\n target: \"t\",\n reverseDefault: true,\n contextRules: [\n { condition: { followedBy: [\"ս\"] }, target: \"tʹ\" },\n ],\n },\n // U+0580 ր\n { armenian: \"ր\", target: \"r\", reverseDefault: false },\n // U+0581 ց → tsʻ (ts + U+02BB)\n { armenian: \"ց\", target: \"tsʻ\", reverseDefault: false },\n // U+0582 ւ → w (yiwn; the ու digraph below overrides for the o+w pair)\n { armenian: \"ւ\", target: \"w\" },\n // U+0583 փ → pʻ (p + U+02BB)\n { armenian: \"փ\", target: \"pʻ\", reverseDefault: false },\n // U+0584 ք → kʻ (k + U+02BB)\n { armenian: \"ք\", target: \"kʻ\", reverseDefault: false },\n // U+0585 օ → ō (o with macron, U+014D)\n { armenian: \"օ\", target: \"ō\", reverseDefault: false },\n // U+0586 ֆ\n { armenian: \"ֆ\", target: \"f\" },\n ],\n\n sequenceMappings: [\n // ու digraph (U+0578 + U+0582) → \"u\"\n { armenian: \"ու\", target: \"u\" },\n // եւ (U+0565 + U+0582), Classical orthography → \"ew\" (note 5)\n { armenian: \"եւ\", target: \"ew\" },\n // եվ (U+0565 + U+057E), modern Eve sequence → \"ev\"\n { armenian: \"եվ\", target: \"ev\" },\n // և (U+0587) modern ligature → \"ev\" (note 6, soft-sign exception not implemented)\n { armenian: \"և\", target: \"ev\" },\n ],\n\n punctuation: {\n \"։\": \".\",\n \"՞\": \"?\",\n \"՝\": \",\",\n \"՜\": \"!\",\n \"«\": '\"',\n \"»\": '\"',\n },\n};\n","import type { TransliterationStandard } from \"../../types.js\";\n\n/**\n * Armenian vowels/digraphs used for Russian context rules.\n * \"ու\" can be a sequence token, so keep it in the same list as letters.\n */\nconst VOWELS_AND_U = [\"ա\", \"ե\", \"է\", \"ը\", \"ի\", \"ո\", \"ու\", \"օ\"] as const;\n\n/**\n * Russian transfer of Armenian SSR geographic names.\n *\n * Source: Инструкция по русской передаче географических названий Армянской ССР\n * / Сост. Г. Г. Кузьмина; Ред. Э. Г. Туманян. — М., 1974.\n *\n * This is a geographic-name profile, not a general phonetic Cyrillic\n * transcription. Official/traditional names may still override the table.\n */\nexport const ruGeoKt1974: TransliterationStandard = {\n id: \"ru-geo-kt-1974\",\n name: \"Russian Geographic Transliteration (Kuzmina-Tumanyan 1974)\",\n targetScript: \"cyrillic\",\n reversible: false,\n\n charMappings: [\n { armenian: \"ա\", target: \"а\" },\n { armenian: \"բ\", target: \"б\" },\n { armenian: \"գ\", target: \"г\" },\n { armenian: \"դ\", target: \"д\" },\n { armenian: \"ե\", target: \"е\" },\n { armenian: \"զ\", target: \"з\" },\n { armenian: \"է\", target: \"э\" },\n { armenian: \"ը\", target: \"ы\" },\n { armenian: \"թ\", target: \"т\", reverseDefault: false },\n { armenian: \"ժ\", target: \"ж\" },\n { armenian: \"ի\", target: \"и\" },\n { armenian: \"լ\", target: \"л\" },\n { armenian: \"խ\", target: \"х\", reverseDefault: true },\n { armenian: \"ծ\", target: \"ц\", reverseDefault: false },\n { armenian: \"կ\", target: \"к\", reverseDefault: true },\n {\n armenian: \"հ\",\n target: \"х\",\n reverseDefault: false,\n contextRules: [{ condition: { wordInitial: true }, target: \"\" }],\n },\n { armenian: \"ձ\", target: \"дз\" },\n {\n armenian: \"ղ\",\n target: \"х\",\n reverseDefault: false,\n contextRules: [{ condition: { wordInitial: true }, target: \"к\" }],\n },\n { armenian: \"ճ\", target: \"ч\", reverseDefault: false },\n { armenian: \"մ\", target: \"м\" },\n { armenian: \"յ\", target: \"й\" },\n { armenian: \"ն\", target: \"н\" },\n { armenian: \"շ\", target: \"ш\" },\n {\n armenian: \"ո\",\n target: \"о\",\n reverseDefault: true,\n contextRules: [\n { condition: { wordInitial: true, followedBy: [\"վ\"] }, target: \"о\" },\n { condition: { wordInitial: true }, target: \"во\" },\n ],\n },\n { armenian: \"չ\", target: \"ч\", reverseDefault: true },\n { armenian: \"պ\", target: \"п\", reverseDefault: true },\n { armenian: \"ջ\", target: \"дж\" },\n { armenian: \"ռ\", target: \"р\", reverseDefault: true },\n { armenian: \"ս\", target: \"с\" },\n { armenian: \"վ\", target: \"в\" },\n { armenian: \"տ\", target: \"т\", reverseDefault: true },\n { armenian: \"ր\", target: \"р\", reverseDefault: false },\n { armenian: \"ց\", target: \"ц\", reverseDefault: true },\n { armenian: \"ւ\", target: \"у\" },\n { armenian: \"փ\", target: \"п\", reverseDefault: false },\n { armenian: \"ք\", target: \"к\", reverseDefault: false },\n { armenian: \"օ\", target: \"о\", reverseDefault: false },\n { armenian: \"ֆ\", target: \"ф\" },\n ],\n\n sequenceMappings: [\n { armenian: \"յու\", target: \"ю\" },\n { armenian: \"յա\", target: \"я\" },\n { armenian: \"յո\", target: \"ё\" },\n { armenian: \"յե\", target: \"е\" },\n { armenian: \"յի\", target: \"и\" },\n { armenian: \"ու\", target: \"у\" },\n { armenian: \"եվ\", target: \"ев\" },\n { armenian: \"եւ\", target: \"ев\" },\n { armenian: \"և\", target: \"ев\" },\n ],\n\n punctuation: {},\n};\n\n/**\n * Re-exported for sibling Russian geographic profiles that share the same\n * basic vowel contexts.\n */\nexport const russianVowelsAndU = VOWELS