UNPKG

@naandalist/honocoroko

Version:

A TypeScript library for transliterating text between Latin and Javanese script (Aksara Jawa/Hanacaraka)

github.com/Naandalist/honocoroko

Naandalist/honocoroko

225 lines (224 loc) • 8.16 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.toHonocoroko = toHonocoroko; exports.fromHonocoroko = fromHonocoroko; exports.transliterate = transliterate; const mappings_js_1 = require("./mappings.js"); // Create reverse mapping for fromHonocoroko const reverseMapping = (0, mappings_js_1.createReverseMappings)(mappings_js_1.allMappings); // Default characters to preserve (not convert to Hanacaraka) // Excludes characters that already have proper Javanese equivalents const DEFAULT_PRESERVE_CHARS = [ '?', '!', '@', '#', '$', '%', '^', '&', '*', '-', '_', '=', '+', '[', ']', '{', '}', '|', '\\', ';', "'", '<', '>', '/', '`', '~' ]; // Helper to check if a character is a consonant function isConsonant(char) { return mappings_js_1.consonants.some(c => c.latin === char || c.latin === char + 'a') || /^[bcdfghjklmnpqrstvwxyz]$/i.test(char); } // Helper to check if a character is a vowel function isVowel(char) { return ['a', 'i', 'u', 'e', 'é', 'o'].includes(char.toLowerCase()); } // Helper to get vowel mark (sandhangan) function getVowelMark(vowel) { const marks = { 'i': 'ꦶ', // wulu 'u': 'ꦸ', // suku 'é': 'ꦺ', // taling 'e': 'ꦼ', // pepet 'o': 'ꦺꦴ', // taling + tarung }; return marks[vowel.toLowerCase()] || ''; } /** * Transliterates Latin text to Javanese script (Honocoroko/Hanacaraka) * @param text - The Latin text to transliterate * @param options - Optional transliteration options * @returns The transliterated Javanese text */ function toHonocoroko(text, options) { if (!text) return ''; const convertSpecialChars = options?.convertSpecialChars ?? false; let result = ''; let i = 0; while (i < text.length) { const char = text[i]; // Check if this character should be preserved unchanged (default behavior) if (!convertSpecialChars && DEFAULT_PRESERVE_CHARS.includes(char)) { result += char; i++; continue; } // Handle whitespace if (/\s/.test(char)) { result += char; i++; continue; } // Handle numbers const numberMapping = mappings_js_1.numbers.find(n => n.latin === char); if (numberMapping) { result += numberMapping.javanese; i++; continue; } // Handle punctuation const punctMapping = mappings_js_1.punctuation.find(p => p.latin === char); if (punctMapping) { result += punctMapping.javanese; i++; continue; } // Handle consonant clusters (nga, nya, dha, tha) if (i + 2 < text.length) { const threeChar = text.substr(i, 3); const mapping = mappings_js_1.consonants.find(c => c.latin === threeChar); if (mapping) { result += mapping.javanese; i += 3; continue; } } // Handle two-character consonants (ka, ba, etc.) or consonant + vowel if (i + 1 < text.length) { const twoChar = text.substr(i, 2); const consonantMapping = mappings_js_1.consonants.find(c => c.latin === twoChar); if (consonantMapping) { result += consonantMapping.javanese; i += 2; continue; } // Check if it's consonant + vowel const nextChar = text[i + 1]; if (isVowel(nextChar)) { // First try to find the consonant with 'a' appended const consonantWithA = mappings_js_1.consonants.find(c => c.latin === char + 'a'); if (consonantWithA) { result += consonantWithA.javanese; if (nextChar !== 'a') { result += getVowelMark(nextChar); } i += 2; continue; } // Fallback to single consonant const consonantOnly = mappings_js_1.consonants.find(c => c.latin === char); if (consonantOnly) { result += consonantOnly.javanese; if (nextChar !== 'a') { result += getVowelMark(nextChar); } i += 2; continue; } } } // Handle single character consonants const consonantWithA = mappings_js_1.consonants.find(c => c.latin === char + 'a'); if (consonantWithA) { result += consonantWithA.javanese; // Add pangkon if followed by another consonant if (i + 1 < text.length && isConsonant(text[i + 1])) { result += '꧀'; // pangkon } i++; continue; } const singleConsonant = mappings_js_1.consonants.find(c => c.latin === char); if (singleConsonant) { result += singleConsonant.javanese; // Add pangkon if followed by another consonant if (i + 1 < text.length && isConsonant(text[i + 1])) { result += '꧀'; // pangkon } i++; continue; } // Handle standalone vowels const vowelMapping = mappings_js_1.vowels.find(v => v.latin === char); if (vowelMapping) { result += vowelMapping.javanese; i++; continue; } // Handle phonetic approximations const approx = mappings_js_1.phoneticApproximations.find(p => p.latin.toLowerCase() === char.toLowerCase()); if (approx) { result += approx.javanese; i++; continue; } // If no match found, keep as-is console.warn(`No mapping found for character: ${char}`); result += char; i++; } return result; } /** * Transliterates Javanese script (Honocoroko/Hanacaraka) to Latin text * @param text - The Javanese text to transliterate * @param options - Optional transliteration options * @returns The transliterated Latin text */ function fromHonocoroko(text, options) { if (!text) return ''; const convertSpecialChars = options?.convertSpecialChars ?? false; let result = ''; let i = 0; while (i < text.length) { const char = text[i]; // Check if this character should be preserved unchanged (default behavior) if (!convertSpecialChars && DEFAULT_PRESERVE_CHARS.includes(char)) { result += char; i++; continue; } let matched = false; // Try to match longer sequences first (some Javanese characters are multi-codepoint) for (let len = 4; len >= 1; len--) { if (i + len <= text.length) { const substr = text.substr(i, len); if (reverseMapping.has(substr)) { result += reverseMapping.get(substr); i += len; matched = true; break; } } } // If no match found, keep the character as-is if (!matched) { const char = text[i]; if (/\s/.test(char)) { result += char; } else { console.warn(`No reverse mapping found for character: ${char} (U+${char.charCodeAt(0).toString(16).toUpperCase().padStart(4, '0')})`); result += char; } i++; } } return result; } /** * Generic transliteration function that can go either direction * @param text - The text to transliterate * @param direction - The direction of transliteration * @param options - Optional transliteration options * @returns The transliterated text */ function transliterate(text, direction, options) { if (direction === 'toHonocoroko') { return toHonocoroko(text, options); } else { return fromHonocoroko(text, options); } }