@naandalist/honocoroko
Version:
A TypeScript library for transliterating text between Latin and Javanese script (Aksara Jawa/Hanacaraka)
220 lines (219 loc) • 7.88 kB
JavaScript
import { allMappings, createReverseMappings, consonants, vowels, numbers, punctuation, phoneticApproximations, } from './mappings.js';
// Create reverse mapping for fromHonocoroko
const reverseMapping = createReverseMappings(allMappings);
// Default characters to preserve (not convert to Hanacaraka)
// Excludes characters that already have proper Javanese equivalents
const DEFAULT_PRESERVE_CHARS = [
'?', '!', '@', '#', '$', '%', '^', '&', '*',
'-', '_', '=', '+', '[', ']', '{', '}', '|', '\\',
';', "'", '<', '>', '/', '`', '~'
];
// Helper to check if a character is a consonant
function isConsonant(char) {
return consonants.some(c => c.latin === char || c.latin === char + 'a') ||
/^[bcdfghjklmnpqrstvwxyz]$/i.test(char);
}
// Helper to check if a character is a vowel
function isVowel(char) {
return ['a', 'i', 'u', 'e', 'é', 'o'].includes(char.toLowerCase());
}
// Helper to get vowel mark (sandhangan)
function getVowelMark(vowel) {
const marks = {
'i': 'ꦶ', // wulu
'u': 'ꦸ', // suku
'é': 'ꦺ', // taling
'e': 'ꦼ', // pepet
'o': 'ꦺꦴ', // taling + tarung
};
return marks[vowel.toLowerCase()] || '';
}
/**
* Transliterates Latin text to Javanese script (Honocoroko/Hanacaraka)
* @param text - The Latin text to transliterate
* @param options - Optional transliteration options
* @returns The transliterated Javanese text
*/
export function toHonocoroko(text, options) {
if (!text)
return '';
const convertSpecialChars = options?.convertSpecialChars ?? false;
let result = '';
let i = 0;
while (i < text.length) {
const char = text[i];
// Check if this character should be preserved unchanged (default behavior)
if (!convertSpecialChars && DEFAULT_PRESERVE_CHARS.includes(char)) {
result += char;
i++;
continue;
}
// Handle whitespace
if (/\s/.test(char)) {
result += char;
i++;
continue;
}
// Handle numbers
const numberMapping = numbers.find(n => n.latin === char);
if (numberMapping) {
result += numberMapping.javanese;
i++;
continue;
}
// Handle punctuation
const punctMapping = punctuation.find(p => p.latin === char);
if (punctMapping) {
result += punctMapping.javanese;
i++;
continue;
}
// Handle consonant clusters (nga, nya, dha, tha)
if (i + 2 < text.length) {
const threeChar = text.substr(i, 3);
const mapping = consonants.find(c => c.latin === threeChar);
if (mapping) {
result += mapping.javanese;
i += 3;
continue;
}
}
// Handle two-character consonants (ka, ba, etc.) or consonant + vowel
if (i + 1 < text.length) {
const twoChar = text.substr(i, 2);
const consonantMapping = consonants.find(c => c.latin === twoChar);
if (consonantMapping) {
result += consonantMapping.javanese;
i += 2;
continue;
}
// Check if it's consonant + vowel
const nextChar = text[i + 1];
if (isVowel(nextChar)) {
// First try to find the consonant with 'a' appended
const consonantWithA = consonants.find(c => c.latin === char + 'a');
if (consonantWithA) {
result += consonantWithA.javanese;
if (nextChar !== 'a') {
result += getVowelMark(nextChar);
}
i += 2;
continue;
}
// Fallback to single consonant
const consonantOnly = consonants.find(c => c.latin === char);
if (consonantOnly) {
result += consonantOnly.javanese;
if (nextChar !== 'a') {
result += getVowelMark(nextChar);
}
i += 2;
continue;
}
}
}
// Handle single character consonants
const consonantWithA = consonants.find(c => c.latin === char + 'a');
if (consonantWithA) {
result += consonantWithA.javanese;
// Add pangkon if followed by another consonant
if (i + 1 < text.length && isConsonant(text[i + 1])) {
result += '꧀'; // pangkon
}
i++;
continue;
}
const singleConsonant = consonants.find(c => c.latin === char);
if (singleConsonant) {
result += singleConsonant.javanese;
// Add pangkon if followed by another consonant
if (i + 1 < text.length && isConsonant(text[i + 1])) {
result += '꧀'; // pangkon
}
i++;
continue;
}
// Handle standalone vowels
const vowelMapping = vowels.find(v => v.latin === char);
if (vowelMapping) {
result += vowelMapping.javanese;
i++;
continue;
}
// Handle phonetic approximations
const approx = phoneticApproximations.find(p => p.latin.toLowerCase() === char.toLowerCase());
if (approx) {
result += approx.javanese;
i++;
continue;
}
// If no match found, keep as-is
console.warn(`No mapping found for character: ${char}`);
result += char;
i++;
}
return result;
}
/**
* Transliterates Javanese script (Honocoroko/Hanacaraka) to Latin text
* @param text - The Javanese text to transliterate
* @param options - Optional transliteration options
* @returns The transliterated Latin text
*/
export function fromHonocoroko(text, options) {
if (!text)
return '';
const convertSpecialChars = options?.convertSpecialChars ?? false;
let result = '';
let i = 0;
while (i < text.length) {
const char = text[i];
// Check if this character should be preserved unchanged (default behavior)
if (!convertSpecialChars && DEFAULT_PRESERVE_CHARS.includes(char)) {
result += char;
i++;
continue;
}
let matched = false;
// Try to match longer sequences first (some Javanese characters are multi-codepoint)
for (let len = 4; len >= 1; len--) {
if (i + len <= text.length) {
const substr = text.substr(i, len);
if (reverseMapping.has(substr)) {
result += reverseMapping.get(substr);
i += len;
matched = true;
break;
}
}
}
// If no match found, keep the character as-is
if (!matched) {
const char = text[i];
if (/\s/.test(char)) {
result += char;
}
else {
console.warn(`No reverse mapping found for character: ${char} (U+${char.charCodeAt(0).toString(16).toUpperCase().padStart(4, '0')})`);
result += char;
}
i++;
}
}
return result;
}
/**
* Generic transliteration function that can go either direction
* @param text - The text to transliterate
* @param direction - The direction of transliteration
* @param options - Optional transliteration options
* @returns The transliterated text
*/
export function transliterate(text, direction, options) {
if (direction === 'toHonocoroko') {
return toHonocoroko(text, options);
}
else {
return fromHonocoroko(text, options);
}
}