UNPKG

romanize-string

Version:

A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).

github.com/rejyoung/romanize-string

rejyoung/romanize-string

31 lines (30 loc) • 1.47 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.romanizeKorean = void 0; const oktjs_1 = require("oktjs"); const index_js_1 = require("../vendor/romanize/korean/src/index.js"); const romanizeKorean = (string) => { // Normalize and tokenize string, omitting any extra white spaces const normalized = (0, oktjs_1.normalize)(string); const tokens = (0, oktjs_1.tokenize)(normalized).filter((t) => t.text.trim()); // Romanize each token individually const romanizedTokens = tokens.map((token) => (0, index_js_1.romanize)(token.text)); // Use the index of each romanizedToken to access the pos property of the corresponding item in the tokens array. // If the token at the current index is a josa (Korean case marker), join it to the preceding noun with a hyphen. const josaJoined = romanizedTokens.reduce((acc, romToken, idx) => { if (tokens[idx].pos === "Josa" && acc.length) { acc[acc.length - 1] += `-${romToken}`; } else { acc.push(romToken); } return acc; }, []); // Join the processed array of tokens into a single string, omitting any additional extra white spaces // that may have crept in during the process and removing white spaces before punctuation. return josaJoined .join(" ") .replace(/\s+/g, " ") .replace(/\s+([.,!?！？。、])/g, "$1"); }; exports.romanizeKorean = romanizeKorean;