romanize-string
Version:
A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).
31 lines (30 loc) • 1.47 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.romanizeKorean = void 0;
const oktjs_1 = require("oktjs");
const index_js_1 = require("../vendor/romanize/korean/src/index.js");
const romanizeKorean = (string) => {
// Normalize and tokenize string, omitting any extra white spaces
const normalized = (0, oktjs_1.normalize)(string);
const tokens = (0, oktjs_1.tokenize)(normalized).filter((t) => t.text.trim());
// Romanize each token individually
const romanizedTokens = tokens.map((token) => (0, index_js_1.romanize)(token.text));
// Use the index of each romanizedToken to access the pos property of the corresponding item in the tokens array.
// If the token at the current index is a josa (Korean case marker), join it to the preceding noun with a hyphen.
const josaJoined = romanizedTokens.reduce((acc, romToken, idx) => {
if (tokens[idx].pos === "Josa" && acc.length) {
acc[acc.length - 1] += `-${romToken}`;
}
else {
acc.push(romToken);
}
return acc;
}, []);
// Join the processed array of tokens into a single string, omitting any additional extra white spaces
// that may have crept in during the process and removing white spaces before punctuation.
return josaJoined
.join(" ")
.replace(/\s+/g, " ")
.replace(/\s+([.,!?!?。、])/g, "$1");
};
exports.romanizeKorean = romanizeKorean;