romanize-string
Version:
A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).
43 lines (42 loc) • 1.65 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.romanizeThai = void 0;
const child_process_1 = require("child_process");
const tnthai_1 = __importDefault(require("tnthai"));
const ensure_python_with_thai_lib_js_1 = require("../utils/ensure-python-with-thai-lib.js");
const analyzer = new tnthai_1.default();
const romanizeThai = (input) => {
try {
(0, ensure_python_with_thai_lib_js_1.ensurePythonWithThaiLib)();
const { solution } = analyzer.segmenting(input);
const segmentedString = solution
.filter((word) => word.trim().length > 0)
.join(" ")
.replace(/\s+([.,!?;:])/g, "$1");
const result = (0, child_process_1.spawnSync)("python3", [
"src/transliterators/python-thai-romanization.py",
segmentedString,
], {
encoding: "utf-8",
input: "",
});
if (result.error) {
throw new Error(`Python error: ${result.error.message}`);
}
if (result.status !== 0) {
throw new Error(`Python script exited with code ${result.status}: ${result.stderr}`);
}
return result.stdout
.replace(/\b(\w{1,10})\s*\/\s*(\w{1,10})\b/g, "$1/$2") // Remove spaces around polite suffix separators
.trim();
}
catch (err) {
console.error("Thai transliteration failed.");
console.error(err);
return input;
}
};
exports.romanizeThai = romanizeThai;