UNPKG

romanize-string

Version:

A fully typed, general-purpose utility for unidirectional string transliteration (non-Latin script => Latin script).

43 lines (42 loc) 1.65 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.romanizeThai = void 0; const child_process_1 = require("child_process"); const tnthai_1 = __importDefault(require("tnthai")); const ensure_python_with_thai_lib_js_1 = require("../utils/ensure-python-with-thai-lib.js"); const analyzer = new tnthai_1.default(); const romanizeThai = (input) => { try { (0, ensure_python_with_thai_lib_js_1.ensurePythonWithThaiLib)(); const { solution } = analyzer.segmenting(input); const segmentedString = solution .filter((word) => word.trim().length > 0) .join(" ") .replace(/\s+([.,!?;:])/g, "$1"); const result = (0, child_process_1.spawnSync)("python3", [ "src/transliterators/python-thai-romanization.py", segmentedString, ], { encoding: "utf-8", input: "", }); if (result.error) { throw new Error(`Python error: ${result.error.message}`); } if (result.status !== 0) { throw new Error(`Python script exited with code ${result.status}: ${result.stderr}`); } return result.stdout .replace(/\b(\w{1,10})\s*\/\s*(\w{1,10})\b/g, "$1/$2") // Remove spaces around polite suffix separators .trim(); } catch (err) { console.error("Thai transliteration failed."); console.error(err); return input; } }; exports.romanizeThai = romanizeThai;