UNPKG

@zsnout/ithkuil

Version:

A set of tools which can generate and parse romanized Ithkuil text and which can generate Ithkuil script from text and JSON data.

131 lines (130 loc) 3.98 kB
import { has, join } from "../../generate/index.js"; import { anyText, seq } from "../lex/builder.js"; import { geminate, H, R, V } from "../lex/forms.js"; /** * Capturing groups: * * 1. Cc? * 2. Vv? * 3. Cr * 4. (Vx Cs)* * 5. Vn? * 6. Cn? * 7. (Vx Cs)* * 8. Vc? */ const main = /* @__PURE__ */ seq( /* @__PURE__ */ seq( /* @__PURE__ */ H.asGroup().optional(), // Cc /* @__PURE__ */ V.asGroup()).optional(), /* @__PURE__ */ R.asGroup(), // Cr /* @__PURE__ */ seq(V, R).zeroOrMore().asGroup(), // VxCs /* @__PURE__ */ seq( /* @__PURE__ */ V.asGroup(), // Vn /* @__PURE__ */ H.asGroup(), // Cn /* @__PURE__ */ seq(V, R).zeroOrMore().asGroup()).optional(), /* @__PURE__ */ V.optional().asGroup()) .matchEntireText() .compile(); const ccShortcut = /* @__PURE__ */ anyText("w", "y", "hl", "hr", "hm", "hn") .matchEntireText() .compile(); const cnShortcut = /* @__PURE__ */ anyText("hl", "hr", "hm", "hn", "hň") .matchEntireText() .compile(); const vc = seq(V.asGroup(), R.asGroup()).compile("g"); function getForms(v) { const output = []; let match; while ((match = vc.exec(v))) { output.push([match[0], match[1], match[2]]); } return output; } function mapForms(forms) { return forms.map(([, vx, cs]) => ({ vx, cs })); } function extractForms(forms) { return mapForms(getForms(forms)); } function tokenizeStandardInner(vx1) { const forms = getForms(vx1); if (forms.length == 0) return; const vr = forms[0][1]; const geminateIndex = forms.findIndex((x) => geminate.test(x[2])); if (geminateIndex == -1) { const ca = forms[0][2]; if (ca == "x" || ca == "xt" || ca == "xp") { return; } return { vr, ca, vx: { 5: [], 7: mapForms(forms.slice(1)) }, }; } const ca = forms[geminateIndex][2]; if (ca == "xx") { return; } return { vr, ca, vx: { 5: Array.from({ length: geminateIndex }, (_, i) => ({ vx: forms[i + 1][1], cs: forms[i][2], })), 7: mapForms(forms.slice(geminateIndex + 1)), }, }; } export function tokenize(word) { const match = word.match(main); if (!match) return; const [, cc, vv, cr, vx1, vn, cn, vx2, vc] = match; if (!cr) { return; } if (has(["w", "y", "hl", "hr", "hm", "hn"], cc)) { if (vx2) { return; } const vxs = getForms(vx1); const glottalStopIndex = vxs.findIndex((x) => x[1].includes("'")); const v5 = vxs.slice(0, glottalStopIndex + 1); const v7 = vxs.slice(glottalStopIndex + 1); const vx = { 5: mapForms(v5), 7: mapForms(v7) }; return { shortcut: "iv/vi", cc, vv: vv, cr, vx, vn, cn, vc }; } if (!(cc == null || cc == "h" || cc == "hw")) return; const cc2 = cc; if (!vx1 && cn) { const vx = { 5: [], 7: extractForms(vx2) }; return { shortcut: "mcs", cc: cc2, vv, cr, vr: vn, vx, cn, vc }; } if (vx2) return; const inner = tokenizeStandardInner(vx1); if (!inner) return; return { shortcut: null, cc: cc2, vv, cr, ...inner, vn, cn, vc }; } export function testNeo(word) { const tokens = tokenize(word); if (!tokens) return; if (tokens.shortcut == "iv/vi") { const { cc, vv, cr, vx, vn, cn, vc } = tokens; return join(cc, vv, cr, vx[5].flatMap(({ vx, cs }) => [vx, cs]), vx[7].flatMap(({ vx, cs }) => [vx, cs]), vn ?? "", cn ?? "", vc); } if (tokens.shortcut == "mcs") { const { cc, vv, cr, vr, cn, vx, vc } = tokens; return join(cc, vv, cr, vr, cn, vx[7].flatMap(({ vx, cs }) => [vx, cs]), vc); } const { cc, vv, cr, vr, vx, ca, vn, cn, vc } = tokens; return join(cc, vv, cr, vr, vx[5].flatMap(({ vx, cs }) => [cs, vx]), ca, vx[7].flatMap(({ vx, cs }) => [vx, cs]), vn, cn, vc); }