UNPKG

@zsnout/ithkuil

Version:

A set of tools which can generate and parse romanized Ithkuil text and which can generate Ithkuil script from text and JSON data.

151 lines (150 loc) 5.5 kB
import { Searcher } from "fast-fuzzy"; import {} from "../data/index.js"; /** * Creates a function which recognizes roots and affixes in a gloss. This is * designed to transform user input before passing to `ungloss`. */ export function createRecognizer(affixes, roots) { const rootsByStem = roots .flatMap((root) => [ { stem: 0, label: root.stems[0], cr: root.cr }, { stem: 1, label: root.stems[1], cr: root.cr }, { stem: 2, label: root.stems[2], cr: root.cr }, { stem: 3, label: root.stems[3], cr: root.cr }, ]) .filter((x) => !!x.label); const searcherAffixByDegree = new Searcher(affixes.flatMap((affix) => affix.degrees .map((value, degree) => ({ cs: affix.cs, abbr: affix.abbreviation, degree: degree, value, })) .filter((x) => x.value !== null)), { keySelector(affix) { return affix.value; }, }); const searcherAffixByAbbr = new Searcher(affixes, { keySelector(s) { return s.abbreviation; }, }); const searcherAffixByLabel = new Searcher(affixes, { keySelector(s) { return [s.description, ...s.degrees].filter((x) => !!x); }, }); const searcherRoots = new Searcher(rootsByStem, { keySelector(root) { return root.label; }, }); return function (source) { const replacements = []; const issues = []; const gloss = source .split(/-/g) .map((segment, index, array) => { let match; // #region roots if ((match = segment.match(/^["“”](.+)["“”]$/))) { const prev = array[index - 1]?.match(/^S([0-3])$/)?.[1]; const forcedStem = prev ? +prev : undefined; let items = searcherRoots.search(match[1]); if (forcedStem != null) { items = items.filter((x) => x.stem == forcedStem); } if (items[0]) { replacements.push({ kind: "root", source: match[1], actual: items[0], alts: items.slice(1), }); return forcedStem == null ? `S${items[0].stem}-${items[0].cr}` : items[0].cr; } else { issues.push({ kind: "root", source: match[1], }); return segment; } } // #endregion // #region affix by degree if ((match = segment.match(/^['‘’](.+)['‘’]([123₁₂₃]?)$/))) { const items = searcherAffixByDegree.search(match[1]); if (items[0]) { replacements.push({ kind: "affix by degree", source: match[1], actual: items[0], alts: items.slice(1), }); return items[0].cs + "/" + items[0].degree + match[2]; } else { issues.push({ kind: "affix by degree", source: match[1], }); return segment; } } // #endregion // #region affix by label/degree if ((match = segment.match(/^['‘’](.+)['‘’]\/([0-9])([123₁₂₃]?)$/))) { const items = searcherAffixByLabel.search(match[1]); if (items[0]) { replacements.push({ kind: "affix by label", source: segment, actual: items[0], alts: items.slice(1), degree: +match[2], }); return items[0].cs + "/" + match[2] + match[3]; } else { issues.push({ kind: "affix by degree", source: segment, }); return segment; } } // #endregion // #region affix by abbreviation if ((match = segment.match(/^([A-Z][A-Z0-9]{2})\/([0-9])([123₁₂₃]?)$/))) { const abbr = match[1]; const degree = +match[2]; const items = searcherAffixByAbbr.search(abbr); if (items[0]) { replacements.push({ kind: "affix by abbreviation", source: segment, actual: items[0], alts: items.slice(1), degree, }); return items[0].cs + "/" + degree + match[3]; } else { issues.push({ kind: "affix by abbreviation", source: segment, }); return segment; } } // #endregion return segment; }) .join("-"); return { source, gloss, issues, replacements }; }; }