@zsnout/ithkuil
Version:
A set of tools which can generate and parse romanized Ithkuil text and which can generate Ithkuil script from text and JSON data.
151 lines (150 loc) • 5.5 kB
JavaScript
import { Searcher } from "fast-fuzzy";
import {} from "../data/index.js";
/**
* Creates a function which recognizes roots and affixes in a gloss. This is
* designed to transform user input before passing to `ungloss`.
*/
export function createRecognizer(affixes, roots) {
const rootsByStem = roots
.flatMap((root) => [
{ stem: 0, label: root.stems[0], cr: root.cr },
{ stem: 1, label: root.stems[1], cr: root.cr },
{ stem: 2, label: root.stems[2], cr: root.cr },
{ stem: 3, label: root.stems[3], cr: root.cr },
])
.filter((x) => !!x.label);
const searcherAffixByDegree = new Searcher(affixes.flatMap((affix) => affix.degrees
.map((value, degree) => ({
cs: affix.cs,
abbr: affix.abbreviation,
degree: degree,
value,
}))
.filter((x) => x.value !== null)), {
keySelector(affix) {
return affix.value;
},
});
const searcherAffixByAbbr = new Searcher(affixes, {
keySelector(s) {
return s.abbreviation;
},
});
const searcherAffixByLabel = new Searcher(affixes, {
keySelector(s) {
return [s.description, ...s.degrees].filter((x) => !!x);
},
});
const searcherRoots = new Searcher(rootsByStem, {
keySelector(root) {
return root.label;
},
});
return function (source) {
const replacements = [];
const issues = [];
const gloss = source
.split(/-/g)
.map((segment, index, array) => {
let match;
// #region roots
if ((match = segment.match(/^["“”](.+)["“”]$/))) {
const prev = array[index - 1]?.match(/^S([0-3])$/)?.[1];
const forcedStem = prev ? +prev : undefined;
let items = searcherRoots.search(match[1]);
if (forcedStem != null) {
items = items.filter((x) => x.stem == forcedStem);
}
if (items[0]) {
replacements.push({
kind: "root",
source: match[1],
actual: items[0],
alts: items.slice(1),
});
return forcedStem == null ?
`S${items[0].stem}-${items[0].cr}`
: items[0].cr;
}
else {
issues.push({
kind: "root",
source: match[1],
});
return segment;
}
}
// #endregion
// #region affix by degree
if ((match = segment.match(/^['‘’](.+)['‘’]([123₁₂₃]?)$/))) {
const items = searcherAffixByDegree.search(match[1]);
if (items[0]) {
replacements.push({
kind: "affix by degree",
source: match[1],
actual: items[0],
alts: items.slice(1),
});
return items[0].cs + "/" + items[0].degree + match[2];
}
else {
issues.push({
kind: "affix by degree",
source: match[1],
});
return segment;
}
}
// #endregion
// #region affix by label/degree
if ((match = segment.match(/^['‘’](.+)['‘’]\/([0-9])([123₁₂₃]?)$/))) {
const items = searcherAffixByLabel.search(match[1]);
if (items[0]) {
replacements.push({
kind: "affix by label",
source: segment,
actual: items[0],
alts: items.slice(1),
degree: +match[2],
});
return items[0].cs + "/" + match[2] + match[3];
}
else {
issues.push({
kind: "affix by degree",
source: segment,
});
return segment;
}
}
// #endregion
// #region affix by abbreviation
if ((match = segment.match(/^([A-Z][A-Z0-9]{2})\/([0-9])([123₁₂₃]?)$/))) {
const abbr = match[1];
const degree = +match[2];
const items = searcherAffixByAbbr.search(abbr);
if (items[0]) {
replacements.push({
kind: "affix by abbreviation",
source: segment,
actual: items[0],
alts: items.slice(1),
degree,
});
return items[0].cs + "/" + degree + match[3];
}
else {
issues.push({
kind: "affix by abbreviation",
source: segment,
});
return segment;
}
}
// #endregion
return segment;
})
.join("-");
return { source, gloss, issues, replacements };
};
}