@zsnout/ithkuil
Version:
A set of tools which can generate and parse romanized Ithkuil text and which can generate Ithkuil script from text and JSON data.
418 lines (417 loc) • 13.6 kB
JavaScript
import { cnShortcutFormative, n, nonShortcutFormative, shortcutFormative, } from "../lex/index.js";
import { parseReferentList } from "../referential/referent-list.js";
import {} from "../transform.js";
import { VOWEL_FORM_TO_OBJECT_MAP, VowelForm } from "../vowel-form.js";
import { parseAffix } from "./affix.js";
import { parseCa, parseGeminatedCa } from "./ca.js";
import { parseCaseScope } from "./case-scope.js";
import { parseCase } from "./case.js";
import { parseIllocutionValidation } from "./illocution-validation.js";
import { parseMood } from "./mood.js";
import { parseAspect, parseNonAspectualVn } from "./vn.js";
const VV_TO_STEM = [undefined, 1, 1, 2, 2, undefined, 0, 0, 3, 3];
const VV_TO_VERSION = [
undefined,
"PRC",
"CPT",
"PRC",
"CPT",
undefined,
"CPT",
"PRC",
"CPT",
"PRC",
];
const VV_TO_CA_SHORTCUT = {
w: [
undefined,
{},
{ perspective: "G" },
{ perspective: "N" },
{ perspective: "G", essence: "RPV" },
],
y: [
undefined,
{ extension: "PRX" },
{ essence: "RPV" },
{ perspective: "A" },
{ extension: "PRX", essence: "RPV" },
],
};
const VV_TO_VII_SHORTCUT = [
undefined,
undefined,
{ cs: "r", type: 1, degree: 4 },
{ cs: "t", type: 1, degree: 4 },
{ cs: "t", type: 1, degree: 5 },
];
const VR_SEQUENCE_TO_CONTEXT = [undefined, "EXS", "FNC", "RPS", "AMG"];
const VR_TO_SPECIFICATION = [
undefined,
"BSC",
"CTE",
"CSV",
"OBJ",
undefined,
"OBJ",
"CSV",
"CTE",
"BSC",
];
const AFFIX_REGEX = /([aeiouäëöü']+)([^aeiouäëöü']+)/g;
function parseAffixes(text) {
if (text == "") {
return [];
}
const output = [];
let match;
while ((match = AFFIX_REGEX.exec(text))) {
output.push(parseAffix(VowelForm.parseOrThrow(match[1]), match[2], output.length == 0 && AFFIX_REGEX.lastIndex == text.length));
}
return output;
}
const REVERSED_AFFIX_REGEX = /([^aeiouäëöü']+)([aeiouäëöü']+)/g;
function parseReversedAffixes(text) {
if (text == "") {
return [];
}
const output = [];
let match;
while ((match = REVERSED_AFFIX_REGEX.exec(text))) {
output.push(parseAffix(VowelForm.parseOrThrow(match[2]), match[1], output.length == 0 && AFFIX_REGEX.lastIndex == text.length));
}
return output;
}
/**
* Builds a non-shortcut formative.
*
* @param word The word to be built.
* @param stress The stress of the formative.
* @returns Either a parsed `PartialFormative` indicating a success, or
* `undefined` indicating a tokenization failure. Throws if the formative was
* successfully tokenized but had another error in it (e.g. invalid Vn slot,
* invalid referential affix, etc.).
*/
export function buildNonShortcutFormative(word, stress) {
const match = nonShortcutFormative.exec(word);
if (!match) {
return;
}
const concatenationType = match[1] == "h" ? 1
: match[1] == "hw" ? 2
: undefined;
const type = concatenationType ? "UNF/C"
: stress == "ultimate" || stress == "monosyllabic" ? "UNF/K"
: stress == "antepenultimate" ? "FRM"
: "UNF/C";
const vv = match[2] ? VowelForm.of(match[2]) : undefined;
if (match[2] && vv == null) {
throw new Error("Invalid Vv slot: " + match[2] + ".");
}
let root;
let affixShortcut;
const vr = VowelForm.of(match[4]);
if (vr == null) {
throw new Error("Invalid Vr slot: " + match[4] + ".");
}
if (vv?.degree == 5) {
root = {
cs: n.test(match[3]) ? BigInt(match[3].replace(/_/g, "")) : match[3],
degree: vr.degree,
};
}
else if (vv?.degree == 0 && (vv.sequence == 1 || vv.sequence == 2)) {
root = parseReferentList(match[3]);
}
else if (vv?.degree == 0) {
throw new Error("Invalid Vv slot: " + vv + ".");
}
else if (n.test(match[3])) {
root = BigInt(match[3].replace(/_/g, ""));
}
else {
root = match[3];
if (vv) {
affixShortcut = VV_TO_VII_SHORTCUT[vv.sequence];
}
}
const vn_ = match[9];
const cn = match[10];
let mood, caseScope, vn;
if (cn && vn_) {
let isAspectual = false;
if (type == "UNF/K") {
;
[mood, isAspectual] = parseMood(cn);
}
else {
;
[caseScope, isAspectual] = parseCaseScope(cn);
}
const form = VowelForm.of(vn_);
if (form == null) {
throw new Error("Invalid Vn form: " + form + ".");
}
if (isAspectual) {
vn = parseAspect(form);
}
else {
vn = parseNonAspectualVn(form);
}
}
let slotVIIAffixes = match[8] ? parseAffixes(match[8]) : undefined;
if (affixShortcut) {
if (slotVIIAffixes) {
slotVIIAffixes.push(affixShortcut);
}
else {
slotVIIAffixes = [affixShortcut];
}
}
return {
type,
concatenationType,
shortcut: affixShortcut ? "VII" : false,
stem: typeof root == "object" ? undefined
: vv ? VV_TO_STEM[vv.degree]
: 1,
version: vv ?
Array.isArray(root) ?
vv.sequence == 1 ?
"PRC"
: "CPT"
: typeof root == "object" ?
vv.sequence == 1 || vv.sequence == 3 ?
"PRC"
: "CPT"
: VV_TO_VERSION[vv.degree]
: undefined,
root,
context: VR_SEQUENCE_TO_CONTEXT[vr.sequence],
specification: root.cs ? undefined : VR_TO_SPECIFICATION[vr.degree],
function: root.cs ?
vv ?
vv.sequence <= 2 ?
"STA"
: "DYN"
: undefined
: vr.degree < 5 ? "STA"
: "DYN",
slotVAffixes: match[5] ? parseReversedAffixes(match[5]) : [],
ca: match[6] ? parseGeminatedCa(match[6]) : parseCa(match[7]),
slotVIIAffixes,
mood,
caseScope,
vn,
case: type == "UNF/K" ? undefined : (parseCase(match[11] ?
VowelForm.parseOrThrow(match[11])
: VOWEL_FORM_TO_OBJECT_MAP.a, concatenationType ?
stress == "ultimate"
: match[11]?.includes("'") ||
match[5]?.includes("'") ||
match[8]?.includes("'") ||
vn_?.includes("'") ||
vr.hasGlottalStop)),
illocutionValidation: type != "UNF/K" ? undefined
: match[11] ? parseIllocutionValidation(VowelForm.parseOrThrow(match[11]))
: undefined,
};
}
/**
* Builds a formative with a Cn shortcut (where Cn replaces Ca).
*
* @param word The word to be built.
* @param stress The stress of the formative.
* @returns Either a parsed `PartialFormative` indicating a success, or
* `undefined` indicating a tokenization failure. Throws if the formative was
* successfully tokenized but had another error in it (e.g. invalid Vn slot,
* invalid referential affix, etc.).
*/
export function buildCnShortcutFormative(word, stress) {
const match = cnShortcutFormative.exec(word);
if (!match) {
return;
}
const concatenationType = match[1] == "h" ? 1
: match[1] == "hw" ? 2
: undefined;
const type = concatenationType ? "UNF/C"
: stress == "ultimate" || stress == "monosyllabic" ? "UNF/K"
: stress == "antepenultimate" ? "FRM"
: "UNF/C";
const vv = match[2] ? VowelForm.of(match[2]) : undefined;
if (match[2] && vv == null) {
throw new Error("Invalid Vv slot: " + match[2] + ".");
}
let root;
let affixShortcut;
const vr = VowelForm.of(match[4]);
if (vr == null) {
throw new Error("Invalid Vr slot: " + match[4] + ".");
}
if (vv?.degree == 5) {
root = {
cs: match[3],
degree: vr.degree,
};
}
else if (vv?.degree == 0 && (vv.sequence == 1 || vv.sequence == 2)) {
root = parseReferentList(match[3]);
}
else if (vv?.degree == 0) {
throw new Error("Invalid Vv slot: " + vv + ".");
}
else if (n.test(match[3])) {
root = BigInt(match[3].replace(/_/g, ""));
}
else {
root = match[3];
if (vv) {
affixShortcut = VV_TO_VII_SHORTCUT[vv.sequence];
}
}
const cn = match[5];
const mood = type == "UNF/K" ? parseMood(cn)[0] : undefined;
const caseScope = type != "UNF/K" ? parseCaseScope(cn)[0] : undefined;
let slotVIIAffixes = match[6] ? parseAffixes(match[6]) : undefined;
if (affixShortcut) {
if (slotVIIAffixes) {
slotVIIAffixes.push(affixShortcut);
}
else {
slotVIIAffixes = [affixShortcut];
}
}
return {
type,
concatenationType,
shortcut: affixShortcut ? "VII+VIII" : "VIII",
stem: typeof root == "object" ? undefined
: vv ? VV_TO_STEM[vv.degree]
: undefined,
version: vv ?
Array.isArray(root) ?
vv.sequence == 1 ?
"PRC"
: "CPT"
: typeof root == "object" ?
vv.sequence == 1 || vv.sequence == 3 ?
"PRC"
: "CPT"
: VV_TO_VERSION[vv.degree]
: undefined,
root,
context: VR_SEQUENCE_TO_CONTEXT[vr.sequence],
specification: root.cs ? undefined : VR_TO_SPECIFICATION[vr.degree],
function: root.cs ?
vv.sequence <= 2 ?
"STA"
: "DYN"
: vr.degree < 5 ? "STA"
: "DYN",
slotVIIAffixes,
mood,
caseScope,
case: type == "UNF/K" ? undefined : (parseCase(match[7] ?
VowelForm.parseOrThrow(match[7])
: VOWEL_FORM_TO_OBJECT_MAP.a, concatenationType ?
stress == "ultimate"
: match[7]?.includes("'") ||
match[6]?.includes("'") ||
vr.hasGlottalStop)),
illocutionValidation: type != "UNF/K" ? undefined
: match[7] ? parseIllocutionValidation(VowelForm.parseOrThrow(match[7]))
: undefined,
};
}
/**
* Builds a shortcut formative.
*
* @param word The word to be built.
* @param stress The stress of the formative.
* @returns Either a parsed `PartialFormative` indicating a success, or
* `undefined` indicating a tokenization failure. Throws if the formative was
* successfully tokenized but had another error in it (e.g. invalid Vn slot,
* invalid referential affix, etc.).
*/
export function buildShortcutFormative(word, stress) {
const match = shortcutFormative.exec(word);
if (!match) {
return;
}
const concatenationType = match[1] == "hl" || match[1] == "hm" ? 1
: match[1] == "hr" || match[1] == "hn" ? 2
: undefined;
const shortcutType = match[1] == "w" || match[1] == "hl" || match[1] == "hr" ? "w" : "y";
const type = concatenationType ? "UNF/C"
: stress == "ultimate" || stress == "monosyllabic" ? "UNF/K"
: stress == "antepenultimate" ? "FRM"
: "UNF/C";
const vv = VowelForm.of(match[2]);
if (vv == null ||
vv.degree == 5 ||
(vv.degree == 0 && (vv.sequence == 3 || vv.sequence == 4))) {
throw new Error("Invalid Vv slot: " + match[2] + ".");
}
let root;
if (vv.degree == 0) {
root = parseReferentList(match[3]);
}
else if (n.test(match[3])) {
root = BigInt(match[3]);
}
else {
root = match[3];
}
const vn_ = match[6];
const cn = match[7];
let mood, caseScope, vn;
if (cn && vn_) {
let isAspectual = false;
if (type == "UNF/K") {
;
[mood, isAspectual] = parseMood(cn);
}
else {
;
[caseScope, isAspectual] = parseCaseScope(cn);
}
const form = VowelForm.of(vn_);
if (form == null) {
throw new Error("Invalid Vn form: " + form + ".");
}
if (isAspectual) {
vn = parseAspect(form);
}
else {
vn = parseNonAspectualVn(form);
}
}
const slotVIIAffixes = match[5] ? parseAffixes(match[5]) : undefined;
return {
type,
concatenationType,
shortcut: "IV/VI",
stem: Array.isArray(root) ? undefined : VV_TO_STEM[vv.degree],
version: Array.isArray(root) ?
vv.sequence == 1 ?
"PRC"
: "CPT"
: VV_TO_VERSION[vv.degree],
root,
slotVAffixes: match[4] ? parseAffixes(match[4]) : [],
ca: {
...VV_TO_CA_SHORTCUT[shortcutType][Array.isArray(root) ? 1 : vv.sequence],
},
slotVIIAffixes,
mood,
caseScope,
vn,
case: type == "UNF/K" ? undefined : (parseCase(match[8] ?
VowelForm.parseOrThrow(match[8])
: VOWEL_FORM_TO_OBJECT_MAP.a, concatenationType ? stress == "ultimate" : match[8]?.includes("'"))),
illocutionValidation: type != "UNF/K" ? undefined
: match[8] ? parseIllocutionValidation(VowelForm.parseOrThrow(match[8]))
: undefined,
};
}