@unglish/word-generator
Version:
A simple generator for creating unglish words.
214 lines (213 loc) • 8.91 kB
JavaScript
import { overrideRand, getRand } from "../utils/random.js";
import { createSeededRandom } from "../utils/createSeededRandom.js";
import pick from "../utils/pick.js";
import getWeightedOption from "../utils/getWeightedOption.js";
import { phonemes, invalidOnsetClusters, invalidBoundaryClusters, invalidCodaClusters, sonority } from "../elements/phonemes.js";
import pronounce from "./pronounce.js";
import { write } from "./write.js";
function getSonority(phoneme) {
return sonority[phoneme.type] || 0;
}
function buildCluster(type, maxLength = 3, ignore = []) {
const cluster = [];
while (cluster.length < maxLength) {
let candidatePhonemes = phonemes.filter(p => {
const isAllowedToStartWord = type === "onset" ? cluster.length === 0 && (!p.start || p.start > 0) : true;
const isAllowedToEndWord = type === "coda" ? cluster.length === maxLength - 1 && (!p.end || p.end > 0) : true;
const isValidPosition = type === "onset" ? p.onset : p.coda && isAllowedToStartWord && isAllowedToEndWord;
const isNotIgnored = !ignore.includes(p.sound);
const isNotDuplicate = !cluster.some(existingP => existingP.sound === p.sound);
// there are special cases for s in english where it can be followed by something
// that increases in sonority
const isSpecialS = type === "onset" &&
cluster.length === 1 &&
cluster[0].sound === 's' &&
['t', 'p', 'k'].includes(p.sound);
const hasSuitableSonority = cluster.length === 0 || isSpecialS ||
(type === "onset"
? getSonority(p) > getSonority(cluster[cluster.length - 1])
: getSonority(p) < getSonority(cluster[cluster.length - 1]));
// Check against invalid clusters
const potentialCluster = cluster.map(ph => ph.sound).join('') + p.sound;
const invalidClusters = type === "onset" ? invalidOnsetClusters : invalidCodaClusters;
const isValidCluster = !invalidClusters.some(regex => regex.test(potentialCluster));
return isValidPosition && isNotIgnored && isNotDuplicate && hasSuitableSonority && isValidCluster;
});
if (!candidatePhonemes.length)
break;
const newPhoneme = pick(candidatePhonemes);
cluster.push(newPhoneme);
// Special cases for English
if (type === "onset"
&& cluster.length === 2
&& ['liquid', 'nasal'].includes(cluster[1].type)) {
break;
}
}
return cluster;
}
function pickOnset(prevSyllable) {
const isFollowingNucleus = prevSyllable && prevSyllable.coda.length === 0;
const length = getWeightedOption([
[0, isFollowingNucleus ? 0 : 150],
[1, 675],
[2, 125],
[3, 15]
]);
let onset = buildCluster("onset", length, prevSyllable ? prevSyllable.coda.map((coda) => coda.sound) : []);
return onset;
}
function pickNucleus(prevSyllable) {
const nuclei = phonemes.filter((p) => !!p.nucleus);
const mappedNuclei = nuclei.map((p) => [
p,
p.nucleus ?? 0,
]);
const nucleus = getWeightedOption(mappedNuclei);
return [nucleus];
}
function pickCoda(onset, nucleus, isLastSyllable = false) {
const length = getWeightedOption(isLastSyllable ? [
[0, 500],
[1, 3000],
[2, 900],
[3, 400],
] : [
[0, 6000],
[1, 3000],
[2, 900],
[3, 100],
]);
if (length === 0)
return [];
let coda = buildCluster("coda", length, []);
// Check for onset-coda repetition
if (onset.length > 0 && coda.length > 0 && onset[0].sound === coda[coda.length - 1].sound) {
const shouldAvoidRepetition = getWeightedOption([
[true, 98],
[false, 2] // 10% chance to allow repetition
]);
if (shouldAvoidRepetition) {
// Try to replace the last coda phoneme
const alternativeCodas = phonemes.filter(p => p.coda && p.sound !== onset[0].sound && p.type === coda[coda.length - 1].type);
if (alternativeCodas.length > 0) {
coda[coda.length - 1] = getWeightedOption(alternativeCodas.map(p => [p, p.coda ?? 0]));
}
else {
// If no suitable alternative, remove the last coda phoneme
coda.pop();
}
}
}
return coda;
}
function checkCrossSyllableSonority(prevSyllable, currentSyllable) {
if (!prevSyllable.coda.length || !currentSyllable.onset.length) {
return true; // No cross-syllable cluster, so it's valid
}
const lastCodaPhoneme = prevSyllable.coda[prevSyllable.coda.length - 1];
const firstOnsetPhoneme = currentSyllable.onset[0];
// Allow equal sonority across syllable boundary
// This is a simplification; you might want to refine this based on specific phoneme types
return getSonority(firstOnsetPhoneme) >= getSonority(lastCodaPhoneme);
}
function tryResyllabify(prevSyllable, currentSyllable) {
if (prevSyllable.coda.length && currentSyllable.onset.length) {
const lastCodaPhoneme = prevSyllable.coda[prevSyllable.coda.length - 1];
const firstOnsetPhoneme = currentSyllable.onset[0];
const lastCodaSonority = getSonority(lastCodaPhoneme);
const firstOnsetSonority = getSonority(firstOnsetPhoneme);
// Check if moving the coda to the onset would create a valid onset cluster
const potentialOnset = [lastCodaPhoneme, ...currentSyllable.onset];
const potentialOnsetSounds = potentialOnset.map(p => p.sound).join('');
const isValidBoundaryCluster = !invalidBoundaryClusters.some(regex => regex.test(potentialOnsetSounds));
if (firstOnsetSonority > lastCodaSonority && isValidBoundaryCluster) {
// Move the last coda phoneme to the onset of the next syllable
prevSyllable.coda.pop();
currentSyllable.onset.unshift(lastCodaPhoneme);
}
else if (firstOnsetSonority === lastCodaSonority) {
// When sonority is equal, use getWeightedOption to decide
const shouldDropCoda = getWeightedOption([
[true, 90],
[false, 10] // 10% chance to keep it
]);
if (shouldDropCoda) {
prevSyllable.coda.pop();
}
}
}
return [prevSyllable, currentSyllable];
}
function generateSyllable(syllablePosition = 0, syllableCount = 1, prevSyllable) {
// Build the syllable structure
const isLastSyllable = syllablePosition === syllableCount - 1;
const onset = pickOnset(prevSyllable);
const nucleus = pickNucleus(prevSyllable);
const coda = pickCoda(onset, nucleus, isLastSyllable);
return {
onset,
nucleus,
coda,
};
}
export const generateWord = (options = {}) => {
const { seed, syllableCount: specifiedSyllableCount } = options;
const originalRand = getRand();
try {
if (seed !== undefined) {
const seededRand = createSeededRandom(seed);
overrideRand(seededRand);
}
const syllableCount = specifiedSyllableCount || getWeightedOption([
[1, 35000],
[2, 35000],
[3, 15000],
[4, 5000],
[5, 1000],
[6, 500],
[7, 10],
[8, 5],
[9, 3],
[10, 2],
[11, 1],
[12, 1],
[13, 1],
[14, 1],
[15, 1], // Extremely rare
]);
const syllables = [];
for (let i = 0; i < syllableCount; i++) {
let newSyllable;
let isValid = false;
while (!isValid) {
newSyllable = generateSyllable(i, syllableCount, i > 0 ? syllables[i - 1] : undefined);
if (i === 0) {
isValid = true; // First syllable is always valid
}
else {
isValid = checkCrossSyllableSonority(syllables[i - 1], newSyllable);
}
// If not valid, we could try to resyllabify here
if (!isValid) {
[syllables[i - 1], newSyllable] = tryResyllabify(syllables[i - 1], newSyllable);
isValid = checkCrossSyllableSonority(syllables[i - 1], newSyllable);
}
}
// @ts-expect-error
syllables.push(newSyllable);
}
const written = write(syllables);
const pronunciation = pronounce(syllables);
return {
syllables,
pronunciation,
written,
};
}
finally {
// Ensure the original randomness function is restored
overrideRand(originalRand);
}
};
export default generateWord;