UNPKG

@unglish/word-generator

Version:

A simple generator for creating unglish words.

214 lines (213 loc) 8.91 kB
import { overrideRand, getRand } from "../utils/random.js"; import { createSeededRandom } from "../utils/createSeededRandom.js"; import pick from "../utils/pick.js"; import getWeightedOption from "../utils/getWeightedOption.js"; import { phonemes, invalidOnsetClusters, invalidBoundaryClusters, invalidCodaClusters, sonority } from "../elements/phonemes.js"; import pronounce from "./pronounce.js"; import { write } from "./write.js"; function getSonority(phoneme) { return sonority[phoneme.type] || 0; } function buildCluster(type, maxLength = 3, ignore = []) { const cluster = []; while (cluster.length < maxLength) { let candidatePhonemes = phonemes.filter(p => { const isAllowedToStartWord = type === "onset" ? cluster.length === 0 && (!p.start || p.start > 0) : true; const isAllowedToEndWord = type === "coda" ? cluster.length === maxLength - 1 && (!p.end || p.end > 0) : true; const isValidPosition = type === "onset" ? p.onset : p.coda && isAllowedToStartWord && isAllowedToEndWord; const isNotIgnored = !ignore.includes(p.sound); const isNotDuplicate = !cluster.some(existingP => existingP.sound === p.sound); // there are special cases for s in english where it can be followed by something // that increases in sonority const isSpecialS = type === "onset" && cluster.length === 1 && cluster[0].sound === 's' && ['t', 'p', 'k'].includes(p.sound); const hasSuitableSonority = cluster.length === 0 || isSpecialS || (type === "onset" ? getSonority(p) > getSonority(cluster[cluster.length - 1]) : getSonority(p) < getSonority(cluster[cluster.length - 1])); // Check against invalid clusters const potentialCluster = cluster.map(ph => ph.sound).join('') + p.sound; const invalidClusters = type === "onset" ? invalidOnsetClusters : invalidCodaClusters; const isValidCluster = !invalidClusters.some(regex => regex.test(potentialCluster)); return isValidPosition && isNotIgnored && isNotDuplicate && hasSuitableSonority && isValidCluster; }); if (!candidatePhonemes.length) break; const newPhoneme = pick(candidatePhonemes); cluster.push(newPhoneme); // Special cases for English if (type === "onset" && cluster.length === 2 && ['liquid', 'nasal'].includes(cluster[1].type)) { break; } } return cluster; } function pickOnset(prevSyllable) { const isFollowingNucleus = prevSyllable && prevSyllable.coda.length === 0; const length = getWeightedOption([ [0, isFollowingNucleus ? 0 : 150], [1, 675], [2, 125], [3, 15] ]); let onset = buildCluster("onset", length, prevSyllable ? prevSyllable.coda.map((coda) => coda.sound) : []); return onset; } function pickNucleus(prevSyllable) { const nuclei = phonemes.filter((p) => !!p.nucleus); const mappedNuclei = nuclei.map((p) => [ p, p.nucleus ?? 0, ]); const nucleus = getWeightedOption(mappedNuclei); return [nucleus]; } function pickCoda(onset, nucleus, isLastSyllable = false) { const length = getWeightedOption(isLastSyllable ? [ [0, 500], [1, 3000], [2, 900], [3, 400], ] : [ [0, 6000], [1, 3000], [2, 900], [3, 100], ]); if (length === 0) return []; let coda = buildCluster("coda", length, []); // Check for onset-coda repetition if (onset.length > 0 && coda.length > 0 && onset[0].sound === coda[coda.length - 1].sound) { const shouldAvoidRepetition = getWeightedOption([ [true, 98], [false, 2] // 10% chance to allow repetition ]); if (shouldAvoidRepetition) { // Try to replace the last coda phoneme const alternativeCodas = phonemes.filter(p => p.coda && p.sound !== onset[0].sound && p.type === coda[coda.length - 1].type); if (alternativeCodas.length > 0) { coda[coda.length - 1] = getWeightedOption(alternativeCodas.map(p => [p, p.coda ?? 0])); } else { // If no suitable alternative, remove the last coda phoneme coda.pop(); } } } return coda; } function checkCrossSyllableSonority(prevSyllable, currentSyllable) { if (!prevSyllable.coda.length || !currentSyllable.onset.length) { return true; // No cross-syllable cluster, so it's valid } const lastCodaPhoneme = prevSyllable.coda[prevSyllable.coda.length - 1]; const firstOnsetPhoneme = currentSyllable.onset[0]; // Allow equal sonority across syllable boundary // This is a simplification; you might want to refine this based on specific phoneme types return getSonority(firstOnsetPhoneme) >= getSonority(lastCodaPhoneme); } function tryResyllabify(prevSyllable, currentSyllable) { if (prevSyllable.coda.length && currentSyllable.onset.length) { const lastCodaPhoneme = prevSyllable.coda[prevSyllable.coda.length - 1]; const firstOnsetPhoneme = currentSyllable.onset[0]; const lastCodaSonority = getSonority(lastCodaPhoneme); const firstOnsetSonority = getSonority(firstOnsetPhoneme); // Check if moving the coda to the onset would create a valid onset cluster const potentialOnset = [lastCodaPhoneme, ...currentSyllable.onset]; const potentialOnsetSounds = potentialOnset.map(p => p.sound).join(''); const isValidBoundaryCluster = !invalidBoundaryClusters.some(regex => regex.test(potentialOnsetSounds)); if (firstOnsetSonority > lastCodaSonority && isValidBoundaryCluster) { // Move the last coda phoneme to the onset of the next syllable prevSyllable.coda.pop(); currentSyllable.onset.unshift(lastCodaPhoneme); } else if (firstOnsetSonority === lastCodaSonority) { // When sonority is equal, use getWeightedOption to decide const shouldDropCoda = getWeightedOption([ [true, 90], [false, 10] // 10% chance to keep it ]); if (shouldDropCoda) { prevSyllable.coda.pop(); } } } return [prevSyllable, currentSyllable]; } function generateSyllable(syllablePosition = 0, syllableCount = 1, prevSyllable) { // Build the syllable structure const isLastSyllable = syllablePosition === syllableCount - 1; const onset = pickOnset(prevSyllable); const nucleus = pickNucleus(prevSyllable); const coda = pickCoda(onset, nucleus, isLastSyllable); return { onset, nucleus, coda, }; } export const generateWord = (options = {}) => { const { seed, syllableCount: specifiedSyllableCount } = options; const originalRand = getRand(); try { if (seed !== undefined) { const seededRand = createSeededRandom(seed); overrideRand(seededRand); } const syllableCount = specifiedSyllableCount || getWeightedOption([ [1, 35000], [2, 35000], [3, 15000], [4, 5000], [5, 1000], [6, 500], [7, 10], [8, 5], [9, 3], [10, 2], [11, 1], [12, 1], [13, 1], [14, 1], [15, 1], // Extremely rare ]); const syllables = []; for (let i = 0; i < syllableCount; i++) { let newSyllable; let isValid = false; while (!isValid) { newSyllable = generateSyllable(i, syllableCount, i > 0 ? syllables[i - 1] : undefined); if (i === 0) { isValid = true; // First syllable is always valid } else { isValid = checkCrossSyllableSonority(syllables[i - 1], newSyllable); } // If not valid, we could try to resyllabify here if (!isValid) { [syllables[i - 1], newSyllable] = tryResyllabify(syllables[i - 1], newSyllable); isValid = checkCrossSyllableSonority(syllables[i - 1], newSyllable); } } // @ts-expect-error syllables.push(newSyllable); } const written = write(syllables); const pronunciation = pronounce(syllables); return { syllables, pronunciation, written, }; } finally { // Ensure the original randomness function is restored overrideRand(originalRand); } }; export default generateWord;