UNPKG

fiction-word

Version:
107 lines (106 loc) 3.12 kB
"use strict"; /* * This file contains functions for generating random word lengths based on a predefined distribution. * The distribution is based on the frequency of word lengths in default linux English dictionary. */ Object.defineProperty(exports, "__esModule", { value: true }); exports.getRandomWordLength = exports.generateDistribution = void 0; var dictionaryDistribution = [ [1, 52], [2, 488], [3, 1385], [4, 3688], [5, 6717], [6, 10268], [7, 13451], [8, 13869], [9, 12363], [10, 9823], [11, 6922], [12, 4454], [13, 2549], [14, 1284], [15, 629], [16, 260], [17, 124], [18, 38], [19, 18], [20, 13], // 20+ ]; var corpusDistribution = [ [1, 0.03], [2, 0.06], [3, 0.18], [4, 0.19], [5, 0.16], [6, 0.13], [7, 0.1], [8, 0.07], [9, 0.04], [10, 0.02], [11, 0.02], // 11+ ]; /** * @description Computes the cumulative distribution of word lengths. * @returns An array of cumulative probabilities for each word length. */ function generateDistribution(mode) { if (mode === void 0) { mode = "dictionary"; } // Distribution of word lengths var distribution = mode === "dictionary" ? dictionaryDistribution : corpusDistribution; var total = distribution.reduce(function (sum, _a) { var count = _a[1]; return sum + count; }, 0); // Precompute cumulative probabilities var cumulative = []; var sum = 0; for (var _i = 0, distribution_1 = distribution; _i < distribution_1.length; _i++) { var _a = distribution_1[_i], length = _a[0], count = _a[1]; // Don't include 20+ as it's a tail distribution if (length === 20) break; sum += count; cumulative.push([length, sum / total]); } return cumulative; } exports.generateDistribution = generateDistribution; /** * Sample a word length from the long tail distribution. * @param min The minimum word length. * @param max The maximum word length. * @param decay The decay rate. * @returns A random word length. */ function sampleLongTail(min, max, decay) { if (min === void 0) { min = 20; } if (max === void 0) { max = 30; } if (decay === void 0) { decay = 0.6; } // A geometric decay for 20+ length var r = Math.random(); var cumulative = 0; for (var i = 0; i <= max - min; i++) { var prob = Math.pow(decay, i) * (1 - decay); cumulative += prob; if (r < cumulative) return min + i; } return max; // fallback } /** * Sample a word length from the distribution. * @returns A random word length. */ function getRandomWordLength(distribution) { distribution = distribution || generateDistribution(); var rand = Math.random(); for (var _i = 0, distribution_2 = distribution; _i < distribution_2.length; _i++) { var _a = distribution_2[_i], length = _a[0], prob = _a[1]; if (rand < prob) return length; } // If it falls into the 20+ tail return sampleLongTail(); } exports.getRandomWordLength = getRandomWordLength;