UNPKG

foswig

Version:

A library that can generate legible pseudo random words based off an input dictionary using markov chains

108 lines (107 loc) 4.24 kB
/** @license Foswig.js | (c) Glenn Conner. | https://github.com/mrsharpoblunto/foswig.js/blob/master/LICENSE @format */ export default class MarkovChain { /** * order indicates how many previous characters to take into account when picking the next. A lower number represents more random words, whereas a higher number will result in words that match the input words more closely. */ constructor(order, words) { this.order = order; this.duplicates = { children: {} }; this.start = { character: '', neighbors: [] }; this.init(words); } init(words) { const map = {}; for (const word of words) { this.addToDuplicatesTrie(word.toLowerCase()); let previous = this.start; let key = ''; for (var i = 0; i < word.length; ++i) { const ch = word[i]; key += ch; if (key.length > this.order) { key = key.substr(1); } let newNode = map[key]; if (!newNode) { newNode = { character: ch, neighbors: [] }; map[key] = newNode; } previous.neighbors.push(newNode); previous = newNode; } //link to end node. previous.neighbors.push(null); } } /** * Adds a word and all its substrings to a duplicates trie to * ensure that generated words are never an exact match or substring * of a word in the input dictionary. Building a trie allows us * to efficiently search for these duplicates later without * having to do O(N) comparision checks over the entire dictionary */ addToDuplicatesTrie(word) { if (word.length > 1) { this.addToDuplicatesTrie(word.substr(1)); } var currentNode = this.duplicates; for (var i = 0; i < word.length; ++i) { var childNode = currentNode.children[word[i]]; if (!childNode) { childNode = { children: {} }; currentNode.children[word[i]] = childNode; } currentNode = childNode; } } /** * Check to see if a word is a match to any substring in the input * dictionary in O(N) time, where N is the number of characters in the * word rather than the number of words in the dictionary. * @param {string} word The word we want to find out whether it is a * duplicate of a substring in the input dictionary. */ isDuplicate(word) { word = word.toLowerCase(); var currentNode = this.duplicates; for (var i = 0; i < word.length; ++i) { var childNode = currentNode.children[word[i]]; if (!childNode) return false; currentNode = childNode; } return true; } generate({ minLength = 0, maxLength = 0, allowDuplicates = true, maxAttempts = 25, random = Math.random, }) { let word; let repeat; let attempts = 0; do { repeat = false; let nextNodeIndex = Math.floor(random() * this.start.neighbors.length); let currentNode = this.start.neighbors[nextNodeIndex]; word = ''; while (currentNode && (maxLength <= 0 || word.length <= maxLength)) { word += currentNode.character; nextNodeIndex = Math.floor(random() * currentNode.neighbors.length); currentNode = currentNode.neighbors[nextNodeIndex]; } if ((maxLength > 0 && word.length > maxLength) || word.length < minLength) { repeat = true; } } while ( // we don't want to output any exact replicas from the input dictionary (repeat || (!allowDuplicates && this.isDuplicate(word))) && (maxAttempts <= 0 || ++attempts < maxAttempts)); if (maxAttempts > 0 && attempts >= maxAttempts) { throw new Error('Unable to generate a word with the given parameters after ' + attempts + ' attempts'); } return word; } }