UNPKG

purpl-markov-chain

Version:

A simple Marvkov chain class supporting different n-grams

266 lines (232 loc) 6.63 kB
/** * @typedef {Object} MarkovChainJSON - A JSON representation of a Markov chain object. * @property {string[][]} corpus - The corpus. * @property {string[]} dictionary - The dictionary. * @property {GenerationConfig} config - The default config used for sentence generation. */ /** * @typedef {MarkovChainJSON | string} MarkovChainResolvable */ /** * @typedef {Object} GenerationConfig - A config object used to generate sentences. * @property {string} from - The word to start generating from * @property {number} grams - The sequence length to use. * @property {boolean} backward - Whether to generate backward or not. */ /** * A Markov chain. */ class MarkovChain { /** * Create a new MarkovChain. * * @example * // Create a new chain * const chain = new MarkovChain() * * // Add text * chain.update('some text') * * // Export it to string or to JSON * const str = chain.toString() * const json = chain.toJSON() * * // Create new chains based on our saves * const fromStr = new MarkovChain(str) * const fromJSON = new markovChain(json) * * @param {MarkovChainResolvable} [base] - A saved chain from .toJSON or .toString. */ constructor(base = {}) { try { base = typeof base === 'string' ? base : JSON.stringify(base) } catch { throw 'Invalid \'base\' argument. Needs type MarkovChainResolvable.' } base = JSON.parse(base) /** * The sentences learnt by the Markov chain. * * @type {string[][]} */ this.corpus = base.corpus || [] /** * A list of all the words learnt. * * @type {string[]} */ this.dictionary = base.dictionary || [] /** * The default config to use for generation * * @type {GenerationConfig} */ this.config = base.config || { from: '', grams: 2, backward: false } } /** * Add a sentence to the Markov chain. * * @example * const chain = new MarkovChain() * * chain.update('Some text') * * @param {string} sentence - The sentence to add. */ update(sentence) { if (typeof sentence !== 'string') throw 'The argument should be a string' const words = sentence.split(' ') const last = this.corpus.length this.corpus.push([]) words.forEach(word => { const index = this.dictionary.indexOf(word) if (index === -1) { this.dictionary.push(word) this.corpus[last].push(this.dictionary.length - 1) } else { this.corpus[last].push(index) } }) } /** * Search if a word is in the corpus. * * @example * const chain = new MarkovChain() * * console.log(chain.contains('OwO')) // false * * chain.update('hi OwO') * * console.log(chain.contains('OwO')) // true * * @param {string} word - The word to search for. * @return {boolean} */ contains(word) { return this.dictionary.includes(word) } /** * Generate a new sentence. * * @example <caption>Generate a sentence</caption> * // Create a Markov chain * const chain = new MarkovChain() * * // Add some words to it * chain.update('Some words') * chain.update('Some more words') * chain.update('Some OwO') * * // Generate a sentence * const sentence = chain.generate() * console.log(sentence) * * // Generate a sentence starting with 'OwO' * const owo = chain.generate({ from: 'OwO' }) * console.log(owo) * * // Generate a sentence backward * const backwardOwO = chain.generate({ from: 'OwO', backward: true }) * console.log(backwardOwO) * * @param {GenerationConfig} [config=this.config] - The config to use for the generation. * @return {string} */ generate(config = {}) { const from = config.from || this.config.from const grams = config.grams || this.config.grams const backward = config.backward || this.config.backward if (from !== '' && !this.contains(from)) return '' const sentence = [] let done = false // If no 'from' is set, get a random first word if (!from.length) { if (backward) { const random = this._getRandom(this.corpus) sentence.push(random[random.length - 1]) } else { sentence.push(this._getRandom(this.corpus)[0]) } } else { sentence.push(this.dictionary.indexOf(from)) } // Get next words until done while (!done) { const last = sentence[sentence.length - 1] // Get the possible chains const possibleChains = this.corpus.filter(s => s.includes(last)) // Select a random one const chain = this._getRandom(possibleChains) // Keep the ngram length of it const wordIndex = chain.indexOf(last) const sequence = [] if (backward) { const start = wordIndex - grams chain .slice(start >= 0 ? start : 0, wordIndex) .reverse() .forEach(word => sequence.push(word)) } else { chain .slice(wordIndex + 1, wordIndex + grams + 1) .forEach(word => sequence.push(word)) } sentence.push(...sequence) // Set done to true if end of sentence if (backward) { done = wordIndex - grams <= 0 } else { done = wordIndex + grams >= chain.length - 1 } } // Translate and return the result if (backward) { return sentence .reverse() .map(word => this.dictionary[word]) .join(' ') } else { return sentence .map(word => this.dictionary[word]) .join(' ') } } /** * Get a JSON version of the chain object. * * @return {MarkovChainJSON} */ toJSON() { return { corpus: this.corpus.map(s => [...s]), dictionary: [...this.dictionary], config: { ...this.config } } } /** * Get a string version of the chain object. * * @return {string} */ toString() { return JSON.stringify(this.toJSON()) } /** * Get a random element from an array. * * @param {Array} arr - The array to get a random element from. * @return {*} * @private */ _getRandom(arr) { return arr[Math.floor(Math.random() * arr.length)] } } if (typeof window === 'undefined') { module.exports = MarkovChain; }