UNPKG

predictionary

Version:

JavaScript dictionary-based word prediction library.

542 lines (508 loc) 25 kB
import Dictionary from "./dictionary.mjs"; let INBETWEEN_CHARS_REGEX = "[\\s\\.\\?!,]"; let PHRASE_END_CHARS_REGEX = "[\\.\\?!,]"; let SENTENCE_END_CHARS_REGEX = "[\\.\\?!]"; /** * Constructs a Predictionary word prediction class. It's possible to manage multiple internal dictionaries, retrieve predictions * (suggestions) for a given input and learn/refine the dictionaries from user input. * * @constructor */ function Predictionary() { /** * Default dictionary key, if no key is specified. * @type {string} */ this.DEFAULT_DICTIONARY_KEY = 'DEFAULT_DICTIONARY_KEY'; let thiz = this; let PREDICT_METHOD_COMPLETE_WORD = 'PREDICT_METHOD_COMPLETE_WORD'; let PREDICT_METHOD_NEXT_WORD = 'PREDICT_METHOD_NEXT_WORD'; let _dicts = {}; let _lastChosenWord = null; /** * Loads a single dictionary from a JSON string that was previously exported by {@link Predictionary#dictionaryToJSON}. * If the given dictionaryKey already exists, the existing dictionary is replaced. * * @param {string} dictionaryJSON json string representing a dictionary, exported by {@link Predictionary#dictionaryToJSON} * @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] the key for which the dictionary should * be imported. */ this.loadDictionary = function (dictionaryJSON, dictionaryKey) { if (!dictionaryJSON) { throw 'dictionaryJSON must be specified.'; } dictionaryKey = dictionaryKey || thiz.DEFAULT_DICTIONARY_KEY; let dictionary = new Dictionary(); dictionary.load(dictionaryJSON); _dicts[dictionaryKey] = dictionary; }; /** * Loads all dictionaries from a JSON string that was previously exported by {@link Predictionary#dictionariesToJSON}. * This method replaces/deletes all currently loaded dictionaries! * @param {string} dictionariesJSON json string representing dictionaries, exported by {@link Predictionary#dictionariesToJSON} */ this.loadDictionaries = function (dictionariesJSON) { if (!dictionariesJSON) { throw 'dictionariesJSON must be specified.'; } _dicts = {}; let list = JSON.parse(dictionariesJSON); list.forEach(element => { thiz.loadDictionary(element.json, element.key); }) }; /** * Exports a single dictionary to a JSON string. Subsequently the dictionary can be imported using the resulting string * and {@link Predictionary#loadDictionary}. * * @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] the key of the dictionary to export. * @return {string} JSON string representing the currently loaded dictionary with specified dictionaryKey. */ this.dictionaryToJSON = function (dictionaryKey) { dictionaryKey = dictionaryKey || thiz.DEFAULT_DICTIONARY_KEY; let dict = _dicts[dictionaryKey]; return dict ? dict.toJSON() : null; }; /** * Exports all dictionaries to a JSON string. Subsequently the dictionaries can be imported using the resulting string * and {@link Predictionary#loadDictionaries}. * * @return {string} JSON string representing all currently loaded dictionaries. */ this.dictionariesToJSON = function () { let list = []; Object.keys(_dicts).forEach(key => { list.push({ key: key, json: _dicts[key].toJSON() }) }); return JSON.stringify(list); }; /** * Use only a singe loaded dictionary for predictions. * * @param {string} dictionaryKey the key of the dictionary to use */ this.useDictionary = function (dictionaryKey) { if (!dictionaryKey) { throw 'dictionaryKey must be specified.'; } Object.keys(_dicts).forEach(key => { _dicts[key].disabled = dictionaryKey !== key; }); }; /** * Use a set of dictionaries for predictions, specified by an array of dictionaryKeys. * * @param {Array} dictionaryKeys an array of strings, specifying the dictionaryKeys to use */ this.useDictionaries = function (dictionaryKeys) { if (!(dictionaryKeys instanceof Array)) { throw 'dictionaryKeys must be specified and of type Array.'; } Object.keys(_dicts).forEach(key => { _dicts[key].disabled = dictionaryKeys.indexOf(key) === -1 && key !== thiz.DEFAULT_DICTIONARY_KEY; }); }; /** * Use all currently loaded dictionaries for predictions. */ this.useAllDictionaries = function () { Object.keys(_dicts).forEach(key => { _dicts[key].disabled = false; }); }; /** * Add a new internal dictionary for predictions. * * @param {string} dictionaryKey the key of the dictionary to add * @param {Array} [words] Optional array of words (string) that should be added to the new dictionary. */ this.addDictionary = function (dictionaryKey, words) { if (!dictionaryKey) { throw 'dictionaryKey must be specified.'; } if (_dicts[dictionaryKey]) { throw 'dictionary already existing.'; } _dicts[dictionaryKey] = new Dictionary(); if (words && words instanceof Array) { words.forEach(element => { thiz.addWord(element, dictionaryKey); }); } }; /** * Add a single new word/element to a dictionary. * * @param {string|Object} element the element to add, can be either a plain word as a string or an object containing * the properties object.word (word to add as string) and object.rank (number specifying the rank of the word, * a lower rank causes the word to be ranked to front as a suggestion). * @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] optional key of the dictionary to add the element. */ this.addWord = function (element, dictionaryKey) { dictionaryKey = dictionaryKey || thiz.DEFAULT_DICTIONARY_KEY; if (!element) { throw 'element to add not specified.'; } if (!_dicts[dictionaryKey]) { thiz.addDictionary(dictionaryKey); } let dict = _dicts[dictionaryKey]; if (typeof element === 'string') { dict.addWord(sanitize(element)); } else if (element.word && typeof element.word === 'string') { dict.addWord(sanitize(element.word), element.rank); } }; /** * Add multiple new words/elements to a dictionary. * * @param {string|Object} elements the elements to add, can be an Array of either plain words (string) or of objects containing * the properties object.word (word to add as string) and object.rank (number specifying the rank of the word, * a lower rank causes the word to be ranked to front as a suggestion). * @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] optional key of the dictionary to add the element. */ this.addWords = function (elements, dictionaryKey) { if (!(elements instanceof Array)) { throw 'elements to add must be instance of array specified.'; } elements.forEach(element => { thiz.addWord(element, dictionaryKey); }) }; /** * Deletes a single word from one or all dictionaries. * * @param {string} inputOrWord a single word or longer string where the last word will be deleted in the dictionaries. * @param {Object} [options] Object for options * @param {string} [options.dictionaryKey] the key of the dictionary where the word should be deleted. If not * specified the word is deleted in all dictionaries. * @param {string} [options.ignoreCase] if false or undefined (default) only words with matching cases are * deleted, otherwise also words with non-matching case. */ this.delete = function (inputOrWord, options) { let word = getLastWord(inputOrWord); options = options || {}; if (word) { if (!options.dictionaryKey) { thiz.getDictionaryKeys().forEach(key => { _dicts[key].deleteWord(word, options.ignoreCase); }); } else if (_dicts[options.dictionaryKey]) { _dicts[options.dictionaryKey].deleteWord(word, options.ignoreCase); } } }; /** * Import words from a plain string (e.g. text file). * * @param {string} importString a plain text string (e.g. from a text file) * @param {Object} [options] options object containing additional properties. The default properties are suited for * a plain text string in format: "word1;word2;word3;...", setting rankPosition=1 would be suited for * a plain text in format: "word1 rank1;word2 rank2;word3 rank3;...". * @param {string} [options.elementSeparator=;] separator to split the elements from the importString * @param {string} [options.rankSeparator=<space>] separator to split a single element into word and rank * @param {string} [options.wordPosition=0] position of the word in the element (0-based) * @param {string} [options.rankPosition] position of the rank in the element (0-based) * @param {string} [options.rankIsIndex] if true, the rank of the word is determined by the index in the importString * (e.g. import ordered list of most frequent words without explicit given rank) * @param {string} [options.addToDictionary={@link Predictionary#DEFAULT_DICTIONARY_KEY}] key of the dictionary where * the words should be added. */ this.parseWords = function (importString, options) { options = options || {}; let elementSeparator = options.elementSeparator || ';'; let rankSeparator = options.rankSeparator || ' '; let wordPosition = options.wordPosition || 0; let wordPosition2 = options.wordPosition2; let rankPosition = options.rankPosition; let rankIsIndex = !!options.rankIsIndex; let addToDictionary = options.addToDictionary || thiz.DEFAULT_DICTIONARY_KEY; let lines = importString.split(elementSeparator); lines.forEach((line, index) => { let elems = line.split(rankSeparator); let rank = rankIsIndex ? (index + 1) : parseInt(elems[rankPosition]); if (wordPosition !== undefined && wordPosition2 !== undefined) { let word1 = elems[wordPosition]; let word2 = elems[wordPosition2]; if (word1 && word2) { thiz.learn(word2, word1, addToDictionary); } } else if (elems[wordPosition]) { let elementToAdd = { word: elems[wordPosition].trim() }; if (!isNaN(rank)) { elementToAdd.rank = rank; } thiz.addWord(elementToAdd, addToDictionary); } }); }; /** * Retrieves saved words of a single or all loaded dictionaries. * * @param {string} [dictionaryKey] key of the dictionary from which the words should be retrieved, if not set all * dictionaries are used. * @return {string[]} array of saved words (string) for the dictionary with the given key. */ this.getWords = function (dictionaryKey) { let words = []; if (!dictionaryKey) { thiz.getDictionaryKeys().forEach(key => { words = words.concat(_dicts[key].getWords()) }); } else if (_dicts[dictionaryKey]) { words = _dicts[dictionaryKey].getWords(); } return words; }; /** * Test if a given word exists a single or all loaded dictionaries. * * @param {string} word to test if existing * @param {string} [dictionaryKey] key of the dictionary to check, if not set all dictionaries are used * @param {boolean} [matchCase] if set (true) the word is searched case-sensitive, otherwise case-insensitive (default) * @return {boolean} true if the given word is existing */ this.hasWord = function (word, dictionaryKey, matchCase) { let allElementsString = " " + thiz.getWords(dictionaryKey).join(" ") + " "; let flag = matchCase ? "" : "i"; return new RegExp(" " + word + " ", flag).test(allElementsString); }; /** * Returns word suggestions for a given input. Automatically detects if the last word should be completed (last * character is not space) or if a next word should be suggested (last character is space). * * @param {string} input string for which the predictions should be calculated, e.g. the value of a text input * where the user is typing. * @param {Object} [options] options object containing additional properties. * @param {number} [options.maxPredictions=10] number of suggestions that should be retrieved maximally //TODO maxPredictions * @param {boolean} [options.applyToInput] if true the suggestions are applied to the original input before being returned * @return {string[]} list of words that are predictions/suggestions for the given input, ordered by relevance. */ this.predict = function (input, options) { return predictInternal(input, options); }; /** * Returns word suggestions for a given input. Last word is assumed to be incomplete and has to be completed. * * @param {string} input string for which the predictions should be calculated, e.g. the value of a text input * where the user is typing. * @param {Object} [options] options object containing additional properties. * @param {number} [options.maxPredictions=10] number of suggestions that should be retrieved maximally * @param {boolean} [options.applyToInput] if true the suggestions are applied to the original input before being returned * @return {string[]} list of words that are predictions/suggestions for the given input, ordered by relevance. */ this.predictCompleteWord = function (input, options) { return predictInternal(input, options, PREDICT_METHOD_COMPLETE_WORD); }; /** * Returns word suggestions for a given input. Last word is assumed to be complete and suggestions for the next words * are calculated. * * @param {string} input string for which the predictions should be calculated, e.g. the value of a text input * where the user is typing. * @param {Object} [options] options object containing additional properties. * @param {number} [options.maxPredictions=10] number of suggestions that should be retrieved maximally * @param {boolean} [options.applyToInput] if true the suggestions are applied to the original input before being returned * @return {string[]} list of words that are predictions/suggestions for the given input, ordered by relevance. */ this.predictNextWord = function (input, options) { return predictInternal(input, options, PREDICT_METHOD_NEXT_WORD); }; /** * Apply chosen suggestion to a given input, e.g. input = "this is an ap" and chosenPrediction = "Apple" results in * "this is an Apple". Calling this function automatically refines the saved frequency of the chosen word making it * more likely to be suggested in the future. * * @param {string} input the current input string (e.g. from textfield) * @param {string} chosenPrediction the chosen prediction which should be applied to the input string * @param {Object} [options] options object containing additional properties. * @param {string} [options.addToDictionary] the key of the dictionary where new learned words should be added. If not * set the dictionary to add is automatically determined. * @param {number} [options.shouldCompleteLastWord] if true the last word is completed, if false the chosen prediction * is added as new word. If not set this decision is done automatically (last character is space -> new word). * @param {boolean} [options.dontLearn] if true the chosen predictions are not added or their frequencies updated * @return {string} the given input with the applied suggestion */ this.applyPrediction = function (input, chosenPrediction, options) { options = options || {}; let addToDictionary = options.addToDictionary || (thiz.isUsingOnlyDefaultDictionary() ? thiz.DEFAULT_DICTIONARY_KEY : null); let shouldCompleteLastWord = options.shouldCompleteLastWord !== undefined ? options.shouldCompleteLastWord : !isLastWordCompleted(input); let dontLearn = options.dontLearn; let lastWord = getLastWord(input); let preLastWord = getLastWord(input, 2); let temp = shouldCompleteLastWord ? input.substring(0, input.lastIndexOf(lastWord)) : input; if (temp.length > 0 && (!isLastWordCompleted(temp) || new RegExp(PHRASE_END_CHARS_REGEX).test(temp[temp.length - 1]))) { temp += ' '; } if (!dontLearn) { thiz.learn(chosenPrediction, !shouldCompleteLastWord ? lastWord : preLastWord, addToDictionary); } return temp + chosenPrediction + ' '; }; /** * Updates the frequencies for given words, making them to be more likely suggested in the future. * * @param {string} chosenWord a suggestion/word the user has chosen * @param {string} [previousWord] the previous word of the chosen suggestion * @param {string} [addToDictionary] the key of the dictionary where new words should be added. Automatically determined, if not specified. */ this.learn = function (chosenWord, previousWord, addToDictionary) { chosenWord = sanitize(chosenWord); previousWord = sanitize(previousWord); let dictKeys = thiz.getDictionaryKeys(true); addToDictionary = dictKeys.length === 1 ? dictKeys[0] : addToDictionary; if (dictKeys.length > 0 && (!addToDictionary || !_dicts[addToDictionary])) { let currentHighscore = 0; dictKeys.forEach(key => { let score = 0; if (thiz.hasWord(chosenWord, key)) { score += 2; } if (thiz.hasWord(previousWord, key)) { score++; } if (score > 0 && score >= currentHighscore) { currentHighscore = score; addToDictionary = key; } }); } addToDictionary = addToDictionary || thiz.DEFAULT_DICTIONARY_KEY; if (!_dicts[addToDictionary]) { thiz.addDictionary(addToDictionary); } Object.keys(_dicts).forEach(key => { let dict = _dicts[key]; if (!dict.disabled) { dict.learn(chosenWord, previousWord, addToDictionary === key); } }); }; /** * Learns from input text while the user is typing. This method can be called with e.g. the value of a text input * field for every character the user is typing. * * @param {string} input the text string to learn with. The second last and third last words are learned. * @param {string} [dictionaryKey] the key of the dictionary where new words should be added. Automatically determined, if not specified. * @return {boolean} true if something was learned, false if not */ this.learnFromInput = function (input, dictionaryKey) { if (isLastWordCompleted(input)) { let chosenWord = getLastWord(input, 2); let previousWord = getLastWord(input, 3); if (chosenWord && chosenWord !== _lastChosenWord) { _lastChosenWord = chosenWord; thiz.learn(chosenWord, previousWord, dictionaryKey); return true; } } return false; }; /** * Learns words and transitions from a given text/phrase. * * @param {string} text the text to learn from * @param {string} [dictionaryKey={@link Predictionary#DEFAULT_DICTIONARY_KEY}] the key of the dictionary where the words should * be learned/added. */ this.learnFromText = function (text, dictionaryKey) { text = text.replace(/\s\s/g, ' '); let sentences = text.split(new RegExp(SENTENCE_END_CHARS_REGEX)); sentences.forEach(sentence => { let words = sentence.split(' '); for (let i = 0; i < words.length - 1; i++) { this.learn(words[i + 1], words[i], dictionaryKey); } }); }; /** * Returns a list of currently loaded dictionary keys. * * @param {boolean} [onlyEnabled] if true only keys of dictionaries that are enabled are returned. See e.g. * {@link Predictionary#useDictionaries} * @return {string[]} */ this.getDictionaryKeys = function (onlyEnabled) { if (onlyEnabled) { return Object.keys(_dicts).filter(element => !_dicts[element].disabled); } return Object.keys(_dicts); }; /** * Returns true if only the default dictionary is used (key {@link Predictionary#DEFAULT_DICTIONARY_KEY}). * @return {boolean} */ this.isUsingOnlyDefaultDictionary = function () { let keys = thiz.getDictionaryKeys(); return keys.length === 0 || (keys.length === 1 && keys[0] === thiz.DEFAULT_DICTIONARY_KEY); }; function predictInternal(input, options, predictType) { let predictions = []; options = options || {}; options.maxPredictions = options.maxPredictions || options.maxPredicitons || 10; options.applyToInput = options.applyToInput || false; Object.keys(_dicts).forEach(key => { let dict = _dicts[key]; if (!dict.disabled) { let predictFn = predictType === PREDICT_METHOD_NEXT_WORD ? dict.predictNextWord : (predictType === PREDICT_METHOD_COMPLETE_WORD ? dict.predictCompleteWord : null); predictFn = predictFn || (isLastWordCompleted(input) ? dict.predictNextWord : dict.predictCompleteWord); predictions = predictions.concat(predictFn(getLastWord(input), options)); } }); predictions.sort((a, b) => { if (a.fuzzyMatch !== b.fuzzyMatch) { return a.fuzzyMatch ? 1 : -1; } if (a.frequency !== b.frequency) { return (a.frequency < b.frequency) ? 1 : -1; } if (a.rank !== b.rank) { if (a.rank && b.rank === undefined) return -1; if (b.rank && a.rank === undefined) return 1; return (a.rank < b.rank) ? -1 : 1 } return 0; }); let returnArray = []; for (let i = 0; i < predictions.length && returnArray.length < options.maxPredictions; i++) { if (returnArray.indexOf(predictions[i].word) === -1) { //de-duplicate if (options.applyToInput) { returnArray.push(thiz.applyPrediction(input, predictions[i].word, {dontLearn: true})); } else { returnArray.push(predictions[i].word); } } } return returnArray; } } function getLastWord(text, index) { index = index || 1; let words = text.trim().split(new RegExp(INBETWEEN_CHARS_REGEX)).filter(word => !!word); let returnWord = words[words.length - index] || ''; return returnWord.replace(new RegExp(INBETWEEN_CHARS_REGEX, 'g'), ''); } function isLastWordCompleted(text) { return new RegExp(INBETWEEN_CHARS_REGEX).test(text[text.length - 1]); } function sanitize(word) { word = word || ''; return word.replace(/[!?,;.:\n]/gim, '').trim(); } /** * Constructs a new instance of Predictionary * * @return {Predictionary} */ Predictionary.instance = function () { return new Predictionary(); }; export default Predictionary; export function instance() { return new Predictionary(); }