UNPKG

literant-search

Version:

Efficient searches for high scoring words on a Scrabble-like board using either a Trie or compressed DAWG.

262 lines (244 loc) 10.3 kB
/** * This is a Trie, which can be compressed into a DAWG by calling the minimize method after instantiation. * While the compression did achieve an 87% node reduction rate on my wordlist, it does require additional * preprocessing and doesn't speed up the search. */ export default class DirectedAcyclicWordGraph { constructor(allWords) { this.root = {}; const wordList = allWords; if(wordList?.length){ wordList.forEach(word => { this.insert(word); }); } } insert(word) { let node = this.root; for (let char of word) { if (!node[char]) { node[char] = {}; // Create a new child node } node = node[char]; } node["."] = 1; // Mark the end of the word } _getNodeSignature(node) { const parts = []; const keys = Object.keys(node).sort(); for(const key of keys){ if(key == '.'){ parts.push('.'); }else{ // store a reference to the child node instead of its contents parts.push(key + ':' + this.nodeIds.get(node[key])); } } return parts.join('|'); } minimize() { // needs to be called after building otherwise we just have an ordinary trie this.nodeIds = new Map(); this.nodesBySignature = new Map(); let nextId = 0; // bottom-up processing const processLevel = (depth, maxDepth) => { const levelNodes = new Map(); const processNode = (node, path = '') => { if(path.length === depth){ levelNodes.set(path, node); return; } for(const [char, childNode] of Object.entries(node)){ if(char !== '.' && typeof childNode === 'object'){ processNode(childNode, path + char); } } }; processNode(this.root); for(const [path, node] of levelNodes){ const signature = this._getNodeSignature(node); if(!this.nodesBySignature.has(signature)){ // unique this.nodesBySignature.set(signature, node); this.nodeIds.set(node, nextId++); }else{ // replace const existingNode = this.nodesBySignature.get(signature); let parent = this.root; for (let i=0; i < path.length-1; i++){ parent = parent[path[i]]; } if(path.length > 0){ parent[path[path.length-1]] = existingNode; } } } }; const getMaxDepth = (node, depth = 0) => { let maxDepth = depth; for(const [char, childNode] of Object.entries(node)){ if(char !== '.' && typeof childNode === 'object'){ maxDepth = Math.max(maxDepth, getMaxDepth(childNode, depth+1)); } } return maxDepth; }; const maxDepth = getMaxDepth(this.root); for(let depth = maxDepth; depth >= 0; depth--){ processLevel(depth, maxDepth); } this.nodeIds = null; this.nodesBySignature = null; return this; } testCompression() { // For testing mimimize const countNodes = (node, visited = new Set()) => { if(visited.has(node)) return 0; visited.add(node); let count = 1; for(const [char, childNode] of Object.entries(node)){ if(char !== '.' && typeof childNode === 'object'){ count += countNodes(childNode, visited); } } return count; }; const beforeCount = countNodes(this.root); this.minimize(); const afterCount = countNodes(this.root); return { nodesBeforeMinimization: beforeCount, nodesAfterMinimization: afterCount, reduction: `${((beforeCount - afterCount) / beforeCount * 100).toFixed(1)}%` }; } // TODO: check this code wordExists(word){ const next = (node, chars) => { if(chars.length == 0) return node["."]; const edge = chars.shift(); if(! node[edge]) return false; return next(node[edge], chars); } return next(this.root, word.split("")); } _perpendicularWord(rowOffset, ch, at, slice){ const behind = (from) => { const at = from - 1; if(at < 0 || slice[at] == " ") return ""; return behind(at)+slice[at]; }; const ahead = (from) => { const at = from + 1; if(at >= slice.length || slice[at] == " ") return ""; return slice[at]+ahead(at); }; const lettersBefore = behind(at); const word = lettersBefore + ch + ahead(at); const colOffset = 0-lettersBefore.length; if(at == 14){ let sdk=1; } if(word.length == 1) return true; // OK, no perpendicular word if(! this.wordExists(word.toUpperCase())) return false; // Invalid perpendicular word return {colOffset, rowOffset, word}; // Valid perpendicular word } /** * Using letters provided and from the word start index, finds all words in wordlist on row that that match the following: * 1) Fit the letters placed on the row defined in constraints object; * 2) Straddle the placement index defined by startIndex; * 3) Are within length bounds set by minLength and maxLength; * 4) If any perpendicular words exist, they are all in the wordlist. * The result object contains the word start index, word found, and any perpendicular words */ findWords(letters, startIndex, pointIndex, pointRow, minLength, maxLength, constraints, perpRows) { const results = []; const letterCounts = this._countLetters(letters); const traverse = (node, prefix, fixedLettersFound, pWords) => { const depth = prefix.length; const index = startIndex + depth; // If there's a board letter here then we just need evaluate it and traverse to the next tile if(constraints[index]){ const ch = constraints[index]; if(node[ch] == null) return; if(node[ch]["."] != null){ // if this is a terminal node if(depth+1 >= minLength && depth+1 <= maxLength){ // if the word length is within bounds if(pointIndex < startIndex + depth){ // if the word straddles placement point if(!constraints[startIndex + depth + 1]){ // if another board letter isnt following results.push({ at: startIndex, word: prefix+ch, perp: pWords });// then it's valid } } } } traverse(node[ch], prefix+ch, fixedLettersFound+ch, pWords); // go to next letter return; } // Otherwise we need to check each letter we have left against our trie to see what our possibilities are for (let char in node) { if (char === ".") continue; // Skip the terminal marker if (startIndex + depth >= perpRows[0].length) return; const charCounts = this._countLetters(char); const prefixCounts = this._countLetters(prefix); const fixedCounts = this._countLetters(fixedLettersFound); const formation = this._canFormWord(letterCounts, charCounts, prefixCounts, fixedCounts); if(formation){ let toAppend = (formation == 2)? char.toLowerCase(): char; const newPrefix = prefix + toAppend; const perpendicular = this._perpendicularWord(depth, toAppend, pointRow, perpRows[startIndex+depth]); if(! perpendicular) continue; const nextPWords = (typeof perpendicular == "object")? [...pWords, perpendicular]: pWords;// add to perpendicular words so far if(node[char]["."] != null){ // if this is a terminal node if(newPrefix.length >= minLength && newPrefix.length <= maxLength){ // if the word length is within bounds if(pointIndex < startIndex + newPrefix.length){ // if the word straddles placement point if(!constraints[startIndex + newPrefix.length]){ // if another board letter isnt following results.push({ at: startIndex, word: newPrefix, perp: nextPWords }); // then it's valid } } } } traverse(node[char], newPrefix, fixedLettersFound, nextPWords); } } }; traverse(this.root, "", "", []); return results; } _countLetters(word){ const counts = {}; const isLowerCase = (ch) => ch >= 'a' && ch <= 'z'; for(let char of word){ if(isLowerCase(char) || char == "_"){ counts["_"] = (counts["_"] || 0) + 1; }else{ counts[char] = (counts[char] || 0) + 1; } } return counts; } /** * Checks to see if a word can be formed using letter counts of component parts provided. * The available pool consists of the letters on rack and letters previously placed on board. * The letters needed consist of the letters in current prefix and the current edge. * A current limitation is that only a single blank tile can be applied per word. * @param available {object} counts of all letters on rack * @param required {object} letter counts for current edge, in a trie this is a single character * @param used {object} letter counts from the current prefix * @param fixed {object} letter counts of existing letters on the board * @return {number} 0 on failure; 1 found no wildcard used; 2 found with wildcard use. */ _canFormWord(available, required, used, fixed){ const num = (val) => val || 0; let usedWildcard = false; for(let char in required){ const pool = num(available[char]) + num(fixed[char]); const needed = (required[char] || 0) + (used[char] || 0) + (used["_"] || 0); const remainder = pool - needed ; if(remainder < 0){ const wildcards = num(available["_"])-num(used["_"]) if(wildcards > 0 && remainder == -1 && !usedWildcard){ usedWildcard = true; }else{ return 0; } } } return usedWildcard? 2: 1; } }