UNPKG

word-match-helper

Version:

Aho-Corasick based word-matching class. Match & filter words.

145 lines (144 loc) 4.38 kB
(function(global, factory) { typeof exports === "object" && typeof module !== "undefined" ? factory(exports) : typeof define === "function" && define.amd ? define(["exports"], factory) : (global = typeof globalThis !== "undefined" ? globalThis : global || self, factory(global.WordMatcher = {})); })(this, function(exports2) { "use strict"; function arrayDeduplication(arr) { const set = new Set(arr); return Array.from(set); } function isDef(v) { return typeof v !== "undefined" && v !== null; } class Node { constructor(params) { var _a, _b, _c; this.isMatch = false; this.parent = null; this.backNode = null; this.children = {}; this.char = params.char; this.isMatch = (_a = params.is_match) !== null && _a !== void 0 ? _a : false; this.parent = (_b = params.parent) !== null && _b !== void 0 ? _b : null; this.backNode = (_c = params.back_node) !== null && _c !== void 0 ? _c : null; } getChildNodeList() { const result = []; Object.keys(this.children).forEach((key) => { result.push(this.children[key]); }); return result; } setChildren(params) { const { char, root, is_match } = params; if (!isDef(this.children[char])) { this.children[char] = new Node({ char, is_match, parent: this, back_node: root }); } return this.children[char]; } } class AhoCorasick { constructor(config) { this.currentState = 0; this.wordset = /* @__PURE__ */ new Set(); const { targets } = config; this.root = new Node({ char: null }); this.initACStateTree(targets); } addWord(wordlist) { this.initACStateTree(wordlist); } initACStateTree(wordlist) { const words = arrayDeduplication(wordlist); words.forEach((word) => { let currentNode = this.root; let len = word.length; for (let i = 0; i < len; i++) { const child = currentNode.setChildren({ char: word[i], root: this.root, is_match: i === len - 1 }); currentNode = child; } currentNode.isMatch = true; }); this.setBackNode(this.root); } setBackNode(node) { let currentNodeArr = node.getChildNodeList(); while (currentNodeArr.length > 0) { let childNodeArr = []; for (let i = 0; i < currentNodeArr.length; i++) { let currentNode = currentNodeArr[i]; childNodeArr.push(...currentNode.getChildNodeList()); let parentNode = currentNode.parent; if (!parentNode) { continue; } let backNode = parentNode.backNode; while (backNode) { let child = backNode.children[currentNode.char || ""]; if (child) { currentNode.backNode = child; break; } backNode = backNode.backNode; } } currentNodeArr = childNodeArr; } } search(text) { let words = []; let currentNode = this.root; for (let i = 0; i < text.length; i++) { let char = text[i]; let child = currentNode.children[char]; if (!child) { let backNode = currentNode.backNode; while (backNode) { child = backNode.children[char]; if (child) { break; } backNode = backNode.backNode; } } if (child) { let backNode = child; while (backNode && backNode !== this.root) { if (backNode.isMatch) { words.push({ pos: i + 1, word: this.getWord(backNode) }); } backNode = backNode.backNode; } currentNode = child; } else { currentNode = this.root; } } return words; } getWord(node) { let word = ""; while (node.parent && node.char) { word = node.char + word; node = node.parent; } return word; } } exports2.WordMatcher = AhoCorasick; exports2.default = AhoCorasick; Object.defineProperties(exports2, { __esModule: { value: true }, [Symbol.toStringTag]: { value: "Module" } }); });