word-match-helper
Version:
Aho-Corasick based word-matching class. Match & filter words.
145 lines (144 loc) • 4.38 kB
JavaScript
(function(global, factory) {
typeof exports === "object" && typeof module !== "undefined" ? factory(exports) : typeof define === "function" && define.amd ? define(["exports"], factory) : (global = typeof globalThis !== "undefined" ? globalThis : global || self, factory(global.WordMatcher = {}));
})(this, function(exports2) {
"use strict";
function arrayDeduplication(arr) {
const set = new Set(arr);
return Array.from(set);
}
function isDef(v) {
return typeof v !== "undefined" && v !== null;
}
class Node {
constructor(params) {
var _a, _b, _c;
this.isMatch = false;
this.parent = null;
this.backNode = null;
this.children = {};
this.char = params.char;
this.isMatch = (_a = params.is_match) !== null && _a !== void 0 ? _a : false;
this.parent = (_b = params.parent) !== null && _b !== void 0 ? _b : null;
this.backNode = (_c = params.back_node) !== null && _c !== void 0 ? _c : null;
}
getChildNodeList() {
const result = [];
Object.keys(this.children).forEach((key) => {
result.push(this.children[key]);
});
return result;
}
setChildren(params) {
const { char, root, is_match } = params;
if (!isDef(this.children[char])) {
this.children[char] = new Node({
char,
is_match,
parent: this,
back_node: root
});
}
return this.children[char];
}
}
class AhoCorasick {
constructor(config) {
this.currentState = 0;
this.wordset = /* @__PURE__ */ new Set();
const { targets } = config;
this.root = new Node({
char: null
});
this.initACStateTree(targets);
}
addWord(wordlist) {
this.initACStateTree(wordlist);
}
initACStateTree(wordlist) {
const words = arrayDeduplication(wordlist);
words.forEach((word) => {
let currentNode = this.root;
let len = word.length;
for (let i = 0; i < len; i++) {
const child = currentNode.setChildren({
char: word[i],
root: this.root,
is_match: i === len - 1
});
currentNode = child;
}
currentNode.isMatch = true;
});
this.setBackNode(this.root);
}
setBackNode(node) {
let currentNodeArr = node.getChildNodeList();
while (currentNodeArr.length > 0) {
let childNodeArr = [];
for (let i = 0; i < currentNodeArr.length; i++) {
let currentNode = currentNodeArr[i];
childNodeArr.push(...currentNode.getChildNodeList());
let parentNode = currentNode.parent;
if (!parentNode) {
continue;
}
let backNode = parentNode.backNode;
while (backNode) {
let child = backNode.children[currentNode.char || ""];
if (child) {
currentNode.backNode = child;
break;
}
backNode = backNode.backNode;
}
}
currentNodeArr = childNodeArr;
}
}
search(text) {
let words = [];
let currentNode = this.root;
for (let i = 0; i < text.length; i++) {
let char = text[i];
let child = currentNode.children[char];
if (!child) {
let backNode = currentNode.backNode;
while (backNode) {
child = backNode.children[char];
if (child) {
break;
}
backNode = backNode.backNode;
}
}
if (child) {
let backNode = child;
while (backNode && backNode !== this.root) {
if (backNode.isMatch) {
words.push({
pos: i + 1,
word: this.getWord(backNode)
});
}
backNode = backNode.backNode;
}
currentNode = child;
} else {
currentNode = this.root;
}
}
return words;
}
getWord(node) {
let word = "";
while (node.parent && node.char) {
word = node.char + word;
node = node.parent;
}
return word;
}
}
exports2.WordMatcher = AhoCorasick;
exports2.default = AhoCorasick;
Object.defineProperties(exports2, { __esModule: { value: true }, [Symbol.toStringTag]: { value: "Module" } });
});