UNPKG

sensitive-word-helper-plus

Version:

基于DFA算法的一个敏感词助手, 可以做步长控制

267 lines (232 loc) 7.67 kB
/* * @Author: maoguiun * @Date: 2019-09-23 14:35:38 * @LastEditors: maoguiun * @LastEditTime: 2019-11-05 12:51:55 * @FilePath: \sensitive-word-helper\src\sensitiveWithStep.ts */ import { Node, Tree } from './core'; interface FilterValue { text?: string | boolean; filter: Array<string>; sensitiveWordIndexs?: Array<number>; pass?: boolean; } interface SwhpConstructor { keywords: Array<string>; replacement?: string; step?: number } // 判定对象 interface JudgeObject { startIndex: number; // 这个对象生成的 index lastFindNodeIndex: number; // 找到上个节点 的index findNodeIndexArr: Array<number>; // 记录找到的节点的 index startNode: Node; // 存放开始节点 prevNode?: Node; // 上一次找到的节点 currNode?: Node | undefined; // 当前节点 filterKey: string; // 存储当前敏感词,每次匹配到则自动合并过来 } class SensitiveWithStep extends Tree { static default: any; // 是否替换原文本敏感词 constructor(obj: SwhpConstructor) { super(); const { keywords, step, replacement } = obj; if (!(keywords instanceof Array && keywords.length >= 1)) { console.error('Sensitive-word:未将过滤词数组传入!'); return; } if (typeof replacement === 'string') { this.replacement = replacement; } if (Number.isInteger(step) && step >= 0) { this.step = step; } else { console.error( 'Sensitive-word:step 必须是非负整数, 传入数据有误,目前 step 已初始化为 3 !' ); } // 创建Trie树 for (let item of keywords) { if (!item) continue; this.insert(item.toLocaleUpperCase()); } this._createFailureTable(); } _filterFn( word: string, every: boolean = false, replace: boolean = true ): FilterValue { if (typeof word !== 'string') { console.error('word must be String !') word = '' } let startIndex = 0; let endIndex = startIndex; const wordLen = word.length; let originalWord: string = word; let filterKeywords: Array<string> = []; word = word.toLocaleUpperCase(); let sensitiveWordIndexs: Array<number> = []; let judgeObjectList: { [key: string]: JudgeObject; } = {}; // 保存过滤文本 let filterTextArr: Array<string> = originalWord.split(''); // 是否通过,无敏感词 let isPass = true; // // 上一个Node与当前Node // let prevNode: Node = this.root; let currNode: Node | undefined; for (endIndex; endIndex <= wordLen; endIndex++) { let key: string = word[endIndex]; let originalKey: string = originalWord[endIndex]; // currNode = this.search(key, prevNode.children); // 分词数组如果是空的,就表示目前没有未处理的敏感词 const judgeObjectListLeng = Object.keys(judgeObjectList).length; if (!judgeObjectListLeng) { currNode = this.search(key, this.root.children); if (!currNode) { // 没有找到就直接拼接 if (!replace) { continue; } filterTextArr[endIndex] = originalKey; continue; } // 判断这个节点是否就是最后一个节点,针对的是单个字符的敏感词 if (currNode.word) { filterTextArr[endIndex] = this.replacement; sensitiveWordIndexs.push(endIndex) continue; } let judgeObject: JudgeObject = { startIndex: endIndex, lastFindNodeIndex: endIndex, findNodeIndexArr: [endIndex], startNode: currNode, prevNode: currNode, filterKey: originalKey // 存储当前敏感词,每次匹配到则自动合并过来 }; judgeObjectList[String(endIndex)] = judgeObject; continue; } // 分词数组中不为空 for (let judgeKey in judgeObjectList) { const judgeObject = judgeObjectList[judgeKey]; // 先判断有没有节点 const currNode = this.search(key, judgeObject.prevNode.children); // 如果没有找到就去上一级 if (!currNode) { let failure: Node = judgeObject.prevNode.failure; let cruNode: Node | undefined; while (failure) { cruNode = this.search(key, failure.children); if (currNode) { break; } failure = failure.failure; } if (cruNode) { // 找到了就又往list 中插入一个分析对象 let judgeObject_: JudgeObject = { startIndex: endIndex, lastFindNodeIndex: endIndex, findNodeIndexArr: [endIndex], startNode: cruNode, prevNode: cruNode, filterKey: originalKey // 存储当前敏感词,每次匹配到则自动合并过来 }; judgeObjectList[String(endIndex)] = judgeObject_; } } // 先判断这个划词对象是否过期了 if (endIndex - judgeObject.lastFindNodeIndex > this.step) { // 那么跳过 ,并删除这个对象 delete judgeObjectList[judgeKey]; } // 如果没有找到currNode 或者 超过了step 那么就跳出当前循环 if (!currNode || endIndex - judgeObject.lastFindNodeIndex > this.step) { continue; } // 如果找到了 // 判断word 是否是 true const findNodeIndexArr = judgeObject.findNodeIndexArr.concat(endIndex); const filterKey = judgeObject.filterKey + originalKey; if (currNode.word) { // 这里不清理这个对象的原因是可能后面还有word 为 true 的情况 judgeObjectList[judgeKey] = { ...judgeObject, findNodeIndexArr, lastFindNodeIndex: endIndex, prevNode: currNode, filterKey }; isPass = false; if (every) { break; } filterKeywords.push(filterKey); if (!replace) { continue; } findNodeIndexArr.forEach(index => { filterTextArr[index] = this.replacement; sensitiveWordIndexs.push(index) }); continue; } // word 不是true 的情况 if (!currNode.word) { judgeObjectList[judgeKey] = { ...judgeObject, findNodeIndexArr, lastFindNodeIndex: endIndex, prevNode: currNode, filterKey }; } } } return { text: replace ? filterTextArr.join('') : originalWord, filter: [...new Set(filterKeywords)], sensitiveWordIndexs, pass: isPass }; } /** * 异步快速检测字符串是否无敏感词 * @param word */ every(word: string): Promise<boolean> { return Promise.resolve(this._filterFn(word, true).pass); } /** * 同步快速检测字符串是否无敏感词 * @param word */ everySync(word: string): boolean { return this._filterFn(word, true).pass; } /** * 同步过滤方法 * @param word * @param replace */ filterSync(word: string, replace: boolean = true): FilterValue { return this._filterFn(word, false, replace); } /** * 异步过滤方法 * @param word * @param replace */ async filter(word: string, replace: boolean = true): Promise<FilterValue> { return Promise.resolve(this._filterFn(word, false, replace)); } } SensitiveWithStep.default = SensitiveWithStep; export = SensitiveWithStep;