UNPKG

@zhin.js/sensitive-filter

Version:

敏感词过滤插件,过滤不符合中国国情的敏感内容

199 lines (159 loc) 6.29 kB
import { describe, it, expect } from 'vitest' import { createSensitiveWordRegex, getEnabledWords, type SensitiveFilterOptions, politicalWords, violenceWords, pornWords, homophoneWords, } from '../src/sensitive-words' describe('敏感词过滤测试', () => { describe('敏感词正则表达式', () => { it('应该创建有效的正则表达式', () => { const words = ['测试', '敏感词'] const regex = createSensitiveWordRegex(words) expect(regex).toBeInstanceOf(RegExp) // 使用 match 方法测试,因为正则表达式带有 'g' 标志,test() 方法会有状态问题 expect('这是测试'.match(regex)).toBeTruthy() expect('这是敏感词'.match(regex)).toBeTruthy() expect('这是正常内容'.match(regex)).toBeNull() }) it('应该转义特殊字符', () => { const words = ['$test', '.*word'] const regex = createSensitiveWordRegex(words) expect('$test'.match(regex)).toBeTruthy() expect('.*word'.match(regex)).toBeTruthy() }) it('应该不区分大小写', () => { const words = ['Test'] const regex = createSensitiveWordRegex(words) expect('test'.match(regex)).toBeTruthy() expect('TEST'.match(regex)).toBeTruthy() expect('TeSt'.match(regex)).toBeTruthy() }) }) describe('获取启用的敏感词', () => { it('应该返回所有启用的敏感词', () => { const options: SensitiveFilterOptions = { political: true, violence: true, porn: true, homophone: true, } const words = getEnabledWords(options) expect(words.length).toBeGreaterThan(0) expect(words).toContain(politicalWords[0]) expect(words).toContain(violenceWords[0]) expect(words).toContain(pornWords[0]) expect(words).toContain(homophoneWords[0]) }) it('应该只返回启用类别的敏感词', () => { const options: SensitiveFilterOptions = { political: true, violence: false, porn: false, homophone: false, } const words = getEnabledWords(options) expect(words).toContain(politicalWords[0]) expect(words).not.toContain(violenceWords[0]) expect(words).not.toContain(pornWords[0]) }) it('应该包含自定义敏感词', () => { const options: SensitiveFilterOptions = { custom: ['自定义词1', '自定义词2'], } const words = getEnabledWords(options) expect(words).toContain('自定义词1') expect(words).toContain('自定义词2') }) it('应该正确过滤谐音词', () => { const options: SensitiveFilterOptions = { political: false, violence: false, porn: false, prohibited: false, fraud: false, illegal: false, homophone: true, } const words = getEnabledWords(options) expect(words).toContain('GCD') expect(words).toContain('sb') expect(words).toContain('cao') }) }) describe('敏感词检测', () => { it('应该检测出敏感词', () => { const words = ['测试', '敏感'] const regex = createSensitiveWordRegex(words) const text = '这是一个测试敏感词的文本' const matches = text.match(regex) expect(matches).not.toBeNull() expect(matches?.length).toBe(2) expect(matches).toContain('测试') expect(matches).toContain('敏感') }) it('应该去重检测结果', () => { const words = ['测试'] const regex = createSensitiveWordRegex(words) const text = '测试 测试 测试' const matches = text.match(regex) const unique = [...new Set(matches || [])] expect(unique.length).toBe(1) }) }) describe('敏感词替换', () => { it('应该替换敏感词为指定字符', () => { const words = ['敏感词'] const regex = createSensitiveWordRegex(words) const text = '这是一个敏感词测试' const result = text.replace(regex, (match) => '*'.repeat(match.length)) expect(result).toBe('这是一个***测试') }) it('应该替换所有匹配的敏感词', () => { const words = ['测试', '敏感'] const regex = createSensitiveWordRegex(words) const text = '测试敏感测试敏感' const result = text.replace(regex, (match) => '*'.repeat(match.length)) // 文本共8个字符(每个中文字符算1个):"测试"(2) + "敏感"(2) + "测试"(2) + "敏感"(2) = 8 expect(result).toBe('********') }) }) describe('谐音词和变体词过滤', () => { it('应该检测拼音缩写', () => { const words = ['GCD', 'FLG', 'CNM'] const regex = createSensitiveWordRegex(words) expect('这是GCD的东西'.match(regex)).toBeTruthy() expect('flg是邪教'.match(regex)).toBeTruthy() expect('cnm骂人'.match(regex)).toBeTruthy() }) it('应该检测谐音变体', () => { const words = ['共惨党', '政腐', '尼玛'] const regex = createSensitiveWordRegex(words) expect('共惨党说法'.match(regex)).toBeTruthy() expect('政腐机关'.match(regex)).toBeTruthy() expect('尼玛真是'.match(regex)).toBeTruthy() }) it('应该检测符号分隔词', () => { const words = ['法_轮_功', '六_四', '毒_品'] const regex = createSensitiveWordRegex(words) expect('法_轮_功邪教'.match(regex)).toBeTruthy() expect('六_四事件'.match(regex)).toBeTruthy() expect('毒_品交易'.match(regex)).toBeTruthy() }) it('应该替换谐音词', () => { const words = ['sb', 'cao', 'fuck'] const regex = createSensitiveWordRegex(words) const text = '你个sb,去cao,fuck off' const result = text.replace(regex, (match) => '*'.repeat(match.length)) expect(result).toContain('**') expect(result).toContain('***') expect(result).toContain('****') expect(result).not.toContain('sb') expect(result).not.toContain('cao') expect(result).not.toContain('fuck') }) }) })