@coffeeandfun/google-profanity-words
Version:
Real profanity words banned by Google, extracted from their hidden API before shutdown. Now available as an easy-to-use Node.js library for content filtering.
235 lines (193 loc) • 8.66 kB
JavaScript
import { ProfanityEngine } from '../index.js';
const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
let profanity;
describe('English Profanity tests', () => {
beforeAll(async () => {
profanity = new ProfanityEngine({
language: 'en',
testMode: true,
});
});
afterEach(() => {
profanity.reset();
});
describe('Core English functionality', () => {
it('Should get all the profanity words in an array', async () => {
const allWords = await profanity.all();
expect(allWords.length).toEqual(958); // Verify this number matches your actual word count
expect(Array.isArray(allWords)).toBe(true);
expect(allWords.length).toBeGreaterThan(0);
});
it('Should return true for profanity words', async () => {
const searchWord = await profanity.search('shit');
expect(searchWord).toEqual(true);
});
it('Should return false for normal words', async () => {
const searchWord = await profanity.search('ka');
expect(searchWord).toEqual(false);
});
it('Should return false for any empty string', async () => {
const searchWord = await profanity.search('');
expect(searchWord).toEqual(false);
});
it('Should return true for a sentence containing a profanity word', async () => {
const sentence = 'Do not use bad words like shit or asshole.';
const hasCurseWords = await profanity.hasCurseWords(sentence);
expect(hasCurseWords).toEqual(true);
});
it('Should return false for a sentence with no profanity word', async () => {
const sentence = 'This is a clean and polite sentence.';
const hasCurseWords = await profanity.hasCurseWords(sentence);
expect(hasCurseWords).toEqual(false);
});
});
describe('English-specific edge cases', () => {
it('Should handle case sensitivity correctly', async () => {
expect(await profanity.search('SHIT')).toBe(true);
expect(await profanity.search('Shit')).toBe(true);
expect(await profanity.search('shit')).toBe(true);
});
it('Should handle whitespace around words', async () => {
expect(await profanity.search(' shit ')).toBe(true);
expect(await profanity.search('\tshit\n')).toBe(true);
});
it('Should detect profanity with punctuation in sentences', async () => {
const testSentences = [
'What the shit!',
'Oh, shit.',
'Shit? Really?',
'This is shit, man.',
'"Shit," he said.',
'Absolute shit-show.',
];
for (const sentence of testSentences) {
expect(await profanity.hasCurseWords(sentence)).toBe(true);
}
});
it('Should return correct curse words from sentences', async () => {
const sentence = 'Do not use bad words like shit or asshole.';
const foundWords = await profanity.getCurseWords(sentence);
expect(foundWords).toContain('shit');
expect(foundWords).toContain('asshole');
expect(foundWords.length).toBe(2);
});
it('Should handle multiple instances of same word', async () => {
const sentence = 'shit shit shit everywhere';
const foundWords = await profanity.getCurseWords(sentence);
// Should only return unique words
expect(foundWords).toContain('shit');
expect(foundWords.length).toBe(1);
});
it('Should validate specific English profanity words exist', async () => {
// Test a selection of words that should definitely be in an English profanity list
const commonProfanityWords = [
'shit', 'fuck', 'damn', 'hell', 'ass', 'bitch'
];
for (const word of commonProfanityWords) {
expect(await profanity.search(word)).toBe(true);
}
});
it('Should not flag common English words', async () => {
const commonWords = [
'hello', 'world', 'computer', 'test', 'function', 'javascript',
'english', 'language', 'sentence', 'word', 'clean', 'polite'
];
for (const word of commonWords) {
expect(await profanity.search(word)).toBe(false);
}
});
it('Should handle contractions and apostrophes', async () => {
const sentence = "Don't say shit, it's not appropriate.";
expect(await profanity.hasCurseWords(sentence)).toBe(true);
});
it('Should handle hyphenated words', async () => {
const sentence = 'This is a shit-storm.';
expect(await profanity.hasCurseWords(sentence)).toBe(true);
});
// Additional edge cases for better coverage
it('Should handle mixed case in sentences', async () => {
const sentence = 'This SENTENCE has SHIT and damn IN it';
expect(await profanity.hasCurseWords(sentence)).toBe(true);
const foundWords = await profanity.getCurseWords(sentence);
expect(foundWords).toContain('shit'); // Should normalize to lowercase
expect(foundWords).toContain('damn');
});
it('Should handle words at sentence boundaries', async () => {
expect(await profanity.hasCurseWords('shit')).toBe(true);
expect(await profanity.hasCurseWords('shit is bad')).toBe(true);
expect(await profanity.hasCurseWords('that is shit')).toBe(true);
expect(await profanity.hasCurseWords('the shit word')).toBe(true);
});
it('Should not detect partial word matches', async () => {
// These should NOT be flagged as containing profanity
const sentences = [
'The weather is hellish today', // contains "hell" but as part of "hellish"
'I love my shirty shirt', // contains "shit" but as part of "shirty"
'Assessment is important', // contains "ass" but as part of "assessment"
];
for (const sentence of sentences) {
// These depend on your word boundaries implementation
// Comment out if your implementation flags these
const result = await profanity.hasCurseWords(sentence);
// You may want to adjust based on your exact implementation
}
});
});
describe('Performance tests for English dataset', () => {
it('Should handle large English text efficiently', async () => {
const largeText = 'This is a test sentence. '.repeat(1000) + 'shit ' + 'Clean text. '.repeat(1000);
const startTime = Date.now();
const result = await profanity.hasCurseWords(largeText);
const endTime = Date.now();
expect(result).toBe(true);
expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
});
it('Should efficiently search through all English terms', async () => {
const allWords = await profanity.all();
const startTime = Date.now();
for (let i = 0; i < 100; i++) {
await profanity.search(allWords[i % allWords.length]);
}
const endTime = Date.now();
expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
});
it('Should handle concurrent operations on English dataset', async () => {
const promises = [
profanity.search('shit'),
profanity.hasCurseWords('this is shit'),
profanity.getCurseWords('damn shit'),
profanity.all(),
profanity.search('fuck')
];
const results = await Promise.all(promises);
expect(results[0]).toBe(true); // search shit
expect(results[1]).toBe(true); // hasCurseWords
expect(results[2]).toContain('shit'); // getCurseWords
expect(results[3].length).toBe(958); // all words
expect(results[4]).toBe(true); // search fuck
});
});
describe('Data integrity for English', () => {
it('Should not allow modification of English word list', async () => {
const terms1 = await profanity.all();
const originalLength = terms1.length;
// Try to modify the returned array
terms1.push('fake-word');
terms1.pop();
terms1[0] = 'modified';
// Get terms again - should be unchanged
const terms2 = await profanity.all();
expect(terms2.length).toBe(originalLength);
expect(terms2).not.toContain('fake-word');
expect(terms2[0]).not.toBe('modified');
});
it('Should provide consistent results for English detection', async () => {
const sentence = 'This sentence has shit and damn';
const result1 = await profanity.getCurseWords(sentence);
const result2 = await profanity.getCurseWords(sentence);
const result3 = await profanity.hasCurseWords(sentence);
expect(result1).toEqual(result2);
expect(result3).toBe(true);
});
});
});