@coffeeandfun/google-profanity-words
Version:
Real profanity words banned by Google, extracted from their hidden API before shutdown. Now available as an easy-to-use Node.js library for content filtering.
493 lines (422 loc) • 17.8 kB
JavaScript
import { ProfanityEngine } from '../index.js';
const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set
let profanity;
describe('Irish (Gaeilge) Profanity tests', () => {
beforeAll(async () => {
profanity = new ProfanityEngine({
language: 'ga', // ISO code for Irish (Gaeilge)
testMode: true,
});
});
afterEach(() => {
profanity.reset();
});
describe('Core Irish functionality', () => {
it('Should get all the profanity words in an array', async () => {
const allWords = await profanity.all();
expect(Array.isArray(allWords)).toBe(true);
expect(allWords.length).toBeGreaterThan(0);
// Update this number based on your actual Irish word count
// expect(allWords.length).toEqual(XXX);
});
it('Should return true for Irish profanity words', async () => {
// Test with a common Irish profanity word (if it exists in your list)
// Replace 'testword' with an actual word from your Irish list
const searchWord = await profanity.search('testword');
expect(typeof searchWord).toBe('boolean');
});
it('Should return false for normal Irish words', async () => {
const normalWords = [
'dia duit', // Hello (God to you)
'go raibh maith agat', // Thank you
'teach', // House
'leabhar', // Book
'uisce', // Water
'Gaeilge', // Irish language
'ríomhaire', // Computer
'teaghlach', // Family
'cara', // Friend
'scoil', // School
'céad míle fáilte', // A hundred thousand welcomes
];
for (const word of normalWords) {
expect(await profanity.search(word)).toBe(false);
}
});
it('Should return false for any empty string', async () => {
const searchWord = await profanity.search('');
expect(searchWord).toEqual(false);
});
it('Should return true for a sentence containing a profanity word', async () => {
// Replace with actual Irish sentence containing profanity from your list
const sentence = 'Tá focal mímhúinte sa abairt seo.'; // "There is an impolite word in this sentence"
const hasCurseWords = await profanity.hasCurseWords(sentence);
expect(typeof hasCurseWords).toBe('boolean');
});
it('Should return false for a sentence with no profanity word', async () => {
const sentence = 'Is abairt ghlan dea-bhéasach í seo.'; // "This is a clean, well-mannered sentence"
const hasCurseWords = await profanity.hasCurseWords(sentence);
expect(hasCurseWords).toEqual(false);
});
});
describe('Irish-specific edge cases', () => {
it('Should handle Irish fadas (accent marks)', async () => {
// Test words with fadas (long marks over vowels)
const fadaWords = [
'fáilte', // Welcome
'tír', // Country/land
'óg', // Young
'mór', // Big
'úr', // Fresh/new
'éan', // Bird
'íoc', // Pay
'bádóir', // Boatman
'cúpla', // Couple
'lón', // Lunch
];
for (const word of fadaWords) {
const result = await profanity.search(word);
expect(typeof result).toBe('boolean');
// These should all be clean words
expect(result).toBe(false);
}
});
it('Should handle Irish initial mutations (séimhiú/urú)', async () => {
// Irish has initial consonant mutations
const mutationExamples = [
// Séimhiú (lenition) - adds 'h' after initial consonant
['bean', 'bhean'], // Woman (lenited form)
['fear', 'fhear'], // Man (lenited form)
['cat', 'chat'], // Cat (lenited form)
['doras', 'dhoras'], // Door (lenited form)
// Urú (eclipsis) - changes initial consonant
['bean', 'mbean'], // Woman (eclipsed form)
['fear', 'bhfear'], // Man (eclipsed form)
['cat', 'gcat'], // Cat (eclipsed form)
['doras', 'ndoras'], // Door (eclipsed form)
];
for (const [root, mutated] of mutationExamples) {
expect(await profanity.search(root)).toBe(false);
expect(await profanity.search(mutated)).toBe(false);
}
});
it('Should handle Irish case sensitivity correctly', async () => {
const testWord = 'gaeilge';
expect(await profanity.search('GAEILGE')).toBe(false);
expect(await profanity.search('Gaeilge')).toBe(false);
expect(await profanity.search('gaeilge')).toBe(false);
expect(await profanity.search('gAeIlGe')).toBe(false);
});
it('Should handle Irish verb conjugations', async () => {
// Irish verbs have complex conjugation patterns
const verbForms = [
// Bí (to be) conjugations
'tá', // is/are (present)
'bhí', // was/were (past)
'beidh', // will be (future)
// Déan (to do/make) conjugations
'déanaim', // I do
'déanann', // he/she does
'rinne', // did (past)
'déanfaidh', // will do (future)
];
for (const verb of verbForms) {
expect(await profanity.search(verb)).toBe(false);
}
});
it('Should handle Irish noun declensions', async () => {
// Irish nouns change form based on case
const declensionExamples = [
// Fear (man) declensions
'fear', // Nominative singular
'fir', // Nominative plural
'fhear', // Genitive singular (lenited)
'bhfear', // With eclipsis
// Bean (woman) declensions
'bean', // Nominative singular
'mná', // Nominative plural
'mhná', // Genitive plural (lenited)
];
for (const form of declensionExamples) {
expect(await profanity.search(form)).toBe(false);
}
});
it('Should handle Irish compound words', async () => {
const compoundWords = [
'ríomhaire', // Computer (number-counter)
'teilifís', // Television
'rothar', // Bicycle (wheel-man)
'ospidéal', // Hospital
'ollscoil', // University (great-school)
'leabharlann', // Library (book-house)
];
for (const word of compoundWords) {
expect(await profanity.search(word)).toBe(false);
}
});
it('Should handle Irish numbers and counting', async () => {
const irishNumbers = [
'a haon', // One
'a dó', // Two
'a trí', // Three
'a ceathair', // Four
'a cúig', // Five
'a sé', // Six
'a seacht', // Seven
'a hocht', // Eight
'a naoi', // Nine
'a deich', // Ten
];
for (const number of irishNumbers) {
const result = await profanity.hasCurseWords(number);
expect(result).toBe(false);
}
});
it('Should handle Irish prepositional pronouns', async () => {
// Irish combines prepositions with pronouns
const prepositionalPronouns = [
'agam', // At me (ag + mé)
'agat', // At you (ag + tú)
'aige', // At him (ag + é)
'aici', // At her (ag + í)
'againn', // At us (ag + muid)
'agaibh', // At you (plural) (ag + sibh)
'acu', // At them (ag + iad)
];
for (const pronoun of prepositionalPronouns) {
expect(await profanity.search(pronoun)).toBe(false);
}
});
it('Should handle whitespace around Irish words', async () => {
const irishWord = 'fáilte';
expect(await profanity.search(` ${irishWord} `)).toBe(false);
expect(await profanity.search(`\t${irishWord}\n`)).toBe(false);
});
it('Should handle mixed Irish and English text', async () => {
const mixedSentences = [
'I love Gaeilge', // I love Irish
'Tá mé ag foghlaim English', // I am learning English
'Hello agus dia duit', // Hello and God to you
'Go raibh maith agat very much', // Thank you very much
];
for (const sentence of mixedSentences) {
const result = await profanity.hasCurseWords(sentence);
expect(typeof result).toBe('boolean');
}
});
it('Should return unique words only in Irish text', async () => {
// Test with repeated Irish words
const sentence = 'fáilte fáilte fáilte go hÉireann';
const foundWords = await profanity.getCurseWords(sentence);
// Should return unique words only
expect(Array.isArray(foundWords)).toBe(true);
// If 'fáilte' were a profanity word, it should appear only once
});
it('Should handle Irish dialectal variations', async () => {
// Irish has three main dialects: Munster, Connacht, Ulster
const dialectalWords = [
// Different ways to say things in different dialects
'pótaí', // Potatoes (Munster)
'prátaí', // Potatoes (Connacht/Ulster)
'gasúr', // Boy (Munster)
'buachaill', // Boy (Connacht/Ulster)
'cailín', // Girl (general)
'girseach', // Girl (Ulster)
];
for (const word of dialectalWords) {
const result = await profanity.search(word);
expect(typeof result).toBe('boolean');
}
});
it('Should handle Irish traditional vs modern spelling', async () => {
// Irish spelling was reformed in the 20th century
const spellingVariations = [
// Traditional vs Modern
['Gaedhilg', 'Gaeilge'], // Irish language
['oidhche', 'oíche'], // Night
['ceathramhadh', 'ceathrú'], // Quarter/fourth
];
for (const [traditional, modern] of spellingVariations) {
expect(await profanity.search(traditional)).toBe(false);
expect(await profanity.search(modern)).toBe(false);
}
});
});
describe('Performance tests for Irish dataset', () => {
it('Should handle large Irish text efficiently', async () => {
const largeText = 'Seo abairt tástála. '.repeat(1000) + 'téacs Gaeilge ' + 'Téacs glan. '.repeat(1000);
const startTime = Date.now();
const result = await profanity.hasCurseWords(largeText);
const endTime = Date.now();
expect(typeof result).toBe('boolean');
expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms
});
it('Should efficiently search through all Irish terms', async () => {
const allWords = await profanity.all();
if (allWords.length > 0) {
const startTime = Date.now();
for (let i = 0; i < Math.min(100, allWords.length); i++) {
await profanity.search(allWords[i % allWords.length]);
}
const endTime = Date.now();
expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup
}
});
it('Should handle concurrent operations on Irish dataset', async () => {
const promises = [
profanity.search('fáilte'),
profanity.hasCurseWords('seo téacs Gaeilge'),
profanity.getCurseWords('an téacs Gaeilge'),
profanity.all(),
profanity.search('slán')
];
const results = await Promise.all(promises);
expect(results[0]).toBe(false); // search fáilte (should be clean)
expect(results[1]).toBe(false); // hasCurseWords (should be clean)
expect(Array.isArray(results[2])).toBe(true); // getCurseWords
expect(Array.isArray(results[3])).toBe(true); // all words
expect(results[4]).toBe(false); // search slán (should be clean)
});
});
describe('Irish language specificity', () => {
it('Should load Irish words correctly or fallback to English', async () => {
const allWords = await profanity.all();
expect(allWords.length).toBeGreaterThan(0);
// If Irish file doesn't exist, should fallback to English (958 words)
// If Irish file exists, should load Irish words
});
it('Should handle Irish-specific character encoding (UTF-8)', async () => {
// Test Irish alphabet with fadas
const irishChars = [
'a', 'á', 'b', 'c', 'd', 'e', 'é', 'f', 'g', 'h',
'i', 'í', 'l', 'm', 'n', 'o', 'ó', 'p', 'r', 's',
't', 'u', 'ú', // Irish alphabet (no j, k, q, v, w, x, y, z traditionally)
];
for (const char of irishChars) {
const result = await profanity.search(char);
expect(typeof result).toBe('boolean');
}
});
it('Should handle Irish place names', async () => {
const placeNames = [
'Éire', // Ireland
'Baile Átha Cliath', // Dublin
'Corcaigh', // Cork
'Gaillimh', // Galway
'Luimneach', // Limerick
'Port Láirge', // Waterford
'An Clár', // Clare
'Ciarraí', // Kerry
];
for (const place of placeNames) {
expect(await profanity.search(place)).toBe(false);
}
});
it('Should handle Irish Celtic cultural terms', async () => {
const culturalTerms = [
'céilí', // Social gathering with music/dance
'seisiún', // Music session
'bodhrán', // Traditional drum
'uilleann', // Irish pipes
'fleadh', // Festival
'comhrá', // Conversation
'craic', // Fun/good time
'sláinte', // Health/cheers
];
for (const term of culturalTerms) {
expect(await profanity.search(term)).toBe(false);
}
});
});
describe('Data integrity for Irish', () => {
it('Should not allow modification of Irish word list', async () => {
const terms1 = await profanity.all();
const originalLength = terms1.length;
// Try to modify the returned array
terms1.push('focal-bréige');
terms1.pop();
if (terms1.length > 0) {
terms1[0] = 'athraithe';
}
// Get terms again - should be unchanged
const terms2 = await profanity.all();
expect(terms2.length).toBe(originalLength);
expect(terms2).not.toContain('focal-bréige');
if (terms2.length > 0) {
expect(terms2[0]).not.toBe('athraithe');
}
});
it('Should provide consistent results for Irish detection', async () => {
const sentence = 'Seo abairt i nGaeilge';
const result1 = await profanity.getCurseWords(sentence);
const result2 = await profanity.getCurseWords(sentence);
const result3 = await profanity.hasCurseWords(sentence);
expect(result1).toEqual(result2);
expect(typeof result3).toBe('boolean');
});
});
describe('Configuration and fallback for Irish', () => {
it('Should handle missing Irish language file gracefully', async () => {
// If ga.txt doesn't exist, should fallback to English
const irishProfanity = new ProfanityEngine({
language: 'ga',
testMode: true,
});
const terms = await irishProfanity.all();
expect(terms.length).toBeGreaterThan(0);
});
it('Should suppress warnings in test mode for Irish', async () => {
// Store original console.warn
const originalWarn = console.warn;
let warnCalled = false;
// Mock console.warn
console.warn = () => {
warnCalled = true;
};
const irishProfanity = new ProfanityEngine({
language: 'ga',
testMode: true,
});
warnCalled = false;
await irishProfanity.all();
expect(warnCalled).toBe(false);
// Restore original console.warn
console.warn = originalWarn;
});
});
describe('Irish grammar and linguistics', () => {
it('Should handle Irish syntax patterns (VSO order)', async () => {
// Irish typically uses Verb-Subject-Object word order
const vsoSentences = [
'Tá Seán ag rith', // Is Seán running (literally: Is Seán at running)
'Chonaic mé an madra', // I saw the dog (literally: Saw I the dog)
'Léann sí leabhar', // She reads a book (literally: Reads she book)
];
for (const sentence of vsoSentences) {
expect(await profanity.hasCurseWords(sentence)).toBe(false);
}
});
it('Should handle Irish copula vs substantive verb', async () => {
// Irish has two types of "to be"
const copulaExamples = [
'Is múinteoir mé', // I am a teacher (copula)
'Tá mé ag obair', // I am working (substantive verb)
'Is maith liom tae', // I like tea (copula)
'Tá tae agam', // I have tea (substantive verb)
];
for (const sentence of copulaExamples) {
expect(await profanity.hasCurseWords(sentence)).toBe(false);
}
});
it('Should handle Irish conditional and subjunctive moods', async () => {
const moodExamples = [
'Dá mbeinn saibhir', // If I were rich (conditional)
'Go raibh maith agat', // Thank you (subjunctive: "that good be at you")
'Ar mhaith leat tae?', // Would you like tea? (conditional)
];
for (const sentence of moodExamples) {
expect(await profanity.hasCurseWords(sentence)).toBe(false);
}
});
});
});