UNPKG

@coffeeandfun/google-profanity-words

Version:

Real profanity words banned by Google, extracted from their hidden API before shutdown. Now available as an easy-to-use Node.js library for content filtering.

446 lines (379 loc) 16.1 kB
import { ProfanityEngine } from '../index.js'; const language = process.env.LANGUAGE || 'en'; // Default to 'en' if the LANGUAGE environment variable is not set let profanity; describe('French Profanity tests', () => { beforeAll(async () => { profanity = new ProfanityEngine({ language: 'fr', testMode: true, }); }); afterEach(() => { profanity.reset(); }); describe('Core French functionality', () => { it('Should get all the profanity words in an array', async () => { const allWords = await profanity.all(); expect(Array.isArray(allWords)).toBe(true); expect(allWords.length).toBeGreaterThan(0); // Update this number based on your actual French word count // expect(allWords.length).toEqual(XXX); }); it('Should return true for French profanity words', async () => { // Test with a common French profanity word (if it exists in your list) // Replace 'testword' with an actual word from your French list const searchWord = await profanity.search('testword'); expect(typeof searchWord).toBe('boolean'); }); it('Should return false for normal French words', async () => { const normalWords = [ 'bonjour', // Hello 'merci', // Thank you 'maison', // House 'livre', // Book 'eau', // Water 'français', // French 'ordinateur', // Computer 'famille', // Family ]; for (const word of normalWords) { expect(await profanity.search(word)).toBe(false); } }); it('Should return false for any empty string', async () => { const searchWord = await profanity.search(''); expect(searchWord).toEqual(false); }); it('Should return true for a sentence containing a profanity word', async () => { // Replace with actual French sentence containing profanity from your list const sentence = 'Cette phrase contient un mot inapproprié.'; // "This sentence contains an inappropriate word" const hasCurseWords = await profanity.hasCurseWords(sentence); expect(typeof hasCurseWords).toBe('boolean'); }); it('Should return false for a sentence with no profanity word', async () => { const sentence = 'Cette phrase est propre et polie.'; // "This sentence is clean and polite" const hasCurseWords = await profanity.hasCurseWords(sentence); expect(hasCurseWords).toEqual(false); }); }); describe('French-specific edge cases', () => { it('Should handle French accented characters', async () => { // Test words with various French accents const accentedWords = [ 'café', // é 'hôtel', // ô 'être', // ê 'français', // ç 'naïf', // ï 'où', // ù 'âge', // â 'élève', // è ]; for (const word of accentedWords) { const result = await profanity.search(word); expect(typeof result).toBe('boolean'); } }); it('Should handle French case sensitivity correctly', async () => { const testWord = 'bonjour'; expect(await profanity.search('BONJOUR')).toBe(false); expect(await profanity.search('Bonjour')).toBe(false); expect(await profanity.search('bonjour')).toBe(false); expect(await profanity.search('bOnJoUr')).toBe(false); }); it('Should handle French apostrophes and contractions', async () => { const testSentences = [ "C'est une phrase.", // It's a sentence "L'ordinateur est cassé.", // The computer is broken "D'accord avec vous.", // I agree with you "Qu'est-ce que c'est?", // What is it? "N'importe quoi!", // Whatever! "J'ai mangé.", // I ate ]; for (const sentence of testSentences) { const result = await profanity.hasCurseWords(sentence); expect(typeof result).toBe('boolean'); } }); it('Should handle French punctuation correctly', async () => { const testSentences = [ 'Qu\'est-ce que c\'est ?', // What is it? (French spacing before ?) 'Bonjour !', // Hello! (French spacing before !) 'Non, merci.', // No, thank you. 'Il a dit : « Bonjour »', // He said: "Hello" (French quotes) 'C\'est vrai ; vraiment.', // It's true; really. ]; for (const sentence of testSentences) { const result = await profanity.hasCurseWords(sentence); expect(typeof result).toBe('boolean'); } }); it('Should handle French gender variations', async () => { // Test masculine and feminine forms const genderPairs = [ ['acteur', 'actrice'], // actor/actress ['chanteur', 'chanteuse'], // singer (m/f) ['directeur', 'directrice'], // director (m/f) ['français', 'française'], // French (m/f) ]; for (const [masculine, feminine] of genderPairs) { expect(await profanity.search(masculine)).toBe(false); expect(await profanity.search(feminine)).toBe(false); } }); it('Should handle French plural forms', async () => { const singularPlural = [ ['livre', 'livres'], // book/books ['maison', 'maisons'], // house/houses ['animal', 'animaux'], // animal/animals (irregular) ['eau', 'eaux'], // water/waters (irregular) ]; for (const [singular, plural] of singularPlural) { expect(await profanity.search(singular)).toBe(false); expect(await profanity.search(plural)).toBe(false); } }); it('Should handle French verb conjugations', async () => { // Test different verb forms const verbForms = [ 'parler', // to speak (infinitive) 'parle', // I/he speaks 'parles', // you speak 'parlons', // we speak 'parlez', // you (plural) speak 'parlent', // they speak ]; for (const verb of verbForms) { const result = await profanity.search(verb); expect(typeof result).toBe('boolean'); } }); it('Should handle French liaison and elision', async () => { const testSentences = [ 'Les enfants', // Liaison: les_enfants 'Un homme', // Liaison: un_homme 'L\'ami', // Elision: l'ami (not le ami) 'D\'eau', // Elision: d'eau (not de eau) ]; for (const sentence of testSentences) { const result = await profanity.hasCurseWords(sentence); expect(typeof result).toBe('boolean'); } }); it('Should handle whitespace around French words', async () => { const frenchWord = 'bonjour'; expect(await profanity.search(` ${frenchWord} `)).toBe(false); expect(await profanity.search(`\t${frenchWord}\n`)).toBe(false); }); it('Should handle French hyphenated words', async () => { const hyphenatedWords = [ 'c\'est-à-dire', // that is to say 'peut-être', // maybe 'moi-même', // myself 'quelqu\'un', // someone 'rendez-vous', // appointment ]; for (const word of hyphenatedWords) { const result = await profanity.search(word); expect(typeof result).toBe('boolean'); } }); it('Should return unique words only in French text', async () => { // Test with repeated French words const sentence = 'bonjour bonjour bonjour partout'; const foundWords = await profanity.getCurseWords(sentence); // Should return unique words only expect(Array.isArray(foundWords)).toBe(true); // If 'bonjour' were a profanity word, it should appear only once }); it('Should handle mixed French and English text', async () => { const mixedSentence = 'This is mixed avec du français and English text.'; const result = await profanity.hasCurseWords(mixedSentence); expect(typeof result).toBe('boolean'); }); it('Should handle French regional variations', async () => { // Test words that might vary between French regions const regionalWords = [ 'chocolatine', // Pain au chocolat (Southwest France) 'septante', // Seventy (Belgium/Switzerland) 'nonante', // Ninety (Belgium/Switzerland) 'tantôt', // Later (Quebec/Belgium) ]; for (const word of regionalWords) { const result = await profanity.search(word); expect(typeof result).toBe('boolean'); } }); }); describe('Performance tests for French dataset', () => { it('Should handle large French text efficiently', async () => { const largeText = 'Ceci est une phrase de test. '.repeat(1000) + 'texte français ' + 'Texte propre. '.repeat(1000); const startTime = Date.now(); const result = await profanity.hasCurseWords(largeText); const endTime = Date.now(); expect(typeof result).toBe('boolean'); expect(endTime - startTime).toBeLessThan(100); // Should complete in under 100ms }); it('Should efficiently search through all French terms', async () => { const allWords = await profanity.all(); if (allWords.length > 0) { const startTime = Date.now(); for (let i = 0; i < Math.min(100, allWords.length); i++) { await profanity.search(allWords[i % allWords.length]); } const endTime = Date.now(); expect(endTime - startTime).toBeLessThan(50); // Should be very fast with Set lookup } }); it('Should handle concurrent operations on French dataset', async () => { const promises = [ profanity.search('bonjour'), profanity.hasCurseWords('ceci est du texte français'), profanity.getCurseWords('le texte français'), profanity.all(), profanity.search('merci') ]; const results = await Promise.all(promises); expect(results[0]).toBe(false); // search bonjour (should be clean) expect(results[1]).toBe(false); // hasCurseWords (should be clean) expect(Array.isArray(results[2])).toBe(true); // getCurseWords expect(Array.isArray(results[3])).toBe(true); // all words expect(results[4]).toBe(false); // search merci (should be clean) }); }); describe('French language specificity', () => { it('Should load French words correctly or fallback to English', async () => { const allWords = await profanity.all(); expect(allWords.length).toBeGreaterThan(0); // If French file doesn't exist, should fallback to English (958 words) // If French file exists, should load French words }); it('Should handle French-specific character encoding (UTF-8)', async () => { const frenchChars = [ 'à', 'â', 'ä', 'ç', 'è', 'é', 'ê', 'ë', 'î', 'ï', 'ô', 'ù', 'û', 'ü', 'ÿ', 'ñ' ]; for (const char of frenchChars) { const result = await profanity.search(char); expect(typeof result).toBe('boolean'); } }); it('Should handle French quotation marks and typography', async () => { const typographyTests = [ '« guillemets français »', // French quotes '"guillemets anglais"', // English quotes 'apostrophe courbe', // Curved apostrophe 'apostrophe droite', // Straight apostrophe '— tiret cadratin', // Em dash '– tiret demi-cadratin', // En dash ]; for (const text of typographyTests) { const result = await profanity.hasCurseWords(text); expect(typeof result).toBe('boolean'); } }); it('Should handle French Canadian (Quebec) variations', async () => { // Quebec French often has different vocabulary const quebecWords = [ 'char', // Car (instead of voiture) 'blonde', // Girlfriend (instead of copine) 'magasiner', // To shop (instead of faire du shopping) 'déjeuner', // Breakfast (lunch in France) ]; for (const word of quebecWords) { const result = await profanity.search(word); expect(typeof result).toBe('boolean'); } }); }); describe('Data integrity for French', () => { it('Should not allow modification of French word list', async () => { const terms1 = await profanity.all(); const originalLength = terms1.length; // Try to modify the returned array terms1.push('mot-faux'); terms1.pop(); if (terms1.length > 0) { terms1[0] = 'modifié'; } // Get terms again - should be unchanged const terms2 = await profanity.all(); expect(terms2.length).toBe(originalLength); expect(terms2).not.toContain('mot-faux'); if (terms2.length > 0) { expect(terms2[0]).not.toBe('modifié'); } }); it('Should provide consistent results for French detection', async () => { const sentence = 'Cette phrase est en français'; const result1 = await profanity.getCurseWords(sentence); const result2 = await profanity.getCurseWords(sentence); const result3 = await profanity.hasCurseWords(sentence); expect(result1).toEqual(result2); expect(typeof result3).toBe('boolean'); }); }); describe('Configuration and fallback for French', () => { it('Should handle missing French language file gracefully', async () => { // If fr.txt doesn't exist, should fallback to English const frenchProfanity = new ProfanityEngine({ language: 'fr', testMode: true, }); const terms = await frenchProfanity.all(); expect(terms.length).toBeGreaterThan(0); }); it('Should suppress warnings in test mode for French', async () => { // Store original console.warn const originalWarn = console.warn; let warnCalled = false; // Mock console.warn console.warn = () => { warnCalled = true; }; const frenchProfanity = new ProfanityEngine({ language: 'fr', testMode: true, }); warnCalled = false; await frenchProfanity.all(); expect(warnCalled).toBe(false); // Restore original console.warn console.warn = originalWarn; }); }); describe('French grammar and linguistics', () => { it('Should handle French articles and determiners', async () => { const articles = [ 'le', 'la', 'les', // Definite articles 'un', 'une', 'des', // Indefinite articles 'du', 'de la', 'des', // Partitive articles 'ce', 'cette', 'ces', // Demonstrative ]; for (const article of articles) { expect(await profanity.search(article)).toBe(false); } }); it('Should handle French prepositions', async () => { const prepositions = [ 'de', 'à', 'dans', 'sur', 'avec', 'pour', 'par', 'sans', 'sous', 'vers', 'chez' ]; for (const prep of prepositions) { expect(await profanity.search(prep)).toBe(false); } }); it('Should handle French reflexive pronouns', async () => { const reflexiveTests = [ 'Je me lave', // I wash myself 'Tu te dépêches', // You hurry 'Il se réveille', // He wakes up 'Nous nous amusons', // We have fun ]; for (const sentence of reflexiveTests) { const result = await profanity.hasCurseWords(sentence); expect(result).toBe(false); // Should be clean } }); }); });