UNPKG

turkish-profanity-filter

Version:

A configurable Turkish profanity filter for text content

github.com/derdogant/turkish-profanity-filter

derdogant/turkish-profanity-filter

138 lines (115 loc) • 4.11 kB

JavaScript

// src/index.js // Main library functionality const { defaultWordList } = require('./wordList'); const { createRegExp } = require('./utils'); class TurkishProfanityFilter { constructor(options = {}) { this.options = { wordList: options.wordList || defaultWordList, wholeWords: options.wholeWords !== undefined ? options.wholeWords : true, caseSensitive: options.caseSensitive || false, replacement: options.replacement || '***', ...options }; this._buildRegExp(); } /** * Build the regular expression used for matching * @private */ _buildRegExp() { this.regexp = createRegExp( this.options.wordList, { wholeWords: this.options.wholeWords, caseSensitive: this.options.caseSensitive } ); } /** * Check if text contains profanity * @param {string} text - The text to check * @returns {boolean} - True if profanity found, false otherwise */ check(text) { if (typeof text !== 'string') { return false; } // Reset lastIndex to ensure consistent results this.regexp.lastIndex = 0; const result = this.regexp.test(text); // Debug // console.log(`Checking: "${text}"`); // console.log(`Regex: ${this.regexp}`); // console.log(`Result: ${result}`); return result; } /** * Get all profanity words found in text * @param {string} text - The text to analyze * @returns {string[]} - Array of found profanity words */ getWords(text) { if (typeof text !== 'string') { return []; } // Create a new RegExp object for searching to avoid lastIndex issues const searchRegexp = new RegExp(this.regexp.source, this.regexp.flags); const matches = []; let match; while ((match = searchRegexp.exec(text)) !== null) { // If using our custom whole-word matching for non-ASCII characters, // the actual word is in the 2nd capture group const word = this.options.wholeWords ? match[2] : match[0]; if (word) { matches.push(word); } // Prevent infinite loops on zero-length matches if (match.index === searchRegexp.lastIndex) { searchRegexp.lastIndex++; } } return matches; } /** * Censor profanity in text * @param {string} text - The text to censor * @returns {string} - Censored text */ censor(text) { if (typeof text !== 'string') { return text; } // Create a new RegExp to avoid lastIndex issues const replaceRegexp = new RegExp(this.regexp.source, this.regexp.flags); // For whole word matching with our custom approach, // preserve the surrounding characters if (this.options.wholeWords) { return text.replace(replaceRegexp, (match, before, word, after) => { return before + this.options.replacement + after; }); } else { return text.replace(replaceRegexp, this.options.replacement); } } /** * Add words to the filter * @param {string|string[]} words - Word(s) to add */ addWords(words) { const wordsArray = Array.isArray(words) ? words : [words]; const validWords = wordsArray.filter(word => word && typeof word === 'string'); this.options.wordList = [...new Set([...this.options.wordList, ...validWords])]; this._buildRegExp(); } /** * Remove words from the filter * @param {string|string[]} words - Word(s) to remove */ removeWords(words) { const wordsArray = Array.isArray(words) ? words : [words]; this.options.wordList = this.options.wordList.filter(word => !wordsArray.includes(word)); this._buildRegExp(); } } module.exports = TurkishProfanityFilter;