turkish-profanity-filter
Version:
A configurable Turkish profanity filter for text content
33 lines (26 loc) • 1.2 kB
JavaScript
// src/utils.js
// Helper functions for the library
exports.escapeRegExp = (string) => {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
};
exports.createRegExp = (wordList, options = {}) => {
const { wholeWords = true, caseSensitive = false } = options;
// Filter out any empty strings to avoid regex errors
const filteredList = wordList.filter(word => word && typeof word === 'string');
if (filteredList.length === 0) {
// Return a regex that will never match if the word list is empty
return new RegExp('(?!)');
}
const escapedWords = filteredList.map(word => exports.escapeRegExp(word));
// For non-ASCII characters like Turkish special characters,
// we need a different approach for whole words
let pattern;
if (wholeWords) {
// This pattern matches whole words by checking for spaces, punctuation,
// or string boundaries before and after the word
pattern = `(^|[^a-zA-ZğüşıöçĞÜŞİÖÇ])(${escapedWords.join('|')})($|[^a-zA-ZğüşıöçĞÜŞİÖÇ])`;
} else {
pattern = `(${escapedWords.join('|')})`;
}
return new RegExp(pattern, caseSensitive ? 'g' : 'gi');
};