text-moderate
Version:
A comprehensive JavaScript library for content moderation, including profanity filtering, sentiment analysis, and toxicity detection. Leveraging advanced algorithms and external APIs, TextModerate provides developers with tools to create safer and more po
352 lines (300 loc) • 11.7 kB
JavaScript
/**
* TextModerate module for comprehensive text moderation
* @module TextModerate
*/
const { google } = require('googleapis');
const localList = require('./lang.json').words;
const baseList = require('./badwords-en');
const frenchList = require('./badwords-fr');
const tokenize = require('./tokenize');
const languageProcessor = require('./language-processor');
/**
* TextModerate class for comprehensive text moderation.
* Combines profanity filtering, sentiment analysis, and toxicity detection.
*/
class TextModerate {
/**
* TextModerate constructor.
* Combines functionalities of word filtering and sentiment analysis.
* @constructor
* @param {Object} options - TextModerate instance options.
* @param {boolean} [options.emptyList=false] - Instantiate filter with no blacklist.
* @param {array} [options.list=[]] - Instantiate filter with custom list.
* @param {string} [options.placeHolder='*'] - Character used to replace profane words.
* @param {string} [options.regex=/[^a-zA-Z0-9|\$|\@]|\^/g] - Regular expression used to sanitize words before comparing them to blacklist.
* @param {string} [options.replaceRegex=/\w/g] - Regular expression used to replace profane words with placeHolder.
* @param {string} [options.splitRegex=/\b/] - Regular expression used to split a string into words.
* @param {Object} [options.sentimentOptions={}] - Options for sentiment analysis.
* @throws {TypeError} If options is not an object.
*/
constructor(options = {}) {
if (options !== null && typeof options !== 'object') {
throw new TypeError('Options must be an object');
}
const opts = options || {};
// Filter properties
this.list = opts.emptyList ? [] : Array.prototype.concat.apply(localList, [baseList, frenchList, opts.list || []]);
this.exclude = opts.exclude || [];
this.splitRegex = opts.splitRegex || /\b/;
this.placeHolder = opts.placeHolder || '*';
this.regex = opts.regex || /[^a-zA-Z0-9|\$|\@]|\^/g;
this.replaceRegex = opts.replaceRegex || /\w/g;
// Sentiment properties
this.sentimentOptions = opts.sentimentOptions || {};
}
// Filter methods
/**
* Determine if a string contains profane language.
* @param {string} string - String to evaluate for profanity.
* @returns {boolean} True if the string contains profane language, false otherwise.
* @throws {TypeError} If the input is not a string.
*/
isProfane(string) {
if (typeof string !== 'string') {
throw new TypeError('Input must be a string');
}
if (!string) {
return false;
}
return this.list
.filter((word) => {
if (!word || typeof word !== 'string') {
return false;
}
const wordExp = new RegExp(`\\b${word.replace(/(\W)/g, '\\$1')}\\b`, 'gi');
return !this.exclude.includes(word.toLowerCase()) && wordExp.test(string);
})
.length > 0;
}
/**
* Replace a word with placeHolder characters.
* @param {string} string - String to replace.
* @returns {string} The input string with profane words replaced by placeholder characters.
* @throws {TypeError} If the input is not a string.
*/
replaceWord(string) {
if (typeof string !== 'string') {
throw new TypeError('Input must be a string');
}
if (!string) {
return '';
}
return string
.replace(this.regex, '')
.replace(this.replaceRegex, this.placeHolder);
}
/**
* Evaluate a string for profanity and return an edited version.
* @param {string} string - Sentence to filter.
* @returns {string} The filtered string with profane words replaced by placeholder characters.
* @throws {TypeError} If the input is not a string.
*/
clean(string) {
if (typeof string !== 'string') {
throw new TypeError('Input must be a string');
}
if (!string) {
return '';
}
try {
const splitRegexMatch = this.splitRegex.exec(string);
const joinChar = splitRegexMatch ? splitRegexMatch[0] : '';
return string.split(this.splitRegex).map((word) => {
return this.isProfane(word) ? this.replaceWord(word) : word;
}).join(joinChar);
} catch (error) {
console.error('Error in clean method:', error);
return string; // Return original string if an error occurs
}
}
/**
* Add word(s) to blacklist filter / remove words from whitelist filter.
* @param {...string} words - Word(s) to add to blacklist.
* @returns {void}
* @throws {TypeError} If any of the words is not a string.
*/
addWords(...words) {
if (!words || !words.length) {
return;
}
// Validate all words are strings
words.forEach(word => {
if (typeof word !== 'string') {
throw new TypeError('All words must be strings');
}
});
// Add words to blacklist
this.list.push(...words);
// Remove words from whitelist if they exist there
words
.map(word => word.toLowerCase())
.forEach((word) => {
const index = this.exclude.indexOf(word);
if (index !== -1) {
this.exclude.splice(index, 1);
}
});
}
/**
* Add words to whitelist filter (exclude from profanity detection).
* @param {...string} words - Word(s) to add to whitelist.
* @returns {void}
* @throws {TypeError} If any of the words is not a string.
*/
removeWords(...words) {
if (!words || !words.length) {
return;
}
// Validate all words are strings
words.forEach(word => {
if (typeof word !== 'string') {
throw new TypeError('All words must be strings');
}
});
// Add lowercase versions of words to the exclusion list
this.exclude.push(...words.map(word => word.toLowerCase()));
}
// Sentiment methods
/**
* Registers the specified language for sentiment analysis.
* @param {String} languageCode - Two-digit code for the language to register.
* @param {Object} language - The language module to register.
* @returns {void}
* @throws {Error} If the language code is invalid or the language object is missing required properties.
*/
registerLanguage(languageCode, language) {
if (!languageCode || typeof languageCode !== 'string') {
throw new TypeError('Language code must be a string');
}
if (!language || typeof language !== 'object') {
throw new TypeError('Language must be a valid object');
}
languageProcessor.addLanguage(languageCode, language);
}
/**
* Performs sentiment analysis on the provided input 'phrase'.
* @param {String} phrase - Input phrase.
* @param {Object} [opts={}] - Options for sentiment analysis.
* @param {String} [opts.language='en'] - Language code for analysis.
* @param {Object} [opts.extras] - Additional word-score pairs to use in analysis.
* @param {function} [callback] - Optional callback for async operation.
* @return {Object} Result object containing sentiment analysis data.
* @throws {TypeError} If parameters are of incorrect types.
*/
analyzeSentiment(phrase, opts = {}, callback) {
// Parameter validation and normalization
let normalizedPhrase = '';
let options = opts;
let cb = callback;
if (phrase !== undefined && phrase !== null) {
if (typeof phrase !== 'string') {
throw new TypeError('Phrase must be a string');
}
normalizedPhrase = phrase;
}
if (typeof options === 'function') {
cb = options;
options = {};
} else if (options !== null && typeof options !== 'object') {
throw new TypeError('Options must be an object');
}
if (cb !== undefined && typeof cb !== 'function') {
throw new TypeError('Callback must be a function');
}
// Get language and labels
const languageCode = options.language || 'en';
let labels = languageProcessor.getLabels(languageCode);
// Merge extra labels if provided
if (options.extras && typeof options.extras === 'object') {
labels = Object.assign({}, labels, options.extras);
}
// Storage objects for analysis
const tokens = tokenize(normalizedPhrase);
let score = 0;
const words = [];
const positive = [];
const negative = [];
const calculation = [];
// Iterate over tokens for sentiment analysis
for (let i = tokens.length - 1; i >= 0; i--) {
const token = tokens[i];
// Skip tokens that don't have sentiment values
if (!Object.prototype.hasOwnProperty.call(labels, token)) {
continue;
}
words.push(token);
// Apply scoring strategy
let tokenScore = labels[token];
tokenScore = languageProcessor.applyScoringStrategy(languageCode, tokens, i, tokenScore);
// Categorize token based on score
if (tokenScore > 0) positive.push(token);
if (tokenScore < 0) negative.push(token);
score += tokenScore;
// Record calculation for this token
calculation.push({ [token]: tokenScore });
}
// Prepare result object
const result = {
score,
comparative: tokens.length > 0 ? score / tokens.length : 0,
calculation,
tokens,
words,
positive,
negative
};
// Handle optional async interface
if (typeof cb === 'function') {
process.nextTick(() => {
cb(null, result);
});
return undefined;
}
return result;
}
/**
* Analyzes the toxicity of a given text using the Perspective API.
* @param {string} text - Text to analyze.
* @param {string} apiKey - API key for the Perspective API.
* @param {Object} [options={}] - Additional options for toxicity analysis.
* @param {Array<string>} [options.attributes=['TOXICITY']] - Attributes to analyze (e.g., 'TOXICITY', 'SEVERE_TOXICITY', 'IDENTITY_ATTACK', etc.)
* @return {Promise} - A promise that resolves with the analysis result.
* @throws {Error} If the API key is missing or the API request fails.
*/
analyzeToxicity(text, apiKey, options = {}) {
if (!text) {
return Promise.reject(new Error('Text to analyze is required'));
}
if (!apiKey) {
return Promise.reject(new Error('API key is required for toxicity analysis'));
}
const DISCOVERY_URL = 'https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1';
const requestedAttributes = {};
// Default to TOXICITY if no attributes specified
const attributes = options.attributes || ['TOXICITY'];
attributes.forEach(attr => {
requestedAttributes[attr] = {};
});
return google.discoverAPI(DISCOVERY_URL)
.then(client => {
const analyzeRequest = {
comment: { text },
requestedAttributes,
languages: options.languages || ['en'],
doNotStore: options.doNotStore !== false // Default to true for privacy
};
return client.comments.analyze({
key: apiKey,
resource: analyzeRequest,
});
})
.then(response => response.data)
.catch(err => {
const errorMessage = err.message || 'Unknown error during toxicity analysis';
const error = new Error(`Toxicity analysis failed: ${errorMessage}`);
error.originalError = err;
throw error;
});
}
}
module.exports = TextModerate;