text-moderate
Version:
A comprehensive JavaScript library for content moderation, including profanity filtering, sentiment analysis, and toxicity detection. Leveraging advanced algorithms and external APIs, TextModerate provides developers with tools to create safer and more po
120 lines (103 loc) • 3.99 kB
JavaScript
/**
* Language processor module for handling multi-language support and scoring strategies
* @module language-processor
*/
const emojis = require('../build/emoji.json');
// English is loaded by default
const enLanguage = require('../languages/en/index');
// Add emojis to English language labels
Object.assign(enLanguage.labels, emojis);
// Cache loaded languages
const languages = {
en: enLanguage
};
/**
* Default scoring strategy when none is specified by a language
*/
const defaultScoringStrategy = {
/**
* Apply the default scoring strategy
* @param {Array} tokens - All tokens in the analyzed text
* @param {number} cursor - Current token position
* @param {number} tokenScore - Current token's score
* @returns {number} The final score for this token
*/
apply: function (tokens, cursor, tokenScore) {
return tokenScore;
}
};
module.exports = {
/**
* Registers the specified language
*
* @param {String} languageCode - Two-digit code for the language to register
* @param {Object} language - The language module to register
* @throws {Error} If language code is missing or invalid, or if language.labels is not defined
*/
addLanguage(languageCode, language) {
if (!languageCode || typeof languageCode !== 'string' || languageCode.length !== 2) {
throw new Error('Language code must be a valid two-digit code');
}
if (!language || typeof language !== 'object') {
throw new Error('Language must be a valid object');
}
if (!language.labels || typeof language.labels !== 'object') {
throw new Error('language.labels must be defined as an object');
}
// Add emojis to the language labels
Object.assign(language.labels, emojis);
languages[languageCode.toLowerCase()] = language;
},
/**
* Retrieves a language object from the cache,
* or tries to load it from the set of supported languages
*
* @param {String} languageCode - Two-digit code for the language to fetch
* @returns {Object} The language object with labels and scoring strategy
* @throws {Error} If the language cannot be found or loaded
*/
getLanguage(languageCode) {
// Default to English if no language was specified
if (!languageCode) {
return languages.en;
}
const langCode = languageCode.toLowerCase();
if (!languages[langCode]) {
// Try to load specified language
try {
const language = require(`../languages/${langCode}/index`);
// Add language to in-memory cache
this.addLanguage(langCode, language);
} catch (err) {
throw new Error(`No language found: ${langCode}. Error: ${err.message}`);
}
}
return languages[langCode];
},
/**
* Returns AFINN-165 weighted labels for the specified language
*
* @param {String} languageCode - Two-digit language code
* @return {Object} The labels object containing words and their sentiment scores
*/
getLabels(languageCode) {
const language = this.getLanguage(languageCode);
return language.labels;
},
/**
* Applies a scoring strategy for the current token
*
* @param {String} languageCode - Two-digit language code
* @param {Array} tokens - Tokens of the phrase to analyze
* @param {number} cursor - Cursor of the current token being analyzed
* @param {number} tokenScore - The score of the current token being analyzed
* @returns {number} The modified score after applying the scoring strategy
*/
applyScoringStrategy(languageCode, tokens, cursor, tokenScore) {
const language = this.getLanguage(languageCode);
// Fallback to default strategy if none was specified
const scoringStrategy = language.scoringStrategy || defaultScoringStrategy;
return scoringStrategy.apply(tokens, cursor, tokenScore);
}
};
// Default strategy is defined at the top of the file