content-guard
Version:
🛡️ Advanced content analysis and moderation system with multi-variant optimization. Features context-aware detection, harassment prevention, and ML-powered toxicity analysis. Pre-1.0 development version.
615 lines (508 loc) • 18.2 kB
JavaScript
/**
* 🎹 Modular Keyboard & Pattern Spam Detection Plugin
*
* Configurable detection categories:
* - keyboardSequences: qwerasdf, asdfgh, etc.
* - randomKeyMashing: fjdsfkdsjlkj, fdjferlkv, etc.
* - characterRepetition: aaaaaaa, hiiiiii, etc.
* - keyboardRolling: smooth finger patterns
* - alternatingHands: left-right hand patterns
* - lowEffortSpam: basic low-quality content
*
* Each category can be independently enabled/disabled for fine-grained control.
*/
class KeyboardSpamPlugin {
constructor() {
this.name = 'keyboardSpam'
this.version = '2.0.0'
this.initialized = false
this.options = {}
// Default detection categories - all can be individually configured
this.defaultCategories = {
keyboardSequences: {
enabled: true,
weight: 1.0,
description: 'Sequential keyboard patterns (qwerty, asdf, etc.)'
},
randomKeyMashing: {
enabled: true,
weight: 1.2,
description: 'Random character sequences that look like keyboard mashing'
},
characterRepetition: {
enabled: false,
weight: 0.8,
description: 'Excessive character repetition (aaaa, hiiii, etc.)'
},
keyboardRolling: {
enabled: true,
weight: 0.9,
description: 'Smooth finger rolling patterns'
},
alternatingHands: {
enabled: true,
weight: 0.7,
description: 'Left-right hand alternating patterns'
},
lowEffortSpam: {
enabled: true,
weight: 1.1,
description: 'Low-effort spam patterns'
}
}
// QWERTY keyboard layout mapping
this.keyboardLayout = {
topRow: 'qwertyuiop[]',
middleRow: 'asdfghjkl;\'',
bottomRow: 'zxcvbnm,./',
numberRow: '1234567890-=',
leftHand: 'qwertasdfgzxcvb',
rightHand: 'yuiophjklnm',
middleKeys: 'asdfghjkl',
homeRow: 'asdfghjkl'
}
// Legitimate content patterns - improved and more specific
this.legitimatePatterns = {
// Technical content
hexCodes: /^(0x|#)?[0-9a-f]{6,}$/i,
base64: /^[A-Za-z0-9+/]{16,}={0,2}$/,
urls: /https?:\/\/|www\.|ftp:\/\/|\.com|\.org|\.net|\.edu|\.gov/i,
emails: /@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/,
// Structured data
uuids: /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i,
licenseKeys: /^[A-Z0-9]{4,5}-[A-Z0-9]{4,5}-[A-Z0-9]{4,5}/i,
serialNumbers: /^[A-Z0-9]{8,}$/,
// File paths and extensions
filePaths: /\.[a-z]{2,4}$|[\/\\]|^[a-z]:\\/i,
// Gaming and usernames
gamingUsernames: /^\w*\d+\w*$|^[a-z]+_[a-z]+$/i,
// Abbreviations (more specific)
abbreviations: /^[A-Z]{2,6}$|^[a-z]{2,6}$/,
// Foreign languages with accents
foreignChars: /[àáäâèéëêìíïîòóöôùúüûñçßščćžđ]/i,
// Common expressions and emotions
emotionalExpressions: /^(ha){3,}$|^(he){3,}$|^(hi){3,}$|^(ho){3,}$/i,
laughter: /^(lol){2,}$|^(lmao){2,}$|^ahaha+$/i
}
}
/**
* Initialize the plugin with configuration
*/
async init(config) {
this.config = config || {}
// Merge detection categories with user config
this.categories = { ...this.defaultCategories }
if (config?.categories) {
for (const [category, settings] of Object.entries(config.categories)) {
if (this.categories[category]) {
this.categories[category] = { ...this.categories[category], ...settings }
}
}
}
this.options = {
// Basic settings
minLength: config?.minLength || 5, // Increased from 4 to reduce false positives
maxLegitLength: config?.maxLegitLength || 25,
sensitivityLevel: config?.sensitivityLevel || 'medium',
// Scoring thresholds
minScoreThreshold: config?.minScoreThreshold || 4, // Minimum score to flag
maxSingleWordScore: config?.maxSingleWordScore || 8, // Cap per word
// Context and behavior
contextAware: config?.contextAware !== false,
weight: config?.weight || 1.0,
debug: config?.debug || false,
// Detection categories
categories: this.categories,
...config
}
this.initialized = true
if (this.options.debug) {
console.log('🎹 Modular keyboard spam detection initialized')
console.log('📊 Enabled categories:', Object.entries(this.categories)
.filter(([_, config]) => config.enabled)
.map(([name, _]) => name))
}
}
/**
* Analyze content for spam patterns
*/
async analyze(content, input, globalOptions = {}) {
if (!this.initialized) {
await this.init()
}
let score = 0
const flags = []
const debug = globalOptions.debug || this.options.debug
if (debug) {
console.log('🔍 KEYBOARD-SPAM: Running modular analysis...')
}
// Analyze different content fields with adjusted weights
const checks = [
{ field: 'name', text: content.name, weight: 1.3 },
{ field: 'subject', text: content.subject, weight: 1.1 },
{ field: 'message', text: content.message, weight: 0.9 },
{ field: 'combined', text: content.allText, weight: 0.8 }
]
for (const { field, text, weight } of checks) {
if (!text || text.trim().length < this.options.minLength) continue
const analysis = this.analyzeTextForSpamPatterns(text, field, debug)
if (analysis.score > 0) {
const fieldScore = Math.round(analysis.score * weight)
score += fieldScore
flags.push(...analysis.flags.map(flag => `${field}: ${flag}`))
if (debug) {
console.log(`🚨 KEYBOARD-SPAM: ${field} detected patterns (+${fieldScore} points)`)
}
}
}
// Apply sensitivity and final adjustments
score = this.applyFinalAdjustments(score, content, debug)
if (debug) {
console.log(`📊 KEYBOARD-SPAM Plugin Final Score: ${score}`)
}
return { score, flags }
}
/**
* Analyze text for spam patterns using enabled categories
*/
analyzeTextForSpamPatterns(text, field, debug = false) {
let score = 0
const flags = []
// Split into words for analysis
const words = text.split(/\s+/).filter(word => word.length >= this.options.minLength)
for (const word of words) {
const cleanWord = word.replace(/[^a-zA-Z0-9]/g, '').toLowerCase()
if (cleanWord.length < this.options.minLength) continue
// Skip legitimate content first
if (this.isLegitimateContent(cleanWord, word)) {
if (debug) {
console.log(`✅ KEYBOARD-SPAM: Skipping legitimate: "${word}"`)
}
continue
}
// Run enabled detection categories
const detectionResults = this.runEnabledDetections(cleanWord, word)
if (detectionResults.totalScore > 0) {
// Cap score per word to prevent single word from dominating
const cappedScore = Math.min(detectionResults.totalScore, this.options.maxSingleWordScore)
score += cappedScore
flags.push(...detectionResults.flags.map(flag => `"${word}" - ${flag}`))
if (debug) {
console.log(`🚨 KEYBOARD-SPAM: Detected in "${word}": ${detectionResults.flags.join(', ')} (score: ${cappedScore})`)
}
}
}
return { score, flags }
}
/**
* Run all enabled detection categories on a word
*/
runEnabledDetections(cleanWord, originalWord) {
let totalScore = 0
const flags = []
// Category 1: Keyboard Sequences
if (this.categories.keyboardSequences.enabled) {
const result = this.detectKeyboardSequences(cleanWord)
if (result.score > 0) {
totalScore += Math.round(result.score * this.categories.keyboardSequences.weight)
flags.push(`Keyboard sequence: ${result.pattern}`)
}
}
// Category 2: Random Key Mashing
if (this.categories.randomKeyMashing.enabled) {
const result = this.detectRandomKeyMashing(cleanWord)
if (result.score > 0) {
totalScore += Math.round(result.score * this.categories.randomKeyMashing.weight)
flags.push('Random key mashing')
}
}
// Category 3: Character Repetition (often disabled)
if (this.categories.characterRepetition.enabled) {
const result = this.detectCharacterRepetition(cleanWord)
if (result.score > 0) {
totalScore += Math.round(result.score * this.categories.characterRepetition.weight)
flags.push('Character repetition')
}
}
// Category 4: Keyboard Rolling
if (this.categories.keyboardRolling.enabled) {
const result = this.detectKeyboardRolling(cleanWord)
if (result.score > 0) {
totalScore += Math.round(result.score * this.categories.keyboardRolling.weight)
flags.push('Keyboard rolling')
}
}
// Category 5: Alternating Hands
if (this.categories.alternatingHands.enabled) {
const result = this.detectAlternatingHands(cleanWord)
if (result.score > 0) {
totalScore += Math.round(result.score * this.categories.alternatingHands.weight)
flags.push('Alternating hands')
}
}
// Category 6: Low Effort Spam
if (this.categories.lowEffortSpam.enabled) {
const result = this.detectLowEffortSpam(cleanWord)
if (result.score > 0) {
totalScore += Math.round(result.score * this.categories.lowEffortSpam.weight)
flags.push('Low effort spam')
}
}
return { totalScore, flags }
}
/**
* Detect sequential keyboard patterns (qwerty, asdf, etc.)
*/
detectKeyboardSequences(word) {
const rows = [
this.keyboardLayout.topRow,
this.keyboardLayout.middleRow,
this.keyboardLayout.bottomRow,
this.keyboardLayout.numberRow
]
for (const row of rows) {
// Check for sequences of 4+ characters (more strict)
for (let i = 0; i <= row.length - 4; i++) {
const sequence = row.substring(i, i + 4)
if (word.includes(sequence)) {
const sequenceLength = this.getLongestSequence(word, row, i)
return {
score: Math.min(sequenceLength, 6), // 4-6 points based on length
pattern: sequence
}
}
}
}
return { score: 0 }
}
/**
* Get the longest keyboard sequence starting from position
*/
getLongestSequence(word, row, startPos) {
let length = 4
while (length <= 8 && startPos + length <= row.length) {
const sequence = row.substring(startPos, startPos + length)
if (!word.includes(sequence)) break
length++
}
return length - 1
}
/**
* Detect random key mashing patterns
*/
detectRandomKeyMashing(word) {
let score = 0
// Check for high unique character ratio (randomness indicator)
const unique = [...new Set(word.split(''))].length
const uniqueRatio = unique / word.length
if (word.length >= 8 && uniqueRatio > 0.75) {
score += 3
}
// Check for lack of vowel structure
const vowels = 'aeiou'
const vowelCount = word.split('').filter(char => vowels.includes(char)).length
const vowelRatio = vowelCount / word.length
if (word.length >= 6 && vowelRatio < 0.1) {
score += 2
}
// Check for middle-row concentration (common in random mashing)
const middleRowChars = word.split('').filter(char =>
this.keyboardLayout.middleKeys.includes(char)
).length
const middleRatio = middleRowChars / word.length
if (middleRatio > 0.6 && word.length >= 6) {
score += 2
}
return { score: Math.min(score, 5) }
}
/**
* Detect excessive character repetition
*/
detectCharacterRepetition(word) {
let score = 0
// Only flag extreme repetition (5+ consecutive chars)
const repetitions = word.match(/(.)\1{4,}/g) || []
for (const rep of repetitions) {
if (rep.length >= 5) {
score += Math.min(rep.length - 4, 4) // 1-4 points based on length
}
}
return { score: Math.min(score, 6) }
}
/**
* Detect keyboard rolling patterns
*/
detectKeyboardRolling(word) {
const rollingPatterns = [
'asdf', 'sdfg', 'dfgh', 'fghj', 'ghjk', 'hjkl',
'qwer', 'wert', 'erty', 'rtyu', 'tyui', 'yuio', 'uiop',
'zxcv', 'xcvb', 'cvbn', 'vbnm'
]
let score = 0
for (const pattern of rollingPatterns) {
if (word.includes(pattern)) {
score += 2
}
}
return { score: Math.min(score, 4) }
}
/**
* Detect alternating hand patterns
*/
detectAlternatingHands(word) {
if (word.length < 6) return { score: 0 }
let alternatingCount = 0
for (let i = 0; i < word.length - 1; i++) {
const current = word[i]
const next = word[i + 1]
const currentLeft = this.keyboardLayout.leftHand.includes(current)
const nextLeft = this.keyboardLayout.leftHand.includes(next)
if (currentLeft !== nextLeft) {
alternatingCount++
}
}
const alternatingRatio = alternatingCount / (word.length - 1)
if (alternatingRatio > 0.7) {
return { score: 3 }
} else if (alternatingRatio > 0.5) {
return { score: 2 }
}
return { score: 0 }
}
/**
* Detect low-effort spam patterns
*/
detectLowEffortSpam(word) {
let score = 0
// Check for very low vowel content combined with randomness
const vowels = 'aeiou'
const vowelCount = word.split('').filter(char => vowels.includes(char)).length
const vowelRatio = vowelCount / word.length
if (word.length >= 8 && vowelRatio < 0.05) {
score += 2
}
// Check for character frequency anomalies
const charCounts = {}
word.split('').forEach(char => {
charCounts[char] = (charCounts[char] || 0) + 1
})
const counts = Object.values(charCounts)
const maxCount = Math.max(...counts)
const avgCount = counts.reduce((a, b) => a + b, 0) / counts.length
// If character distribution is too uniform (indicates randomness)
if (word.length >= 6 && maxCount / avgCount < 1.5) {
score += 1
}
return { score: Math.min(score, 3) }
}
/**
* Enhanced legitimate content detection
*/
isLegitimateContent(cleanWord, originalWord) {
// Check all legitimate patterns
for (const [name, pattern] of Object.entries(this.legitimatePatterns)) {
if (pattern.test(originalWord) || pattern.test(cleanWord)) {
return true
}
}
// Enhanced real word detection
if (this.looksLikeRealWord(cleanWord)) {
return true
}
// Technical content detection
if (this.isTechnicalContent(originalWord, cleanWord)) {
return true
}
return false
}
/**
* Improved real word detection
*/
looksLikeRealWord(word) {
if (word.length < 3) return true
// Common word patterns
const commonEndings = ['ing', 'tion', 'sion', 'ed', 'er', 'est', 'ly', 'ness', 'ment', 'able', 'ible', 'ous', 'ful', 'less', 'ize', 'ise']
const commonPrefixes = ['un', 'pre', 're', 'in', 'im', 'dis', 'mis', 'over', 'under', 'out', 'sub', 'inter', 'anti', 'pro', 'de']
// Check for common word patterns
for (const ending of commonEndings) {
if (word.endsWith(ending) && word.length > ending.length + 2) {
return true
}
}
for (const prefix of commonPrefixes) {
if (word.startsWith(prefix) && word.length > prefix.length + 2) {
return true
}
}
// Vowel-consonant analysis
const vowels = 'aeiou'
const vowelCount = word.split('').filter(char => vowels.includes(char)).length
const vowelRatio = vowelCount / word.length
// Must have reasonable vowel ratio (10-60% for English)
if (vowelCount === 0 || vowelRatio > 0.6 || vowelRatio < 0.1) {
return false
}
// Check for reasonable character distribution
const charCounts = {}
word.split('').forEach(char => {
charCounts[char] = (charCounts[char] || 0) + 1
})
const maxCharFreq = Math.max(...Object.values(charCounts)) / word.length
// If a single character dominates too much, it's probably not a real word
if (maxCharFreq > 0.5) {
return false
}
return true
}
/**
* Technical content detection
*/
isTechnicalContent(originalWord, cleanWord) {
// Hash-like patterns
if (/^[a-f0-9]{8,}$/i.test(cleanWord) && cleanWord.length >= 8) return true
// Version numbers
if (/\d+\.\d+/.test(originalWord)) return true
// Mixed alphanumeric IDs
if (/^[a-z0-9]{8,}$/i.test(cleanWord) && /\d/.test(cleanWord) && /[a-z]/i.test(cleanWord)) {
return true
}
// API keys, tokens, etc.
if (cleanWord.length >= 20 && /^[a-z0-9]+$/i.test(cleanWord)) {
return true
}
return false
}
/**
* Apply final scoring adjustments based on sensitivity and context
*/
applyFinalAdjustments(score, content, debug = false) {
// Apply minimum threshold first
if (score < this.options.minScoreThreshold) {
return 0
}
// Apply sensitivity adjustments
const sensitivity = this.options.sensitivityLevel
switch (sensitivity) {
case 'low':
// More conservative - require higher confidence
score = score > 6 ? Math.round(score * 0.7) : 0
break
case 'high':
// More aggressive detection
score = Math.round(score * 1.2)
break
case 'medium':
default:
// Standard sensitivity with slight reduction
score = Math.round(score * 0.9)
break
}
// Apply final weight
score = Math.round(score * this.options.weight)
if (debug) {
console.log(`🎯 KEYBOARD-SPAM: Applied ${sensitivity} sensitivity, final score: ${score}`)
}
return Math.max(score, 0)
}
}
module.exports = KeyboardSpamPlugin