@dollhousemcp/mcp-server
Version:
DollhouseMCP - A Model Context Protocol (MCP) server that enables dynamic AI persona management from markdown files, allowing Claude and other compatible AI assistants to activate and switch between different behavioral personas.
97 lines • 3.57 kB
TypeScript
/**
* Unicode Validator for DollhouseMCP
*
* Prevents Unicode-based bypass attacks including:
* - Homograph attacks (visually similar characters)
* - Direction override attacks (RLO/LRO)
* - Mixed script attacks
* - Zero-width character injection
* - Unicode normalization bypasses
*
* Security: SEC-001 - Unicode attack prevention
*/
export interface UnicodeValidationResult {
isValid: boolean;
normalizedContent: string;
detectedIssues?: string[];
severity?: 'low' | 'medium' | 'high' | 'critical';
}
export declare class UnicodeValidator {
/**
* Unicode attack patterns and confusable characters
*/
/**
* Direction override characters that can hide or reverse text display
* @see https://unicode.org/reports/tr9/#Directional_Formatting_Characters
* U+202A-U+202E: Left/Right embedding and override marks (LRE, RLE, PDF, LRO, RLO)
* U+2066-U+2069: Isolate formatting characters (LRI, RLI, FSI, PDI)
*/
private static readonly DIRECTION_OVERRIDE_CHARS;
/**
* Zero-width and invisible formatting characters often used to hide payloads
* U+200B-U+200F: Zero-width spaces and directional marks
* U+2028-U+202F: Line/paragraph separators and formatting characters
* U+FEFF: Zero-width no-break space (Byte Order Mark)
*/
private static readonly ZERO_WIDTH_CHARS;
/**
* Non-printable control characters that should not appear in normal text
* U+0000-U+0008, U+000B-U+000C, U+000E-U+001F: C0 control codes (except TAB, LF, CR)
* U+007F-U+009F: Delete and C1 control codes
* U+FFFE-U+FFFF: Non-characters that should never appear in valid text
*/
private static readonly NON_PRINTABLE_CHARS;
/**
* Common homograph/confusable character mappings
* Maps visually similar Unicode characters to their ASCII equivalents
*/
private static readonly CONFUSABLE_MAPPINGS;
/**
* Script mixing detection patterns
* Detects suspicious mixing of different Unicode scripts
*/
private static readonly SCRIPT_PATTERNS;
/**
* Normalize Unicode content to prevent bypass attacks
*/
static normalize(content: string): UnicodeValidationResult;
/**
* Detect suspicious Unicode patterns that might indicate attacks
*/
private static detectSuspiciousPatterns;
/**
* Replace confusable Unicode characters with ASCII equivalents
*/
private static replaceConfusables;
/**
* Detect suspicious mixing of different Unicode scripts
*/
private static detectMixedScripts;
/**
* Escalate severity level (higher severity takes precedence)
*/
private static escalateSeverity;
/**
* Escape special regex characters for safe replacement
*/
private static escapeRegex;
/**
* Check if content contains potentially dangerous Unicode patterns
*/
static containsDangerousUnicode(content: string): boolean;
/**
* Check if content has excessive Unicode escape sequences
* Prevents null pointer exception by safely checking match results
*/
private static hasExcessiveUnicodeEscapes;
/**
* Safely check for malformed surrogate pairs without ReDoS vulnerability
* Uses character-by-character validation instead of complex regex
*/
private static hasMalformedSurrogates;
/**
* Get safe preview of Unicode content for logging
*/
static getSafePreview(content: string, maxLength?: number): string;
}
//# sourceMappingURL=unicodeValidator.d.ts.map