UNPKG

@dollhousemcp/mcp-server

Version:

DollhouseMCP - A Model Context Protocol (MCP) server that enables dynamic AI persona management from markdown files, allowing Claude and other compatible AI assistants to activate and switch between different behavioral personas.

150 lines 6.83 kB
/** * Content Validator for DollhouseMCP * * Protects against prompt injection attacks in collection personas * by detecting and sanitizing malicious content patterns. * * Security: SEC-001 - Critical vulnerability protection */ import type { SecurityTelemetry } from './telemetry/SecurityTelemetry.js'; export type SecuritySeverity = 'low' | 'medium' | 'high' | 'critical'; export interface ContentValidationResult { isValid: boolean; sanitizedContent?: string; detectedPatterns?: string[]; severity?: SecuritySeverity; } export interface ContentValidatorOptions { /** * Skip size limit checks - useful for memory content that can be large * @default false */ skipSizeCheck?: boolean; /** * Custom max length override * @default SECURITY_LIMITS.MAX_CONTENT_LENGTH */ maxLength?: number; /** * Element type context for context-aware pattern matching. * Skills may legitimately contain code patterns (eval, exec, require) * that would be blocked in other contexts. * @since Issue #456 */ contentContext?: 'persona' | 'skill' | 'template' | 'agent' | 'memory'; } export declare class ContentValidator { /** * SHA-256 hashes of bundled data/ elements verified against HASHES.json at seed time. * Content whose hash is in this set bypasses injection-pattern checks — it was * vetted by the DollhouseMCP team before being included in the npm package. * Unicode and YAML-bomb checks still run regardless. * * Populated by DefaultElementProvider.registerBundledHashes() on startup. * Only content that matches the published HASHES.json is registered — any * modification to a bundled file after install breaks the hash and revokes trust. */ private static readonly bundledContentHashes; /** Register a SHA-256 hex hash as trusted bundled content. */ static registerBundledHash(sha256hex: string): void; /** True if the given content hash belongs to a verified bundled element. */ static isBundledContent(content: string): boolean; private static telemetryResolver?; static configureTelemetryResolver(resolver: () => SecurityTelemetry | undefined): void; private static getTelemetry; /** * Pattern-based detection system for prompt injection attacks. * * This approach was chosen over AI-based detection because: * 1. Pattern matching cannot be socially engineered or confused * 2. Deterministic results ensure consistent security * 3. No additional API calls or latency * 4. Can't be bypassed by clever prompt engineering * * The patterns below represent known attack vectors from security research * and real-world exploit attempts against AI systems. */ private static readonly INJECTION_PATTERNS; private static readonly YAML_BOMB_PATTERNS; private static readonly MALICIOUS_YAML_PATTERNS; /** * Content contexts where code execution patterns are legitimate and should * not trigger security blocks. Skills contain exemplar code; templates contain * code snippets that are rendered, never executed; agent definitions describe * technical workflows that may reference code. Prompt injection, actual token * exposure (ghp_/gho_), data exfiltration, and HTML/XSS patterns remain * active for ALL contexts. * @since Issue #456 */ private static readonly CODE_EXEMPT_CONTEXTS; /** * Pattern descriptions that are exempt for CODE_EXEMPT_CONTEXTS. * These patterns match legitimate code documentation, not threats. * @since Issue #456 */ private static readonly CODE_EXECUTION_PATTERNS; /** * Security documentation patterns exempt for CODE_EXEMPT_CONTEXTS. * Skills/agents that teach penetration testing, threat modeling, etc. * legitimately reference shell commands, file paths, and credential names. * * DISTINCTION FROM ACTIVE THREAT PATTERNS: * These patterns describe attacks (educational) — they appear in element * definitions that an author wrote, not in runtime user input. Patterns * that remain active even in exempt contexts are actual threats: * - Prompt injection (system/admin override, instruction manipulation) * - Real token formats (ghp_*, gho_* — not just the word "GITHUB_TOKEN") * - Data exfiltration commands (export/send all credentials) * - HTML/XSS injection (renders in the web console) * * @since Issue #1725 */ private static readonly SECURITY_DOC_PATTERNS; /** * HTML/XSS pattern descriptions exempt for template context. * Templates use <template>, <style>, <script> as section delimiters. * @since Issue #803 */ private static readonly HTML_SECTION_PATTERNS; /** * Handles Unicode validation and threat detection * REFACTOR: Extracted from validateAndSanitize() to reduce cognitive complexity * Returns normalized content and Unicode severity without aborting early */ private static handleUnicodeValidation; /** * Checks content for injection patterns and logs/sanitizes threats * REFACTOR: Extracted from validateAndSanitize() to reduce cognitive complexity * * @param originalContent - Original content to check patterns against * @param normalizedContent - Normalized content to apply replacements to * @param detectedPatterns - Array to accumulate detected pattern descriptions * @param currentSeverity - Current highest severity level * @param maxLength - Maximum allowed content length for regex validation */ private static checkInjectionPatterns; /** * Validates and sanitizes persona content for security threats * FIX #1269: Added options to support large memory content * REFACTOR: Reduced cognitive complexity by extracting helper methods * * SECURITY FIX (DMCP-SEC-004): Length checks now performed on NORMALIZED content * to prevent bypass attacks using Unicode combining characters or zero-width chars. * A pre-check with generous multiplier prevents DoS from huge payloads. */ static validateAndSanitize(content: string, options?: ContentValidatorOptions): ContentValidationResult; /** * Validates YAML frontmatter for malicious content * SECURITY FIX #364: Added YAML bomb detection to prevent denial of service */ static validateYamlContent(yamlContent: string): boolean; /** * Validates persona metadata fields */ static validateMetadata(metadata: any): ContentValidationResult; /** * Sanitizes a complete persona file (frontmatter + content) */ static sanitizePersonaContent(content: string): string; } //# sourceMappingURL=contentValidator.d.ts.map