UNPKG

perplexity-mcp-server

Version:

A Perplexity API Model Context Protocol (MCP) server that unlocks Perplexity's search-augmented AI capabilities for LLM agents. Features robust error handling, secure input validation, and transparent reasoning with the showThinking parameter. Built with

371 lines (370 loc) 16.1 kB
import path from 'path'; import sanitizeHtml from 'sanitize-html'; import validator from 'validator'; import { BaseErrorCode, McpError } from '../types-global/errors.js'; import { logger } from './logger.js'; /** * Sanitization class for handling various input sanitization tasks */ export class Sanitization { /** * Private constructor to enforce singleton pattern */ constructor() { /** Default list of sensitive fields for sanitizing logs */ this.sensitiveFields = [ 'password', 'token', 'secret', 'key', 'apiKey', 'auth', 'credential', 'jwt', 'ssn', 'credit', 'card', 'cvv', 'authorization' ]; /** Default sanitize-html configuration */ this.defaultHtmlSanitizeConfig = { allowedTags: [ 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'a', 'ul', 'ol', 'li', 'b', 'i', 'strong', 'em', 'strike', 'code', 'hr', 'br', 'div', 'table', 'thead', 'tbody', 'tr', 'th', 'td', 'pre' ], allowedAttributes: { 'a': ['href', 'name', 'target'], 'img': ['src', 'alt', 'title', 'width', 'height'], '*': ['class', 'id', 'style'] }, preserveComments: false }; logger.debug('Sanitization service initialized with modern libraries'); } /** * Get the singleton Sanitization instance * @returns Sanitization instance */ static getInstance() { if (!Sanitization.instance) { Sanitization.instance = new Sanitization(); } return Sanitization.instance; } /** * Set sensitive fields for log sanitization * @param fields Array of field names to consider sensitive */ setSensitiveFields(fields) { this.sensitiveFields = [...new Set([...this.sensitiveFields, ...fields])]; // Ensure uniqueness logger.debug('Updated sensitive fields list', { count: this.sensitiveFields.length }); } /** * Get the current list of sensitive fields * @returns Array of sensitive field names */ getSensitiveFields() { return [...this.sensitiveFields]; } /** * Sanitize HTML content using sanitize-html library * @param input HTML string to sanitize * @param config Optional custom sanitization config * @returns Sanitized HTML */ sanitizeHtml(input, config) { if (!input) return ''; // Create sanitize-html options from our config const options = { allowedTags: config?.allowedTags || this.defaultHtmlSanitizeConfig.allowedTags, allowedAttributes: config?.allowedAttributes || this.defaultHtmlSanitizeConfig.allowedAttributes, transformTags: config?.transformTags }; // Handle comments - if preserveComments is true, add '!--' to allowedTags if (config?.preserveComments || this.defaultHtmlSanitizeConfig.preserveComments) { options.allowedTags = [...(options.allowedTags || []), '!--']; } return sanitizeHtml(input, options); } /** * Sanitize string input based on context. * * **Important:** Using `context: 'javascript'` is explicitly disallowed and will throw an `McpError`. * This is a security measure to prevent accidental execution or ineffective sanitization of JavaScript code. * * @param input String to sanitize * @param options Sanitization options * @returns Sanitized string * @throws {McpError} If `context: 'javascript'` is used. */ sanitizeString(input, options = {}) { if (!input) return ''; // Handle based on context switch (options.context) { case 'html': // Use sanitize-html with custom options return this.sanitizeHtml(input, { allowedTags: options.allowedTags, allowedAttributes: options.allowedAttributes ? this.convertAttributesFormat(options.allowedAttributes) : undefined }); case 'attribute': // Strip HTML tags for attribute context return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); case 'url': // Validate and sanitize URL if (!validator.isURL(input, { protocols: ['http', 'https'], require_protocol: true })) { // Return empty string for invalid URLs in this context logger.warn('Invalid URL detected during string sanitization', { input }); return ''; } return validator.trim(input); case 'javascript': // Reject any attempt to sanitize JavaScript logger.error('Attempted JavaScript sanitization via sanitizeString', { input: input.substring(0, 50) }); throw new McpError(BaseErrorCode.VALIDATION_ERROR, 'JavaScript sanitization not supported through string sanitizer'); case 'text': default: // Strip HTML tags for basic text context return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); } } /** * Sanitize URL with robust validation and sanitization * @param input URL to sanitize * @param allowedProtocols Allowed URL protocols * @returns Sanitized URL * @throws {McpError} If URL is invalid */ sanitizeUrl(input, allowedProtocols = ['http', 'https']) { try { // First validate the URL format if (!validator.isURL(input, { protocols: allowedProtocols, require_protocol: true })) { throw new Error('Invalid URL format or protocol'); } // Double-check no javascript: protocol sneaked in const lowerInput = input.toLowerCase().trim(); if (lowerInput.startsWith('javascript:')) { throw new Error('JavaScript protocol not allowed'); } // Return the trimmed, validated URL return validator.trim(input); } catch (error) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, error instanceof Error ? error.message : 'Invalid URL format', { input }); } } /** * Sanitize file paths to prevent path traversal attacks * @param input Path to sanitize * @param options Options for path sanitization * @returns Sanitized and normalized path * @throws {McpError} If path is invalid or unsafe */ sanitizePath(input, options = {}) { try { if (!input || typeof input !== 'string') { throw new Error('Invalid path input: must be a non-empty string'); } // Apply path normalization using built-in path module let normalized = path.normalize(input); // Prevent null byte injection if (normalized.includes('\0')) { throw new Error('Path contains null byte'); } // Convert backslashes to forward slashes if toPosix is true if (options.toPosix) { normalized = normalized.replace(/\\/g, '/'); } // Handle absolute paths based on allowAbsolute option if (!options.allowAbsolute && path.isAbsolute(normalized)) { // Remove leading slash or drive letter to make it relative normalized = normalized.replace(/^(?:[A-Za-z]:)?[/\\]/, ''); } // If rootDir is specified, ensure the path doesn't escape it if (options.rootDir) { const rootDir = path.resolve(options.rootDir); // Resolve the normalized path against the root dir const fullPath = path.resolve(rootDir, normalized); // More robust check for path traversal: ensure fullPath starts with rootDir + separator // or is exactly rootDir if (!fullPath.startsWith(rootDir + path.sep) && fullPath !== rootDir) { throw new Error('Path traversal detected'); } // Return the path relative to the root return path.relative(rootDir, fullPath); } // Final validation - check for relative path traversal attempts if not rooted if (normalized.includes('..')) { // Resolve the path to see if it escapes the current working directory conceptually const resolvedPath = path.resolve(normalized); const currentWorkingDir = path.resolve('.'); // Or use a safer base if needed if (!resolvedPath.startsWith(currentWorkingDir)) { throw new Error('Relative path traversal detected'); } } return normalized; } catch (error) { logger.warn('Path sanitization error', { input, error: error instanceof Error ? error.message : String(error) }); throw new McpError(BaseErrorCode.VALIDATION_ERROR, error instanceof Error ? error.message : 'Invalid or unsafe path', { input }); } } /** * Sanitize a JSON string * @param input JSON string to sanitize * @param maxSize Maximum allowed size in bytes * @returns Parsed and sanitized object * @throws {McpError} If JSON is invalid or too large */ sanitizeJson(input, maxSize) { try { if (typeof input !== 'string') { throw new Error('Invalid input: expected a JSON string'); } // Check size limit if specified if (maxSize !== undefined && Buffer.byteLength(input, 'utf8') > maxSize) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, `JSON exceeds maximum allowed size of ${maxSize} bytes`, { size: Buffer.byteLength(input, 'utf8'), maxSize }); } // Validate JSON format using JSON.parse for stricter validation than validator.isJSON const parsed = JSON.parse(input); // Optional: Add recursive sanitization of parsed object values if needed // this.sanitizeObjectRecursively(parsed); return parsed; } catch (error) { if (error instanceof McpError) { throw error; } throw new McpError(BaseErrorCode.VALIDATION_ERROR, error instanceof Error ? error.message : 'Invalid JSON format', { input: input.length > 100 ? `${input.substring(0, 100)}...` : input }); } } /** * Ensure input is within a numeric range * @param input Number or string to validate * @param min Minimum allowed value (inclusive) * @param max Maximum allowed value (inclusive) * @returns Sanitized number within range * @throws {McpError} If input is not a valid number */ sanitizeNumber(input, min, max) { let value; // Handle string input if (typeof input === 'string') { // Use validator for initial check, but rely on parseFloat for conversion if (!validator.isNumeric(input.trim())) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, 'Invalid number format', { input }); } value = parseFloat(input.trim()); } else if (typeof input === 'number') { value = input; } else { throw new McpError(BaseErrorCode.VALIDATION_ERROR, 'Invalid input type: expected number or string', { input: String(input) }); } // Check if parsing resulted in NaN if (isNaN(value) || !isFinite(value)) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, 'Invalid number value (NaN or Infinity)', { input }); } // Clamp the value to the specified range if (min !== undefined && value < min) { value = min; logger.debug('Number clamped to minimum value', { input, min, value }); } if (max !== undefined && value > max) { value = max; logger.debug('Number clamped to maximum value', { input, max, value }); } return value; } /** * Sanitize input for logging to protect sensitive information * @param input Input to sanitize * @returns Sanitized input safe for logging */ sanitizeForLogging(input) { try { // Handle non-objects and null directly if (!input || typeof input !== 'object') { return input; } // Use structuredClone for deep copy if available (Node.js >= 17) // Fallback to JSON stringify/parse for older versions const clonedInput = typeof structuredClone === 'function' ? structuredClone(input) : JSON.parse(JSON.stringify(input)); // Recursively sanitize the cloned object this.redactSensitiveFields(clonedInput); return clonedInput; } catch (error) { logger.error('Error during log sanitization', { error: error instanceof Error ? error.message : String(error) }); // Return a placeholder if sanitization fails return '[Log Sanitization Failed]'; } } /** * Private helper to convert attribute format from record to sanitize-html format */ convertAttributesFormat(attrs) { // sanitize-html directly supports Record<string, string[]> for allowedAttributes per tag return attrs; } /** * Recursively redact sensitive fields in an object or array */ redactSensitiveFields(obj) { if (!obj || typeof obj !== 'object') { return; } // Handle arrays: iterate and recurse if (Array.isArray(obj)) { obj.forEach((item, index) => { // If the item is an object/array, recurse. Otherwise, leave primitive values. if (item && typeof item === 'object') { this.redactSensitiveFields(item); } }); return; } // Handle regular objects: iterate through keys for (const key in obj) { // Use hasOwnProperty to avoid iterating over prototype properties if (Object.prototype.hasOwnProperty.call(obj, key)) { const value = obj[key]; // Check if this key matches any sensitive field pattern (case-insensitive) const isSensitive = this.sensitiveFields.some(field => key.toLowerCase().includes(field.toLowerCase())); if (isSensitive) { // Mask sensitive value obj[key] = '[REDACTED]'; } else if (value && typeof value === 'object') { // Recursively process nested objects/arrays this.redactSensitiveFields(value); } // Primitive values are left as is if not sensitive } } } } // Create and export singleton instance export const sanitization = Sanitization.getInstance(); // Removed the `sanitizeInput` object export for simplicity. // Users should import `sanitization` and call methods directly. // e.g., import { sanitization } from './sanitization.js'; // sanitization.sanitizeHtml(input); // sanitization.sanitizePath(input); /** * Sanitize input for logging to protect sensitive information. * Kept as a separate export for convenience. * @param input Input to sanitize * @returns Sanitized input safe for logging */ export const sanitizeInputForLogging = (input) => sanitization.sanitizeForLogging(input); // Removed default export