UNPKG

obsidian-mcp-server

Version:

Obsidian Knowledge-Management MCP (Model Context Protocol) server that enables AI agents and development tools to interact with an Obsidian vault. It provides a comprehensive suite of tools for reading, writing, searching, and managing notes, tags, and fr

600 lines (599 loc) 28.7 kB
/** * @fileoverview Provides a comprehensive sanitization utility class for various input types, * including HTML, strings, URLs, file paths, JSON, and numbers. It also includes * functionality for redacting sensitive information from objects for safe logging. * @module src/utils/security/sanitization */ import path from "path"; import sanitizeHtml from "sanitize-html"; import validator from "validator"; import { BaseErrorCode, McpError } from "../../types-global/errors.js"; import { logger, requestContextService, } from "../internal/index.js"; // Use internal index /** * A singleton utility class for performing various input sanitization tasks. * It provides methods to clean and validate strings, HTML, URLs, file paths, JSON, * and numbers, and to redact sensitive data for logging. */ export class Sanitization { constructor() { this.sensitiveFields = [ "password", "token", "secret", "key", "apiKey", "auth", "credential", "jwt", "ssn", "credit", "card", "cvv", "authorization", "passphrase", "privatekey", // Added more common sensitive field names "obsidianapikey", // Specific to this project potentially ]; this.defaultHtmlSanitizeConfig = { allowedTags: [ "h1", "h2", "h3", "h4", "h5", "h6", "p", "a", "ul", "ol", "li", "b", "i", "strong", "em", "strike", "code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td", "pre", "blockquote", // Added blockquote ], allowedAttributes: { a: ["href", "name", "target", "title"], // Added title for links img: ["src", "alt", "title", "width", "height"], "*": ["class", "id", "style", "data-*"], // Allow data-* attributes }, preserveComments: false, }; // Singleton constructor } /** * Gets the singleton instance of the `Sanitization` class. * @returns {Sanitization} The singleton instance. */ static getInstance() { if (!Sanitization.instance) { Sanitization.instance = new Sanitization(); } return Sanitization.instance; } /** * Sets or extends the list of field names considered sensitive for log redaction. * Field names are matched case-insensitively. * @param {string[]} fields - An array of field names to add to the sensitive list. * @param {RequestContext} [context] - Optional context for logging this configuration change. */ setSensitiveFields(fields, context) { const opContext = context || requestContextService.createRequestContext({ operation: "Sanitization.setSensitiveFields", }); this.sensitiveFields = [ ...new Set([ ...this.sensitiveFields, ...fields.map((f) => f.toLowerCase()), ]), ]; logger.debug("Updated sensitive fields list for log redaction.", { ...opContext, newCount: this.sensitiveFields.length, }); } /** * Retrieves a copy of the current list of sensitive field names used for log redaction. * @returns {string[]} An array of sensitive field names (all lowercase). */ getSensitiveFields() { return [...this.sensitiveFields]; } /** * Sanitizes an HTML string by removing potentially malicious tags and attributes, * based on a configurable allow-list. * @param {string} input - The HTML string to sanitize. * @param {HtmlSanitizeConfig} [config] - Optional custom configuration for HTML sanitization. * Overrides defaults for `allowedTags`, `allowedAttributes`, etc. * @returns {string} The sanitized HTML string. Returns an empty string if input is falsy. */ sanitizeHtml(input, config) { if (!input) return ""; const effectiveConfig = { ...this.defaultHtmlSanitizeConfig, ...config }; const options = { allowedTags: effectiveConfig.allowedTags, allowedAttributes: effectiveConfig.allowedAttributes, transformTags: effectiveConfig.transformTags, }; if (effectiveConfig.preserveComments) { // Ensure '!--' is not duplicated if already present options.allowedTags = [ ...new Set([...(options.allowedTags || []), "!--"]), ]; } return sanitizeHtml(input, options); } /** * Sanitizes a tag name by removing the leading '#' and replacing invalid characters. * @param {string} input - The tag string to sanitize. * @returns {string} The sanitized tag name. */ sanitizeTagName(input) { if (!input) return ""; // Remove leading '#' and replace spaces/invalid characters with nothing return input.replace(/^#/, "").replace(/[\s#,\\?%*:|"<>]/g, ""); } /** >>>>>>> REPLACE * Sanitizes a string based on its intended usage context (e.g., HTML, URL, plain text). * * **Security Note:** Using `context: 'javascript'` is explicitly disallowed and will throw an `McpError`. * This is to prevent accidental introduction of XSS vulnerabilities through ineffective sanitization * of JavaScript code. Proper contextual encoding or safer methods should be used for JavaScript. * * @param {string} input - The string to sanitize. * @param {SanitizeStringOptions} [options={}] - Options specifying the sanitization context * and any context-specific parameters (like `allowedTags` for HTML). * @param {RequestContext} [contextForLogging] - Optional context for logging warnings or errors. * @returns {string} The sanitized string. Returns an empty string if input is falsy. * @throws {McpError} If `options.context` is `'javascript'`. */ sanitizeString(input, options = {}, contextForLogging) { const opContext = contextForLogging || requestContextService.createRequestContext({ operation: "sanitizeString", inputContext: options.context, }); if (!input) return ""; switch (options.context) { case "html": return this.sanitizeHtml(input, { allowedTags: options.allowedTags, allowedAttributes: options.allowedAttributes ? this.convertAttributesFormat(options.allowedAttributes) : undefined, }); case "attribute": // For HTML attributes, strip all tags. Values should be further encoded by the templating engine. return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); case "url": // Validate and trim. Throws McpError on failure. try { return this.sanitizeUrl(input, ["http", "https"], opContext); // Use sanitizeUrl for consistent validation } catch (urlError) { logger.warning("Invalid URL detected during string sanitization (context: url).", { ...opContext, input, error: urlError instanceof Error ? urlError.message : String(urlError), }); return ""; // Return empty or rethrow, depending on desired strictness. Empty for now. } case "javascript": logger.error("Attempted JavaScript sanitization via sanitizeString, which is disallowed.", { ...opContext, inputPreview: input.substring(0, 100) }); throw new McpError(BaseErrorCode.VALIDATION_ERROR, "JavaScript sanitization is not supported via sanitizeString due to security risks. Use appropriate contextual encoding or safer alternatives.", opContext); case "text": default: // Default to stripping all HTML for plain text contexts. return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} }); } } /** * Sanitizes a URL string by validating its format and protocol. * @param {string} input - The URL string to sanitize. * @param {string[]} [allowedProtocols=['http', 'https']] - An array of allowed URL protocols (e.g., 'http', 'https', 'ftp'). * @param {RequestContext} [contextForLogging] - Optional context for logging errors. * @returns {string} The sanitized and trimmed URL string. * @throws {McpError} If the URL is invalid, uses a disallowed protocol, or contains 'javascript:'. */ sanitizeUrl(input, allowedProtocols = ["http", "https"], contextForLogging) { const opContext = contextForLogging || requestContextService.createRequestContext({ operation: "sanitizeUrl" }); try { if (!input || typeof input !== "string") { throw new Error("Invalid URL input: must be a non-empty string."); } const trimmedInput = input.trim(); // Stricter check for 'javascript:' regardless of validator's protocol check if (trimmedInput.toLowerCase().startsWith("javascript:")) { throw new Error("JavaScript pseudo-protocol is explicitly disallowed."); } if (!validator.isURL(trimmedInput, { protocols: allowedProtocols, require_protocol: true, })) { throw new Error(`Invalid URL format or protocol not in allowed list: [${allowedProtocols.join(", ")}].`); } return trimmedInput; } catch (error) { const message = error instanceof Error ? error.message : "Invalid or disallowed URL."; logger.warning(`URL sanitization failed: ${message}`, { ...opContext, input, }); throw new McpError(BaseErrorCode.VALIDATION_ERROR, message, { ...opContext, input, }); } } /** * Sanitizes a file path to prevent path traversal attacks and normalize its format. * * @param {string} input - The file path string to sanitize. * @param {PathSanitizeOptions} [options={}] - Options to control sanitization behavior (e.g., `rootDir`, `toPosix`). * @param {RequestContext} [contextForLogging] - Optional context for logging warnings or errors. * @returns {SanitizedPathInfo} An object containing the sanitized path and metadata about the sanitization. * @throws {McpError} If the path is invalid (e.g., empty, contains null bytes) or determined to be unsafe * (e.g., attempts to traverse outside `rootDir` or current working directory if no `rootDir`). */ sanitizePath(input, options = {}, contextForLogging) { const opContext = contextForLogging || requestContextService.createRequestContext({ operation: "sanitizePath" }); const originalInput = input; const effectiveOptions = { toPosix: options.toPosix ?? false, allowAbsolute: options.allowAbsolute ?? false, rootDir: options.rootDir ? path.resolve(options.rootDir) : undefined, // Resolve rootDir upfront }; let wasAbsoluteInitially = false; let convertedToRelative = false; try { if (!input || typeof input !== "string") { throw new Error("Invalid path input: must be a non-empty string."); } if (input.includes("\0")) { throw new Error("Path contains null byte, which is disallowed."); } let normalized = path.normalize(input); // Normalize first (e.g., 'a/b/../c' -> 'a/c') wasAbsoluteInitially = path.isAbsolute(normalized); if (effectiveOptions.toPosix) { normalized = normalized.replace(/\\/g, "/"); } let finalSanitizedPath; if (effectiveOptions.rootDir) { // Resolve the input path against the root directory. // If 'normalized' is absolute, path.resolve treats it as the new root. // To correctly join, ensure 'normalized' is treated as relative to 'rootDir' if it's not already escaping. let tempPathForResolve = normalized; if (path.isAbsolute(normalized) && !effectiveOptions.allowAbsolute) { // If absolute paths are not allowed, make it relative before resolving with rootDir tempPathForResolve = normalized.replace(/^(?:[A-Za-z]:)?[/\\]+/, ""); convertedToRelative = true; } else if (path.isAbsolute(normalized) && effectiveOptions.allowAbsolute) { // Absolute path is allowed, check if it's within rootDir if (!normalized.startsWith(effectiveOptions.rootDir + path.sep) && normalized !== effectiveOptions.rootDir) { throw new Error("Absolute path is outside the specified root directory."); } finalSanitizedPath = path.relative(effectiveOptions.rootDir, normalized); finalSanitizedPath = finalSanitizedPath === "" ? "." : finalSanitizedPath; // Handle case where path is rootDir itself // Early return if absolute path is allowed and within root. return { sanitizedPath: finalSanitizedPath, originalInput, wasAbsolute: wasAbsoluteInitially, convertedToRelative, optionsUsed: effectiveOptions, }; } // If path was relative or made relative, join with rootDir const fullPath = path.resolve(effectiveOptions.rootDir, tempPathForResolve); if (!fullPath.startsWith(effectiveOptions.rootDir + path.sep) && fullPath !== effectiveOptions.rootDir) { throw new Error("Path traversal detected: sanitized path escapes root directory."); } finalSanitizedPath = path.relative(effectiveOptions.rootDir, fullPath); finalSanitizedPath = finalSanitizedPath === "" ? "." : finalSanitizedPath; } else { // No rootDir specified if (path.isAbsolute(normalized)) { if (effectiveOptions.allowAbsolute) { finalSanitizedPath = normalized; // Absolute path allowed } else { // Convert to relative (strip leading slash/drive) finalSanitizedPath = normalized.replace(/^(?:[A-Za-z]:)?[/\\]+/, ""); convertedToRelative = true; } } else { // Path is relative, and no rootDir // For relative paths without a rootDir, ensure they don't traverse "above" the conceptual CWD. // path.resolve('.') gives current working directory. const resolvedAgainstCwd = path.resolve(normalized); if (!resolvedAgainstCwd.startsWith(path.resolve("."))) { // This check is a bit tricky because '..' is valid if it stays within CWD's subtree. // A more robust check might involve comparing segments or ensuring it doesn't go "too high". // For simplicity, if it resolves outside CWD's prefix, consider it traversal. // This might be too strict for some use cases but safer for general utility. // A common pattern is to check if path.relative(cwd, resolvedPath) starts with '..'. if (path .relative(path.resolve("."), resolvedAgainstCwd) .startsWith("..")) { throw new Error("Relative path traversal detected (escapes current working directory context)."); } } finalSanitizedPath = normalized; } } return { sanitizedPath: finalSanitizedPath, originalInput, wasAbsolute: wasAbsoluteInitially, convertedToRelative, optionsUsed: effectiveOptions, }; } catch (error) { const message = error instanceof Error ? error.message : "Invalid or unsafe path."; logger.warning(`Path sanitization error: ${message}`, { ...opContext, input: originalInput, options: effectiveOptions, errorDetails: String(error), }); throw new McpError(BaseErrorCode.VALIDATION_ERROR, message, { ...opContext, input: originalInput, }); } } /** * Sanitizes a JSON string by parsing it to validate its format. * Optionally checks if the JSON string's byte size exceeds a maximum limit. * * @template T The expected type of the parsed JSON object. Defaults to `unknown`. * @param {string} input - The JSON string to sanitize/validate. * @param {number} [maxSizeBytes] - Optional maximum allowed size of the JSON string in bytes. * @param {RequestContext} [contextForLogging] - Optional context for logging errors. * @returns {T} The parsed JavaScript object. * @throws {McpError} If the input is not a string, is not valid JSON, or exceeds `maxSizeBytes`. */ sanitizeJson(input, maxSizeBytes, contextForLogging) { const opContext = contextForLogging || requestContextService.createRequestContext({ operation: "sanitizeJson" }); try { if (typeof input !== "string") { throw new Error("Invalid input: expected a JSON string."); } if (maxSizeBytes !== undefined && Buffer.byteLength(input, "utf8") > maxSizeBytes) { throw new McpError(// Throw McpError directly BaseErrorCode.VALIDATION_ERROR, `JSON content exceeds maximum allowed size of ${maxSizeBytes} bytes. Actual size: ${Buffer.byteLength(input, "utf8")} bytes.`, { ...opContext, size: Buffer.byteLength(input, "utf8"), maxSize: maxSizeBytes, }); } const parsed = JSON.parse(input); // Note: This function only validates JSON structure. It does not sanitize content within the JSON. // For deep sanitization of object values, additional logic would be needed. return parsed; } catch (error) { if (error instanceof McpError) throw error; // Re-throw if already McpError (e.g., size limit) const message = error instanceof Error ? error.message : "Invalid JSON format."; logger.warning(`JSON sanitization failed: ${message}`, { ...opContext, inputPreview: input.substring(0, 100), errorDetails: String(error), }); throw new McpError(BaseErrorCode.VALIDATION_ERROR, message, { ...opContext, inputPreview: input.length > 100 ? `${input.substring(0, 100)}...` : input, }); } } /** * Sanitizes a numeric input (number or string) by converting it to a number * and optionally clamping it within a specified min/max range. * * @param {number | string} input - The numeric value or string representation of a number. * @param {number} [min] - Optional minimum allowed value (inclusive). * @param {number} [max] - Optional maximum allowed value (inclusive). * @param {RequestContext} [contextForLogging] - Optional context for logging clamping or errors. * @returns {number} The sanitized (and potentially clamped) number. * @throws {McpError} If the input cannot be parsed into a valid, finite number. */ sanitizeNumber(input, min, max, contextForLogging) { const opContext = contextForLogging || requestContextService.createRequestContext({ operation: "sanitizeNumber", }); let value; if (typeof input === "string") { const trimmedInput = input.trim(); // Validator's isNumeric allows empty strings, so check explicitly. if (trimmedInput === "" || !validator.isNumeric(trimmedInput)) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, "Invalid number format: string is not numeric or is empty.", { ...opContext, input }); } value = parseFloat(trimmedInput); } else if (typeof input === "number") { value = input; } else { throw new McpError(BaseErrorCode.VALIDATION_ERROR, "Invalid input type: expected number or string.", { ...opContext, input: String(input) }); } if (isNaN(value) || !isFinite(value)) { throw new McpError(BaseErrorCode.VALIDATION_ERROR, "Invalid number value (NaN or Infinity).", { ...opContext, input }); } let clamped = false; let originalValueForLog = value; // Store original before clamping for logging if (min !== undefined && value < min) { value = min; clamped = true; } if (max !== undefined && value > max) { value = max; clamped = true; } if (clamped) { logger.debug("Number clamped to range.", { ...opContext, originalValue: originalValueForLog, min, max, finalValue: value, }); } return value; } /** * Sanitizes an object or array for logging by deep cloning it and redacting fields * whose names (case-insensitively) match any of the configured sensitive field names. * Redacted fields are replaced with the string `'[REDACTED]'`. * * @param {unknown} input - The object, array, or other value to sanitize for logging. * If input is not an object or array, it's returned as is. * @param {RequestContext} [contextForLogging] - Optional context for logging errors during sanitization. * @returns {unknown} A sanitized copy of the input, safe for logging. * Returns `'[Log Sanitization Failed]'` if an unexpected error occurs during sanitization. */ sanitizeForLogging(input, contextForLogging) { const opContext = contextForLogging || requestContextService.createRequestContext({ operation: "sanitizeForLogging", }); try { // Primitives and null are returned as is. if (input === null || typeof input !== "object") { return input; } // Use structuredClone if available (Node.js >= 17), otherwise fallback to JSON parse/stringify. // JSON.parse(JSON.stringify(obj)) is a common way to deep clone, but has limitations // (e.g., loses functions, undefined, Date objects become strings). // For logging, this is often acceptable. const clonedInput = typeof structuredClone === "function" ? structuredClone(input) : JSON.parse(JSON.stringify(input)); this.redactSensitiveFields(clonedInput); return clonedInput; } catch (error) { logger.error("Error during log sanitization process.", error instanceof Error ? error : undefined, { ...opContext, errorDetails: error instanceof Error ? error.message : String(error), }); return "[Log Sanitization Failed]"; // Fallback string indicating sanitization failure } } /** * Helper to convert attribute format for sanitize-html. * `sanitize-html` expects `allowedAttributes` in a specific format. * This method assumes the input `attrs` (from `SanitizeStringOptions`) * is already in the correct format or a compatible one. * @param {Record<string, string[]>} attrs - Attributes configuration. * @returns {sanitizeHtml.IOptions['allowedAttributes']} Attributes in `sanitize-html` format. * @private */ convertAttributesFormat(attrs) { // The type Record<string, string[]> is compatible with sanitizeHtml.IOptions['allowedAttributes'] // which can be Record<string, Array<string | RegExp>> or boolean. // No complex conversion needed if options.allowedAttributes is already Record<string, string[]>. return attrs; } /** * Recursively redacts sensitive fields within an object or array. * This method modifies the input object/array in place. * @param {unknown} obj - The object or array to redact sensitive fields from. * @private */ redactSensitiveFields(obj) { if (!obj || typeof obj !== "object") { return; // Not an object or array, or null } if (Array.isArray(obj)) { obj.forEach((item) => { // Recurse only if the item is an object (including nested arrays) if (item && typeof item === "object") { this.redactSensitiveFields(item); } }); return; } // It's an object (but not an array) for (const key in obj) { // Check if the property belongs to the object itself, not its prototype if (Object.prototype.hasOwnProperty.call(obj, key)) { const value = obj[key]; const lowerKey = key.toLowerCase(); // Special handling for non-serializable but non-sensitive objects if (key === "httpsAgent") { obj[key] = "[HttpAgent Instance]"; continue; // Skip further processing for this key } // Check if the lowercase key includes any of the lowercase sensitive field terms const isSensitive = this.sensitiveFields.some((field) => lowerKey.includes(field)); if (isSensitive) { obj[key] = "[REDACTED]"; } else if (value && typeof value === "object") { // If the value is another object or array, recurse this.redactSensitiveFields(value); } } } } } /** * A default, shared instance of the `Sanitization` class. * Use this instance for all sanitization tasks. * * Example: * ```typescript * import { sanitization, sanitizeInputForLogging } from './sanitization'; * * const unsafeHtml = "<script>alert('xss')</script><p>Safe</p>"; * const safeHtml = sanitization.sanitizeHtml(unsafeHtml); * * const sensitiveData = { password: '123', username: 'user' }; * const safeLogData = sanitizeInputForLogging(sensitiveData); * // safeLogData will be { password: '[REDACTED]', username: 'user' } * ``` */ export const sanitization = Sanitization.getInstance(); /** * A convenience function that wraps `sanitization.sanitizeForLogging`. * Sanitizes an object or array for logging by redacting sensitive fields. * * @param {unknown} input - The data to sanitize for logging. * @param {RequestContext} [contextForLogging] - Optional context for logging errors during sanitization. * @returns {unknown} A sanitized copy of the input, safe for logging. */ export const sanitizeInputForLogging = (input, contextForLogging) => sanitization.sanitizeForLogging(input, contextForLogging);