perplexity-mcp-server
Version:
A Perplexity API Model Context Protocol (MCP) server that unlocks Perplexity's search-augmented AI capabilities for LLM agents. Features robust error handling, secure input validation, and transparent reasoning with the showThinking parameter. Built with
452 lines (451 loc) • 19.2 kB
JavaScript
/**
* @fileoverview Provides a comprehensive `Sanitization` class for various input cleaning and validation tasks.
* This module includes utilities for sanitizing HTML, strings, URLs, file paths, JSON, numbers,
* and for redacting sensitive information from data intended for logging.
* @module src/utils/security/sanitization
*/
import path from "path";
import sanitizeHtml from "sanitize-html";
import validator from "validator";
import { BaseErrorCode, McpError } from "../../types-global/errors.js";
import { logger, requestContextService } from "../index.js";
/**
* A singleton class providing various methods for input sanitization.
* Aims to protect against common vulnerabilities like XSS and path traversal.
*/
export class Sanitization {
/** @private */
constructor() {
/**
* Default list of field names considered sensitive for log redaction.
* Case-insensitive matching is applied.
* @private
*/
this.sensitiveFields = [
"password",
"token",
"secret",
"key",
"apiKey",
"auth",
"credential",
"jwt",
"ssn",
"credit",
"card",
"cvv",
"authorization",
];
/**
* Default configuration for HTML sanitization.
* @private
*/
this.defaultHtmlSanitizeConfig = {
allowedTags: [
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"p",
"a",
"ul",
"ol",
"li",
"b",
"i",
"strong",
"em",
"strike",
"code",
"hr",
"br",
"div",
"table",
"thead",
"tbody",
"tr",
"th",
"td",
"pre",
],
allowedAttributes: {
a: ["href", "name", "target"],
img: ["src", "alt", "title", "width", "height"],
"*": ["class", "id", "style"],
},
preserveComments: false,
};
}
/**
* Retrieves the singleton instance of the `Sanitization` class.
* @returns The singleton `Sanitization` instance.
*/
static getInstance() {
if (!Sanitization.instance) {
Sanitization.instance = new Sanitization();
}
return Sanitization.instance;
}
/**
* Sets or extends the list of sensitive field names for log sanitization.
* @param fields - An array of field names to add to the sensitive list.
*/
setSensitiveFields(fields) {
this.sensitiveFields = [
...new Set([
...this.sensitiveFields,
...fields.map((f) => f.toLowerCase()),
]),
];
const logContext = requestContextService.createRequestContext({
operation: "Sanitization.setSensitiveFields",
newSensitiveFieldCount: this.sensitiveFields.length,
});
logger.debug("Updated sensitive fields list for log sanitization", logContext);
}
/**
* Gets a copy of the current list of sensitive field names.
* @returns An array of sensitive field names.
*/
getSensitiveFields() {
return [...this.sensitiveFields];
}
/**
* Sanitizes an HTML string by removing potentially malicious tags and attributes.
* @param input - The HTML string to sanitize.
* @param config - Optional custom configuration for `sanitize-html`.
* @returns The sanitized HTML string. Returns an empty string if input is falsy.
*/
sanitizeHtml(input, config) {
if (!input)
return "";
const effectiveConfig = { ...this.defaultHtmlSanitizeConfig, ...config };
const options = {
allowedTags: effectiveConfig.allowedTags,
allowedAttributes: effectiveConfig.allowedAttributes,
transformTags: effectiveConfig.transformTags,
};
if (effectiveConfig.preserveComments) {
options.allowedTags = [...(options.allowedTags || []), "!--"];
}
return sanitizeHtml(input, options);
}
/**
* Sanitizes a string based on its intended context (e.g., HTML, URL, text).
* **Important:** `context: 'javascript'` is disallowed due to security risks.
*
* @param input - The string to sanitize.
* @param options - Options specifying the sanitization context.
* @returns The sanitized string. Returns an empty string if input is falsy.
* @throws {McpError} If `options.context` is 'javascript', or URL validation fails.
*/
sanitizeString(input, options = {}) {
if (!input)
return "";
switch (options.context) {
case "html":
return this.sanitizeHtml(input, {
allowedTags: options.allowedTags,
allowedAttributes: options.allowedAttributes
? this.convertAttributesFormat(options.allowedAttributes)
: undefined,
});
case "attribute":
return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} });
case "url":
if (!validator.isURL(input, {
protocols: ["http", "https"],
require_protocol: true,
require_host: true,
})) {
logger.warning("Potentially invalid URL detected during string sanitization (context: url)", requestContextService.createRequestContext({
operation: "Sanitization.sanitizeString.urlWarning",
invalidUrlAttempt: input,
}));
return "";
}
return validator.trim(input);
case "javascript":
logger.error("Attempted JavaScript sanitization via sanitizeString, which is disallowed.", requestContextService.createRequestContext({
operation: "Sanitization.sanitizeString.jsAttempt",
inputSnippet: input.substring(0, 50),
}));
throw new McpError(BaseErrorCode.VALIDATION_ERROR, "JavaScript sanitization is not supported through sanitizeString due to security risks.");
case "text":
default:
return sanitizeHtml(input, { allowedTags: [], allowedAttributes: {} });
}
}
/**
* Converts attribute format for `sanitizeHtml`.
* @param attrs - Attributes in `{ tagName: ['attr1'] }` format.
* @returns Attributes in `sanitize-html` expected format.
* @private
*/
convertAttributesFormat(attrs) {
return attrs;
}
/**
* Sanitizes a URL string by validating its format and protocol.
* @param input - The URL string to sanitize.
* @param allowedProtocols - Array of allowed URL protocols. Default: `['http', 'https']`.
* @returns The sanitized and trimmed URL string.
* @throws {McpError} If the URL is invalid or uses a disallowed protocol.
*/
sanitizeUrl(input, allowedProtocols = ["http", "https"]) {
try {
const trimmedInput = input.trim();
if (!validator.isURL(trimmedInput, {
protocols: allowedProtocols,
require_protocol: true,
require_host: true,
})) {
throw new Error("Invalid URL format or protocol not in allowed list.");
}
const lowercasedInput = trimmedInput.toLowerCase();
if (lowercasedInput.startsWith("javascript:") ||
lowercasedInput.startsWith("data:") ||
lowercasedInput.startsWith("vbscript:")) {
throw new Error("Disallowed pseudo-protocol (javascript:, data:, or vbscript:) in URL.");
}
return trimmedInput;
}
catch (error) {
throw new McpError(BaseErrorCode.VALIDATION_ERROR, error instanceof Error
? error.message
: "Invalid or unsafe URL provided.", { input });
}
}
/**
* Sanitizes a file path to prevent path traversal and normalize format.
* @param input - The file path string to sanitize.
* @param options - Options to control sanitization behavior.
* @returns An object with the sanitized path and sanitization metadata.
* @throws {McpError} If the path is invalid or unsafe.
*/
sanitizePath(input, options = {}) {
const originalInput = input;
const effectiveOptions = {
toPosix: options.toPosix ?? false,
allowAbsolute: options.allowAbsolute ?? false,
rootDir: options.rootDir ? path.resolve(options.rootDir) : undefined,
};
let wasAbsoluteInitially = false;
try {
if (!input || typeof input !== "string")
throw new Error("Invalid path input: must be a non-empty string.");
if (input.includes("\0"))
throw new Error("Path contains null byte, which is disallowed.");
let normalized = path.normalize(input);
wasAbsoluteInitially = path.isAbsolute(normalized);
if (effectiveOptions.toPosix) {
normalized = normalized.replace(/\\/g, "/");
}
let finalSanitizedPath;
if (effectiveOptions.rootDir) {
const fullPath = path.resolve(effectiveOptions.rootDir, normalized);
if (!fullPath.startsWith(effectiveOptions.rootDir + path.sep) &&
fullPath !== effectiveOptions.rootDir) {
throw new Error("Path traversal detected: attempts to escape the defined root directory.");
}
finalSanitizedPath = path.relative(effectiveOptions.rootDir, fullPath);
finalSanitizedPath =
finalSanitizedPath === "" ? "." : finalSanitizedPath;
if (path.isAbsolute(finalSanitizedPath) &&
!effectiveOptions.allowAbsolute) {
throw new Error("Path resolved to absolute outside root when absolute paths are disallowed.");
}
}
else {
if (path.isAbsolute(normalized)) {
if (!effectiveOptions.allowAbsolute) {
finalSanitizedPath = normalized.replace(/^(?:[A-Za-z]:)?[/\\]+/, "");
}
else {
finalSanitizedPath = normalized;
}
}
else {
const resolvedAgainstCwd = path.resolve(normalized);
const currentWorkingDir = path.resolve(".");
if (!resolvedAgainstCwd.startsWith(currentWorkingDir + path.sep) &&
resolvedAgainstCwd !== currentWorkingDir) {
throw new Error("Relative path traversal detected (escapes current working directory context).");
}
finalSanitizedPath = normalized;
}
}
return {
sanitizedPath: finalSanitizedPath,
originalInput,
wasAbsolute: wasAbsoluteInitially,
convertedToRelative: wasAbsoluteInitially &&
!path.isAbsolute(finalSanitizedPath) &&
!effectiveOptions.allowAbsolute,
optionsUsed: effectiveOptions,
};
}
catch (error) {
logger.warning("Path sanitization error", requestContextService.createRequestContext({
operation: "Sanitization.sanitizePath.error",
originalPathInput: originalInput,
pathOptionsUsed: effectiveOptions,
errorMessage: error instanceof Error ? error.message : String(error),
}));
throw new McpError(BaseErrorCode.VALIDATION_ERROR, error instanceof Error
? error.message
: "Invalid or unsafe path provided.", { input: originalInput });
}
}
/**
* Sanitizes a JSON string by parsing it to validate its format.
* Optionally checks if the JSON string exceeds a maximum allowed size.
* @template T The expected type of the parsed JSON object. Defaults to `unknown`.
* @param input - The JSON string to sanitize/validate.
* @param maxSize - Optional maximum allowed size of the JSON string in bytes.
* @returns The parsed JavaScript object.
* @throws {McpError} If input is not a string, too large, or invalid JSON.
*/
sanitizeJson(input, maxSize) {
try {
if (typeof input !== "string")
throw new Error("Invalid input: expected a JSON string.");
if (maxSize !== undefined && Buffer.byteLength(input, "utf8") > maxSize) {
throw new McpError(BaseErrorCode.VALIDATION_ERROR, `JSON string exceeds maximum allowed size of ${maxSize} bytes.`, { actualSize: Buffer.byteLength(input, "utf8"), maxSize });
}
return JSON.parse(input);
}
catch (error) {
if (error instanceof McpError)
throw error;
throw new McpError(BaseErrorCode.VALIDATION_ERROR, error instanceof Error ? error.message : "Invalid JSON format.", {
inputPreview: input.length > 100 ? `${input.substring(0, 100)}...` : input,
});
}
}
/**
* Validates and sanitizes a numeric input, converting strings to numbers.
* Clamps the number to `min`/`max` if provided.
* @param input - The number or string to validate and sanitize.
* @param min - Minimum allowed value (inclusive).
* @param max - Maximum allowed value (inclusive).
* @returns The sanitized (and potentially clamped) number.
* @throws {McpError} If input is not a valid number, NaN, or Infinity.
*/
sanitizeNumber(input, min, max) {
let value;
if (typeof input === "string") {
const trimmedInput = input.trim();
if (trimmedInput === "" || !validator.isNumeric(trimmedInput)) {
throw new McpError(BaseErrorCode.VALIDATION_ERROR, "Invalid number format: input is empty or not numeric.", { input });
}
value = parseFloat(trimmedInput);
}
else if (typeof input === "number") {
value = input;
}
else {
throw new McpError(BaseErrorCode.VALIDATION_ERROR, "Invalid input type: expected number or string.", { input: String(input) });
}
if (isNaN(value) || !isFinite(value)) {
throw new McpError(BaseErrorCode.VALIDATION_ERROR, "Invalid number value (NaN or Infinity).", { input });
}
let clamped = false;
const originalValueForLog = value;
if (min !== undefined && value < min) {
value = min;
clamped = true;
}
if (max !== undefined && value > max) {
value = max;
clamped = true;
}
if (clamped) {
logger.debug("Number clamped to range.", requestContextService.createRequestContext({
operation: "Sanitization.sanitizeNumber.clamped",
originalInput: String(input),
parsedValue: originalValueForLog,
minValue: min,
maxValue: max,
clampedValue: value,
}));
}
return value;
}
/**
* Sanitizes input for logging by redacting sensitive fields.
* Creates a deep clone and replaces values of fields matching `this.sensitiveFields`
* (case-insensitive substring match) with "[REDACTED]".
*
* It uses `structuredClone` if available for a high-fidelity deep clone.
* If `structuredClone` is not available (e.g., in older Node.js environments),
* it falls back to `JSON.parse(JSON.stringify(input))`. This fallback has limitations:
* - `Date` objects are converted to ISO date strings.
* - `undefined` values within objects are removed.
* - `Map`, `Set`, `RegExp` objects are converted to empty objects (`{}`).
* - Functions are removed.
* - `BigInt` values will throw an error during `JSON.stringify` unless a `toJSON` method is provided.
* - Circular references will cause `JSON.stringify` to throw an error.
*
* @param input - The input data to sanitize for logging.
* @returns A sanitized (deep cloned) version of the input, safe for logging.
* Returns original input if not object/array, or "[Log Sanitization Failed]" on error.
*/
sanitizeForLogging(input) {
try {
if (!input || typeof input !== "object")
return input;
const clonedInput = typeof globalThis.structuredClone === "function"
? globalThis.structuredClone(input)
: JSON.parse(JSON.stringify(input));
this.redactSensitiveFields(clonedInput);
return clonedInput;
}
catch (error) {
logger.error("Error during log sanitization, returning placeholder.", requestContextService.createRequestContext({
operation: "Sanitization.sanitizeForLogging.error",
errorMessage: error instanceof Error ? error.message : String(error),
}));
return "[Log Sanitization Failed]";
}
}
/**
* Recursively redacts sensitive fields in an object or array in place.
* @param obj - The object or array to redact.
* @private
*/
redactSensitiveFields(obj) {
if (!obj || typeof obj !== "object")
return;
if (Array.isArray(obj)) {
obj.forEach((item) => this.redactSensitiveFields(item));
return;
}
for (const key in obj) {
if (Object.prototype.hasOwnProperty.call(obj, key)) {
const value = obj[key];
const lowerKey = key.toLowerCase();
const isSensitive = this.sensitiveFields.some((field) => lowerKey.includes(field));
if (isSensitive) {
obj[key] = "[REDACTED]";
}
else if (value && typeof value === "object") {
this.redactSensitiveFields(value);
}
}
}
}
}
/**
* Singleton instance of the `Sanitization` class.
* Use this for all input sanitization tasks.
*/
export const sanitization = Sanitization.getInstance();
/**
* Convenience function calling `sanitization.sanitizeForLogging`.
* @param input - The input data to sanitize.
* @returns A sanitized version of the input, safe for logging.
*/
export const sanitizeInputForLogging = (input) => sanitization.sanitizeForLogging(input);