UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

288 lines (287 loc) 9.96 kB
/** * XML Processing Utility * * Handles downloading, validating, and processing XML files with security. * * Security Notes: * --------------- * XML parsing can be vulnerable to XML External Entity (XXE) attacks: * * 1. **XXE Attacks**: DOCTYPE and ENTITY declarations can be exploited to: * - Read local files on the server * - Perform Server-Side Request Forgery (SSRF) * - Cause Denial of Service via entity expansion * * 2. **Mitigation**: We reject XML files containing DOCTYPE or ENTITY declarations * and disable entity processing in the parser. * * References: * - https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing * - https://cwe.mitre.org/data/definitions/611.html (XXE) * * @module processors/data/XmlProcessor * * @example * ```typescript * import { xmlProcessor, isXmlFile, processXml } from "./XmlProcessor.js"; * * // Check if file is XML * if (isXmlFile("application/xml", "data.xml")) { * // Process the file * const result = await processXml(fileInfo); * if (result.success && result.data) { * console.log("Root element:", result.data.rootElement); * console.log("Parsed:", result.data.parsed); * } * } * ``` */ import { createRequire } from "node:module"; import { BaseFileProcessor } from "../base/BaseFileProcessor.js"; import { SIZE_LIMITS_MB } from "../config/index.js"; import { createFileError, FileErrorCode } from "../errors/index.js"; const require = createRequire(import.meta.url); // ============================================================================= // TYPES // ============================================================================= // Re-import for local use within this file // ============================================================================= // CONSTANTS // ============================================================================= /** Supported XML MIME types */ const SUPPORTED_XML_TYPES = ["application/xml", "text/xml"]; /** Supported XML file extensions */ const SUPPORTED_XML_EXTENSIONS = [".xml"]; // ============================================================================= // XML PROCESSOR CLASS // ============================================================================= /** * XML file processor. * Extends BaseFileProcessor with XML-specific parsing and validation. * * Features: * - XXE protection (rejects DOCTYPE and ENTITY declarations) * - Parses XML to JavaScript objects * - Extracts root element name * * @example * ```typescript * const processor = new XmlProcessor(); * * const result = await processor.processFile({ * id: "file-123", * name: "data.xml", * mimetype: "application/xml", * size: 1024, * buffer: xmlBuffer, * }); * * if (result.success && result.data?.valid) { * console.log("Root element:", result.data.rootElement); * } * ``` */ export class XmlProcessor extends BaseFileProcessor { constructor() { super({ maxSizeMB: SIZE_LIMITS_MB.XML_MAX_MB, timeoutMs: 30000, supportedMimeTypes: SUPPORTED_XML_TYPES, supportedExtensions: SUPPORTED_XML_EXTENSIONS, fileTypeName: "XML", defaultFilename: "data.xml", }); } /** * Extract the root element name from XML content. * * @param content - XML content string * @returns Root element name or undefined if not found */ extractRootElement(content) { // Skip XML declaration and comments, then find first element const elementMatch = content.match(/<([a-zA-Z][a-zA-Z0-9_:-]*)[>\s/]/); return elementMatch?.[1]; } /** * Check if XML content contains XXE attack vectors. * * @param content - XML content string * @returns Object with detection results */ checkXxeVectors(content) { const lower = content.toLowerCase(); return { hasDOCTYPE: lower.includes("<!doctype"), hasENTITY: lower.includes("<!entity"), }; } /** * Parse XML content to JavaScript object securely. * * @param content - XML content string * @returns Parsed XML content */ parseXmlSecurely(content) { // Dynamically import fast-xml-parser const { XMLParser } = require("fast-xml-parser"); // Initialize XML parser with sensible defaults // XXE Protection: Disable entity processing to prevent XML External Entity attacks const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: "@_", textNodeName: "#text", parseAttributeValue: true, parseTagValue: true, trimValues: true, // XXE Protection - explicitly disable entity processing processEntities: false, htmlEntities: false, }); return parser.parse(content); } /** * Validate downloaded XML is parseable and safe with structured error result. * Includes XXE protection by rejecting XML with DOCTYPE or ENTITY declarations. * Returns user-friendly error messages with actionable suggestions. * * @param buffer - Downloaded file content * @param fileInfo - Original file information * @returns Success result or error result */ async validateDownloadedFileWithResult(buffer, fileInfo) { try { const content = buffer.toString("utf-8"); // XXE Protection: Check for potentially dangerous DOCTYPE/ENTITY declarations const { hasDOCTYPE, hasENTITY } = this.checkXxeVectors(content); if (hasDOCTYPE || hasENTITY) { const error = createFileError(FileErrorCode.XXE_DETECTED, { hasDOCTYPE, hasENTITY, filename: fileInfo.name, }); return { success: false, error: { code: error.code, message: error.message, userMessage: error.userMessage, details: error.details, }, }; } // Parse to validate structure this.parseXmlSecurely(content); return { success: true, data: undefined }; } catch (error) { const fileError = createFileError(FileErrorCode.PARSING_FAILED, { fileType: "XML" }, error instanceof Error ? error : undefined); return { success: false, error: { code: fileError.code, message: fileError.message, userMessage: fileError.userMessage, details: fileError.details, }, }; } } /** * Build processed XML result with parsed content. * * @param buffer - Downloaded file content * @param fileInfo - Original file information * @returns Processed XML result */ buildProcessedResult(buffer, fileInfo) { const content = buffer.toString("utf-8"); let parsed = null; let valid = true; let errorMessage; // Extract root element const rootElement = this.extractRootElement(content); try { parsed = this.parseXmlSecurely(content); } catch (error) { // This shouldn't happen since we validate, but handle gracefully valid = false; errorMessage = error instanceof Error ? error.message : "Invalid XML"; } return { content, parsed, valid, errorMessage, rootElement, buffer, mimetype: fileInfo.mimetype || "application/xml", size: fileInfo.size, filename: this.getFilename(fileInfo), }; } } // ============================================================================= // SINGLETON INSTANCE // ============================================================================= /** Singleton XML processor instance */ export const xmlProcessor = new XmlProcessor(); // ============================================================================= // UTILITY FUNCTIONS // ============================================================================= /** * Check if a file is an XML file based on MIME type or extension. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns true if the file is an XML file * * @example * ```typescript * if (isXmlFile("application/xml", "data.xml")) { * // Process as XML * } * ``` */ export function isXmlFile(mimetype, filename) { return xmlProcessor.isFileSupported(mimetype, filename); } /** * Validate XML file size against configured limit. * * @param sizeBytes - File size in bytes * @returns true if size is within the limit */ export function validateXmlSize(sizeBytes) { const maxBytes = SIZE_LIMITS_MB.XML_MAX_MB * 1024 * 1024; return sizeBytes <= maxBytes; } /** * Process a single XML file with XXE protection. * * @param fileInfo - File information (with URL or buffer) * @param options - Optional processing options (auth headers, timeout, retry config) * @returns Processing result with parsed XML or error * * @example * ```typescript * const result = await processXml({ * id: "file-123", * name: "data.xml", * mimetype: "application/xml", * size: 2048, * url: "https://example.com/data.xml", * }, { * authHeaders: { "Authorization": "Bearer token" }, * }); * * if (result.success && result.data) { * console.log("Root:", result.data.rootElement); * console.log("Parsed:", result.data.parsed); * } * ``` */ export function processXml(fileInfo, options) { return xmlProcessor.processFile(fileInfo, options); }