@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
297 lines (296 loc) • 10.8 kB
JavaScript
/**
* YAML Processing Utility
*
* Handles downloading, validating, and processing YAML files with security.
*
* Security Notes:
* ---------------
* YAML parsing can be vulnerable to various attacks if not configured securely:
*
* 1. **Code Execution via Custom Tags**: YAML supports custom tags like `!!python/object`,
* `!!ruby/object`, or `!!js/function` that can execute arbitrary code when parsed.
* We use the 'core' schema which only allows standard YAML types (strings, numbers,
* booleans, null, arrays, and objects) and explicitly check for dangerous tag patterns.
*
* 2. **Billion Laughs Attack (Entity Expansion)**: YAML supports anchors (&) and aliases (*)
* for referencing content. Malicious YAML can use nested aliases to create exponential
* expansion (e.g., 10 levels of 10x expansion = 10^10 entities from a small file).
* We limit `maxAliasCount` to 100 to prevent memory exhaustion.
*
* 3. **Denial of Service**: Large or deeply nested YAML files can exhaust memory/CPU.
* Size limits are enforced by the base processor's maxSizeMB configuration.
*
* References:
* - https://en.wikipedia.org/wiki/Billion_laughs_attack
* - https://cwe.mitre.org/data/definitions/502.html (Deserialization of Untrusted Data)
*
* @module processors/data/YamlProcessor
*
* @example
* ```typescript
* import { yamlProcessor, isYamlFile, processYaml } from "./YamlProcessor.js";
*
* // Check if file is YAML
* if (isYamlFile("application/x-yaml", "config.yaml")) {
* // Process the file
* const result = await processYaml(fileInfo);
* if (result.success && result.data) {
* console.log("Parsed YAML:", result.data.parsed);
* console.log("As JSON:", result.data.asJson);
* }
* }
* ```
*/
import { createRequire } from "node:module";
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
import { SIZE_LIMITS_MB } from "../config/index.js";
import { createFileError, FileErrorCode } from "../errors/index.js";
// Re-export for consumers who import from this module
// Import for local use
const require = createRequire(import.meta.url);
// =============================================================================
// CONSTANTS
// =============================================================================
/** Supported YAML MIME types */
const SUPPORTED_YAML_TYPES = ["application/x-yaml", "text/yaml", "text/x-yaml"];
/** Supported YAML file extensions */
const SUPPORTED_YAML_EXTENSIONS = [".yaml", ".yml"];
/**
* Dangerous YAML custom tags that can execute code.
* These patterns indicate potential security threats and should be rejected.
*/
const YAML_DANGEROUS_TAGS = [
// Python code execution
"!!python/object",
"!!python/object/apply",
"!!python/object/new",
"!!python/name",
"!!python/module",
// Ruby code execution
"!!ruby/object",
"!!ruby/hash",
"!!ruby/struct",
"!!ruby/sym",
// JavaScript code execution
"!!js/function",
"!!js/undefined",
// General dangerous patterns
"!!perl/",
"!!php/",
"!!java/",
];
// =============================================================================
// YAML PROCESSOR CLASS
// =============================================================================
/**
* YAML file processor.
* Extends BaseFileProcessor with YAML-specific parsing and validation.
*
* Uses secure parsing configuration to prevent:
* - Code execution via custom tags (uses 'core' schema)
* - Billion laughs attack (limits alias count to 100)
* - Dangerous custom tag injection (explicit pattern checking)
*
* @example
* ```typescript
* const processor = new YamlProcessor();
*
* const result = await processor.processFile({
* id: "file-123",
* name: "config.yaml",
* mimetype: "application/x-yaml",
* size: 1024,
* buffer: yamlBuffer,
* });
*
* if (result.success && result.data?.valid) {
* console.log("As JSON:", result.data.asJson);
* }
* ```
*/
export class YamlProcessor extends BaseFileProcessor {
constructor() {
super({
maxSizeMB: SIZE_LIMITS_MB.YAML_MAX_MB,
timeoutMs: 30000,
supportedMimeTypes: SUPPORTED_YAML_TYPES,
supportedExtensions: SUPPORTED_YAML_EXTENSIONS,
fileTypeName: "YAML",
defaultFilename: "config.yaml",
});
}
/**
* Get detected dangerous tags in YAML content.
*
* @param content - Raw YAML content string
* @returns Array of detected dangerous tags (empty if none found)
*/
getDetectedDangerousTags(content) {
return YAML_DANGEROUS_TAGS.filter((pattern) => content.includes(pattern));
}
/**
* Parse YAML content securely using strict schema.
*
* Security measures:
* - 'core' schema: Only allows standard YAML types (string, number, boolean, null, array, object)
* - maxAliasCount: Limits alias expansion to prevent billion laughs attack
*
* @param content - Raw YAML content string
* @returns Parsed YAML content
*/
parseYamlSecurely(content) {
// Dynamically import js-yaml to parse YAML securely
const yaml = require("js-yaml");
return yaml.load(content, {
schema: yaml.CORE_SCHEMA, // Only allow standard YAML types, no custom tags
// Prevent billion laughs attack via alias expansion
// Note: js-yaml doesn't have maxAliasCount, but using CORE_SCHEMA + size limits provides protection
});
}
/**
* Validate downloaded YAML is parseable and safe with structured error result.
* Checks for dangerous custom tags and validates YAML syntax.
* Returns user-friendly error messages with actionable suggestions.
*
* @param buffer - Downloaded file content
* @param fileInfo - Original file information
* @returns Success result or error result
*/
async validateDownloadedFileWithResult(buffer, fileInfo) {
try {
const content = buffer.toString("utf-8");
// Check for potentially dangerous YAML constructs before parsing
const detectedTags = this.getDetectedDangerousTags(content);
if (detectedTags.length > 0) {
const error = createFileError(FileErrorCode.CODE_EXECUTION_DETECTED, {
fileType: "YAML",
detectedTags: detectedTags.join(", "),
filename: fileInfo.name,
});
return {
success: false,
error: {
code: error.code,
message: error.message,
userMessage: error.userMessage,
details: error.details,
},
};
}
// Parse with secure configuration
this.parseYamlSecurely(content);
return { success: true, data: undefined };
}
catch (error) {
const fileError = createFileError(FileErrorCode.PARSING_FAILED, { fileType: "YAML" }, error instanceof Error ? error : undefined);
return {
success: false,
error: {
code: fileError.code,
message: fileError.message,
userMessage: fileError.userMessage,
details: fileError.details,
},
};
}
}
/**
* Build processed YAML result with parsed content.
* Uses secure parsing configuration to prevent code execution attacks.
*
* @param buffer - Downloaded file content
* @param fileInfo - Original file information
* @returns Processed YAML result
*/
buildProcessedResult(buffer, fileInfo) {
const content = buffer.toString("utf-8");
let parsed = null;
let valid = true;
let errorMessage;
let asJson = null;
try {
// Use secure parsing - validation already passed, but maintain consistent security
parsed = this.parseYamlSecurely(content);
asJson = JSON.stringify(parsed, null, 2);
}
catch (error) {
// This shouldn't happen since we validate, but handle gracefully
valid = false;
errorMessage = error instanceof Error ? error.message : "Invalid YAML";
}
return {
content,
parsed,
valid,
errorMessage,
asJson,
buffer,
mimetype: fileInfo.mimetype || "application/x-yaml",
size: fileInfo.size,
filename: this.getFilename(fileInfo),
};
}
}
// =============================================================================
// SINGLETON INSTANCE
// =============================================================================
/** Singleton YAML processor instance */
export const yamlProcessor = new YamlProcessor();
// =============================================================================
// UTILITY FUNCTIONS
// =============================================================================
/**
* Check if a file is a YAML file based on MIME type or extension.
*
* @param mimetype - MIME type of the file
* @param filename - Filename (for extension-based detection)
* @returns true if the file is a YAML file
*
* @example
* ```typescript
* if (isYamlFile("application/x-yaml", "config.yaml")) {
* // Process as YAML
* }
* ```
*/
export function isYamlFile(mimetype, filename) {
return yamlProcessor.isFileSupported(mimetype, filename);
}
/**
* Validate YAML file size against configured limit.
*
* @param sizeBytes - File size in bytes
* @returns true if size is within the limit
*/
export function validateYamlSize(sizeBytes) {
const maxBytes = SIZE_LIMITS_MB.YAML_MAX_MB * 1024 * 1024;
return sizeBytes <= maxBytes;
}
/**
* Process a single YAML file with security validation.
*
* @param fileInfo - File information (with URL or buffer)
* @param options - Optional processing options (auth headers, timeout, retry config)
* @returns Processing result with parsed YAML or error
*
* @example
* ```typescript
* const result = await processYaml({
* id: "file-123",
* name: "config.yaml",
* mimetype: "application/x-yaml",
* size: 2048,
* url: "https://example.com/config.yaml",
* }, {
* authHeaders: { "Authorization": "Bearer token" },
* });
*
* if (result.success && result.data) {
* console.log("Parsed:", result.data.parsed);
* console.log("As JSON:", result.data.asJson);
* }
* ```
*/
export function processYaml(fileInfo, options) {
return yamlProcessor.processFile(fileInfo, options);
}