@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
392 lines (386 loc) • 13.1 kB
JavaScript
/**
* CLI Helpers for File Processors
*
* Provides utilities for CLI integration of the file processor system.
* These helpers can be used by CLI commands to process files.
*
* @module processors/cli/fileProcessorCli
*
* @example
* ```typescript
* import {
* loadFileFromPath,
* processFileFromPath,
* listSupportedFileTypes,
* getCliUsage,
* } from "./processors/cli/index.js";
*
* // Process a file from the CLI
* const result = await processFileFromPath("./document.docx", {
* verbose: true,
* outputFormat: "json",
* });
*
* if (result.success) {
* console.log(result.output);
* } else {
* console.error(result.error);
* }
* ```
*/
import * as fs from "fs";
import * as fsPromises from "fs/promises";
import * as path from "path";
import { logger } from "../../utils/logger.js";
import { getMimeTypeForExtension } from "../config/index.js";
import { getProcessorRegistry } from "../registry/index.js";
// =============================================================================
// TYPES
// =============================================================================
// =============================================================================
// MIME TYPE MAPPING
// =============================================================================
// Extension-to-MIME mapping is imported from the centralized config
// (../config/mimeTypes.ts) via EXTENSION_MIME_MAP and getMimeTypeForExtension.
// This avoids duplicating the ~90-entry mapping that was previously inline here.
// See EXTENSION_MIME_MAP for the single source of truth.
// =============================================================================
// FILE LOADING
// =============================================================================
/**
* Load a file from the filesystem and create a FileInfo object.
*
* @param filePath - Path to the file (relative or absolute)
* @returns FileInfo object ready for processing
* @throws Error if file doesn't exist or is not a file
*
* @example
* ```typescript
* const fileInfo = await loadFileFromPath("./document.pdf");
* console.log(`Loaded: ${fileInfo.name} (${fileInfo.size} bytes)`);
* ```
*/
export async function loadFileFromPath(filePath) {
const absolutePath = path.resolve(filePath);
let stats;
try {
stats = await fsPromises.stat(absolutePath);
}
catch {
throw new Error(`File not found: ${absolutePath}`);
}
if (!stats.isFile()) {
throw new Error(`Not a file: ${absolutePath}`);
}
const buffer = await fsPromises.readFile(absolutePath);
const filename = path.basename(absolutePath);
const ext = path.extname(filename).toLowerCase();
// Determine MIME type from extension
const mimeType = getMimeTypeForExtension(ext);
return {
id: absolutePath,
name: filename,
mimetype: mimeType,
size: stats.size,
buffer,
};
}
// =============================================================================
// FILE PROCESSING
// =============================================================================
/**
* Process a file from a path using the CLI.
*
* @param filePath - Path to the file to process
* @param options - Processing options (verbose, processor, outputFormat)
* @returns Processing result with success status, output, and error info
*
* @example
* ```typescript
* const result = await processFileFromPath("./data.xlsx", {
* verbose: true,
* outputFormat: "json",
* });
*
* if (result.success) {
* console.log(result.output);
* } else {
* console.error(`Error: ${result.error}`);
* }
* ```
*/
export async function processFileFromPath(filePath, options) {
try {
const fileInfo = await loadFileFromPath(filePath);
if (options?.verbose) {
logger.info(`Processing: ${fileInfo.name}`);
logger.info(` Size: ${fileInfo.size} bytes`);
logger.info(` MIME: ${fileInfo.mimetype}`);
}
const registry = await getProcessorRegistry();
// If a specific processor is requested, use it directly
if (options?.processor) {
const processorReg = registry.getProcessor(options.processor);
if (!processorReg) {
return {
success: false,
processorUsed: null,
output: "",
error: `Processor not found: ${options.processor}. Use 'list-file-types' to see available processors.`,
};
}
const result = await processorReg.processor.processFile(fileInfo);
if (options?.verbose) {
logger.info(` Processor: ${options.processor}`);
}
if (!result.success || !result.data) {
return {
success: false,
processorUsed: options.processor,
output: "",
error: result.error?.message || "Processing failed",
};
}
const output = formatOutput(result.data, options?.outputFormat || "text");
return {
success: true,
processorUsed: options.processor,
output,
};
}
// Auto-detect processor
const match = registry.findProcessor(fileInfo.mimetype, fileInfo.name);
if (!match) {
return {
success: false,
processorUsed: null,
output: "",
error: `No processor found for file type: ${fileInfo.mimetype} (${fileInfo.name})`,
};
}
if (options?.verbose) {
logger.info(` Processor: ${match.name}`);
logger.info(` Confidence: ${match.confidence}%`);
}
const processor = match.processor;
const result = await processor.processFile(fileInfo);
if (!result.success || !result.data) {
return {
success: false,
processorUsed: match.name,
output: "",
error: result.error?.message || "Processing failed",
};
}
// Format output based on options
const output = formatOutput(result.data, options?.outputFormat || "text");
return {
success: true,
processorUsed: match.name,
output,
};
}
catch (error) {
return {
success: false,
processorUsed: null,
output: "",
error: error instanceof Error ? error.message : String(error),
};
}
}
// =============================================================================
// OUTPUT FORMATTING
// =============================================================================
/**
* Format processed file output for display.
*
* @param data - Processed file data
* @param format - Output format (json, text, or raw)
* @returns Formatted output string
*/
function formatOutput(data, format) {
if (format === "json") {
// Create a serializable version (summarize buffer)
const serializable = {
...data,
buffer: `<Buffer ${data.buffer.length} bytes>`,
};
return JSON.stringify(serializable, null, 2);
}
if (format === "raw") {
return data.buffer.toString("utf-8");
}
// Text format - extract text content if available
const dataRecord = data;
const textFields = ["textContent", "content", "text", "parsedContent"];
for (const field of textFields) {
const value = dataRecord[field];
if (typeof value === "string") {
return value;
}
}
// Check for structured data that should be stringified
const structuredFields = ["parsedData", "data", "rows", "sheets"];
for (const field of structuredFields) {
const value = dataRecord[field];
if (value !== undefined && value !== null) {
return JSON.stringify(value, null, 2);
}
}
// Fallback to JSON representation
const serializable = {
...data,
buffer: `<Buffer ${data.buffer.length} bytes>`,
};
return JSON.stringify(serializable, null, 2);
}
// =============================================================================
// FILE TYPE LISTING
// =============================================================================
/**
* Get information about all supported file types.
*
* @returns Array of supported file type information
*
* @example
* ```typescript
* const types = getSupportedFileTypes();
* for (const type of types) {
* console.log(`${type.name}: ${type.extensions.join(", ")}`);
* }
* ```
*/
export async function getSupportedFileTypes() {
const registry = await getProcessorRegistry();
const processors = registry.listProcessors();
return processors.map((proc) => {
// Extract config from processor via public getConfig() method
const config = proc.processor.getConfig();
return {
name: proc.name,
priority: proc.priority,
extensions: config.supportedExtensions,
mimeTypes: config.supportedMimeTypes,
description: proc.description,
};
});
}
/**
* List all supported file types formatted for CLI display.
*
* @returns Formatted string listing all supported file types
*
* @example
* ```typescript
* console.log(listSupportedFileTypes());
* ```
*/
export async function listSupportedFileTypes() {
const types = await getSupportedFileTypes();
if (types.length === 0) {
return "No processors registered. Initialize the processor registry first.";
}
let output = "Supported file types:\n\n";
// Sort by priority (lower = higher priority)
const sortedTypes = [...types].sort((a, b) => a.priority - b.priority);
for (const type of sortedTypes) {
output += ` ${type.name} (priority: ${type.priority})\n`;
if (type.description) {
output += ` ${type.description}\n`;
}
if (type.extensions.length > 0) {
output += ` Extensions: ${type.extensions.join(", ")}\n`;
}
if (type.mimeTypes.length > 0) {
// Show first 3 MIME types to avoid overwhelming output
const displayMimes = type.mimeTypes.slice(0, 3);
const suffix = type.mimeTypes.length > 3
? ` (+${type.mimeTypes.length - 3} more)`
: "";
output += ` MIME types: ${displayMimes.join(", ")}${suffix}\n`;
}
output += "\n";
}
return output;
}
// =============================================================================
// CLI USAGE HELP
// =============================================================================
/**
* Get CLI usage information for file processing commands.
*
* @returns Usage help string
*
* @example
* ```typescript
* console.log(getCliUsage());
* ```
*/
export function getCliUsage() {
return `
File Processor CLI Usage:
Process a file:
neurolink process-file <path> [options]
Options:
--processor <name> Use specific processor (e.g., excel, word, json)
--format <type> Output format: json, text, or raw
--verbose Show processing details
List supported types:
neurolink list-file-types
Examples:
neurolink process-file document.docx
neurolink process-file data.xlsx --format json
neurolink process-file config.yaml --processor yaml
neurolink process-file report.pdf --verbose
neurolink process-file data.csv --format raw
Output Formats:
text - Extract text content (default)
json - Full structured output as JSON
raw - Raw file content as UTF-8 string
Notes:
- Processor is auto-detected based on file extension and MIME type
- Use --processor to override auto-detection
- Use --verbose to see which processor was selected
`;
}
// =============================================================================
// UTILITY FUNCTIONS
// =============================================================================
/**
* Check if a file exists and is readable.
*
* @param filePath - Path to check
* @returns true if file exists and is readable
*/
export function fileExists(filePath) {
try {
const absolutePath = path.resolve(filePath);
fs.accessSync(absolutePath, fs.constants.R_OK);
return fs.statSync(absolutePath).isFile();
}
catch {
return false;
}
}
/**
* Get file extension from a path.
*
* @param filePath - File path
* @returns Lowercase extension with leading dot, or empty string
*/
export function getFileExtension(filePath) {
const ext = path.extname(filePath);
return ext.toLowerCase();
}
/**
* Detect MIME type for a file path.
*
* @param filePath - File path
* @returns Detected MIME type
*/
export function detectMimeType(filePath) {
const ext = getFileExtension(filePath);
return getMimeTypeForExtension(ext);
}
//# sourceMappingURL=fileProcessorCli.js.map