@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
388 lines (387 loc) • 12.7 kB
TypeScript
/**
* File detection and processing types for unified file handling
*/
/**
* Supported file types for multimodal input
*/
export type FileType = "csv" | "image" | "pdf" | "audio" | "video" | "archive" | "text" | "svg" | "docx" | "pptx" | "xlsx" | "unknown";
/**
* Office document types
*/
export type OfficeDocumentType = "docx" | "pptx" | "xlsx";
/**
* File with metadata — allows callers to pass filename alongside a Buffer.
*
* This is the recommended way for applications (e.g. Slack bots) to pass
* files that were downloaded as Buffers but still have original filenames.
*
* @example
* ```typescript
* files: [
* { buffer: pdfBuffer, filename: "quarterly-report.pdf" },
* { buffer: videoBuffer, filename: "meeting-recording.mov", mimetype: "video/quicktime" }
* ]
* ```
*/
export type FileWithMetadata = {
buffer: Buffer;
filename: string;
mimetype?: string;
};
/**
* File input can be Buffer, string (path/URL/data URI), or an object with metadata.
*/
export type FileInput = Buffer | string | FileWithMetadata;
/**
* File source type for tracking input origin
*/
export type FileSource = "url" | "path" | "buffer" | "datauri";
/**
* File detection result with confidence scoring
*/
export type FileDetectionResult = {
type: FileType;
mimeType: string;
extension: string | null;
source: FileSource;
metadata: {
size?: number;
filename?: string;
confidence: number;
};
};
/**
* File processing result after detection and conversion
*/
export type FileProcessingResult = {
type: FileType;
content: string | Buffer;
mimeType: string;
/** Additional images extracted from the file (e.g., video keyframes, audio cover art) */
images?: Array<Buffer | string>;
metadata: {
confidence: number;
size?: number;
filename?: string;
extension?: string | null;
rowCount?: number;
totalLines?: number;
columnCount?: number;
columnNames?: string[];
sampleData?: string | unknown[];
hasEmptyColumns?: boolean;
/** Enhanced column metadata with type detection and statistics */
columnMetadata?: CSVColumnMetadata[];
/** Data quality warnings */
dataQualityWarnings?: CSVDataQualityWarning[];
/** Overall data quality score (0-100) */
dataQualityScore?: number;
/** Whether headers were detected */
hasHeaders?: boolean;
/** Detected delimiter */
detectedDelimiter?: string;
version?: string;
estimatedPages?: number | null;
provider?: string;
apiType?: PDFAPIType;
officeFormat?: OfficeDocumentType;
pageCount?: number;
slideCount?: number;
sheetCount?: number;
sheetNames?: string[];
author?: string;
createdDate?: string;
modifiedDate?: string;
hasFormulas?: boolean;
hasImages?: boolean;
frameCount?: number;
hasKeyframes?: boolean;
};
};
/**
* Sample data format options for CSV metadata
* - 'json': JSON string representation (default, backward compatible)
* - 'object': Structured array of row objects (best for programmatic use)
* - 'csv': CSV formatted string preview
* - 'markdown': Markdown table format
*/
export type SampleDataFormat = "object" | "json" | "csv" | "markdown";
/**
* Detected data type for a CSV column
*/
export type CSVColumnDataType = "string" | "number" | "integer" | "float" | "boolean" | "date" | "datetime" | "email" | "url" | "empty" | "mixed";
/**
* Data quality warning for CSV columns
*/
export type CSVDataQualityWarning = {
column: string;
type: "empty_values" | "invalid_name" | "mixed_types" | "high_null_rate" | "duplicates" | "inconsistent_format";
message: string;
severity: "info" | "warning" | "error";
affectedRows?: number;
};
/**
* Rich metadata for a single CSV column
*/
export type CSVColumnMetadata = {
name: string;
index: number;
detectedType: CSVColumnDataType;
/** Confidence of type detection (0-100) */
typeConfidence: number;
/** Count of null/empty values */
nullCount: number;
/** Count of unique values */
uniqueCount: number;
/** Sample values from this column (up to 5) */
sampleValues: string[];
/** For numeric columns: min value */
minValue?: number;
/** For numeric columns: max value */
maxValue?: number;
/** For numeric columns: average value */
avgValue?: number;
/** For date columns: detected format (e.g., 'YYYY-MM-DD', 'MM/DD/YYYY') */
dateFormat?: string;
/** Column name validation issues */
nameIssues?: string[];
};
/**
* CSV processor options
*/
export type CSVProcessorOptions = {
maxRows?: number;
formatStyle?: "raw" | "markdown" | "json";
includeHeaders?: boolean;
sampleDataFormat?: SampleDataFormat;
extension?: string | null;
};
/**
* PDF API types for different providers
*/
export type PDFAPIType = "document" | "files-api" | "unsupported";
/**
* PDF provider configuration
*/
export type PDFProviderConfig = {
maxSizeMB: number;
maxPages: number;
supportsNative: boolean;
requiresCitations: boolean | "auto";
apiType: PDFAPIType;
};
/**
* PDF processor options
*/
export type PDFProcessorOptions = {
provider?: string;
model?: string;
maxSizeMB?: number;
bedrockApiMode?: "converse" | "invokeModel";
/**
* Whether to enforce page limits by throwing an error (default: true)
* Set to false to bypass limit enforcement (logs warning instead)
*/
enforceLimits?: boolean;
};
/**
* Audio provider configuration for transcription services
*
* Describes the capabilities and limitations of each audio transcription provider
* (e.g., OpenAI Whisper, Google Speech-to-Text, Azure Speech Services).
*
* @example OpenAI Whisper configuration
* ```typescript
* const openaiConfig: AudioProviderConfig = {
* maxSizeMB: 25,
* maxDurationSeconds: 600,
* supportedFormats: ['mp3', 'mp4', 'm4a', 'wav', 'webm'],
* supportsLanguageDetection: true,
* requiresApiKey: true,
* costPer60s: 0.006 // $0.006 per minute
* };
* ```
*
* @example Google Speech-to-Text configuration
* ```typescript
* const googleConfig: AudioProviderConfig = {
* maxSizeMB: 10,
* maxDurationSeconds: 480,
* supportedFormats: ['flac', 'wav', 'mp3', 'ogg'],
* supportsLanguageDetection: true,
* requiresApiKey: true,
* costPer15s: 0.004 // $0.016 per minute ($0.004 per 15 seconds)
* };
* ```
*/
export type AudioProviderConfig = {
/** Maximum audio file size in megabytes */
maxSizeMB: number;
/** Maximum audio duration in seconds */
maxDurationSeconds: number;
/** Supported audio formats (e.g., 'mp3', 'wav', 'm4a', 'flac', 'ogg') */
supportedFormats: string[];
/** Whether the provider supports automatic language detection */
supportsLanguageDetection: boolean;
/** Whether the provider requires an API key for authentication */
requiresApiKey: boolean;
/** Optional: Cost per 60 seconds of audio in USD */
costPer60s?: number;
/** Optional: Cost per 15 seconds of audio in USD */
costPer15s?: number;
};
/**
* Audio processor options
*/
export type AudioProcessorOptions = {
/** AI provider to use for transcription (e.g., 'openai', 'google', 'azure') */
provider?: string;
/** Transcription model to use (e.g., 'whisper-1', 'chirp-3') */
transcriptionModel?: string;
/** Language code for transcription (e.g., 'en', 'es', 'fr') */
language?: string;
/** Context or prompt to guide transcription accuracy */
prompt?: string;
/** Maximum audio duration in seconds (default: 600) */
maxDurationSeconds?: number;
/** Maximum file size in megabytes */
maxSizeMB?: number;
};
/**
* Office processor options for Word, PowerPoint, and Excel documents
*
* @example Word document processing (docx)
* ```typescript
* const options: OfficeProcessorOptions = {
* format: "docx",
* extractTextOnly: false,
* includeMetadata: true
* };
* ```
*
* @example PowerPoint processing (pptx)
* ```typescript
* const options: OfficeProcessorOptions = {
* format: "pptx",
* includeSlideNotes: true, // pptx-specific
* includeMetadata: true
* };
* ```
*
* @example Excel processing (xlsx)
* ```typescript
* const options: OfficeProcessorOptions = {
* format: "xlsx",
* processAllSheets: true, // xlsx-specific
* includeMetadata: true
* };
* ```
*/
export type OfficeProcessorOptions = {
/** Office document format type */
format?: OfficeDocumentType;
/** Whether to extract text only (true) or preserve formatting (false). Applies to: docx, pptx, xlsx */
extractTextOnly?: boolean;
/** Maximum file size in megabytes. Applies to: docx, pptx, xlsx */
maxSizeMB?: number;
/** Whether to include metadata (author, created date, etc.). Applies to: docx, pptx, xlsx */
includeMetadata?: boolean;
/** For spreadsheets (xlsx only): whether to process all sheets or just the first */
processAllSheets?: boolean;
/** For presentations (pptx only): whether to include slide notes */
includeSlideNotes?: boolean;
};
/**
* File detector options
*/
export type FileDetectorOptions = {
maxSize?: number;
timeout?: number;
allowedTypes?: FileType[];
audioOptions?: AudioProcessorOptions;
csvOptions?: CSVProcessorOptions;
officeOptions?: OfficeProcessorOptions;
confidenceThreshold?: number;
provider?: string;
/** Maximum number of retry attempts for network requests (default: 3) */
maxRetries?: number;
/** Initial retry delay in milliseconds with exponential backoff (default: 1000) */
retryDelay?: number;
/**
* Caller-provided MIME type hint (e.g. "text/plain", "application/json").
* Used when the filename has no extension and magic-byte detection cannot
* identify the content — the common Slack/Curator extension-less-buffer
* case. When set to a trustworthy mimetype (not "application/octet-stream"),
* it short-circuits the detection strategy loop with a high-confidence
* result so small files on the eager file-processing path still honor the
* hint (the lazy FileReferenceRegistry path has its own hint-handling).
*/
mimetypeHint?: string;
};
/**
* Google AI Studio Files API types
*/
export type GoogleFilesAPIUploadResult = {
file: {
name: string;
displayName: string;
mimeType: string;
sizeBytes: string;
createTime: string;
updateTime: string;
expirationTime: string;
sha256Hash: string;
uri: string;
};
};
/** Options for converting PDF pages to images. */
export type PDFImageConversionOptions = {
/** Scale factor for image quality (1-4, default: 2) */
scale?: number;
/** Maximum number of pages to convert (default: 20 from PDF_LIMITS.DEFAULT_MAX_PAGES) */
maxPages?: number;
/** Output format (default: png). Only PNG is currently implemented by PDFProcessor. */
format?: "png";
};
/** Result of PDF to image conversion. */
export type PDFImageConversionResult = {
/** Array of base64-encoded PNG images (one per page) */
images: string[];
/** Number of pages converted */
pageCount: number;
/** Total conversion time in milliseconds */
conversionTimeMs: number;
/** Any warnings during conversion */
warnings?: string[];
};
/** Options for filename sanitization. */
export type SanitizeFileNameOptions = {
/** Maximum length for the filename (default: 255) */
maxLength?: number;
/** Replacement character for invalid chars (default: '_') */
replacement?: string;
/** Whether to block dangerous extensions (default: true) */
blockDangerousExtensions?: boolean;
/** Whether to allow hidden files starting with dot (default: false) */
allowHiddenFiles?: boolean;
};
/** Options for display name sanitization. */
export type SanitizeDisplayNameOptions = {
/** Maximum length for the name (default: 100) */
maxLength?: number;
/** Whether to allow unicode characters (default: true) */
allowUnicode?: boolean;
};
/** Result of SVG sanitization. */
export type SvgSanitizationResult = {
/** Sanitized SVG content */
content: string;
/** Items that were removed during sanitization */
removedItems: string[];
/** Whether any content was modified */
wasModified: boolean;
};
/** Contract implemented by each file-detection strategy. */
export type DetectionStrategy = {
detect(input: FileInput): Promise<FileDetectionResult>;
};