UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

268 lines (267 loc) 9.9 kB
/** * Audio File Processor * * Handles downloading, validating, and processing audio files to extract metadata * and build text content suitable for LLM consumption. Audio files cannot be sent * raw to most LLMs, so this processor extracts structured metadata (duration, codec, * bitrate, tags) and formats it as text. * * Uses the `music-metadata` library (pure JavaScript, no native dependencies) for * metadata extraction. Supports all major audio formats: MP3, WAV, OGG, FLAC, M4A, * AAC, WMA, WebM, AIFF, AMR, APE, WavPack, and more. * * Key features: * - Metadata extraction: duration, codec, bitrate, sample rate, channels * - Tag extraction: title, artist, album, year, genre, track number, composer * - Embedded cover art extraction * - Graceful degradation for corrupt or partially readable files * - LLM-friendly text content generation * * @module processors/media/AudioProcessor * * @example * ```typescript * import { audioProcessor, processAudio, isAudioFile } from "./AudioProcessor.js"; * * // Check if a file is an audio file * if (isAudioFile(fileInfo.mimetype, fileInfo.name)) { * const result = await processAudio(fileInfo); * * if (result.success) { * console.log(`Duration: ${result.data.metadata.durationFormatted}`); * console.log(`Codec: ${result.data.metadata.codec}`); * console.log(`Artist: ${result.data.tags.artist}`); * console.log(`Text for LLM: ${result.data.textContent}`); * } * } * ``` */ import { BaseFileProcessor } from "../base/BaseFileProcessor.js"; import type { FileInfo, ProcessedAudio, ProcessorFileProcessingResult, ProcessOptions } from "../../types/index.js"; /** * Audio Processor - extracts metadata and tags from audio files for LLM consumption. * * Audio files cannot be directly sent to most language models. This processor * parses audio file headers to extract structured metadata (duration, codec, * bitrate, sample rate, channels) and embedded tags (title, artist, album, etc.), * then builds a human-readable text summary for the AI to reason about. * * Uses the `music-metadata` library which is a pure JavaScript implementation * with no native dependencies, making it safe for all deployment environments. * * @example * ```typescript * const processor = new AudioProcessor(); * * const result = await processor.processFile({ * id: 'audio-123', * name: 'song.mp3', * mimetype: 'audio/mpeg', * size: 5242880, * buffer: audioBuffer, * }); * * if (result.success) { * console.log(result.data.textContent); * // "[Audio File: song.mp3] * // Duration: 3:45 | Codec: MPEG 1 Layer 3 | Bitrate: 320 kbps | ..." * } * ``` */ export declare class AudioProcessor extends BaseFileProcessor<ProcessedAudio> { constructor(); /** * Override processFile for async audio metadata parsing with music-metadata. * * Processing pipeline: * 1. Validate file type and size (base class) * 2. Get file buffer (from direct buffer or download) * 3. Parse audio metadata using music-metadata's parseBuffer() * 4. Extract tags (title, artist, album, etc.) * 5. Extract embedded cover art if present * 6. Build LLM-friendly text content * * @param fileInfo - File information (can include URL or buffer) * @param options - Optional processing options (auth headers, timeout, etc.) * @returns Processing result with audio metadata or error */ processFile(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedAudio>>; /** * Attempt to transcribe audio using the Vercel AI SDK's `transcribe()` function * with the OpenAI Whisper model. * * Transcription is attempted when: * 1. `OPENAI_API_KEY` environment variable is set * 2. File size is within Whisper's 25MB limit * 3. File format is supported by Whisper * * Gracefully degrades: if transcription fails for any reason, metadata-only * output is returned (transcription is additive, never blocks processing). * * @param buffer - Audio file content * @param filename - Original filename (used for format detection) * @param mimetype - MIME type of the audio file * @returns Transcription result with transcript text, or empty result */ private attemptTranscription; /** * Stub implementation required by BaseFileProcessor. * Not used because processFile is fully overridden. * * @param buffer - File buffer * @param fileInfo - File information * @returns Empty ProcessedAudio structure */ protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedAudio; /** * Parse audio metadata from a buffer using music-metadata. * * @param buffer - Audio file content * @param fileInfo - File information (used for MIME type hint) * @returns Parsed audio metadata from music-metadata * @throws Error if the buffer cannot be parsed (corrupt file, unsupported format) */ private parseAudioMetadata; /** * Extract structured metadata from the parsed audio format information. * * @param audioMetadata - Parsed audio metadata from music-metadata * @param fileSize - File size in bytes * @returns Structured metadata object */ private extractMetadata; /** * Extract common tags from the parsed audio metadata. * * Maps music-metadata's common tag format to our simplified tag structure. * Handles array-to-scalar conversions (e.g., comment[] -> first comment string). * * @param audioMetadata - Parsed audio metadata from music-metadata * @returns Simplified tag object */ private extractTags; /** * Extract embedded cover art from the audio file. * * Uses music-metadata's selectCover() to pick the most appropriate * cover image when multiple are embedded (e.g., front cover vs. back cover). * * @param audioMetadata - Parsed audio metadata from music-metadata * @returns Cover art as Buffer, or null if no cover art is embedded */ private extractCoverArt; /** * Build an LLM-friendly text representation of the audio file. * * Produces a structured text block that gives the AI context about the * audio file without requiring the actual audio stream. The format is * designed to be scannable and information-dense. * * @param filename - Original filename * @param metadata - Extracted audio metadata * @param tags - Extracted audio tags * @param transcript - Optional transcribed text from Whisper * @returns Formatted text content string * * @example Output: * ``` * [Audio File: song.mp3] * Duration: 3:45 | Codec: MPEG 1 Layer 3 | Bitrate: 320 kbps | Sample Rate: 44100 Hz | Channels: 2 (Stereo) | Lossless: No * File Size: 5.00 MB * Title: Yesterday | Artist: The Beatles | Album: Help! | Year: 1965 | Genre: Rock, Pop * Track: 1/14 | Composer: Lennon-McCartney * * --- Transcript --- * [full transcribed text here] * ``` */ private buildTextContent; /** * Format a duration in seconds to a human-readable string. * * @param seconds - Duration in seconds * @returns Formatted string: "M:SS" for < 1 hour, "H:MM:SS" for >= 1 hour * * @example * formatDuration(225) // "3:45" * formatDuration(3750) // "1:02:30" * formatDuration(0) // "0:00" */ private formatDuration; /** * Format bitrate to a human-readable string. * * @param bitrate - Bitrate in bits per second * @returns Formatted string (e.g., "320 kbps", "1411 kbps") */ private formatBitrate; /** * Get a human-readable label for the number of audio channels. * * @param channels - Number of audio channels * @returns Channel label (e.g., "Mono", "Stereo", "5.1 Surround") */ private getChannelLabel; } /** * Singleton Audio processor instance. * Use this for standard audio processing operations. * * @example * ```typescript * import { audioProcessor } from "./AudioProcessor.js"; * * const result = await audioProcessor.processFile(fileInfo); * ``` */ export declare const audioProcessor: AudioProcessor; /** * Check if a file is an audio file. * Matches by MIME type or file extension. * * @param mimetype - MIME type of the file * @param filename - Filename (for extension-based detection) * @returns true if the file is an audio file * * @example * ```typescript * if (isAudioFile('audio/mpeg', 'song.mp3')) { * // Process as audio * } * * if (isAudioFile('', 'recording.flac')) { * // Also matches by extension * } * ``` */ export declare function isAudioFile(mimetype: string, filename: string): boolean; /** * Process a single audio file. * Convenience function that uses the singleton processor. * * @param fileInfo - File information (can include URL or buffer) * @param options - Optional processing options (auth headers, timeout, etc.) * @returns Processing result with audio metadata or error * * @example * ```typescript * import { processAudio } from "./AudioProcessor.js"; * * const result = await processAudio({ * id: 'audio-1', * name: 'podcast.mp3', * mimetype: 'audio/mpeg', * size: 15728640, * buffer: mp3Buffer, * }); * * if (result.success) { * const { metadata, tags, textContent } = result.data; * console.log(`${tags.title} by ${tags.artist} (${metadata.durationFormatted})`); * // Send textContent to LLM for analysis * } else { * console.error(`Processing failed: ${result.error?.userMessage}`); * } * ``` */ export declare function processAudio(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedAudio>>;