@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
268 lines (267 loc) • 9.9 kB
TypeScript
/**
* Audio File Processor
*
* Handles downloading, validating, and processing audio files to extract metadata
* and build text content suitable for LLM consumption. Audio files cannot be sent
* raw to most LLMs, so this processor extracts structured metadata (duration, codec,
* bitrate, tags) and formats it as text.
*
* Uses the `music-metadata` library (pure JavaScript, no native dependencies) for
* metadata extraction. Supports all major audio formats: MP3, WAV, OGG, FLAC, M4A,
* AAC, WMA, WebM, AIFF, AMR, APE, WavPack, and more.
*
* Key features:
* - Metadata extraction: duration, codec, bitrate, sample rate, channels
* - Tag extraction: title, artist, album, year, genre, track number, composer
* - Embedded cover art extraction
* - Graceful degradation for corrupt or partially readable files
* - LLM-friendly text content generation
*
* @module processors/media/AudioProcessor
*
* @example
* ```typescript
* import { audioProcessor, processAudio, isAudioFile } from "./AudioProcessor.js";
*
* // Check if a file is an audio file
* if (isAudioFile(fileInfo.mimetype, fileInfo.name)) {
* const result = await processAudio(fileInfo);
*
* if (result.success) {
* console.log(`Duration: ${result.data.metadata.durationFormatted}`);
* console.log(`Codec: ${result.data.metadata.codec}`);
* console.log(`Artist: ${result.data.tags.artist}`);
* console.log(`Text for LLM: ${result.data.textContent}`);
* }
* }
* ```
*/
import { BaseFileProcessor } from "../base/BaseFileProcessor.js";
import type { FileInfo, ProcessedAudio, ProcessorFileProcessingResult, ProcessOptions } from "../../types/index.js";
/**
* Audio Processor - extracts metadata and tags from audio files for LLM consumption.
*
* Audio files cannot be directly sent to most language models. This processor
* parses audio file headers to extract structured metadata (duration, codec,
* bitrate, sample rate, channels) and embedded tags (title, artist, album, etc.),
* then builds a human-readable text summary for the AI to reason about.
*
* Uses the `music-metadata` library which is a pure JavaScript implementation
* with no native dependencies, making it safe for all deployment environments.
*
* @example
* ```typescript
* const processor = new AudioProcessor();
*
* const result = await processor.processFile({
* id: 'audio-123',
* name: 'song.mp3',
* mimetype: 'audio/mpeg',
* size: 5242880,
* buffer: audioBuffer,
* });
*
* if (result.success) {
* console.log(result.data.textContent);
* // "[Audio File: song.mp3]
* // Duration: 3:45 | Codec: MPEG 1 Layer 3 | Bitrate: 320 kbps | ..."
* }
* ```
*/
export declare class AudioProcessor extends BaseFileProcessor<ProcessedAudio> {
constructor();
/**
* Override processFile for async audio metadata parsing with music-metadata.
*
* Processing pipeline:
* 1. Validate file type and size (base class)
* 2. Get file buffer (from direct buffer or download)
* 3. Parse audio metadata using music-metadata's parseBuffer()
* 4. Extract tags (title, artist, album, etc.)
* 5. Extract embedded cover art if present
* 6. Build LLM-friendly text content
*
* @param fileInfo - File information (can include URL or buffer)
* @param options - Optional processing options (auth headers, timeout, etc.)
* @returns Processing result with audio metadata or error
*/
processFile(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedAudio>>;
/**
* Attempt to transcribe audio using the Vercel AI SDK's `transcribe()` function
* with the OpenAI Whisper model.
*
* Transcription is attempted when:
* 1. `OPENAI_API_KEY` environment variable is set
* 2. File size is within Whisper's 25MB limit
* 3. File format is supported by Whisper
*
* Gracefully degrades: if transcription fails for any reason, metadata-only
* output is returned (transcription is additive, never blocks processing).
*
* @param buffer - Audio file content
* @param filename - Original filename (used for format detection)
* @param mimetype - MIME type of the audio file
* @returns Transcription result with transcript text, or empty result
*/
private attemptTranscription;
/**
* Stub implementation required by BaseFileProcessor.
* Not used because processFile is fully overridden.
*
* @param buffer - File buffer
* @param fileInfo - File information
* @returns Empty ProcessedAudio structure
*/
protected buildProcessedResult(buffer: Buffer, fileInfo: FileInfo): ProcessedAudio;
/**
* Parse audio metadata from a buffer using music-metadata.
*
* @param buffer - Audio file content
* @param fileInfo - File information (used for MIME type hint)
* @returns Parsed audio metadata from music-metadata
* @throws Error if the buffer cannot be parsed (corrupt file, unsupported format)
*/
private parseAudioMetadata;
/**
* Extract structured metadata from the parsed audio format information.
*
* @param audioMetadata - Parsed audio metadata from music-metadata
* @param fileSize - File size in bytes
* @returns Structured metadata object
*/
private extractMetadata;
/**
* Extract common tags from the parsed audio metadata.
*
* Maps music-metadata's common tag format to our simplified tag structure.
* Handles array-to-scalar conversions (e.g., comment[] -> first comment string).
*
* @param audioMetadata - Parsed audio metadata from music-metadata
* @returns Simplified tag object
*/
private extractTags;
/**
* Extract embedded cover art from the audio file.
*
* Uses music-metadata's selectCover() to pick the most appropriate
* cover image when multiple are embedded (e.g., front cover vs. back cover).
*
* @param audioMetadata - Parsed audio metadata from music-metadata
* @returns Cover art as Buffer, or null if no cover art is embedded
*/
private extractCoverArt;
/**
* Build an LLM-friendly text representation of the audio file.
*
* Produces a structured text block that gives the AI context about the
* audio file without requiring the actual audio stream. The format is
* designed to be scannable and information-dense.
*
* @param filename - Original filename
* @param metadata - Extracted audio metadata
* @param tags - Extracted audio tags
* @param transcript - Optional transcribed text from Whisper
* @returns Formatted text content string
*
* @example Output:
* ```
* [Audio File: song.mp3]
* Duration: 3:45 | Codec: MPEG 1 Layer 3 | Bitrate: 320 kbps | Sample Rate: 44100 Hz | Channels: 2 (Stereo) | Lossless: No
* File Size: 5.00 MB
* Title: Yesterday | Artist: The Beatles | Album: Help! | Year: 1965 | Genre: Rock, Pop
* Track: 1/14 | Composer: Lennon-McCartney
*
* --- Transcript ---
* [full transcribed text here]
* ```
*/
private buildTextContent;
/**
* Format a duration in seconds to a human-readable string.
*
* @param seconds - Duration in seconds
* @returns Formatted string: "M:SS" for < 1 hour, "H:MM:SS" for >= 1 hour
*
* @example
* formatDuration(225) // "3:45"
* formatDuration(3750) // "1:02:30"
* formatDuration(0) // "0:00"
*/
private formatDuration;
/**
* Format bitrate to a human-readable string.
*
* @param bitrate - Bitrate in bits per second
* @returns Formatted string (e.g., "320 kbps", "1411 kbps")
*/
private formatBitrate;
/**
* Get a human-readable label for the number of audio channels.
*
* @param channels - Number of audio channels
* @returns Channel label (e.g., "Mono", "Stereo", "5.1 Surround")
*/
private getChannelLabel;
}
/**
* Singleton Audio processor instance.
* Use this for standard audio processing operations.
*
* @example
* ```typescript
* import { audioProcessor } from "./AudioProcessor.js";
*
* const result = await audioProcessor.processFile(fileInfo);
* ```
*/
export declare const audioProcessor: AudioProcessor;
/**
* Check if a file is an audio file.
* Matches by MIME type or file extension.
*
* @param mimetype - MIME type of the file
* @param filename - Filename (for extension-based detection)
* @returns true if the file is an audio file
*
* @example
* ```typescript
* if (isAudioFile('audio/mpeg', 'song.mp3')) {
* // Process as audio
* }
*
* if (isAudioFile('', 'recording.flac')) {
* // Also matches by extension
* }
* ```
*/
export declare function isAudioFile(mimetype: string, filename: string): boolean;
/**
* Process a single audio file.
* Convenience function that uses the singleton processor.
*
* @param fileInfo - File information (can include URL or buffer)
* @param options - Optional processing options (auth headers, timeout, etc.)
* @returns Processing result with audio metadata or error
*
* @example
* ```typescript
* import { processAudio } from "./AudioProcessor.js";
*
* const result = await processAudio({
* id: 'audio-1',
* name: 'podcast.mp3',
* mimetype: 'audio/mpeg',
* size: 15728640,
* buffer: mp3Buffer,
* });
*
* if (result.success) {
* const { metadata, tags, textContent } = result.data;
* console.log(`${tags.title} by ${tags.artist} (${metadata.durationFormatted})`);
* // Send textContent to LLM for analysis
* } else {
* console.error(`Processing failed: ${result.error?.userMessage}`);
* }
* ```
*/
export declare function processAudio(fileInfo: FileInfo, options?: ProcessOptions): Promise<ProcessorFileProcessingResult<ProcessedAudio>>;