UNPKG

@juspay/neurolink

Version:

Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio

github.com/juspay/neurolink

juspay/neurolink

435 lines (434 loc) • 15.1 kB

JavaScript

/** * Audio Utilities for Voice Module * * Provides audio format conversion, duration calculation, and buffer utilities. * * @module voice/audio-utils */ import { AUDIO_FORMAT_DETAILS } from "../types/index.js"; import { logger } from "../utils/logger.js"; /** * Detect audio format from buffer * * @param buffer - Audio data buffer * @returns Detected audio format or null */ export function detectAudioFormat(buffer) { if (buffer.length < 12) { return null; } // Check for WAV (RIFF header) if (buffer[0] === 0x52 && // R buffer[1] === 0x49 && // I buffer[2] === 0x46 && // F buffer[3] === 0x46 && // F buffer[8] === 0x57 && // W buffer[9] === 0x41 && // A buffer[10] === 0x56 && // V buffer[11] === 0x45 // E ) { return "wav"; } // Check for MP3 (ID3 tag or frame sync) if ((buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) || // ID3 (buffer[0] === 0xff && (buffer[1] & 0xe0) === 0xe0) // Frame sync ) { return "mp3"; } // Check for OGG (OggS header) if (buffer[0] === 0x4f && // O buffer[1] === 0x67 && // g buffer[2] === 0x67 && // g buffer[3] === 0x53 // S ) { // Could be Opus or Vorbis, check for Opus header // Opus has "OpusHead" in the first page const opusOffset = buffer.indexOf("OpusHead"); if (opusOffset !== -1 && opusOffset < 200) { return "opus"; } return "ogg"; } return null; } /** * Get MIME type for audio format * * @param format - Audio format * @returns MIME type string */ export function getMimeType(format) { return AUDIO_FORMAT_DETAILS[format]?.mimeType ?? "application/octet-stream"; } /** * Get file extension for audio format * * @param format - Audio format * @returns File extension with dot */ export function getFileExtension(format) { return AUDIO_FORMAT_DETAILS[format]?.extension ?? ".bin"; } /** * Calculate audio duration from buffer * * @param buffer - Audio data buffer * @param format - Audio format (optional, will be detected if not provided) * @param sampleRate - Sample rate in Hz (optional, will be extracted if possible) * @returns Duration in seconds, or undefined if cannot be calculated */ export function calculateDuration(buffer, format, sampleRate) { const detectedFormat = format ?? detectAudioFormat(buffer); if (!detectedFormat) { return undefined; } try { switch (detectedFormat) { case "wav": return calculateWavDuration(buffer); case "mp3": return estimateMp3Duration(buffer); case "ogg": case "opus": return estimateOpusDuration(buffer); default: // Estimate based on size and assumed bitrate if (sampleRate) { // Assume 16-bit mono return buffer.length / (sampleRate * 2); } return undefined; } } catch (err) { logger.debug(`[audio-utils] Failed to calculate duration: ${err instanceof Error ? err.message : String(err)}`); return undefined; } } /** * Calculate WAV duration from header */ function calculateWavDuration(buffer) { if (buffer.length < 44) { return undefined; } // Find data chunk let offset = 12; while (offset < buffer.length - 8) { const chunkId = buffer.toString("ascii", offset, offset + 4); const chunkSize = buffer.readUInt32LE(offset + 4); if (chunkId === "fmt ") { const channels = buffer.readUInt16LE(offset + 10); const sampleRate = buffer.readUInt32LE(offset + 12); const bitsPerSample = buffer.readUInt16LE(offset + 22); // RIFF chunks are word-aligned: odd-sized chunks carry a trailing pad // byte that must be skipped, otherwise we land on the wrong header. let dataOffset = offset + 8 + chunkSize + (chunkSize % 2); while (dataOffset < buffer.length - 8) { const dataChunkId = buffer.toString("ascii", dataOffset, dataOffset + 4); const dataChunkSize = buffer.readUInt32LE(dataOffset + 4); if (dataChunkId === "data") { const bytesPerSample = (bitsPerSample / 8) * channels; const numSamples = dataChunkSize / bytesPerSample; return numSamples / sampleRate; } dataOffset += 8 + dataChunkSize + (dataChunkSize % 2); } } offset += 8 + chunkSize + (chunkSize % 2); } return undefined; } /** * Estimate MP3 duration (approximate) */ function estimateMp3Duration(buffer) { // This is a rough estimate based on file size and assumed bitrate // For accurate duration, we would need to parse all frames // Check for ID3v2 tag and skip it let offset = 0; if (buffer[0] === 0x49 && buffer[1] === 0x44 && buffer[2] === 0x33) { // ID3v2 tag present const tagSize = ((buffer[6] & 0x7f) << 21) | ((buffer[7] & 0x7f) << 14) | ((buffer[8] & 0x7f) << 7) | (buffer[9] & 0x7f); offset = 10 + tagSize; } // Find first MP3 frame header while (offset < buffer.length - 4) { if (buffer[offset] === 0xff && (buffer[offset + 1] & 0xe0) === 0xe0) { // Found frame sync const version = (buffer[offset + 1] >> 3) & 0x03; const _layer = (buffer[offset + 1] >> 1) & 0x03; const bitrateIndex = (buffer[offset + 2] >> 4) & 0x0f; const sampleRateIndex = (buffer[offset + 2] >> 2) & 0x03; // Get sample rate const sampleRates = { 3: [44100, 48000, 32000], // MPEG1 2: [22050, 24000, 16000], // MPEG2 0: [11025, 12000, 8000], // MPEG2.5 }; const sampleRate = sampleRates[version]?.[sampleRateIndex]; // Get bitrate (MPEG1 Layer III) const bitrates = [ 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0, ]; const bitrate = bitrates[bitrateIndex]; if (sampleRate && bitrate) { // Estimate duration: (file_size_bits) / bitrate const audioBytes = buffer.length - offset; return (audioBytes * 8) / (bitrate * 1000); } break; } offset++; } // Fallback: assume 128kbps return (buffer.length * 8) / 128000; } /** * Estimate Opus/OGG duration (approximate) */ function estimateOpusDuration(buffer) { // Opus typically uses 48kHz, estimate based on typical bitrate // For accurate duration, we would need to parse all pages // Assume average bitrate of 64kbps for voice return (buffer.length * 8) / 64000; } /** * Convert audio format (basic conversion) * * Note: For full format conversion, external tools like ffmpeg would be needed. * This provides basic PCM resampling only. * * @param buffer - Input audio buffer * @param fromFormat - Source format * @param toFormat - Target format * @param options - Conversion options * @returns Converted audio buffer */ export async function convertAudioFormat(buffer, fromFormat, toFormat, _options = {}) { // If formats are the same, just return the buffer if (fromFormat === toFormat) { return buffer; } // Genuine format conversion needs ffmpeg or similar. Until that's wired up, // fail loudly — silently returning the original bytes labeled as the new // format pushes a much harder-to-debug failure into the next provider call // (Copilot/CodeRabbit review). logger.warn(`[audio-utils] Audio format conversion from ${fromFormat} to ${toFormat} is not implemented.`); throw new Error(`Audio format conversion from ${fromFormat} to ${toFormat} is not implemented. Convert with ffmpeg before passing to NeuroLink.`); } /** * Create PCM audio buffer from raw samples * * @param samples - Array of sample values (-1 to 1) * @param sampleRate - Sample rate in Hz * @param bitDepth - Bit depth (8, 16, 24, or 32) * @returns PCM audio buffer */ export function createPcmBuffer(samples, _sampleRate = 16000, bitDepth = 16) { const bytesPerSample = bitDepth / 8; const buffer = Buffer.alloc(samples.length * bytesPerSample); for (let i = 0; i < samples.length; i++) { const sample = Math.max(-1, Math.min(1, samples[i])); const offset = i * bytesPerSample; switch (bitDepth) { case 8: buffer.writeUInt8(Math.round((sample + 1) * 127.5), offset); break; case 16: buffer.writeInt16LE(Math.round(sample * 32767), offset); break; case 24: { const val24 = Math.round(sample * 8388607); buffer.writeUInt8(val24 & 0xff, offset); buffer.writeUInt8((val24 >> 8) & 0xff, offset + 1); buffer.writeUInt8((val24 >> 16) & 0xff, offset + 2); break; } case 32: buffer.writeInt32LE(Math.round(sample * 2147483647), offset); break; } } return buffer; } /** * Extract PCM samples from buffer * * @param buffer - PCM audio buffer * @param bitDepth - Bit depth (8, 16, 24, or 32) * @returns Array of sample values (-1 to 1) */ export function extractPcmSamples(buffer, bitDepth = 16) { const bytesPerSample = bitDepth / 8; const numSamples = Math.floor(buffer.length / bytesPerSample); const samples = []; for (let i = 0; i < numSamples; i++) { const offset = i * bytesPerSample; switch (bitDepth) { case 8: samples.push(buffer.readUInt8(offset) / 127.5 - 1); break; case 16: samples.push(buffer.readInt16LE(offset) / 32767); break; case 24: { const val24 = buffer.readUInt8(offset) | (buffer.readUInt8(offset + 1) << 8) | (buffer.readUInt8(offset + 2) << 16); samples.push((val24 > 8388607 ? val24 - 16777216 : val24) / 8388607); break; } case 32: samples.push(buffer.readInt32LE(offset) / 2147483647); break; } } return samples; } /** * Resample PCM audio * * @param samples - Input samples * @param fromSampleRate - Source sample rate * @param toSampleRate - Target sample rate * @returns Resampled samples */ export function resamplePcm(samples, fromSampleRate, toSampleRate) { if (fromSampleRate <= 0 || toSampleRate <= 0) { return samples; } if (fromSampleRate === toSampleRate) { return samples; } const ratio = fromSampleRate / toSampleRate; const newLength = Math.round(samples.length / ratio); const resampled = []; for (let i = 0; i < newLength; i++) { const srcIndex = i * ratio; const srcIndexFloor = Math.floor(srcIndex); const srcIndexCeil = Math.min(srcIndexFloor + 1, samples.length - 1); const fraction = srcIndex - srcIndexFloor; // Linear interpolation const value = samples[srcIndexFloor] * (1 - fraction) + samples[srcIndexCeil] * fraction; resampled.push(value); } return resampled; } /** * Normalize audio levels * * @param samples - Input samples * @param targetPeak - Target peak level (0 to 1) * @returns Normalized samples */ export function normalizeAudio(samples, targetPeak = 0.95) { if (samples.length === 0) { return samples; } // Find current peak let peak = 0; for (const sample of samples) { peak = Math.max(peak, Math.abs(sample)); } if (peak === 0) { return samples; } // Calculate gain const gain = targetPeak / peak; // Apply gain return samples.map((s) => s * gain); } /** * Create a WAV header * * @param dataSize - Size of audio data in bytes * @param sampleRate - Sample rate in Hz * @param channels - Number of channels * @param bitDepth - Bit depth * @returns WAV header buffer */ export function createWavHeader(dataSize, sampleRate = 16000, channels = 1, bitDepth = 16) { const header = Buffer.alloc(44); const byteRate = sampleRate * channels * (bitDepth / 8); const blockAlign = channels * (bitDepth / 8); // RIFF header header.write("RIFF", 0); header.writeUInt32LE(36 + dataSize, 4); header.write("WAVE", 8); // fmt chunk header.write("fmt ", 12); header.writeUInt32LE(16, 16); // Subchunk1Size (PCM) header.writeUInt16LE(1, 20); // TTSAudioFormat (PCM) header.writeUInt16LE(channels, 22); header.writeUInt32LE(sampleRate, 24); header.writeUInt32LE(byteRate, 28); header.writeUInt16LE(blockAlign, 32); header.writeUInt16LE(bitDepth, 34); // data chunk header.write("data", 36); header.writeUInt32LE(dataSize, 40); return header; } /** * Create a complete WAV file from PCM data * * @param pcmData - PCM audio data * @param sampleRate - Sample rate in Hz * @param channels - Number of channels * @param bitDepth - Bit depth * @returns Complete WAV file buffer */ export function createWavFile(pcmData, sampleRate = 16000, channels = 1, bitDepth = 16) { const header = createWavHeader(pcmData.length, sampleRate, channels, bitDepth); return Buffer.concat([header, pcmData]); } /** * Split audio buffer into chunks * * @param buffer - Audio buffer to split * @param chunkDurationMs - Duration of each chunk in milliseconds * @param sampleRate - Sample rate in Hz * @param bytesPerSample - Bytes per sample (channels * bitDepth / 8) * @returns Array of audio chunks */ export function splitIntoChunks(buffer, chunkDurationMs, sampleRate = 16000, bytesPerSample = 2) { if (chunkDurationMs <= 0 || sampleRate <= 0 || bytesPerSample <= 0) { return [buffer]; } const bytesPerMs = (sampleRate * bytesPerSample) / 1000; const chunkSize = Math.round(chunkDurationMs * bytesPerMs); if (chunkSize <= 0) { return [buffer]; } const chunks = []; for (let offset = 0; offset < buffer.length; offset += chunkSize) { const end = Math.min(offset + chunkSize, buffer.length); chunks.push(buffer.subarray(offset, end)); } return chunks; } /** * Audio format signatures for detection */ export const AUDIO_SIGNATURES = { wav: Buffer.from([0x52, 0x49, 0x46, 0x46]), // RIFF mp3: { id3: Buffer.from([0x49, 0x44, 0x33]), // ID3 frameSync: Buffer.from([0xff, 0xe0]), // Frame sync mask }, ogg: Buffer.from([0x4f, 0x67, 0x67, 0x53]), // OggS }; /** * MIME types for audio formats */ export const MIME_TYPES = { wav: "audio/wav", mp3: "audio/mpeg", ogg: "audio/ogg", opus: "audio/opus", };