@steelbrain/media-buffer-speech
Version:
Speech buffering that accumulates audio chunks and releases them after natural pause periods
113 lines • 4.25 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.bufferSpeech = bufferSpeech;
/**
* Creates a TransformStream that buffers incoming chunks and releases them
* after a configurable pause period with no new data.
*
* Perfect for detecting natural break points in continuous streams like speech.
*
* @example
* ```typescript
* // Wait for 2-second pause before processing
* const pauseDetector = bufferSpeech({
* durationSeconds: 2.0,
* maxBufferSeconds: 60.0,
* onError: (err) => console.error('Buffer overflow:', err)
* });
*
* await audioStream
* .pipeThrough(speechFilter()) // Filter for speech
* .pipeThrough(pauseDetector) // Wait for natural pauses
* .pipeTo(transcriptionProcessor); // Process complete segments
*
* // .tee() pattern for live transcription with turn detection
* const [liveStream, turnStream] = audioStream.tee();
*
* // Branch 1: Live transcription
* liveStream.pipeThrough(speechFilter()).pipeTo(liveTranscriber);
*
* // Branch 2: Turn detection signaling
* const turnDetector = bufferSpeech({
* durationSeconds: 3.0,
* noEmit: true, // Don't emit chunks
* onBuffered: () => { // Signal when turn is complete
* console.log('Turn complete - process accumulated transcript');
* processAccumulatedTranscript();
* }
* });
* turnStream.pipeThrough(speechFilter()).pipeThrough(turnDetector);
* ```
*/
function bufferSpeech(options = {}) {
const durationMs = (options.durationSeconds ?? 2.0) * 1000;
const maxBufferMs = (options.maxBufferSeconds ?? 60.0) * 1000;
let buffer = [];
let pauseTimer = null;
let bufferStartTime = null;
let chunkCount = 0;
const debugLog = (message) => {
options.onDebugLog?.(`SpeechBuffer: ${message}`);
};
const handleError = (message) => {
const error = new Error(message);
options.onError?.(error);
debugLog(`Error: ${message}`);
};
const releaseBuffer = (controller) => {
if (buffer.length > 0) {
const bufferDuration = bufferStartTime ? Date.now() - bufferStartTime : 0;
debugLog(`Releasing buffer: ${buffer.length} chunks after ${bufferDuration}ms`);
// Trigger callback for .tee() patterns
options.onBuffered?.();
// Only emit downstream if noEmit is false (default behavior)
if (!options.noEmit) {
controller.enqueue([...buffer]);
}
buffer = [];
bufferStartTime = null;
chunkCount = 0;
}
if (pauseTimer) {
clearTimeout(pauseTimer);
pauseTimer = null;
}
};
const checkBufferOverflow = () => {
if (bufferStartTime && Date.now() - bufferStartTime > maxBufferMs) {
handleError(`Buffer overflow: ${buffer.length} chunks accumulated over ${maxBufferMs / 1000}s. ` +
`Consider increasing maxBufferSeconds or checking for continuous input without pauses.`);
}
};
return new TransformStream({
start: () => {
debugLog(`Initialized with ${durationMs}ms speech buffering, ${maxBufferMs}ms max buffer`);
},
transform: (chunk, controller) => {
// Initialize buffer timing on first chunk
if (bufferStartTime === null) {
bufferStartTime = Date.now();
debugLog('Started new buffer');
}
// Add chunk to buffer
buffer.push(chunk);
chunkCount++;
debugLog(`Buffered chunk ${chunkCount}, total: ${buffer.length} chunks`);
// Check for buffer overflow
checkBufferOverflow();
// Clear existing timer and set new one
if (pauseTimer) {
clearTimeout(pauseTimer);
}
pauseTimer = setTimeout(() => {
releaseBuffer(controller);
}, durationMs);
},
flush: (controller) => {
debugLog('Stream ending, releasing final buffer');
// Release any buffered content
releaseBuffer(controller);
},
});
}
//# sourceMappingURL=index.js.map