austack
Version:
TypeScript/JavaScript client SDK for Austack conversational AI
255 lines • 10.3 kB
JavaScript
export class AudioInterface {
constructor(inputCallback, amplitudeCallback, onInterrupt) {
this.onInterrupt = onInterrupt;
this.inputAudioContext = null;
this.outputAudioContext = null;
this.inputStream = null;
// These were unused; remove to satisfy TypeScript
this.isRunning = false;
// Simplified output playback queue/state (sequential chunk playback)
this.outputQueue = [];
this.isOutputPlaying = false;
this.currentOutputSource = null;
// Configuration matching Python client
this.inputSampleRate = 16000;
this.outputSampleRate = 16000;
this.inputChannels = 1;
// removed unused outputChannels
this.chunkSize = 1024;
this.silenceThreshold = 0.01;
this.silenceTimeout = 2000; // 2 seconds in ms
this.sendInterval = 500; // 0.5 seconds in ms
// Interrupt detection configuration
this.interruptThreshold = 0.05; // Higher threshold for interrupt detection
this.isAudioPlaying = false;
// State tracking
this.lastSpeechTime = null;
this.audioBufferParts = [];
this.lastSendTime = Date.now();
this.processAudioData = (audioData) => {
const currentTime = Date.now();
// Calculate amplitude for pulsing effect
const rms = this.calculateRMS(audioData);
if (this.amplitudeCallback) {
this.amplitudeCallback(rms);
}
// Check for speech
const isSpeaking = this.isSpeech(audioData);
if (isSpeaking) {
this.lastSpeechTime = currentTime;
// Check if we should interrupt due to speaking during playback
if (this.isAudioPlaying && rms > this.interruptThreshold && this.onInterrupt) {
console.log('User speaking during playback, sending interrupt');
this.onInterrupt();
this.isAudioPlaying = false; // Stop interrupt detection until next playback
}
}
// Buffer audio if we should send it
if (this.shouldSendAudio()) {
const int16Data = this.float32ArrayToInt16Array(audioData);
this.audioBufferParts.push(new Uint8Array(int16Data.buffer));
}
// Send batched audio periodically
if (currentTime - this.lastSendTime >= this.sendInterval &&
this.audioBufferParts.length > 0 &&
this.shouldSendAudio()) {
// Combine buffered audio
const totalLength = this.audioBufferParts.reduce((sum, part) => sum + part.byteLength, 0);
const combinedArray = new Uint8Array(totalLength);
let offset = 0;
for (const part of this.audioBufferParts) {
combinedArray.set(part, offset);
offset += part.byteLength;
}
console.log(`Sending ${combinedArray.byteLength} bytes of audio`);
this.inputCallback(combinedArray);
// Reset buffer and timer
this.audioBufferParts = [];
this.lastSendTime = currentTime;
}
};
this.playNextOutputChunk = () => {
if (!this.outputAudioContext) {
this.isOutputPlaying = false;
return;
}
if (this.outputQueue.length === 0) {
this.isOutputPlaying = false;
// Playback completed
this.isAudioPlaying = false;
this.currentOutputSource = null;
return;
}
this.isOutputPlaying = true;
const nextChunk = this.outputQueue.shift();
// Convert linear16 PCM to Float32 and play via AudioBufferSourceNode
const float32 = this.convertPCMToFloat32(nextChunk);
const audioBuffer = this.outputAudioContext.createBuffer(1, float32.length, this.outputSampleRate);
audioBuffer.copyToChannel(float32, 0);
const source = this.outputAudioContext.createBufferSource();
source.buffer = audioBuffer;
source.connect(this.outputAudioContext.destination);
source.onended = this.playNextOutputChunk;
this.currentOutputSource = source;
try {
source.start();
}
catch (error) {
console.error('Error starting audio playback', error);
this.isOutputPlaying = false;
this.currentOutputSource = null;
}
};
this.inputCallback = inputCallback;
this.amplitudeCallback = amplitudeCallback;
}
calculateRMS(audioData) {
let sum = 0;
for (let i = 0; i < audioData.length; i++) {
sum += audioData[i] * audioData[i];
}
const rms = Math.sqrt(sum / audioData.length);
if (rms < 0.02) {
return 0;
}
return rms;
}
isSpeech(audioData) {
const rms = this.calculateRMS(audioData);
return rms > this.silenceThreshold;
}
shouldSendAudio() {
if (this.lastSpeechTime === null) {
return false;
}
const timeSinceSpeech = Date.now() - this.lastSpeechTime;
return timeSinceSpeech < this.silenceTimeout;
}
float32ArrayToInt16Array(float32Array) {
const int16Array = new Int16Array(float32Array.length);
for (let i = 0; i < float32Array.length; i++) {
// Clamp and convert to 16-bit signed integer
const val = Math.max(-1, Math.min(1, float32Array[i]));
int16Array[i] = val * 32767;
}
return int16Array;
}
async start() {
try {
// Request microphone access with WebRTC constraints including echo cancellation
this.inputStream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: this.inputSampleRate,
channelCount: this.inputChannels,
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
},
});
// Create audio context
this.inputAudioContext = new AudioContext({
sampleRate: this.inputSampleRate,
});
this.outputAudioContext = new AudioContext({
sampleRate: this.outputSampleRate,
});
// Create media stream source
const source = this.inputAudioContext.createMediaStreamSource(this.inputStream);
// Create script processor for audio analysis
const scriptProcessor = this.inputAudioContext.createScriptProcessor(this.chunkSize, 1, 1);
scriptProcessor.onaudioprocess = (event) => {
if (!this.isRunning)
return;
const inputBuffer = event.inputBuffer;
const audioData = inputBuffer.getChannelData(0);
this.processAudioData(audioData);
};
// Connect audio graph
source.connect(scriptProcessor);
scriptProcessor.connect(this.inputAudioContext.destination);
this.isRunning = true;
console.log('Audio interface started');
}
catch (error) {
console.error('Error starting audio interface:', error);
throw error;
}
}
play(audioData) {
if (!this.outputAudioContext) {
console.warn('Audio interface not initialized for playback');
return;
}
// Mark that audio is now playing for interrupt detection
this.isAudioPlaying = true;
// Queue raw PCM chunk; playback will convert to AudioBuffer per chunk
this.outputQueue.push(new Uint8Array(audioData));
if (!this.isOutputPlaying) {
this.playNextOutputChunk();
}
}
setAudioPlaybackState(isPlaying) {
this.isAudioPlaying = isPlaying;
}
isAudioPlaybackActive() {
return this.isAudioPlaying;
}
interruptPlayback() {
// Clear any queued audio and reset playback state without stopping input capture
this.outputQueue = [];
this.isOutputPlaying = false;
this.isAudioPlaying = false;
if (this.currentOutputSource) {
try {
this.currentOutputSource.stop();
}
catch { }
this.currentOutputSource.disconnect();
this.currentOutputSource = null;
}
console.log('Audio playback interrupted');
}
convertPCMToFloat32(pcmData) {
// Create a new ArrayBuffer copy to avoid SharedArrayBuffer issues
const buffer = new ArrayBuffer(pcmData.byteLength);
new Uint8Array(buffer).set(pcmData);
// Convert to Int16Array (linear16 format)
const int16Data = new Int16Array(buffer);
const float32Data = new Float32Array(int16Data.length);
// Convert from int16 to float32 [-1, 1]
for (let i = 0; i < int16Data.length; i++) {
float32Data[i] = int16Data[i] / 32768.0;
}
return float32Data;
}
stop() {
this.isRunning = false;
this.outputQueue = [];
this.isOutputPlaying = false;
if (this.currentOutputSource) {
try {
this.currentOutputSource.stop();
}
catch { }
this.currentOutputSource.disconnect();
this.currentOutputSource = null;
}
if (this.inputStream) {
this.inputStream.getTracks().forEach(track => track.stop());
this.inputStream = null;
}
if (this.inputAudioContext) {
this.inputAudioContext.close();
this.inputAudioContext = null;
}
if (this.outputAudioContext) {
this.outputAudioContext.close();
this.outputAudioContext = null;
}
console.log('Audio interface stopped');
}
cleanup() {
this.stop();
}
}
//# sourceMappingURL=AudioInterface.js.map