@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
276 lines (241 loc) • 8.09 kB
JavaScript
const {
WebSocket: BrowserWebSocket,
HTMLElement: BrowserHTMLElement,
HTMLButtonElement: BrowserHTMLButtonElement,
AudioContext: BrowserAudioContext,
Blob: BrowserBlob,
alert: browserAlert,
} = globalThis;
const socketProtocol =
globalThis.location.protocol === "https:" ? "wss:" : "ws:";
const socketUrl = `${socketProtocol}//${globalThis.location.host}`;
/** @type {WebSocket | null} */
let socket = null;
const orb = document.getElementById("orb");
const statusEl = document.getElementById("status");
const toggleBtn = document.getElementById("toggleBtn");
if (!(orb instanceof BrowserHTMLElement)) {
throw new Error("Missing #orb element");
}
if (!(statusEl instanceof BrowserHTMLElement)) {
throw new Error("Missing #status element");
}
if (!(toggleBtn instanceof BrowserHTMLButtonElement)) {
throw new Error("Missing #toggleBtn element");
}
const orbEl = orb;
const statusNode = statusEl;
const toggleBtnEl = toggleBtn;
// Cobra VAD on the server requires 16kHz raw PCM
const CAPTURE_SAMPLE_RATE = 16000;
let isActive = false;
/* ---- PLAYBACK (TTS arrives as 24kHz raw PCM from server) ---- */
const playbackCtx = new BrowserAudioContext({ sampleRate: 24000 });
let playbackTime = 0;
/** @type {AudioBufferSourceNode[]} */
let activeSources = []; // track all scheduled sources so we can stop them on interrupt
let playbackCanceled = false;
/* ---- WEBSOCKET ---- */
/**
* Create (or recreate) the WebSocket connection.
* Called at page load and on every startConversation so the page
* is recoverable after a disconnect.
*/
function connectSocket() {
if (socket && socket.readyState <= BrowserWebSocket.OPEN) {
return; // already connected or connecting
}
socket = new BrowserWebSocket(socketUrl);
socket.binaryType = "blob";
socket.onopen = () => {
console.log("Connected");
statusNode.textContent = "Connected";
};
/** @param {Event} e */
socket.onerror = (e) => console.error("WS error", e);
socket.onclose = () => {
stopConversation();
statusNode.textContent = "Disconnected";
};
socket.onmessage = onSocketMessage;
}
/* ---- INCOMING: TTS audio + control messages ---- */
/** @param {MessageEvent<string | Blob>} event */
async function onSocketMessage(event) {
// JSON control message
if (typeof event.data === "string") {
let msg;
try {
msg = JSON.parse(event.data);
} catch {
return;
}
if (msg.type === "interrupt") {
console.log("Interrupt — stopping playback");
playbackCanceled = true;
activeSources.forEach((s) => {
try {
s.stop(0);
} catch {
/* already stopped */
}
});
activeSources = [];
playbackTime = playbackCtx.currentTime;
orbEl.className = isActive ? "listening" : "idle";
statusNode.textContent = isActive ? "Listening..." : "Stopped.";
}
return;
}
// Binary: raw 16-bit PCM, 24kHz, mono
if (!(event.data instanceof BrowserBlob)) {
return;
}
orbEl.className = "speaking";
statusNode.textContent = "Assistant speaking...";
playbackCanceled = false;
const arrayBuffer = await event.data.arrayBuffer();
const pcm16 = new Int16Array(arrayBuffer);
const float32 = new Float32Array(pcm16.length);
for (let i = 0; i < pcm16.length; i++) {
float32[i] = pcm16[i] / 32768;
}
const audioBuffer = playbackCtx.createBuffer(1, float32.length, 24000);
audioBuffer.getChannelData(0).set(float32);
const source = playbackCtx.createBufferSource();
source.buffer = audioBuffer;
source.connect(playbackCtx.destination);
const nowT = playbackCtx.currentTime;
if (playbackTime < nowT) {
playbackTime = nowT;
}
source.start(playbackTime);
playbackTime += audioBuffer.duration;
activeSources.push(source);
source.onended = () => {
activeSources = activeSources.filter((s) => s !== source);
// When the last chunk finishes, notify server and reset UI.
// Skip if playback was intentionally canceled (interrupt/stop) —
// stale onended callbacks must not send playback_done mid-barge-in.
if (
!playbackCanceled &&
activeSources.length === 0 &&
playbackTime <= playbackCtx.currentTime + 0.05
) {
if (socket && socket.readyState === BrowserWebSocket.OPEN) {
socket.send(JSON.stringify({ type: "playback_done" }));
}
orbEl.className = isActive ? "listening" : "idle";
statusNode.textContent = isActive ? "Listening..." : "Stopped.";
}
};
}
// Connect immediately on page load
connectSocket();
/* ---- CAPTURE ---- */
/** @type {AudioContext | null} */
let captureCtx = null;
/** @type {ScriptProcessorNode | null} */
let scriptProcessor = null;
/** @type {MediaStream | null} */
let micStream = null;
async function startConversation() {
try {
// Ensure we have a live WebSocket (reconnects after previous disconnect)
connectSocket();
await playbackCtx.resume();
micStream = await navigator.mediaDevices.getUserMedia({
audio: {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
channelCount: 1,
},
});
// Separate AudioContext at 16kHz — keeps capture and playback sample rates independent
captureCtx = new BrowserAudioContext({ sampleRate: CAPTURE_SAMPLE_RATE });
const micSource = captureCtx.createMediaStreamSource(micStream);
// 1024 samples = 64ms per callback; server splits into 512-sample Cobra frames
scriptProcessor = captureCtx.createScriptProcessor(1024, 1, 1);
scriptProcessor.onaudioprocess = (e) => {
if (!isActive || !socket || socket.readyState !== BrowserWebSocket.OPEN) {
return;
}
const input = e.inputBuffer.getChannelData(0);
const int16 = new Int16Array(input.length);
for (let i = 0; i < input.length; i++) {
const s = Math.max(-1, Math.min(1, input[i]));
int16[i] = s < 0 ? s * 0x8000 : s * 0x7fff;
}
socket.send(int16.buffer);
};
micSource.connect(scriptProcessor);
// ScriptProcessor requires a destination in the graph to fire onaudioprocess,
// but we must NOT route mic audio to speakers — that feeds back into the mic,
// defeats browser AEC, and causes Soniox to transcribe TTS audio as user speech.
// A zero-gain node keeps the graph alive while staying completely silent.
const silentGain = captureCtx.createGain();
silentGain.gain.value = 0;
scriptProcessor.connect(silentGain);
silentGain.connect(captureCtx.destination);
isActive = true;
orbEl.className = "listening";
statusNode.textContent = "Listening...";
toggleBtnEl.textContent = "Stop Conversation";
toggleBtnEl.classList.add("active");
} catch (err) {
// Release any partially-initialized resources
if (scriptProcessor) {
scriptProcessor.disconnect();
scriptProcessor = null;
}
if (micStream) {
micStream.getTracks().forEach((t) => t.stop());
micStream = null;
}
if (captureCtx) {
captureCtx.close();
captureCtx = null;
}
isActive = false;
console.error("Failed to start:", err);
const message = err instanceof Error ? err.message : String(err);
browserAlert("Error: " + message);
}
}
function stopConversation() {
isActive = false;
playbackCanceled = true;
activeSources.forEach((s) => {
try {
s.stop(0);
} catch {
/* already stopped */
}
});
activeSources = [];
playbackTime = playbackCtx.currentTime;
if (scriptProcessor) {
scriptProcessor.disconnect();
scriptProcessor = null;
}
if (micStream) {
micStream.getTracks().forEach((t) => t.stop());
micStream = null;
}
if (captureCtx) {
captureCtx.close();
captureCtx = null;
}
orbEl.className = "idle";
statusNode.textContent = "Stopped.";
toggleBtnEl.textContent = "Start Conversation";
toggleBtnEl.classList.remove("active");
}
toggleBtnEl.onclick = async () => {
if (!isActive) {
await startConversation();
} else {
stopConversation();
}
};