susurro-audio
Version:
🎙️ Real-time conversational audio with AI transcription. Build ChatGPT-style voice interfaces in minutes with <300ms latency
1,449 lines (1,438 loc) • 55.6 kB
JavaScript
"use strict";
var __create = Object.create;
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __getProtoOf = Object.getPrototypeOf;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __esm = (fn, res) => function __init() {
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
};
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
// If the importer is in node compatibility mode or this is not an ESM
// file that has been converted to a CommonJS file using a Babel-
// compatible transform (i.e. "__esModule" has not been set), then set
// "default" to the CommonJS "module.exports" for node compatibility.
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
mod
));
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
// src/lib/dynamic-loaders.ts
var dynamic_loaders_exports = {};
__export(dynamic_loaders_exports, {
loadMurmubaraProcessing: () => loadMurmubaraProcessing,
loadTransformers: () => loadTransformers,
preloadCriticalDependencies: () => preloadCriticalDependencies
});
var MODULE_CACHE, loadTransformers, loadMurmubaraProcessing, preloadCriticalDependencies;
var init_dynamic_loaders = __esm({
"src/lib/dynamic-loaders.ts"() {
"use strict";
MODULE_CACHE = {
transformers: null,
murmubaraProcessing: null
};
loadTransformers = async () => {
if (MODULE_CACHE.transformers) {
console.log("[loadTransformers] Using cached module");
return MODULE_CACHE.transformers;
}
const transformers = await import(
/* webpackChunkName: "transformers-core" */
/* webpackPreload: true */
"@huggingface/transformers"
);
MODULE_CACHE.transformers = transformers;
return transformers;
};
loadMurmubaraProcessing = async () => {
if (MODULE_CACHE.murmubaraProcessing) {
console.log("[loadMurmubaraProcessing] Using cached module");
return MODULE_CACHE.murmubaraProcessing;
}
const module2 = await import(
/* webpackChunkName: "murmuraba-processing" */
/* webpackPreload: true */
"murmuraba"
);
console.log("[loadMurmubaraProcessing] Module loaded for first time, keys:", Object.keys(module2));
console.log("[loadMurmubaraProcessing] murmubaraVAD type:", typeof module2.murmubaraVAD);
const processedModule = {
processFileWithMetrics: module2.processFileWithMetrics || module2.processFile,
// Use processFileWithMetrics first, fallback to processFile
murmubaraVAD: module2.murmubaraVAD,
// No fallback - murmubaraVAD is a required export
extractAudioMetadata: module2.extractAudioMetadata || (() => ({ duration: 1, sampleRate: 44100, channels: 2 })),
// Fallback metadata
// Add engine status check to ensure initialization
getEngineStatus: module2.getEngineStatus,
initializeAudioEngine: module2.initializeAudioEngine
};
MODULE_CACHE.murmubaraProcessing = processedModule;
return processedModule;
};
preloadCriticalDependencies = () => {
setTimeout(() => {
import(
/* webpackChunkName: "transformers-core" */
/* webpackPrefetch: true */
"@huggingface/transformers"
).catch(() => {
});
}, 2e3);
setTimeout(() => {
import(
/* webpackChunkName: "murmuraba-engine" */
/* webpackPrefetch: true */
"murmuraba"
).catch(() => {
});
}, 3e3);
};
}
});
// src/index.ts
var index_exports = {};
__export(index_exports, {
AUDIO_CONFIG: () => AUDIO_CONFIG,
AudioProcessingError: () => AudioProcessingError,
ChunkMiddlewarePipeline: () => ChunkMiddlewarePipeline,
ERROR_MESSAGES: () => ERROR_MESSAGES,
LatencyMonitor: () => LatencyMonitor,
RecordingError: () => RecordingError,
TranscriptionError: () => TranscriptionError,
VADError: () => VADError,
WHISPER_CONFIG: () => WHISPER_CONFIG,
defaultAlertService: () => defaultAlertService,
defaultToastService: () => defaultToastService,
getErrorMessage: () => getErrorMessage,
handleAudioError: () => handleAudioError,
intentMiddleware: () => intentMiddleware,
logAudioError: () => logAudioError,
qualityMiddleware: () => qualityMiddleware,
retryWithBackoff: () => retryWithBackoff,
sentimentMiddleware: () => sentimentMiddleware,
translationMiddleware: () => translationMiddleware,
useErrorHandler: () => useErrorHandler,
useLatencyMonitor: () => useLatencyMonitor,
useModelCache: () => useModelCache,
useSusurro: () => useSusurro
});
module.exports = __toCommonJS(index_exports);
// src/hooks/use-susurro.ts
var import_react2 = require("react");
var import_murmuraba = require("murmuraba");
// src/lib/chunk-middleware.ts
var translationMiddleware = {
name: "translation",
enabled: false,
priority: 1,
async process(chunk) {
const translatedText = chunk.transcript;
return {
...chunk,
metadata: {
...chunk.metadata,
originalLanguage: "en",
translatedText,
translationConfidence: 0.95
}
};
}
};
var sentimentMiddleware = {
name: "sentiment",
enabled: false,
priority: 2,
async process(chunk) {
const sentiment = analyzeSentiment(chunk.transcript);
return {
...chunk,
metadata: {
...chunk.metadata,
sentiment: sentiment.label,
sentimentScore: sentiment.score,
emotion: sentiment.emotion
}
};
}
};
var intentMiddleware = {
name: "intent",
enabled: false,
priority: 3,
async process(chunk) {
const intent = detectIntent(chunk.transcript);
return {
...chunk,
metadata: {
...chunk.metadata,
intent: intent.name,
intentConfidence: intent.confidence,
entities: intent.entities
}
};
}
};
var qualityMiddleware = {
name: "quality",
enabled: true,
priority: 0,
// Highest priority
async process(chunk) {
const qualityMetrics = analyzeAudioQuality(chunk);
return {
...chunk,
metadata: {
...chunk.metadata,
audioQuality: qualityMetrics.score,
noiseLevel: qualityMetrics.noiseLevel,
clarity: qualityMetrics.clarity,
enhancement: qualityMetrics.applied
}
};
}
};
var ChunkMiddlewarePipeline = class {
// private context: MiddlewareContext; // Context stored for future middleware extensions
constructor() {
this.middlewares = [];
this.register(qualityMiddleware);
this.register(translationMiddleware);
this.register(sentimentMiddleware);
this.register(intentMiddleware);
}
register(middleware) {
this.middlewares.push(middleware);
this.middlewares.sort((a, b) => a.priority - b.priority);
}
unregister(name) {
this.middlewares = this.middlewares.filter((m) => m.name !== name);
}
enable(name) {
const middleware = this.middlewares.find((m) => m.name === name);
if (middleware) {
middleware.enabled = true;
}
}
disable(name) {
const middleware = this.middlewares.find((m) => m.name === name);
if (middleware) {
middleware.enabled = false;
}
}
async process(chunk) {
let processedChunk = { ...chunk };
const processingLatencies = {};
for (const middleware of this.middlewares) {
if (!middleware.enabled) continue;
const startTime = performance.now();
try {
processedChunk = await middleware.process(processedChunk);
const latency = performance.now() - startTime;
processingLatencies[middleware.name] = latency;
} catch (error) {
}
}
return {
...processedChunk,
metadata: {
...processedChunk.metadata,
middlewareLatencies: processingLatencies,
totalMiddlewareTime: Object.values(processingLatencies).reduce((a, b) => a + b, 0)
}
};
}
getStatus() {
return this.middlewares.map((m) => ({
name: m.name,
enabled: m.enabled,
priority: m.priority
}));
}
};
function analyzeSentiment(text) {
const positiveWords = ["good", "great", "awesome", "excellent", "amazing"];
const negativeWords = ["bad", "terrible", "awful", "horrible", "worst"];
const words = text.toLowerCase().split(" ");
const positiveCount = words.filter((w) => positiveWords.includes(w)).length;
const negativeCount = words.filter((w) => negativeWords.includes(w)).length;
if (positiveCount > negativeCount) {
return { label: "positive", score: 0.7, emotion: "happy" };
} else if (negativeCount > positiveCount) {
return { label: "negative", score: 0.7, emotion: "sad" };
}
return { label: "neutral", score: 0.5, emotion: "neutral" };
}
function detectIntent(text) {
const questionWords = ["what", "how", "when", "where", "why", "who"];
const commandWords = ["play", "stop", "start", "open", "close", "send"];
const words = text.toLowerCase().split(" ");
if (words.some((w) => questionWords.includes(w))) {
return { name: "question", confidence: 0.8, entities: [] };
} else if (words.some((w) => commandWords.includes(w))) {
return { name: "command", confidence: 0.8, entities: [] };
}
return { name: "statement", confidence: 0.6, entities: [] };
}
function analyzeAudioQuality(chunk) {
return {
score: chunk.vadScore || 0.8,
noiseLevel: 0.1,
// Low noise thanks to neural processing
clarity: 0.9,
applied: ["neural_denoising", "voice_enhancement"]
};
}
// src/hooks/use-latency-monitor.ts
var import_react = require("react");
// src/lib/latency-monitor.ts
var LatencyMonitor = class {
constructor(targetLatency = 300) {
this.metrics = [];
this.maxMetrics = 1e3;
// Keep last 1000 measurements
this.target = 300;
// Target latency in ms
this.optimizations = [];
// Simple event emitter for optimization triggers
this.listeners = {};
this.target = targetLatency;
this.setupOptimizations();
}
recordMetrics(metrics) {
const fullMetrics = {
...metrics,
timestamp: performance.now()
};
this.metrics.push(fullMetrics);
if (this.metrics.length > this.maxMetrics) {
this.metrics = this.metrics.slice(-this.maxMetrics);
}
this.checkForOptimizations(fullMetrics);
}
generateReport(lastNMinutes = 5) {
const cutoffTime = performance.now() - lastNMinutes * 60 * 1e3;
const recentMetrics = this.metrics.filter((m) => m.timestamp > cutoffTime);
if (recentMetrics.length === 0) {
return this.getEmptyReport();
}
const latencies = recentMetrics.map((m) => m.audioToEmitLatency).sort((a, b) => a - b);
const avgLatency = latencies.reduce((a, b) => a + b, 0) / latencies.length;
return {
averageLatency: avgLatency,
medianLatency: this.getPercentile(latencies, 50),
p95Latency: this.getPercentile(latencies, 95),
p99Latency: this.getPercentile(latencies, 99),
minLatency: latencies[0],
maxLatency: latencies[latencies.length - 1],
targetMet: avgLatency < this.target,
sampleCount: recentMetrics.length,
timeRange: {
start: recentMetrics[0].timestamp,
end: recentMetrics[recentMetrics.length - 1].timestamp
},
breakdown: {
audioProcessing: this.calculateAverageBreakdown(recentMetrics, "audioProcessingLatency"),
transcription: this.calculateAverageBreakdown(recentMetrics, "transcriptionLatency"),
middleware: this.calculateAverageBreakdown(recentMetrics, "middlewareLatency")
}
};
}
getRealtimeStatus() {
if (this.metrics.length < 5) {
return {
isHealthy: true,
currentLatency: 0,
trend: "stable"
};
}
const recent = this.metrics.slice(-5);
const currentLatency = recent[recent.length - 1].audioToEmitLatency;
const previousLatency = recent[0].audioToEmitLatency;
let trend = "stable";
const trendThreshold = 50;
if (currentLatency < previousLatency - trendThreshold) {
trend = "improving";
} else if (currentLatency > previousLatency + trendThreshold) {
trend = "degrading";
}
return {
isHealthy: currentLatency < this.target,
currentLatency,
trend
};
}
setupOptimizations() {
this.optimizations.push({
name: "middleware-reduction",
description: "Disable non-essential middleware when latency > 400ms",
expectedLatencyReduction: 100,
condition: (metrics) => metrics.middlewareLatency > 100,
apply: async () => {
this.emit("optimization-trigger", {
type: "disable-middleware",
target: ["sentiment", "intent", "translation"]
});
}
});
this.optimizations.push({
name: "chunk-size-reduction",
description: "Reduce chunk size when transcription latency > 200ms",
expectedLatencyReduction: 80,
condition: (metrics) => metrics.transcriptionLatency > 200,
apply: async () => {
this.emit("optimization-trigger", {
type: "reduce-chunk-size",
newSize: 6e3
// Reduce from 8s to 6s
});
}
});
this.optimizations.push({
name: "parallel-processing",
description: "Enable parallel processing when audio processing > 150ms",
expectedLatencyReduction: 60,
condition: (metrics) => metrics.audioProcessingLatency > 150,
apply: async () => {
this.emit("optimization-trigger", {
type: "enable-parallel-processing"
});
}
});
}
checkForOptimizations(metrics) {
if (metrics.audioToEmitLatency <= this.target) return;
for (const optimization of this.optimizations) {
if (optimization.condition(metrics)) {
optimization.apply().catch(() => {
});
break;
}
}
}
getPercentile(sortedArray, percentile) {
const index = Math.ceil(percentile / 100 * sortedArray.length) - 1;
return sortedArray[Math.max(0, index)];
}
calculateAverageBreakdown(metrics, field) {
const values = metrics.map((m) => m[field]).filter((v) => typeof v === "number");
return values.length > 0 ? values.reduce((a, b) => a + b, 0) / values.length : 0;
}
getEmptyReport() {
return {
averageLatency: 0,
medianLatency: 0,
p95Latency: 0,
p99Latency: 0,
minLatency: 0,
maxLatency: 0,
targetMet: true,
sampleCount: 0,
timeRange: { start: 0, end: 0 },
breakdown: {
audioProcessing: 0,
transcription: 0,
middleware: 0
}
};
}
emit(event, data) {
if (this.listeners[event]) {
this.listeners[event].forEach((listener) => listener(data));
}
}
on(event, listener) {
if (!this.listeners[event]) {
this.listeners[event] = [];
}
this.listeners[event].push(listener);
}
off(event, listener) {
if (this.listeners[event]) {
this.listeners[event] = this.listeners[event].filter((l) => l !== listener);
}
}
// Export metrics for analysis
exportMetrics(format = "json") {
if (format === "csv") {
const headers = [
"chunkId",
"audioToEmitLatency",
"audioProcessingLatency",
"transcriptionLatency",
"middlewareLatency",
"timestamp",
"vadScore",
"audioSize"
].join(",");
const rows = this.metrics.map(
(m) => [
m.chunkId,
m.audioToEmitLatency,
m.audioProcessingLatency,
m.transcriptionLatency,
m.middlewareLatency,
m.timestamp,
m.vadScore || "",
m.audioSize || ""
].join(",")
);
return [headers, ...rows].join("\n");
}
return JSON.stringify(this.metrics, null, 2);
}
// Clear metrics (useful for testing)
clear() {
this.metrics = [];
}
// Get current metrics count
getMetricsCount() {
return this.metrics.length;
}
};
// src/hooks/use-latency-monitor.ts
function useLatencyMonitor(targetLatency = 300) {
const monitorRef = (0, import_react.useRef)(null);
const [latencyReport, setLatencyReport] = (0, import_react.useState)(() => {
if (!monitorRef.current) {
monitorRef.current = new LatencyMonitor(targetLatency);
}
return monitorRef.current.generateReport();
});
const [latencyStatus, setLatencyStatus] = (0, import_react.useState)(() => {
if (!monitorRef.current) {
monitorRef.current = new LatencyMonitor(targetLatency);
}
return monitorRef.current.getRealtimeStatus();
});
(0, import_react.useEffect)(() => {
if (!monitorRef.current) {
monitorRef.current = new LatencyMonitor(targetLatency);
}
}, [targetLatency]);
const recordMetrics = (0, import_react.useCallback)((metrics) => {
if (monitorRef.current) {
monitorRef.current.recordMetrics(metrics);
setLatencyStatus(monitorRef.current.getRealtimeStatus());
}
}, []);
const exportMetrics = (0, import_react.useCallback)((format = "json") => {
if (monitorRef.current) {
return monitorRef.current.exportMetrics(format);
}
return format === "json" ? "[]" : "";
}, []);
const clear = (0, import_react.useCallback)(() => {
if (monitorRef.current) {
monitorRef.current.clear();
setLatencyReport(monitorRef.current.generateReport());
setLatencyStatus(monitorRef.current.getRealtimeStatus());
}
}, []);
const getMetricsCount = (0, import_react.useCallback)(() => {
if (monitorRef.current) {
return monitorRef.current.getMetricsCount();
}
return 0;
}, []);
const onOptimization = (0, import_react.useCallback)((listener) => {
if (monitorRef.current) {
monitorRef.current.on("optimization-trigger", listener);
}
}, []);
const offOptimization = (0, import_react.useCallback)((listener) => {
if (monitorRef.current) {
monitorRef.current.off("optimization-trigger", listener);
}
}, []);
(0, import_react.useEffect)(() => {
const updateLatencyReport = () => {
if (monitorRef.current) {
setLatencyReport(monitorRef.current.generateReport());
setLatencyStatus(monitorRef.current.getRealtimeStatus());
}
};
const interval = setInterval(updateLatencyReport, 1e4);
return () => clearInterval(interval);
}, []);
return {
latencyReport,
latencyStatus,
recordMetrics,
exportMetrics,
clear,
getMetricsCount,
onOptimization,
offOptimization
};
}
// src/hooks/use-susurro.ts
var WHISPER_ENV = {
useBrowserCache: true,
logLevel: "error"
};
var ASR_PIPELINE_CACHE = /* @__PURE__ */ new Map();
async function ensureASR(model, quantized, onProgress) {
try {
const cacheKey = `${model}_${quantized ? "q8" : "fp32"}`;
const cachedPipeline = ASR_PIPELINE_CACHE.get(cacheKey);
if (cachedPipeline) {
console.log(`[ensureASR] Using cached pipeline for ${cacheKey}`);
onProgress(100);
return cachedPipeline;
}
console.log(`[ensureASR] Creating new pipeline for ${cacheKey}`);
const transformersModule = await import("@huggingface/transformers");
const { pipeline, env } = transformersModule;
if (env) {
env.useBrowserCache = WHISPER_ENV.useBrowserCache;
env.allowRemoteModels = true;
}
const modelName = `Xenova/${model.replace(".en", "")}`;
console.log(`[ensureASR] Loading model: ${modelName}`);
const asr = await pipeline("automatic-speech-recognition", modelName, {
// v3 uses dtype instead of quantized
dtype: quantized ? "q8" : "fp32",
// Optional: use WebGPU if available (requires COEP/COOP headers)
// device: 'webgpu',
// eslint-disable-next-line @typescript-eslint/no-explicit-any
progress_callback: (p) => {
if (p?.progress !== void 0) {
const percent = p.progress <= 1 ? Math.round(p.progress * 100) : Math.round(p.progress);
onProgress(Math.min(100, Math.max(0, percent)));
} else if (p?.status) {
console.log(`[ensureASR] Status: ${p.status}`);
}
}
});
ASR_PIPELINE_CACHE.set(cacheKey, asr);
console.log(`[ensureASR] Pipeline cached for ${cacheKey}`);
return asr;
} catch (error) {
console.error(`[ensureASR] Failed to load model:`, error);
throw error;
}
}
async function resampleTo16k(buffer) {
if (buffer.sampleRate === 16e3) return buffer.getChannelData(0).slice();
const length = Math.ceil(buffer.duration * 16e3);
const offline = new OfflineAudioContext(1, length, 16e3);
const mono = offline.createBuffer(1, buffer.length, buffer.sampleRate);
mono.copyToChannel(buffer.getChannelData(0), 0);
const src = offline.createBufferSource();
src.buffer = mono;
src.connect(offline.destination);
src.start(0);
const rendered = await offline.startRendering();
return rendered.getChannelData(0).slice();
}
async function transcribeBlobWith(asr, blob, language) {
const ab = await blob.arrayBuffer();
const ctx = new AudioContext();
const decoded = await ctx.decodeAudioData(ab);
const audioData = await resampleTo16k(decoded);
ctx.close();
const audioArray = audioData instanceof Float32Array ? audioData : new Float32Array(audioData);
console.log(
"[transcribeBlobWith] Audio array type:",
audioArray.constructor.name,
"Length:",
audioArray.length
);
const options = {
return_timestamps: true,
chunk_length_s: 30,
stride_length_s: 5
};
try {
const out = await asr(audioArray, {
...options,
language: language || "es",
// Default to Spanish
task: "transcribe"
});
console.log("[transcribeBlobWith] Transcription successful with language:", language || "es");
return processTranscriptionResult(out);
} catch (error) {
console.warn("[transcribeBlobWith] First attempt failed:", error?.message);
if (error?.message?.includes("English-only") || error?.message?.includes("Cannot specify")) {
console.log("[transcribeBlobWith] Retrying for English-only model...");
const out = await asr(audioArray, options);
return processTranscriptionResult(out);
}
throw error;
}
}
function processTranscriptionResult(out) {
const result = {
text: out?.text ?? "",
chunkIndex: 0,
timestamp: Date.now(),
segments: out?.chunks?.map((c, index) => ({
id: index,
seek: c.timestamp?.[0] ?? 0,
start: c.timestamp?.[0] ?? 0,
end: c.timestamp?.[1] ?? 0,
text: c.text ?? "",
tokens: [],
temperature: 0,
avg_logprob: 0,
compression_ratio: 0,
no_speech_prob: 0
})) ?? []
};
return result;
}
async function urlToBlob(url) {
if (!url) return new Blob();
const r = await fetch(url);
return r.blob();
}
function useSusurro(options = {}) {
const {
chunkDurationMs = 8e3,
whisperConfig = {},
conversational,
onWhisperProgressLog
} = options;
const murmubaraConfig = {
bufferSize: options.engineConfig?.bufferSize ?? 1024,
denoiseStrength: options.engineConfig?.denoiseStrength ?? 0.5,
enableMetrics: options.engineConfig?.enableMetrics ?? true,
noiseReductionLevel: options.engineConfig?.noiseReductionLevel ?? "medium",
algorithm: options.engineConfig?.algorithm ?? "rnnoise",
chunkDurationMs,
autoCleanup: true,
useAudioWorklet: true,
logLevel: "error",
// Changed from 'info' to 'error' to reduce logs
enableDebugLogs: false
// Explicitly disable debug logs
};
const {
isInitialized: engineReady,
error: engineError,
recordingState,
currentStream: engineStream,
startRecording: murmubaraStartRecording,
stopRecording: murmubaraStopRecording,
pauseRecording: murmurbaraPauseRecording,
resumeRecording: murmubaraResumeRecording,
exportChunkAsWav: murmubaraExportChunkAsWav,
initialize: murmubaraInitializeEngine,
destroy: murmubaraDestroyEngine
} = (0, import_murmuraba.useMurmubaraEngine)(murmubaraConfig);
const [engineInitializing, setEngineInitializing] = (0, import_react2.useState)(false);
const [whisperReady, setWhisperReady] = (0, import_react2.useState)(false);
const [whisperProgress, setWhisperProgress] = (0, import_react2.useState)(0);
const [whisperError, setWhisperError] = (0, import_react2.useState)(null);
const asrRef = (0, import_react2.useRef)(null);
const modelMap = {
tiny: "whisper-tiny",
base: "whisper-base",
medium: "whisper-medium",
small: "whisper-small",
large: "whisper-large-v3"
};
const whisperModel = modelMap[options.initialModel || "tiny"] || "whisper-tiny";
const whisperLanguage = whisperConfig?.language || "es";
const whisperQuantized = true;
(0, import_react2.useEffect)(() => {
let cancelled = false;
(async () => {
try {
const asr = await ensureASR(whisperModel, whisperQuantized, (p) => {
setWhisperProgress(p);
if (onWhisperProgressLog) {
if (p === 100) {
onWhisperProgressLog(
`\u2705 Modelo Whisper ${whisperModel} cargado correctamente`,
"success"
);
onWhisperProgressLog("\u{1F399}\uFE0F Sistema de transcripci\xF3n listo para usar", "success");
} else if (p === 0) {
onWhisperProgressLog(`\u{1F4E5} Iniciando descarga del modelo ${whisperModel}...`, "info");
} else if (p > 0 && p < 25) {
onWhisperProgressLog(`\u{1F4E5} Descargando modelo Whisper... ${p}%`, "info");
} else if (p >= 25 && p < 50) {
onWhisperProgressLog(`\u2699\uFE0F Procesando modelo de IA... ${p}%`, "info");
} else if (p >= 50 && p < 75) {
onWhisperProgressLog(`\u{1F527} Configurando neural network... ${p}%`, "info");
} else if (p >= 75 && p < 100) {
onWhisperProgressLog(`\u{1F680} Finalizando inicializaci\xF3n... ${p}%`, "info");
}
}
});
if (!cancelled) {
asrRef.current = asr;
setWhisperReady(true);
}
} catch (e) {
if (!cancelled) {
const errorMessage = e?.message ?? "Failed to load Whisper";
setWhisperError(errorMessage);
onWhisperProgressLog?.(`\u274C Error al cargar Whisper: ${errorMessage}`, "error");
}
}
})();
return () => {
cancelled = true;
asrRef.current = null;
setWhisperReady(false);
setWhisperProgress(0);
};
}, [whisperModel]);
const [audioChunks, setAudioChunks] = (0, import_react2.useState)([]);
const [transcriptions, setTranscriptions] = (0, import_react2.useState)([]);
const [processingStatus, setProcessingStatus] = (0, import_react2.useState)({
isProcessing: false,
currentChunk: 0,
totalChunks: 0,
stage: "idle"
});
const [averageVad, setAverageVad] = (0, import_react2.useState)(0);
const [conversationalChunks, setConversationalChunks] = (0, import_react2.useState)([]);
const processedAudioUrls = (0, import_react2.useRef)(/* @__PURE__ */ new Map());
const chunkTranscriptions = (0, import_react2.useRef)(/* @__PURE__ */ new Map());
const chunkProcessingTimes = (0, import_react2.useRef)(/* @__PURE__ */ new Map());
const [isStreamingRecording, setIsStreamingRecording] = (0, import_react2.useState)(false);
const streamingCallbackRef = (0, import_react2.useRef)(null);
const streamingSessionRef = (0, import_react2.useRef)(null);
const lastProcessedChunkIndexRef = (0, import_react2.useRef)(0);
const { latencyReport, latencyStatus, recordMetrics } = useLatencyMonitor(300);
const [middlewarePipeline] = (0, import_react2.useState)(() => new ChunkMiddlewarePipeline());
const initializeAudioEngine = (0, import_react2.useCallback)(async () => {
if (engineReady || engineInitializing) return;
setEngineInitializing(true);
try {
await murmubaraInitializeEngine();
} finally {
setEngineInitializing(false);
}
}, [engineReady, engineInitializing, murmubaraInitializeEngine]);
const clearConversationalChunks = (0, import_react2.useCallback)(() => {
setConversationalChunks([]);
processedAudioUrls.current.clear();
chunkTranscriptions.current.clear();
chunkProcessingTimes.current.clear();
}, []);
const resetAudioEngine = (0, import_react2.useCallback)(async () => {
if (recordingState?.isRecording) {
murmubaraStopRecording();
}
if (streamingSessionRef.current) {
await streamingSessionRef.current.stop();
streamingSessionRef.current = null;
}
setIsStreamingRecording(false);
streamingCallbackRef.current = null;
setAudioChunks([]);
setTranscriptions([]);
clearConversationalChunks();
await murmubaraDestroyEngine();
setEngineInitializing(true);
try {
await murmubaraInitializeEngine();
} finally {
setEngineInitializing(false);
}
}, [
recordingState,
murmubaraStopRecording,
murmubaraDestroyEngine,
murmubaraInitializeEngine,
clearConversationalChunks
]);
const startRecording = (0, import_react2.useCallback)(
async (config) => {
if (!engineReady) {
await initializeAudioEngine();
}
const seconds = (config?.chunkDuration ?? chunkDurationMs / 1e3) | 0;
await murmubaraStartRecording(seconds);
},
[engineReady, initializeAudioEngine, murmubaraStartRecording, chunkDurationMs]
);
const stopRecording = (0, import_react2.useCallback)(() => {
murmubaraStopRecording();
}, [murmubaraStopRecording]);
const pauseRecording = (0, import_react2.useCallback)(() => {
murmurbaraPauseRecording();
}, [murmurbaraPauseRecording]);
const resumeRecording = (0, import_react2.useCallback)(() => {
murmubaraResumeRecording();
}, [murmubaraResumeRecording]);
const clearTranscriptions = (0, import_react2.useCallback)(() => {
setTranscriptions([]);
setAudioChunks([]);
chunkTranscriptions.current.clear();
processedAudioUrls.current.clear();
chunkProcessingTimes.current.clear();
setConversationalChunks([]);
}, []);
const transcribeWithWhisper = (0, import_react2.useCallback)(
async (blob) => {
if (!asrRef.current || !whisperReady) return null;
const t0 = performance.now();
const out = await transcribeBlobWith(asrRef.current, blob, whisperLanguage);
recordMetrics({
chunkId: "file",
audioToEmitLatency: performance.now() - t0,
audioProcessingLatency: 0,
transcriptionLatency: performance.now() - t0,
middlewareLatency: 0,
vadScore: 0,
audioSize: blob.size
});
return out;
},
[whisperReady, whisperLanguage, recordMetrics]
);
const analyzeVAD = (0, import_react2.useCallback)(async (buffer) => {
try {
const { loadMurmubaraProcessing: loadMurmubaraProcessing2 } = await Promise.resolve().then(() => (init_dynamic_loaders(), dynamic_loaders_exports));
const { murmubaraVAD } = await loadMurmubaraProcessing2();
if (!murmubaraVAD) {
console.warn("murmubaraVAD function not available in murmuraba module");
return { averageVad: 0, vadScores: [], metrics: [], voiceSegments: [] };
}
console.log("[analyzeVAD] Buffer type:", buffer.constructor.name, "Size:", buffer.byteLength);
console.log("[analyzeVAD] murmubaraVAD type:", typeof murmubaraVAD);
const r = await murmubaraVAD(buffer);
console.log("[analyzeVAD] Result type:", typeof r, "Keys:", r ? Object.keys(r) : "null");
return {
averageVad: r.average || 0,
vadScores: r.scores || [],
metrics: r.metrics || [],
voiceSegments: (r.voiceSegments || []).map(
(s) => ({
startTime: s.startTime || 0,
endTime: s.endTime || 0,
vadScore: s.vadScore || 0,
confidence: s.confidence || 0
})
)
};
} catch (error) {
console.error("VAD analysis failed:", error);
console.error("Error stack:", error instanceof Error ? error.stack : "No stack");
return { averageVad: 0, vadScores: [], metrics: [], voiceSegments: [] };
}
}, []);
const convertBlobToBuffer = (0, import_react2.useCallback)((blob) => blob.arrayBuffer(), []);
const calculateDuration = (0, import_react2.useCallback)(async (buffer) => {
try {
const { loadMurmubaraProcessing: loadMurmubaraProcessing2 } = await Promise.resolve().then(() => (init_dynamic_loaders(), dynamic_loaders_exports));
const { extractAudioMetadata } = await loadMurmubaraProcessing2();
const metadata = extractAudioMetadata(buffer);
return metadata.duration;
} catch {
const bytes = buffer.byteLength;
return Math.max(0.1, bytes / (44100 * 2 * 2));
}
}, []);
const startStreamingRecording = (0, import_react2.useCallback)(
async (onChunk, config) => {
if (isStreamingRecording) throw new Error("Already recording. Stop first.");
if (!engineReady) {
await initializeAudioEngine();
}
setIsStreamingRecording(true);
streamingCallbackRef.current = onChunk;
lastProcessedChunkIndexRef.current = recordingState?.chunks?.length ?? 0;
streamingChunksRef.current = [];
const seconds = (config?.chunkDuration ?? chunkDurationMs / 1e3) | 0;
await murmubaraStartRecording(seconds);
streamingSessionRef.current = {
stop: async () => {
murmubaraStopRecording();
setIsStreamingRecording(false);
streamingCallbackRef.current = null;
}
};
},
[
isStreamingRecording,
engineReady,
initializeAudioEngine,
murmubaraStartRecording,
murmubaraStopRecording,
chunkDurationMs,
recordingState?.chunks?.length
]
);
const streamingChunksRef = (0, import_react2.useRef)([]);
const stopStreamingRecording = (0, import_react2.useCallback)(async () => {
console.log("[stopStreamingRecording] Stopping recording...");
if (streamingSessionRef.current) {
await streamingSessionRef.current.stop();
streamingSessionRef.current = null;
}
setIsStreamingRecording(false);
streamingCallbackRef.current = null;
const chunks = [...streamingChunksRef.current];
console.log("[stopStreamingRecording] Returning", chunks.length, "chunks");
streamingChunksRef.current = [];
lastProcessedChunkIndexRef.current = 0;
return chunks;
}, []);
(0, import_react2.useEffect)(() => {
if (!engineReady || !recordingState?.chunks) {
return;
}
const chunks = recordingState.chunks;
const newOnes = [];
for (let i = audioChunks.length; i < chunks.length; i++) {
const src = chunks[i];
const id = src.id || `chunk-${Date.now()}-${i}`;
const startTime = src.startTime ?? i * chunkDurationMs;
const endTime = src.endTime ?? (i + 1) * chunkDurationMs;
const vadScore = src.averageVad ?? 0;
newOnes.push({
id,
blob: void 0,
// lo traemos on-demand al transcribir
startTime,
endTime,
vadScore,
duration: src.duration ?? chunkDurationMs
});
if (src.processedAudioUrl) {
processedAudioUrls.current.set(id, src.processedAudioUrl);
}
}
if (newOnes.length) {
setAudioChunks((prev) => [...prev, ...newOnes]);
}
const last = chunks[chunks.length - 1];
if (last?.averageVad != null) setAverageVad(last.averageVad);
}, [engineReady, recordingState?.chunks, audioChunks.length, chunkDurationMs]);
(0, import_react2.useEffect)(() => {
if (!isStreamingRecording || !streamingCallbackRef.current || !recordingState?.chunks) {
return;
}
const chunks = recordingState.chunks;
const newChunks = chunks.slice(lastProcessedChunkIndexRef.current);
if (newChunks.length === 0) return;
newChunks.forEach(async (chunk, relativeIndex) => {
const absoluteIndex = lastProcessedChunkIndexRef.current + relativeIndex;
try {
const audioBlob = await urlToBlob(chunk.processedAudioUrl);
const vadScore = chunk.averageVad ?? 0;
const isVoiceActive = vadScore > 0.3;
let transcriptionText = "";
if (whisperReady && isVoiceActive && audioBlob.size > 0) {
try {
const r = await transcribeWithWhisper(audioBlob);
transcriptionText = r?.text ?? "";
} catch (error) {
console.error("[STREAMING] Transcription error:", error);
}
}
const streamingChunk = {
id: chunk.id || `chunk-${Date.now()}-${absoluteIndex}`,
audioBlob,
vadScore,
timestamp: Date.now(),
transcriptionText,
duration: chunk.duration ?? chunkDurationMs,
isVoiceActive
};
streamingChunksRef.current.push(streamingChunk);
streamingCallbackRef.current?.(streamingChunk);
} catch (error) {
console.error("[STREAMING] Error processing chunk:", error);
}
});
lastProcessedChunkIndexRef.current = chunks.length;
}, [
recordingState?.chunks,
isStreamingRecording,
whisperReady,
transcribeWithWhisper,
chunkDurationMs
]);
const tryEmitChunk = (0, import_react2.useCallback)(
async (chunk, forceEmit = false) => {
if (!conversational?.onChunk) return;
const audioUrl = processedAudioUrls.current.get(chunk.id);
const transcript = chunkTranscriptions.current.get(chunk.id);
const t0 = chunkProcessingTimes.current.get(chunk.id);
if (audioUrl && (transcript || forceEmit)) {
let emitted = {
id: chunk.id,
audioUrl,
transcript: transcript ?? "",
startTime: chunk.startTime,
endTime: chunk.endTime,
vadScore: chunk.vadScore ?? 0,
isComplete: Boolean(transcript),
processingLatency: t0 ? Date.now() - t0 : void 0
};
const t1 = performance.now();
try {
emitted = await middlewarePipeline.process(emitted);
} catch {
}
const middlewareLatency = performance.now() - t1;
if (emitted.processingLatency != null) {
recordMetrics({
chunkId: chunk.id,
audioToEmitLatency: emitted.processingLatency,
audioProcessingLatency: Math.max(0, emitted.processingLatency - middlewareLatency),
transcriptionLatency: 0,
middlewareLatency,
vadScore: chunk.vadScore,
audioSize: 0
});
}
setConversationalChunks((prev) => [...prev, emitted]);
conversational.onChunk(emitted);
chunkProcessingTimes.current.delete(chunk.id);
}
},
[conversational, middlewarePipeline, recordMetrics]
);
const processChunks = (0, import_react2.useCallback)(
async (chunks) => {
if (!chunks.length) return;
setProcessingStatus({
isProcessing: true,
currentChunk: 0,
totalChunks: chunks.length,
stage: "processing"
});
for (let i = 0; i < chunks.length; i++) {
setProcessingStatus((p) => ({ ...p, currentChunk: i + 1 }));
const id = chunks[i].id;
const processedUrl = processedAudioUrls.current.get(id);
if (!processedUrl) continue;
try {
const blob = await urlToBlob(processedUrl);
const r = await transcribeWithWhisper(blob);
if (r) {
setTranscriptions((prev) => [
...prev,
{ ...r, chunkIndex: i, timestamp: Date.now() }
]);
chunkTranscriptions.current.set(id, r.text);
await tryEmitChunk(chunks[i]);
}
} catch {
}
}
setProcessingStatus({
isProcessing: false,
currentChunk: 0,
totalChunks: 0,
stage: "complete"
});
},
[transcribeWithWhisper, tryEmitChunk]
);
(0, import_react2.useEffect)(() => {
if (audioChunks.length > 0 && engineReady && whisperReady) {
const isRecording = recordingState?.isRecording ?? false;
if (!isRecording) {
setTimeout(() => {
if (!conversational?.onChunk || conversational.enableInstantTranscription) {
processChunks(audioChunks);
}
}, 50);
}
}
}, [
audioChunks,
engineReady,
whisperReady,
recordingState?.isRecording,
conversational,
processChunks
]);
(0, import_react2.useEffect)(() => {
return () => {
clearConversationalChunks();
if (streamingSessionRef.current) {
streamingSessionRef.current.stop().catch(() => {
});
}
};
}, [clearConversationalChunks]);
const exportChunkAsWav = (0, import_react2.useCallback)(
async (chunkId) => {
if (!murmubaraExportChunkAsWav) {
console.warn("Export chunk feature not available");
return new Blob();
}
return murmubaraExportChunkAsWav(chunkId, "processed");
},
[murmubaraExportChunkAsWav]
);
const processAndTranscribeFile = (0, import_react2.useCallback)(
async (file) => {
const t0 = performance.now();
if (!whisperReady) throw new Error("Whisper model not ready");
await initializeAudioEngine();
const originalBuffer = await file.arrayBuffer();
const originalAudioUrl = URL.createObjectURL(file);
const { loadMurmubaraProcessing: loadMurmubaraProcessing2 } = await Promise.resolve().then(() => (init_dynamic_loaders(), dynamic_loaders_exports));
const {
processFileWithMetrics,
getEngineStatus,
initializeAudioEngine: initProc
} = await loadMurmubaraProcessing2();
try {
const status = getEngineStatus?.() ?? "uninitialized";
if (status === "uninitialized" && initProc) {
await initProc({
noiseReductionLevel: "medium",
bufferSize: 1024,
algorithm: "rnnoise",
logLevel: "info",
autoCleanup: true,
useAudioWorklet: true
});
}
} catch {
}
const processed = await processFileWithMetrics(originalBuffer, () => {
});
const processedBlob = new Blob([processed.processedBuffer], { type: "audio/wav" });
const processedAudioUrl = URL.createObjectURL(processedBlob);
const vadAnalysis = await analyzeVAD(originalBuffer);
const tr = await transcribeWithWhisper(processedBlob);
if (!tr) throw new Error("Transcription failed");
const metadata = {
duration: await calculateDuration(originalBuffer),
sampleRate: 44100,
channels: 2,
fileSize: file.size,
processedSize: processed.processedBuffer.byteLength
};
return {
originalAudioUrl,
processedAudioUrl,
transcriptionText: tr.text,
transcriptionSegments: tr.segments,
vadAnalysis,
metadata,
processingTime: performance.now() - t0
};
},
[whisperReady, initializeAudioEngine, analyzeVAD, transcribeWithWhisper, calculateDuration]
);
return {
// Recording (managed by Murmuraba directly)
isRecording: recordingState?.isRecording ?? false,
isProcessing: processingStatus.isProcessing,
transcriptions,
audioChunks,
processingStatus,
averageVad,
startRecording,
stopRecording,
pauseRecording,
resumeRecording,
clearTranscriptions,
exportChunkAsWav,
whisperReady,
whisperProgress,
whisperError,
transcribeWithWhisper,
conversationalChunks,
clearConversationalChunks,
middlewarePipeline,
latencyReport,
latencyStatus,
initializeAudioEngine,
resetAudioEngine,
isEngineInitialized: engineReady,
engineError: engineError ? String(engineError) : null,
isInitializingEngine: engineInitializing,
processAndTranscribeFile,
startStreamingRecording,
stopStreamingRecording,
analyzeVAD,
convertBlobToBuffer,
currentStream: engineStream
};
}
// src/lib/ui-interfaces.ts
var defaultAlertService = {
show: () => ({
update: () => {
},
close: () => {
}
})
};
var defaultToastService = {
success: () => {
},
error: () => {
},
warning: () => {
},
info: () => {
}
};
// src/hooks/use-model-cache.ts
var import_react3 = require("react");
function useModelCache() {
const [cacheStatus, setCacheStatus] = (0, import_react3.useState)({ hasCache: false });
const dbName = "whisper-models-cache";
const storeName = "models";
const cacheVersion = 1;
const initDB = (0, import_react3.useCallback)(async () => {
return new Promise((resolve, reject) => {
const request = indexedDB.open(dbName, cacheVersion);
request.onerror = () => reject(request.error);
request.onsuccess = () => resolve(request.result);
request.onupgradeneeded = (event) => {
const db = event.target.result;
if (!db.objectStoreNames.contains(storeName)) {
db.createObjectStore(storeName, { keyPath: "id" });
}
};
});
}, [dbName, storeName, cacheVersion]);
const storeModel = (0, import_react3.useCallback)(
async (modelId, data) => {
const db = await initDB();
const transaction = db.transaction([storeName], "readwrite");
const store = transaction.objectStore(storeName);
return new Promise((resolve, reject) => {
const request = store.put({
id: modelId,
data,
timestamp: Date.now(),
size: data.byteLength
});
request.onsuccess = () => {
resolve();
refreshCacheStatus();
};
request.onerror = () => reject(request.error);
});
},
// eslint-disable-next-line react-hooks/exhaustive-deps
[initDB, storeName]
);
const getModel = (0, import_react3.useCallback)(
async (modelId) => {
try {
const db = await initDB();
const transaction = db.transaction([storeName], "readonly");
const store = transaction.objectStore(storeName);
return new Promise((resolve) => {
const request = store.get(modelId);
request.onsuccess = () => {
const result = request.result;
if (result && result.data) {
resolve(result.data);
} else {
resolve(null);
}
};
request.onerror = () => resolve(null);
});
} catch (error) {
return null;
}
},
[initDB, storeName]
);
const hasModel = (0, import_react3.useCallback)(
async (modelId) => {
const model = await getModel(modelId);
return model !== null;
},
[getModel]
);
const refreshCacheStatus = (0, import_react3.useCallback)(async () => {
try {
const db = await initDB();
const transaction = db.transaction([storeName], "readonly");
const store = transaction.objectStore(storeName);
return new Promise((resolve) => {
const request = store.getAll();
request.onsuccess = () => {
const models = request.result;
if (models.length > 0) {
const totalSize = models.reduce(
(sum, model) => sum + (model.size || 0),
0
);
const lastUpdated = new Date(Math.max(...models.map((m) => m.timestamp || 0)));
setCacheStatus({
hasCache: true,
cacheSize: totalSize,
lastUpdated
});
} else {
setCacheStatus({ hasCache: false });
}
resolve();
};
request.onerror = () => {
setCacheStatus({ hasCache: false });
resolve();
};
});
} catch (error) {
setCacheStatus({ hasCache: false });
}
}, [initDB, storeName]);
const clearCache = (0, import_react3.useCallback)(async () => {
try {
const db = await initDB();
const transaction = db.transaction([storeName], "readwrite");
const store = transaction.objectStore(storeName);
return new Promise((resolve, reject) => {
const request = store.clear();
request.onsuccess = () => {
setCacheStatus({ hasCache: false });
resolve();
};
request.onerror = () => reject(request.error);
});
} catch (error) {
setCacheStatus({ hasCache: false });
}
}, [initDB, storeName]);
const getStorageInfo = (0, import_react3.useCallback)(async () => {
if ("storage" in navigator && "estimate" in navigator.storage) {
try {
const estimate = await navigator.storage.estimate();
return {
usage: estimate.usage || 0,
quota: estimate.quota || 0
};
} catch (error) {
return null;
}
}
return null;
}, []);
const requestPersistentStorage = (0, import_react3.useCallback)(async () => {
if ("storage" in navigator && "persist" in navigator.storage) {
try {
const currentlyPersisted = await navigator.storage.persisted();
if (currentlyPersisted) {
return true;
}
const isPersisted = await navigator.storage.persist();
if ("estimate" in navigator.storage) {