@moonshine-ai/moonshine-js
Version:
On-device speech-to-text and voice control for web applications with Moonshine.
266 lines (222 loc) • 7.49 kB
text/typescript
import Log from "./log";
import MicrophoneTranscriber from "./microphoneTranscriber";
interface SpeechRecognitionAlternative {
readonly transcript: string;
readonly confidence: number;
}
interface SpeechRecognitionEventInit extends EventInit {
results: SpeechRecognitionResultList;
resultIndex: number;
}
// WARN not technically an event, just has the same structure as an event. this is due to
// nastiness when transpiling from ts that uses the native event class into native js
class MoonshineSpeechRecognitionEvent {
readonly type: string;
readonly results: SpeechRecognitionResultList;
readonly resultIndex: number;
readonly bubbles: boolean;
readonly cancelable: boolean;
readonly composed: boolean;
constructor(type: string, init: SpeechRecognitionEventInit) {
this.type = type;
this.results = init.results;
this.resultIndex = init.resultIndex;
this.bubbles = init.bubbles ?? false;
this.cancelable = init.cancelable ?? false;
this.composed = init.composed ?? false;
}
}
class MoonshineSpeechRecognitionResult implements SpeechRecognitionResult {
private readonly _alternatives: SpeechRecognitionAlternative[];
readonly isFinal: boolean;
constructor(
alternatives: SpeechRecognitionAlternative[],
isFinal: boolean
) {
this._alternatives = alternatives;
this.isFinal = isFinal;
for (let i = 0; i < alternatives.length; i++) {
(this as any)[i] = alternatives[i];
}
}
get length(): number {
return this._alternatives.length;
}
item(index: number): SpeechRecognitionAlternative {
return this._alternatives[index] || null!;
}
[index: number]: SpeechRecognitionAlternative;
}
class MoonshineSpeechRecognitionResultList
implements SpeechRecognitionResultList
{
private readonly _results: SpeechRecognitionResult[];
constructor(results: SpeechRecognitionResult[]) {
this._results = results;
for (let i = 0; i < results.length; i++) {
(this as any)[i] = results[i];
}
return new Proxy(this, {
get(target, prop) {
if (typeof prop === "string" && /^\d+$/.test(prop)) {
return target._results[+prop];
}
return (target as any)[prop];
},
});
}
get length(): number {
return this._results.length;
}
item(index: number): SpeechRecognitionResult {
return this._results[index] || null!;
}
[index: number]: SpeechRecognitionResult;
}
// @ts-expect-error
class MoonshineSpeechRecognition implements SpeechRecognition {
private transcriber: MicrophoneTranscriber;
public constructor(modelURL: string = "model/tiny") {
this.transcriber = new MicrophoneTranscriber(modelURL, {}, true);
}
continuous: boolean;
// @ts-expect-error
grammars: SpeechGrammarList;
interimResults: boolean;
lang: string;
maxAlternatives: number;
removeEventListener(
type: unknown,
listener: unknown,
options?: unknown
): void {
throw new Error("Method not implemented.");
}
dispatchEvent(event: Event): boolean {
throw new Error("Method not implemented.");
}
start() {
this.transcriber.start();
}
stop() {
this.transcriber.stop();
}
abort() {
this.transcriber.stop();
}
public set onaudiostart(handler: (Event) => void) {
Log.warn(
"onaudiostart is not implemented for SpeechRecognition polyfill"
);
}
public set onaudioend(handler: (Event) => void) {
Log.warn(
"onaudioend is not implemented for SpeechRecognition polyfill"
);
}
public set onend(handler: (Event) => void) {
this.transcriber.callbacks.onTranscribeStopped = () => {
handler(new Event(""));
};
}
public set onerror(handler: (SpeechRecognitionErrorEvent) => void) {
Log.warn("onerror is not implemented for SpeechRecognition polyfill");
}
public set onnomatch(handler: (SpeechRecognitionEvent) => void) {
Log.warn("nomatch is not implemented for SpeechRecognition polyfill");
}
resultIndex = 0;
public set onresult(handler: (SpeechRecognitionEvent) => void) {
this.transcriber.callbacks.onTranscriptionUpdated = (text: string) => {
if (text || this.interimResults) {
const res1: SpeechRecognitionAlternative = {
transcript: text,
confidence: undefined,
};
const result = new MoonshineSpeechRecognitionResult(
[res1],
true
);
const results = new MoonshineSpeechRecognitionResultList([
result,
]);
const eventInit: SpeechRecognitionEventInit = {
bubbles: false,
cancelable: false,
composed: false,
results: results,
resultIndex: 0,
};
const e = new MoonshineSpeechRecognitionEvent(
"result",
eventInit
);
handler(e);
}
};
}
public set onsoundstart(handler: (Event) => void) {
Log.warn(
"onsoundstart is not implemented for SpeechRecognition polyfill"
);
}
public set onsoundend(handler: (Event) => void) {
Log.warn(
"onsoundend is not implemented for SpeechRecognition polyfill"
);
}
public set onspeechstart(handler: (Event) => void) {
this.transcriber.callbacks.onSpeechStart = () => {
handler(new Event(""));
};
}
public set onspeechend(handler: (Event) => void) {
this.transcriber.callbacks.onSpeechEnd = () => {
handler(new Event(""));
};
}
public set onstart(handler: (Event) => void) {
this.transcriber.callbacks.onTranscribeStarted = () => {
handler(new Event(""));
};
}
addEventListener(...args: any): void {
const handler = args[1];
switch (args[0]) {
case "audiostart":
this.onaudiostart = handler;
break;
case "audioend":
this.onaudioend = handler;
break;
case "end":
this.onend = handler;
break;
case "error":
this.onerror = handler;
break;
case "nomatch":
this.onnomatch = handler;
break;
case "result":
this.onresult = handler;
break;
case "soundstart":
this.onsoundstart = handler;
break;
case "soundend":
this.onsoundend = handler;
break;
case "speechstart":
this.onspeechstart = handler;
break;
case "speechend":
this.onspeechend = handler;
break;
case "start":
this.onstart = handler;
break;
}
}
}
export default MoonshineSpeechRecognition;