expo-speech-recognition
Version:
Speech Recognition for React Native Expo projects
386 lines (334 loc) • 10.9 kB
text/typescript
import type { EventSubscription, PermissionResponse } from "expo-modules-core";
import { registerWebModule, NativeModule } from "expo";
import type {
ExpoSpeechRecognitionNativeEventMap,
ExpoSpeechRecognitionNativeEvents,
ExpoSpeechRecognitionOptions,
ExpoSpeechRecognitionResultSegment,
} from "./ExpoSpeechRecognitionModule.types";
let _speechRecognitionRef: SpeechRecognition | null = null;
type NativeEventListener = (
event: SpeechRecognitionEventMap[keyof SpeechRecognitionEventMap],
) => void;
// It loads the native module object from the JSI or falls back to
// the bridge module (from NativeModulesProxy) if the remote debugger is on.
class ExpoSpeechRecognitionModuleWeb extends NativeModule<ExpoSpeechRecognitionNativeEvents> {
_clientListeners: Map<
// Original listener
ExpoSpeechRecognitionNativeEvents[keyof ExpoSpeechRecognitionNativeEvents],
// Native listener
NativeEventListener
> = new Map();
_nativeListeners: Map<string, Set<NativeEventListener>> = new Map();
// Convert the web event to the native event
bindEventListener = <T extends keyof SpeechRecognitionEventMap>(
eventName: T,
ev: SpeechRecognitionEventMap[T],
) => {
const eventPayload = webToNativeEventMap[eventName]?.(ev);
this.emit(
eventName,
// @ts-expect-error payload typings are incorrect
eventPayload,
);
};
addListener<EventName extends keyof ExpoSpeechRecognitionNativeEventMap>(
eventName: EventName,
listener: ExpoSpeechRecognitionNativeEvents[EventName],
): EventSubscription {
// Convert the web event to the native event
// @ts-expect-error Not all events are covered here
const nativeListener = (ev: SpeechRecognitionEventMap[EventName]) => {
const handler =
eventName in webToNativeEventMap
? webToNativeEventMap[eventName as keyof SpeechRecognitionEventMap]
: null;
// @ts-expect-error payload typings are incorrect
const eventPayload = handler?.(ev);
// @ts-expect-error
listener(eventPayload);
};
// @ts-expect-error
_speechRecognitionRef?.addEventListener(eventName, nativeListener);
if (!this._nativeListeners.has(eventName)) {
this._nativeListeners.set(eventName, new Set());
}
// Add the original listener to the enhanced listeners
// @ts-expect-error
this._nativeListeners.get(eventName)?.add(nativeListener);
// Map the original listener to the enhanced listener
// @ts-expect-error
this._clientListeners.set(listener, nativeListener);
const handle = super.addListener(eventName, listener);
return {
remove: () => {
// @ts-expect-error
this._nativeListeners.get(eventName)?.delete(nativeListener);
this._clientListeners.delete(listener);
handle.remove();
},
};
}
removeAllListeners(
eventName: keyof ExpoSpeechRecognitionNativeEventMap,
): void {
// Go through _listeners and remove all listeners for this event
if (this._nativeListeners.has(eventName)) {
const nativeListeners = this._nativeListeners.get(eventName);
if (!nativeListeners) {
return;
}
// Remove the enhanced listeners
for (const [listener, nativeListener] of this._clientListeners) {
// if nativeListener in listeners, remove it
if (nativeListeners.has(nativeListener)) {
// Remove the enhanced listener
this.removeListener(eventName, listener);
}
}
// ...and the native listeners
for (const listener of nativeListeners) {
// @ts-expect-error
this.removeListener(eventName, listener);
}
// Clean up
this._nativeListeners.delete(eventName);
}
}
start(options: ExpoSpeechRecognitionOptions) {
const SpeechRecognitionClass =
typeof webkitSpeechRecognition !== "undefined"
? webkitSpeechRecognition
: SpeechRecognition;
_speechRecognitionRef = new SpeechRecognitionClass();
_speechRecognitionRef.lang = options.lang ?? "en-US";
_speechRecognitionRef.interimResults = options.interimResults ?? false;
_speechRecognitionRef.maxAlternatives = options.maxAlternatives ?? 1;
_speechRecognitionRef.continuous = options.continuous ?? false;
// Re-subscribe to events so that we don't lose them
// This covers the case where the user has already subscribed to an event prior to calling `start()`
this._nativeListeners.forEach((listeners, eventName) => {
for (const listener of listeners) {
// May already be subscribed
_speechRecognitionRef?.removeEventListener(eventName, listener);
_speechRecognitionRef?.addEventListener(eventName, listener);
}
});
// Start the speech recognition!
_speechRecognitionRef.start();
}
getStateAsync() {
console.warn(
"getStateAsync is not supported on web. Returning 'inactive'.",
);
return Promise.resolve("inactive");
}
stop() {
_speechRecognitionRef?.stop();
}
abort() {
_speechRecognitionRef?.abort();
}
requestPermissionsAsync() {
console.warn(
"requestPermissionsAsync is not supported on web. Returning a granted permission response.",
);
return Promise.resolve({
granted: true,
canAskAgain: false,
expires: "never",
status: "granted",
} as PermissionResponse);
}
getPermissionsAsync() {
console.warn(
"getPermissionsAsync is not supported on web. Returning a granted permission response.",
);
return Promise.resolve({
granted: true,
canAskAgain: false,
expires: "never",
status: "granted",
} as PermissionResponse);
}
getMicrophonePermissionsAsync() {
console.warn(
"getMicrophonePermissionsAsync is not supported on web. Returning a granted permission response.",
);
return Promise.resolve({
granted: true,
canAskAgain: false,
expires: "never",
status: "granted",
} as PermissionResponse);
}
requestMicrophonePermissionsAsync() {
console.warn(
"requestMicrophonePermissionsAsync is not supported on web. Returning a granted permission response.",
);
return Promise.resolve({
granted: true,
canAskAgain: false,
expires: "never",
status: "granted",
} as PermissionResponse);
}
getSpeechRecognizerPermissionsAsync() {
console.warn(
"getSpeechRecognizerPermissionsAsync is not supported on web. Returning a granted permission response.",
);
return Promise.resolve({
granted: true,
canAskAgain: false,
expires: "never",
status: "granted",
} as PermissionResponse);
}
requestSpeechRecognizerPermissionsAsync() {
console.warn(
"requestSpeechRecognizerPermissionsAsync is not supported on web. Returning a granted permission response.",
);
return Promise.resolve({
granted: true,
canAskAgain: false,
expires: "never",
status: "granted",
} as PermissionResponse);
}
async getSupportedLocales() {
console.warn(
"getSupportedLocales is not supported on web. Returning an empty array.",
);
return {
locales: [] as string[],
installedLocales: [] as string[],
};
}
// addListener() {}
// removeListeners() {}
getSpeechRecognitionServices() {
return [] as string[];
}
getDefaultRecognitionService() {
return {
packageName: "",
};
}
getAssistantService() {
return {
packageName: "",
};
}
supportsOnDeviceRecognition() {
return false;
}
supportsRecording() {
return false;
}
androidTriggerOfflineModelDownload() {
console.warn(
"androidTriggerOfflineModelDownload is not supported on web. Returning false.",
);
return Promise.resolve({
status: "opened_dialog",
message: "Offline model download is not supported on web.",
});
}
setCategoryIOS() {
console.warn("setCategoryIOS is not supported on web.");
}
getAudioSessionCategoryAndOptionsIOS() {
console.warn(
"getAudioSessionCategoryAndOptionsIOS is not supported on web.",
);
return {
category: "playAndRecord",
categoryOptions: ["defaultToSpeaker", "allowBluetooth"],
mode: "measurement",
};
}
setAudioSessionActiveIOS() {
console.warn("setAudioSessionActiveIOS is not supported on web.");
}
isRecognitionAvailable() {
const hasSpeechRecognitionAPI =
typeof webkitSpeechRecognition !== "undefined" ||
typeof SpeechRecognition !== "undefined";
return hasSpeechRecognitionAPI;
}
}
/**
* Convert the web SpeechRecognitionEventMap to the native event map for compatibility
*/
const webToNativeEventMap: {
[K in keyof SpeechRecognitionEventMap]: (
ev: SpeechRecognitionEventMap[K],
) => ExpoSpeechRecognitionNativeEventMap[K];
} = {
audioend: (ev) => ({ uri: null }),
audiostart: (ev) => ({ uri: null }),
end: (ev) => null,
error: (ev) => ({ error: ev.error, message: ev.message }),
nomatch: (ev) => null,
result: (ev): ExpoSpeechRecognitionNativeEventMap["result"] => {
const isFinal = Boolean(ev.results[ev.resultIndex]?.isFinal);
if (isFinal) {
const results: ExpoSpeechRecognitionNativeEventMap["result"]["results"] =
[];
for (let i = 0; i < ev.results[ev.resultIndex].length; i++) {
const result = ev.results[ev.resultIndex][i];
results.push({
transcript: result.transcript,
confidence: result.confidence,
segments: [],
});
}
return {
isFinal: true,
results,
};
}
// Interim results: Append to the transcript
let transcript = "";
const segments: ExpoSpeechRecognitionResultSegment[] = [];
for (let i = ev.resultIndex; i < ev.results.length; i++) {
const resultList = ev.results[i];
for (let j = 0; j < resultList.length; j++) {
const result = resultList[j];
if (!result) {
continue;
}
segments.push({
confidence: result.confidence,
segment: result.transcript,
startTimeMillis: 0,
endTimeMillis: 0,
});
if (!isFinal) {
transcript += result.transcript;
}
}
}
return {
isFinal: false,
results: [
{
transcript,
confidence:
segments.reduce((acc, curr) => acc + curr.confidence, 0) /
segments.length,
segments,
},
],
};
},
soundstart: (ev) => null,
speechend: (ev) => null,
speechstart: (ev) => null,
start: (ev) => null,
soundend: (ev) => null,
};
export const ExpoSpeechRecognitionModule = registerWebModule(
ExpoSpeechRecognitionModuleWeb,
"ExpoSpeechRecognitionModule",
);