web-speech-cognitive-services
Version:
Polyfill Web Speech API with Cognitive Services Speech-to-Text service
345 lines (325 loc) • 15.9 kB
text/typescript
import { AudioConfig, SpeechRecognizer } from 'microsoft-cognitiveservices-speech-sdk';
import * as valibot from 'valibot';
import { InferOutput } from 'valibot';
type SpeechRecognitionErrorType = 'aborted' | 'audio-capture' | 'bad-grammar' | 'language-not-supported' | 'network' | 'no-speech' | 'not-allowed' | 'service-not-allowed' | 'unknown';
type SpeechRecognitionErrorEventInit = {
error: SpeechRecognitionErrorType;
message?: string | undefined;
};
declare class SpeechRecognitionErrorEvent extends Event {
#private;
constructor(type: 'error', { error, message }: SpeechRecognitionErrorEventInit);
get error(): SpeechRecognitionErrorType;
get message(): string | undefined;
get type(): 'error';
}
interface FakeArrayInterface<T> {
[index: number]: T | undefined;
get length(): number;
}
declare class FakeArray<T> implements FakeArrayInterface<T> {
#private;
constructor(array: readonly T[]);
[index: number]: T | undefined;
[Symbol.iterator](): IterableIterator<T>;
get length(): number;
}
type SpeechRecognitionResultInit = {
isFinal: boolean;
results: readonly SpeechRecognitionAlternative[];
};
declare class SpeechRecognitionResult extends FakeArray<SpeechRecognitionAlternative> {
#private;
constructor(init: SpeechRecognitionResultInit);
get isFinal(): boolean;
}
declare class SpeechRecognitionResultList extends FakeArray<SpeechRecognitionResult> {
constructor(result: readonly SpeechRecognitionResult[]);
}
type SpeechRecognitionEventInit = {
data?: undefined | unknown;
resultIndex?: number | undefined;
results?: SpeechRecognitionResultList | undefined;
};
declare class SpeechRecognitionEvent<T extends 'audioend' | 'audiostart' | 'cognitiveservices' | 'end' | 'result' | 'soundend' | 'soundstart' | 'speechend' | 'speechstart' | 'start'> extends Event {
#private;
constructor(type: 'cognitiveservices', init: SpeechRecognitionEventInit & {
data: {
type: string;
};
});
constructor(type: 'audioend');
constructor(type: 'audiostart');
constructor(type: 'end');
constructor(type: 'result', init: SpeechRecognitionEventInit);
constructor(type: 'soundend');
constructor(type: 'soundstart');
constructor(type: 'speechend');
constructor(type: 'speechstart');
constructor(type: 'start');
get data(): unknown;
get resultIndex(): number | undefined;
get results(): SpeechRecognitionResultList;
get type(): T;
}
declare class EventListenerMap<T extends string, EventMap extends {
[Name in T]: unknown;
}> {
#private;
constructor(eventTarget: EventTarget);
getProperty<U extends T>(name: U): ((event: EventMap[U]) => void) | undefined;
setProperty<U extends T>(name: U, value: ((event: EventMap[U]) => void) | undefined): void;
}
type SpeechRecognitionEventListenerMap = EventListenerMap<'audioend' | 'audiostart' | 'cognitiveservices' | 'end' | 'error' | 'result' | 'soundend' | 'soundstart' | 'speechend' | 'speechstart' | 'start', {
audioend: SpeechRecognitionEvent<'audioend'>;
audiostart: SpeechRecognitionEvent<'audiostart'>;
cognitiveservices: SpeechRecognitionEvent<'cognitiveservices'>;
end: SpeechRecognitionEvent<'end'>;
error: SpeechRecognitionErrorEvent;
result: SpeechRecognitionEvent<'result'>;
soundend: SpeechRecognitionEvent<'soundend'>;
soundstart: SpeechRecognitionEvent<'soundstart'>;
speechend: SpeechRecognitionEvent<'speechend'>;
speechstart: SpeechRecognitionEvent<'speechstart'>;
start: SpeechRecognitionEvent<'start'>;
}>;
interface W3CSpeechGrammar {
src: string;
weight: number;
}
interface W3CSpeechGrammarList {
readonly length: number;
addFromString(string: string, weight?: number): void;
addFromURI(src: string, weight?: number): void;
item(index: number): W3CSpeechGrammar;
[index: number]: W3CSpeechGrammar;
}
declare class SpeechGrammarList implements W3CSpeechGrammarList {
#private;
constructor();
addFromString(): void;
addFromURI(): void;
item(): W3CSpeechGrammar;
get length(): number;
[index: number]: {
src: string;
weight: number;
};
get phrases(): readonly string[];
set phrases(value: readonly string[]);
}
declare const credentialsSchema: valibot.SchemaWithPipe<[valibot.IntersectSchema<[valibot.UnionSchema<[valibot.ObjectSchema<{
readonly authorizationToken: valibot.StringSchema<undefined>;
readonly subscriptionKey: valibot.OptionalSchema<valibot.UndefinedSchema<"\"subscriptionKey\" must be unset when \"authorizationToken\" is set.">, never>;
}, undefined>, valibot.ObjectSchema<{
readonly authorizationToken: valibot.OptionalSchema<valibot.UndefinedSchema<"\"authorizationToken\" must be unset when \"subscriptionKey\" is set.">, never>;
readonly subscriptionKey: valibot.StringSchema<undefined>;
}, undefined>], "The object must either have either \"authorizationToken\" or \"subscriptionKey\" set, but not both.">, valibot.UnionSchema<[valibot.ObjectSchema<{
readonly customVoiceHostname: valibot.OptionalSchema<valibot.UndefinedSchema<"\"customVoiceHostname\" must be unest when \"region\" is set.">, never>;
readonly region: valibot.StringSchema<undefined>;
readonly speechRecognitionHostname: valibot.OptionalSchema<valibot.UndefinedSchema<"\"speechRecognitionHostname\" must be unest when \"region\" is set.">, never>;
readonly speechSynthesisHostname: valibot.OptionalSchema<valibot.UndefinedSchema<"\"speechSynthesisHostname\" must be unest when \"region\" is set.">, never>;
}, undefined>, valibot.ObjectSchema<{
readonly customVoiceHostname: valibot.OptionalSchema<valibot.UnionSchema<[valibot.StringSchema<undefined>, valibot.UndefinedSchema<undefined>], undefined>, never>;
readonly region: valibot.OptionalSchema<valibot.UndefinedSchema<"\"region\" must be unset when \"*Hostname\" is set.">, never>;
readonly speechRecognitionHostname: valibot.StringSchema<undefined>;
readonly speechSynthesisHostname: valibot.StringSchema<undefined>;
}, undefined>], "The object must either have either \"region\" or \"*Hostname\" set, but not both.">], undefined>, valibot.ReadonlyAction<({
authorizationToken: string;
subscriptionKey?: undefined;
} | {
subscriptionKey: string;
authorizationToken?: undefined;
}) & ({
region: string;
customVoiceHostname?: undefined;
speechRecognitionHostname?: undefined;
speechSynthesisHostname?: undefined;
} | {
speechRecognitionHostname: string;
speechSynthesisHostname: string;
customVoiceHostname?: string | undefined;
region?: undefined;
})>]>;
type Credentials = InferOutput<typeof credentialsSchema>;
type PatchOptionsInit = {
audioConfig?: AudioConfig | undefined;
credentials: (() => Credentials | Promise<Credentials>) | Credentials | Promise<Credentials>;
enableTelemetry?: boolean | undefined;
initialSilenceTimeout?: number | undefined;
looseEvent?: boolean | undefined;
looseEvents?: boolean | undefined;
referenceGrammars?: readonly string[] | undefined;
speechRecognitionEndpointId?: string | undefined;
textNormalization?: 'display' | 'itn' | 'lexical' | 'maskeditn' | undefined;
};
declare function createSpeechRecognitionPonyfill(options: PatchOptionsInit): {
SpeechGrammarList: typeof SpeechGrammarList;
SpeechRecognition: {
new (): {
"__#8@#continuous": boolean;
"__#8@#eventListenerMap": SpeechRecognitionEventListenerMap;
"__#8@#grammars": SpeechGrammarList;
"__#8@#interimResults": boolean;
"__#8@#lang": string;
"__#8@#maxAlternatives": number;
emitCognitiveServices<T extends {
type: string;
}>(type: string, event: T): void;
continuous: boolean;
grammars: SpeechGrammarList;
interimResults: boolean;
maxAlternatives: number;
lang: string;
get onaudioend(): ((event: SpeechRecognitionEvent<"audioend">) => void) | undefined;
set onaudioend(value: ((event: SpeechRecognitionEvent<"audioend">) => void) | undefined);
onaudiostart: ((event: SpeechRecognitionEvent<"audiostart">) => void) | undefined;
oncognitiveservices: ((event: SpeechRecognitionEvent<"cognitiveservices">) => void) | undefined;
onend: ((event: SpeechRecognitionEvent<"end">) => void) | undefined;
onerror: ((event: SpeechRecognitionErrorEvent) => void) | undefined;
onresult: ((event: SpeechRecognitionEvent<"result">) => void) | undefined;
onsoundend: ((event: SpeechRecognitionEvent<"soundend">) => void) | undefined;
onsoundstart: ((event: SpeechRecognitionEvent<"soundstart">) => void) | undefined;
onspeechend: ((event: SpeechRecognitionEvent<"speechend">) => void) | undefined;
onspeechstart: ((event: SpeechRecognitionEvent<"speechstart">) => void) | undefined;
onstart: ((event: SpeechRecognitionEvent<"start">) => void) | undefined;
abort: (() => void) | undefined;
stop: (() => void) | undefined;
start(): void;
_startOnce(): Promise<void>;
addEventListener(type: string, callback: EventListenerOrEventListenerObject | null, options?: boolean | AddEventListenerOptions | undefined): void;
dispatchEvent(event: Event): boolean;
removeEventListener(type: string, callback: EventListenerOrEventListenerObject | null, options?: boolean | EventListenerOptions | undefined): void;
};
};
SpeechRecognitionEvent: typeof SpeechRecognitionEvent;
};
type CreateSpeechRecognitionPonyfillFromRecognizerInit = {
createRecognizer: (lang: string) => Promise<SpeechRecognizer>;
enableTelemetry: boolean | undefined;
looseEvents: boolean;
referenceGrammars?: readonly string[] | undefined;
textNormalization: 'display' | 'itn' | 'lexical' | 'maskeditn';
};
declare function createSpeechRecognitionPonyfillFromRecognizer({ createRecognizer, enableTelemetry, looseEvents, referenceGrammars, textNormalization }: CreateSpeechRecognitionPonyfillFromRecognizerInit): {
SpeechGrammarList: typeof SpeechGrammarList;
SpeechRecognition: {
new (): {
"__#8@#continuous": boolean;
"__#8@#eventListenerMap": SpeechRecognitionEventListenerMap;
"__#8@#grammars": SpeechGrammarList;
"__#8@#interimResults": boolean;
"__#8@#lang": string;
"__#8@#maxAlternatives": number;
emitCognitiveServices<T extends {
type: string;
}>(type: string, event: T): void;
continuous: boolean;
grammars: SpeechGrammarList;
interimResults: boolean;
maxAlternatives: number;
lang: string;
get onaudioend(): ((event: SpeechRecognitionEvent<'audioend'>) => void) | undefined;
set onaudioend(value: ((event: SpeechRecognitionEvent<'audioend'>) => void) | undefined);
/** @type { ((event: SpeechRecognitionEvent<'audiostart'>) => void) | undefined } */
onaudiostart: ((event: SpeechRecognitionEvent<"audiostart">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'cognitiveservices'>) => void) | undefined } */
oncognitiveservices: ((event: SpeechRecognitionEvent<"cognitiveservices">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'end'>) => void) | undefined } */
onend: ((event: SpeechRecognitionEvent<"end">) => void) | undefined;
/** @type { ((event: SpeechRecognitionErrorEvent) => void) | undefined } */
onerror: ((event: SpeechRecognitionErrorEvent) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'result'>) => void) | undefined } */
onresult: ((event: SpeechRecognitionEvent<"result">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'soundend'>) => void) | undefined } */
onsoundend: ((event: SpeechRecognitionEvent<"soundend">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'soundstart'>) => void) | undefined } */
onsoundstart: ((event: SpeechRecognitionEvent<"soundstart">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'speechend'>) => void) | undefined } */
onspeechend: ((event: SpeechRecognitionEvent<"speechend">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'speechstart'>) => void) | undefined } */
onspeechstart: ((event: SpeechRecognitionEvent<"speechstart">) => void) | undefined;
/** @type { ((event: SpeechRecognitionEvent<'start'>) => void) | undefined } */
onstart: ((event: SpeechRecognitionEvent<"start">) => void) | undefined;
abort: (() => void) | undefined;
stop: (() => void) | undefined;
start(): void;
_startOnce(): Promise<void>;
addEventListener(type: string, callback: EventListenerOrEventListenerObject | null, options?: boolean | AddEventListenerOptions | undefined): void;
dispatchEvent(event: Event): boolean;
removeEventListener(type: string, callback: EventListenerOrEventListenerObject | null, options?: boolean | EventListenerOptions | undefined): void;
};
};
SpeechRecognitionEvent: typeof SpeechRecognitionEvent;
};
declare class SpeechSynthesisEvent {
constructor(type: any);
}
declare class SpeechSynthesisUtterance {
constructor(text: any);
_lang: any;
_pitch: number;
_rate: number;
_voice: any;
_volume: number;
text: any;
set onboundary(value: any);
get onboundary(): any;
set onend(value: any);
get onend(): any;
set onerror(value: any);
get onerror(): any;
set onmark(value: any);
get onmark(): any;
set onpause(value: any);
get onpause(): any;
set onresume(value: any);
get onresume(): any;
set onstart(value: any);
get onstart(): any;
set lang(value: any);
get lang(): any;
set pitch(value: number);
get pitch(): number;
set rate(value: number);
get rate(): number;
set voice(value: any);
get voice(): any;
set volume(value: number);
get volume(): number;
preload({ deploymentId, fetchCredentials, outputFormat }: {
deploymentId: any;
fetchCredentials: any;
outputFormat: any;
}): void;
arrayBufferPromise: Promise<ArrayBuffer> | undefined;
play(audioContext: any): Promise<void>;
_playingSource: any;
stop(): void;
}
declare function _default(options: any): {
speechSynthesis?: never;
SpeechSynthesisEvent?: never;
SpeechSynthesisUtterance?: never;
} | {
speechSynthesis: {
queue: AudioContextQueue;
cancel(): void;
getVoices(): any[];
onvoiceschanged: any;
pause(): void;
resume(): void;
speak(utterance: any): Promise<any>;
readonly speaking: boolean;
updateVoices(): Promise<void>;
};
SpeechSynthesisEvent: typeof SpeechSynthesisEvent;
SpeechSynthesisUtterance: typeof SpeechSynthesisUtterance;
};
type FetchAuthorizationTokenInit = {
region: string;
subscriptionKey: string;
};
declare function fetchAuthorizationToken({ region, subscriptionKey }: FetchAuthorizationTokenInit): Promise<string>;
declare function createSpeechServicesPonyfill(options?: any): any;
export { createSpeechRecognitionPonyfill, createSpeechRecognitionPonyfillFromRecognizer, createSpeechServicesPonyfill, _default as createSpeechSynthesisPonyfill, fetchAuthorizationToken };