UNPKG

@aristech-org/stt-client

Version:

A Node.js client library for the Aristech Speech-to-Text API

577 lines (576 loc) 25 kB
import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; import { type CallOptions, ChannelCredentials, Client, type ClientDuplexStream, type ClientOptions, type ClientUnaryCall, type handleBidiStreamingCall, type handleUnaryCall, Metadata, type ServiceError, type UntypedServiceImplementation } from "@grpc/grpc-js"; import { Duration } from "./google/protobuf/duration.js"; export declare const protobufPackage = "ari.stt.v1"; export declare enum EndpointingType { /** LM - Endpointing that considers the language model. */ LM = 0, /** VAD - VAD based endpointing. */ VAD = 1, UNRECOGNIZED = -1 } export declare function endpointingTypeFromJSON(object: any): EndpointingType; export declare function endpointingTypeToJSON(object: EndpointingType): string; export declare enum ModelType { /** CORE_STT - STT-Core models */ CORE_STT = 0, /** GRAMMAR_STT - Grammar only models */ GRAMMAR_STT = 1, /** MULTITASK_STT - Multitask models */ MULTITASK_STT = 2, /** DIARIZATION - Speaker diarization model type. */ DIARIZATION = 3, UNRECOGNIZED = -1 } export declare function modelTypeFromJSON(object: any): ModelType; export declare function modelTypeToJSON(object: ModelType): string; export declare enum GrammarType { /** * JSGF - JSGF grammar type. * Example grammar: `jsgf:<yes_no> = yes | no;` */ JSGF = 0, /** * SRGS - SRGS grammar type. * Example grammar: `srgs:$yes_no = yes | no;` */ SRGS = 3, /** * KWS - Keyword / Keyphrase spotting grammar type. * Example grammar: `kws:oh mighty computer|hey computer` */ KWS = 1, /** * PHRASE_LIST - A simple json phrase list grammar type. * Example grammar: `["yes", "yeah", "yep", "why not", "no", "nope"]` */ PHRASE_LIST = 2, UNRECOGNIZED = -1 } export declare function grammarTypeFromJSON(object: any): GrammarType; export declare function grammarTypeToJSON(object: GrammarType): string; /** The top-level message sent by the client for the `StreamingRecognize` method. */ export interface StreamingRecognitionRequest { /** * The configuration for the stream. * This is the first message that must be sent. */ config?: RecognitionConfig | undefined; /** The audio data to be recognized. */ audioContent?: Uint8Array | undefined; } /** The top-level message returned from the `StreamingRecognize` method. */ export interface StreamingRecognitionResponse { /** List of results that are currently available. */ chunks: SpeechRecognitionChunk[]; /** * A short id that is used in the stt-server logs to differentiate between * different client requests. Be aware that this id should only be used for * debugging purposes because it is not collision safe. */ clientId: string; /** The language identified by the server, e.g. en, de, etc. */ language: string; } /** * The `RecognitionConfig` message provides information to the recognizer that * specifies how to process the request. */ export interface RecognitionConfig { /** * Specifies what kind of audio is being sent and how the recognizer should * process it. */ specification: RecognitionSpec | undefined; } /** * The `RecognitionSpec` message provides information to the recognizer that * specifies how to process the request. */ export interface RecognitionSpec { /** At the moment only LINEAR16 is supported. */ audioEncoding: RecognitionSpec_AudioEncoding; /** 8000, 16000, 48000 only for pcm. */ sampleRateHertz: number; /** [language[_territory]] e.g. en, en-IN, de. */ locale: string; /** load a specific graph for the locale specific model (e.g. yes_no). */ graph: string; /** * Allows to specify a grammar to be used for the recognition. * To specify a JSGF grammar for example set grammar to `jsgf:public <yes_no> * = yes | no;` To spot a keyword / phrase, set grammar to `kws:oh mighty * computer` You can also specify a json string to narrow the possible words * to appear `["oh one two three four five six seven eight nine zero", * "[unk]"]`. */ grammar: string; /** * If set true, tentative hypotheses may be returned as they become available * (final=false flag) If false or omitted, only final=true result(s) are * returned. Makes sense only for StreamingRecognize requests. */ partialResults: boolean; /** Decode as single utterance. */ singleUtterance: boolean; /** Specifies how text should be normalized. */ normalization: NormalizationSpec | undefined; /** * When set, the recognizer opts out of MBR decoding and produces phoneme * infos. */ phones: boolean; /** * Instead of picking a model based on the locale, this field can be used to * specify a specific model directly. * To specify a graph model directly use model:graph e.g. * generic-model-de-0.21:ja_nein */ model: string; /** * For models that use endpointing (e.g. STT-Core models) this field can be * used to specify the endpointing configuration. */ endpointing: EndpointSpec | undefined; /** * For models that use voice activity detection (VAD) this field can be used * to specify the VAD configuration. */ vad: VadSpec | undefined; /** * Some models allow to specify a prompt that can be used to give the model * some context on what was said before or to steer the model to use * particular spellings or styles. */ prompt: string; } export declare enum RecognitionSpec_AudioEncoding { /** AUDIO_ENCODING_UNSPECIFIED - If not specified, defaults to LINEAR16_PCM. */ AUDIO_ENCODING_UNSPECIFIED = 0, /** LINEAR16_PCM - 16-bit signed little-endian (Linear PCM) */ LINEAR16_PCM = 1, UNRECOGNIZED = -1 } export declare function recognitionSpec_AudioEncodingFromJSON(object: any): RecognitionSpec_AudioEncoding; export declare function recognitionSpec_AudioEncodingToJSON(object: RecognitionSpec_AudioEncoding): string; /** * The `NormalizationSpec` message provides information to the recognizer that * specifies which normalizer to use. */ export interface NormalizationSpec { /** * Whether to strip unknown label in the resulting text * Note: The unknown label can still be accessed via the `words` field */ stripUnk: boolean; /** * Allows to specify which nlp functions should be applied to the text * before it is returned. By specifying this field, the default nlp * configuration is overwritten. */ nlp: NLPSpec | undefined; } export interface NLPSpec { /** The server config name of the server that provides the nlp functions. */ serverConfig: string; /** Which nlp functions should be applied to the text before it is returned. */ functions: NLPFunctionSpec[]; /** Whether to apply the nlp functions to the partial results. */ partialResults: boolean; /** Optional global argument. */ args: string; /** Specifies which field should be used as input for the nlp functions. */ inputField: NLPSpec_NlpInputField; } export declare enum NLPSpec_NlpInputField { /** UNSPECIFIED - If not specified, defaults to TEXT. */ UNSPECIFIED = 0, /** TEXT - The text field is used as input for the nlp processing. */ TEXT = 1, /** TAGGED_TEXT - Use the tagged_text field as input for the nlp processing. */ TAGGED_TEXT = 2, /** SLOTTED_TEXT - Use the slotted_text field as input for the nlp processing. */ SLOTTED_TEXT = 3, UNRECOGNIZED = -1 } export declare function nLPSpec_NlpInputFieldFromJSON(object: any): NLPSpec_NlpInputField; export declare function nLPSpec_NlpInputFieldToJSON(object: NLPSpec_NlpInputField): string; /** * The `NLPFunction` message provides information to the recognizer that * specifies which nlp function to use. */ export interface NLPFunctionSpec { /** The id of the nlp function (e.g. `ner-de`). */ id: string; /** Optional additional parameters for the nlp function (e.g. `ANONYMIZE`). */ args: string[]; } /** Endpointing configuration for LM based endpointing. */ export interface EndpointSpec { /** * How many seconds of non-speech before the endpointer triggers to * clean up the buffer. */ silenceTimeout: number; /** * How many seconds of non-speech after some speech with high * probability for an endpoint before the endpointer triggers. */ trailingSilenceHighProbability: number; /** * How many seconds of non-speech after some speech with low probability * for an endpoint before the endpointer triggers. */ trailingSilenceOkProbability: number; /** * How many seconds of non-speech without the endpointer reaching a * final state before the endpointer triggers. */ trailingSilenceNoEndpoint: number; /** * After how many seconds of audio to trigger and endpoint regardless * of anything else. */ utteranceTimeout: number; } /** * Endpointing configuration for Voice activity detection (VAD) based * endpointing. */ export interface VadSpec { /** * The threshold between 0 and 1.0 to determine if a frame is speech or * non-speech. A higher threshold will result in less false positives but also * some speech might be cut off. */ threshold: number; /** * Amount of trailing silence before an utterance is considered after a speech * to non-speech transition. */ trailingSilence: number; /** * The minimum duration of speech in seconds before trying to perform a * partial recognition. */ minSpeech: number; } /** * The `SpeechRecognitionChunk` message contains the result of a single * utterance. */ export interface SpeechRecognitionChunk { /** The transcription alternatives. */ alternatives: SpeechRecognitionAlternative[]; /** This flag indicates if the transcription is final or not. */ final: boolean; /** This flag shows that the received chunk is the end of an utterance. */ endOfUtterance: boolean; } /** * The `SpeechRecognitionAlternative` message contains one alternative of a * transcription. */ export interface SpeechRecognitionAlternative { /** The raw recognized text. */ text: string; /** * When the model is composed of multiple nested language models, this field * contains the recognized text including xml tags that indicate which * language model produced which part of the text. e.g. "i live in <address> * <number> 21 </number> <street> jumpstreet </street> <city> heidelberg * </city> </address>" */ slottedText: string; /** The tagged recognized text. */ taggedText: string; /** The nlp result. */ nlpText: string; /** The overall confidence of the recognition result. */ confidence: number; /** * Word level infos such as start and end time offsets, word level * confidences, or phoneme infos. */ words: WordInfo[]; } /** The `WordInfo` message contains the word level information. */ export interface WordInfo { /** The word's start time, in seconds. */ startTime: Duration | undefined; /** The word's end time, in seconds. */ endTime: Duration | undefined; /** The word. */ word: string; /** The confidence of the word in the range [0.0, 1.0]. */ confidence: number; /** Phoneme infos. */ phones: PhoneInfo[]; /** * Speech recognition slot the word belongs to. * For nested slots, the slots are joined with a dot and ordered from outer to * inner * e.g.: "i live in <address> <number> 21 </number> <street> jumpstreet * </street> heidelberg </address>" * will have the following slots: * i -> '' * live -> '' * in -> '' * 21 -> 'address.number' * jumpstreet -> 'address.street' * heidelberg -> 'address' */ slot: string; } /** The `PhoneInfo` message contains the phoneme level information. */ export interface PhoneInfo { /** The phone's start time, in seconds. */ startTime: Duration | undefined; /** The phone's end time, in seconds. */ endTime: Duration | undefined; /** The phone. */ phone: string; } /** The `ModelsRequest` message currently contains no information. */ export interface ModelsRequest { } /** The `ModelsResponse` message contains the list of supported models. */ export interface ModelsResponse { /** List of supported models. */ model: Model[]; } /** The `Model` message contains the information about a single model. */ export interface Model { /** * The model id. * e.g. generic-model-de-0.21 */ id: string; /** * Alias that can also be used to refer to the model instead of the id. * e.g. generic-de or german-large */ alias: string[]; /** * The human readable model name (for display purposes). * e.g. German Generic Model (Large) */ name: string; /** The model description. */ description: string; /** The model version. */ version: string; /** The model type. */ type: ModelType; /** The locale(s) supported by the model. */ locale: string[]; /** Which grammar types are supported by the model. */ grammarType: GrammarType[]; /** The NLP preconfiguration for this model (if any). */ nlp: NLPSpec | undefined; /** The slots the model potentially outputs. */ slots: string[]; /** Examples of what the model can recognize. */ examples: string[]; /** The supported endpointing modes. */ endpointing: EndpointingType[]; } /** The `NLPFunctionsRequest` message currently contains no information. */ export interface NLPFunctionsRequest { } /** * The `NLPFunctionsResponse` message contains the list of supported nlp * servers and the corresponding functions. */ export interface NLPFunctionsResponse { /** List of supported nlp servers. */ server: NLPFunctionServer[]; } export interface NLPFunctionServer { /** The nlp server configuration name (to be used in `NLPSpec`). */ serverConfig: string; /** The nlp functions supported by the nlp server. */ functions: NLPFunction[]; } /** * The `NLPFunction` message contains the information about a single nlp * function. */ export interface NLPFunction { /** The nlp function id. */ id: string; /** The nlp function name. */ name: string; /** The nlp function description. */ description: string; } /** The `Graph` message contains the information about a single graph. */ export interface Graph { /** The name of the graph */ name: string; } /** The `AccountInfoRequest` message currently contains no information. */ export interface AccountInfoRequest { } /** The `AccountInfoResponse` message contains the account information. */ export interface AccountInfoResponse { /** The account token. */ token: string; /** The account display name. */ displayName: string; /** How many requests were made with this account. */ totalRequests: number; /** How many seconds of audio this account has booked. */ bookedSeconds: number; /** How many seconds of audio this account has used. */ usedSeconds: number; /** Expiration date of the account as unix timestamp (-1 for unlimited). */ expirationDate: number; /** Whether the account is blocked. */ blocked: boolean; } /** The `NLPProcessRequest` message contains the text to be processed. */ export interface NLPProcessRequest { /** The text to be processed. */ text: string; /** The nlp specification. */ nlp: NLPSpec | undefined; } /** The `NLPProcessResponse` message contains the processed text. */ export interface NLPProcessResponse { /** The processed text. */ text: string; } export declare const StreamingRecognitionRequest: MessageFns<StreamingRecognitionRequest>; export declare const StreamingRecognitionResponse: MessageFns<StreamingRecognitionResponse>; export declare const RecognitionConfig: MessageFns<RecognitionConfig>; export declare const RecognitionSpec: MessageFns<RecognitionSpec>; export declare const NormalizationSpec: MessageFns<NormalizationSpec>; export declare const NLPSpec: MessageFns<NLPSpec>; export declare const NLPFunctionSpec: MessageFns<NLPFunctionSpec>; export declare const EndpointSpec: MessageFns<EndpointSpec>; export declare const VadSpec: MessageFns<VadSpec>; export declare const SpeechRecognitionChunk: MessageFns<SpeechRecognitionChunk>; export declare const SpeechRecognitionAlternative: MessageFns<SpeechRecognitionAlternative>; export declare const WordInfo: MessageFns<WordInfo>; export declare const PhoneInfo: MessageFns<PhoneInfo>; export declare const ModelsRequest: MessageFns<ModelsRequest>; export declare const ModelsResponse: MessageFns<ModelsResponse>; export declare const Model: MessageFns<Model>; export declare const NLPFunctionsRequest: MessageFns<NLPFunctionsRequest>; export declare const NLPFunctionsResponse: MessageFns<NLPFunctionsResponse>; export declare const NLPFunctionServer: MessageFns<NLPFunctionServer>; export declare const NLPFunction: MessageFns<NLPFunction>; export declare const Graph: MessageFns<Graph>; export declare const AccountInfoRequest: MessageFns<AccountInfoRequest>; export declare const AccountInfoResponse: MessageFns<AccountInfoResponse>; export declare const NLPProcessRequest: MessageFns<NLPProcessRequest>; export declare const NLPProcessResponse: MessageFns<NLPProcessResponse>; export type SttServiceService = typeof SttServiceService; export declare const SttServiceService: { /** Transcribe a stream of audio. */ readonly streamingRecognize: { readonly path: "/ari.stt.v1.SttService/StreamingRecognize"; readonly requestStream: true; readonly responseStream: true; readonly requestSerialize: (value: StreamingRecognitionRequest) => Buffer<ArrayBuffer>; readonly requestDeserialize: (value: Buffer) => StreamingRecognitionRequest; readonly responseSerialize: (value: StreamingRecognitionResponse) => Buffer<ArrayBuffer>; readonly responseDeserialize: (value: Buffer) => StreamingRecognitionResponse; }; /** List all supported models. */ readonly models: { readonly path: "/ari.stt.v1.SttService/Models"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: ModelsRequest) => Buffer<ArrayBuffer>; readonly requestDeserialize: (value: Buffer) => ModelsRequest; readonly responseSerialize: (value: ModelsResponse) => Buffer<ArrayBuffer>; readonly responseDeserialize: (value: Buffer) => ModelsResponse; }; /** List all available nlp server configs and corresponding functions. */ readonly nlpFunctions: { readonly path: "/ari.stt.v1.SttService/NLPFunctions"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: NLPFunctionsRequest) => Buffer<ArrayBuffer>; readonly requestDeserialize: (value: Buffer) => NLPFunctionsRequest; readonly responseSerialize: (value: NLPFunctionsResponse) => Buffer<ArrayBuffer>; readonly responseDeserialize: (value: Buffer) => NLPFunctionsResponse; }; readonly accountInfo: { readonly path: "/ari.stt.v1.SttService/AccountInfo"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: AccountInfoRequest) => Buffer<ArrayBuffer>; readonly requestDeserialize: (value: Buffer) => AccountInfoRequest; readonly responseSerialize: (value: AccountInfoResponse) => Buffer<ArrayBuffer>; readonly responseDeserialize: (value: Buffer) => AccountInfoResponse; }; /** Processes the given text with the given nlp pipeline. */ readonly nlpProcess: { readonly path: "/ari.stt.v1.SttService/NLPProcess"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: NLPProcessRequest) => Buffer<ArrayBuffer>; readonly requestDeserialize: (value: Buffer) => NLPProcessRequest; readonly responseSerialize: (value: NLPProcessResponse) => Buffer<ArrayBuffer>; readonly responseDeserialize: (value: Buffer) => NLPProcessResponse; }; }; export interface SttServiceServer extends UntypedServiceImplementation { /** Transcribe a stream of audio. */ streamingRecognize: handleBidiStreamingCall<StreamingRecognitionRequest, StreamingRecognitionResponse>; /** List all supported models. */ models: handleUnaryCall<ModelsRequest, ModelsResponse>; /** List all available nlp server configs and corresponding functions. */ nlpFunctions: handleUnaryCall<NLPFunctionsRequest, NLPFunctionsResponse>; accountInfo: handleUnaryCall<AccountInfoRequest, AccountInfoResponse>; /** Processes the given text with the given nlp pipeline. */ nlpProcess: handleUnaryCall<NLPProcessRequest, NLPProcessResponse>; } export interface SttServiceClient extends Client { /** Transcribe a stream of audio. */ streamingRecognize(): ClientDuplexStream<StreamingRecognitionRequest, StreamingRecognitionResponse>; streamingRecognize(options: Partial<CallOptions>): ClientDuplexStream<StreamingRecognitionRequest, StreamingRecognitionResponse>; streamingRecognize(metadata: Metadata, options?: Partial<CallOptions>): ClientDuplexStream<StreamingRecognitionRequest, StreamingRecognitionResponse>; /** List all supported models. */ models(request: ModelsRequest, callback: (error: ServiceError | null, response: ModelsResponse) => void): ClientUnaryCall; models(request: ModelsRequest, metadata: Metadata, callback: (error: ServiceError | null, response: ModelsResponse) => void): ClientUnaryCall; models(request: ModelsRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: ModelsResponse) => void): ClientUnaryCall; /** List all available nlp server configs and corresponding functions. */ nlpFunctions(request: NLPFunctionsRequest, callback: (error: ServiceError | null, response: NLPFunctionsResponse) => void): ClientUnaryCall; nlpFunctions(request: NLPFunctionsRequest, metadata: Metadata, callback: (error: ServiceError | null, response: NLPFunctionsResponse) => void): ClientUnaryCall; nlpFunctions(request: NLPFunctionsRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: NLPFunctionsResponse) => void): ClientUnaryCall; accountInfo(request: AccountInfoRequest, callback: (error: ServiceError | null, response: AccountInfoResponse) => void): ClientUnaryCall; accountInfo(request: AccountInfoRequest, metadata: Metadata, callback: (error: ServiceError | null, response: AccountInfoResponse) => void): ClientUnaryCall; accountInfo(request: AccountInfoRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: AccountInfoResponse) => void): ClientUnaryCall; /** Processes the given text with the given nlp pipeline. */ nlpProcess(request: NLPProcessRequest, callback: (error: ServiceError | null, response: NLPProcessResponse) => void): ClientUnaryCall; nlpProcess(request: NLPProcessRequest, metadata: Metadata, callback: (error: ServiceError | null, response: NLPProcessResponse) => void): ClientUnaryCall; nlpProcess(request: NLPProcessRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: NLPProcessResponse) => void): ClientUnaryCall; } export declare const SttServiceClient: { new (address: string, credentials: ChannelCredentials, options?: Partial<ClientOptions>): SttServiceClient; service: typeof SttServiceService; serviceName: string; }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; export type DeepPartial<T> = T extends Builtin ? T : T extends globalThis.Array<infer U> ? globalThis.Array<DeepPartial<U>> : T extends ReadonlyArray<infer U> ? ReadonlyArray<DeepPartial<U>> : T extends {} ? { [K in keyof T]?: DeepPartial<T[K]>; } : Partial<T>; type KeysOfUnion<T> = T extends T ? keyof T : never; export type Exact<P, I extends P> = P extends Builtin ? P : P & { [K in keyof P]: Exact<P[K], I[K]>; } & { [K in Exclude<keyof I, KeysOfUnion<P>>]: never; }; export interface MessageFns<T> { encode(message: T, writer?: BinaryWriter): BinaryWriter; decode(input: BinaryReader | Uint8Array, length?: number): T; fromJSON(object: any): T; toJSON(message: T): unknown; create<I extends Exact<DeepPartial<T>, I>>(base?: I): T; fromPartial<I extends Exact<DeepPartial<T>, I>>(object: I): T; } export {};