UNPKG

@aristech-org/tts-client

Version:

A Node.js client library for the Aristech Text-to-Speech API

430 lines (429 loc) 23.7 kB
import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire"; import { type CallOptions, type ChannelCredentials, Client, type ClientDuplexStream, type ClientOptions, type ClientReadableStream, type ClientUnaryCall, type handleBidiStreamingCall, type handleServerStreamingCall, type handleUnaryCall, type Metadata, type ServiceError, type UntypedServiceImplementation } from "@grpc/grpc-js"; import { SpeechLocale, SpeechRequestOption, SsmlDocSection, Voice } from "./TTSTypes.js"; export declare const protobufPackage = "aristech.tts"; /** Commands that can be used in ServerCommand */ export declare enum CommandType { START_REQUEST = 0, STOP_REQUEST = 1, STATUS_REQUEST = 2, UNRECOGNIZED = -1 } export declare function commandTypeFromJSON(object: any): CommandType; export declare function commandTypeToJSON(object: CommandType): string; export declare enum CommandResponseType { /** STARTRESPONSE - Synthesis startet */ STARTRESPONSE = 0, /** STOPRESPONSE - Synthesis stopped */ STOPRESPONSE = 1, /** STATUSRESPONSE - Response contains status information */ STATUSRESPONSE = 2, /** SYNTHESISRESPONSE - Response contains SpeechResponses */ SYNTHESISRESPONSE = 3, UNRECOGNIZED = -1 } export declare function commandResponseTypeFromJSON(object: any): CommandResponseType; export declare function commandResponseTypeToJSON(object: CommandResponseType): string; /** Status codes for adding a new voice. */ export declare enum VoiceAdditionStatus { /** SUCCESS - Voice was added successfully. */ SUCCESS = 0, /** INVALID_VOICE_DATA - The provided voice data was invalid. */ INVALID_VOICE_DATA = 1, /** BASE_VOICE_NOT_FOUND - The requested base voice was not found. */ BASE_VOICE_NOT_FOUND = 2, /** INTERNAL_ERROR - The voice could not be added due to an internal server error. */ INTERNAL_ERROR = 3, UNRECOGNIZED = -1 } export declare function voiceAdditionStatusFromJSON(object: any): VoiceAdditionStatus; export declare function voiceAdditionStatusToJSON(object: VoiceAdditionStatus): string; /** Status codes for clearing the cache. */ export declare enum ClearCacheStatus { /** CLEARED_SUCCESSFULLY - Cache was cleared successfully. */ CLEARED_SUCCESSFULLY = 0, /** CACHE_DISABLED - The request was received, but caching is not enabled on the server. */ CACHE_DISABLED = 1, /** VOICE_ID_NOT_FOUND - Voice ID provided does not exist */ VOICE_ID_NOT_FOUND = 3, /** CLEAR_CACHE_ERROR - An error occurred during the operation. */ CLEAR_CACHE_ERROR = 2, UNRECOGNIZED = -1 } export declare function clearCacheStatusFromJSON(object: any): ClearCacheStatus; export declare function clearCacheStatusToJSON(object: ClearCacheStatus): string; /** Status codes for checking the cache backend. */ export declare enum TestCacheConnectionStatus { /** TEST_CACHE_CONNECTION_OK - Cache backend is enabled and usable. */ TEST_CACHE_CONNECTION_OK = 0, /** TEST_CACHE_CONNECTION_CACHE_DISABLED - Cache backend is not enabled/configured. */ TEST_CACHE_CONNECTION_CACHE_DISABLED = 1, /** TEST_CACHE_CONNECTION_ERROR - Cache backend is enabled but not usable (e.g. connection failure/timeouts). */ TEST_CACHE_CONNECTION_ERROR = 2, UNRECOGNIZED = -1 } export declare function testCacheConnectionStatusFromJSON(object: any): TestCacheConnectionStatus; export declare function testCacheConnectionStatusToJSON(object: TestCacheConnectionStatus): string; /** * `SpeechRequest` is the top-level message sent by the client for * the `getSpeech` method. */ export interface SpeechRequest { /** Required: The text that will be transformed into audio. */ text: string; /** Required: Further options for the request. */ options: SpeechRequestOption | undefined; /** Currently not used */ parameters: string; /** Use "SSML" */ inputType: string; /** Use "AUDIO" */ outputType: string; } export interface PhonesetRequest { /** Required: The voice of which the phoneset is requested. Only `Voice.name` has to be filled */ voice: Voice | undefined; } export interface PhonesetResponse { status: number; message: string; /** A json array that contains a representation of the phoneset */ phoneset: string; } export interface TranscriptionRequest { /** Required: The voice of which the phoneset is requested. Only `Voice.name` has to be filled */ voice: Voice | undefined; /** Required: The word to be transcribed */ word: string; } export interface TranscriptionResponse { status: number; message: string; /** The transcription of the submitted word */ transcription: string; } /** A command that is sent to the speech engine. START_REQUEST has to encapsulate a SpeechRequest. */ export interface ServerCommand { commandType: CommandType; commandData: Uint8Array; speechRequest: SpeechRequest | undefined; } /** The server's response to a ServerCommand. */ export interface ServerCommandResponse { responseType: CommandResponseType; status: number; message: string; responseData: Uint8Array; speechResponse: SpeechResponse[]; } /** * `SpeechResponse` is the top-level message sent by the server for * the `getSpeech` and `ProcessData`methods. Possibly multiple `SpeechResponse` * messages are sent while the audio is being generated. * When the requested output_type is a text type, there will be only one * SpeechResponse that contains the whole block. * Howevery, output_type AUDIO will return multiple blocks of audio signal * that have to be concatenated or streamed to an audio device */ export interface SpeechResponse { /** The audio data bytes encoded as specified in `SpeechRequestOptionAudioFormat`. */ status: number; data: Uint8Array; inputType: string; outputType: string; } /** * Requests a VoiceListResponse that contains all available voices (with the * specified locale) */ export interface VoiceListRequest { locale: SpeechLocale | undefined; } /** Adds a voice clone to the server using a wav file provided via this request */ export interface AddVoiceRequest { /** The locale of the voice to be added. */ locale: SpeechLocale | undefined; /** The wav file for cloning the voice, provided as bytes. */ data: Uint8Array; /** The name of the new voice. */ voiceName: string; /** The name of the base voice to use for cloning. This voice must be running on the server and be one of the default voices. */ baseVoiceName: string; /** whether voice is temporary (not available after server restart or non-temporary */ temporaryVoice: boolean; } /** The response to an AddVoiceRequest, indicating the status of the operation. */ export interface AddVoiceResponse { /** The status indicating whether the voice has been added successfully. */ status: VoiceAdditionStatus; } export interface ClearCacheRequest { /** * If provided, only entries for this voice_id will be cleared. * If empty, the entire cache will be cleared. */ voiceId: string; } export interface ClearCacheResponse { /** The status of the clear cache operation. */ status: ClearCacheStatus; /** The number of entries that were deleted. */ deletedCount: number; } export interface TestCacheConnectionRequest { } export interface TestCacheConnectionResponse { status: TestCacheConnectionStatus; /** Configured backend name (e.g. diskcache/redis/postgres). */ backend: string; /** Human-readable detail (empty when OK). */ message: string; } /** Request for SSML documentation for a specific voice. */ export interface SsmlDocumentationRequest { /** The voice ID to get SSML documentation for. */ voiceId: string; /** BCP-47 locale code for the documentation language, e.g. "en", "de". */ locale: string; } /** Response containing SSML documentation sections for a voice. */ export interface SsmlDocumentationResponse { /** The voice ID this documentation belongs to. */ voiceId: string; /** Human-readable documentation sections, one per supported non-internal tag per locale. */ documentation: SsmlDocSection[]; } export declare const SpeechRequest: MessageFns<SpeechRequest>; export declare const PhonesetRequest: MessageFns<PhonesetRequest>; export declare const PhonesetResponse: MessageFns<PhonesetResponse>; export declare const TranscriptionRequest: MessageFns<TranscriptionRequest>; export declare const TranscriptionResponse: MessageFns<TranscriptionResponse>; export declare const ServerCommand: MessageFns<ServerCommand>; export declare const ServerCommandResponse: MessageFns<ServerCommandResponse>; export declare const SpeechResponse: MessageFns<SpeechResponse>; export declare const VoiceListRequest: MessageFns<VoiceListRequest>; export declare const AddVoiceRequest: MessageFns<AddVoiceRequest>; export declare const AddVoiceResponse: MessageFns<AddVoiceResponse>; export declare const ClearCacheRequest: MessageFns<ClearCacheRequest>; export declare const ClearCacheResponse: MessageFns<ClearCacheResponse>; export declare const TestCacheConnectionRequest: MessageFns<TestCacheConnectionRequest>; export declare const TestCacheConnectionResponse: MessageFns<TestCacheConnectionResponse>; export declare const SsmlDocumentationRequest: MessageFns<SsmlDocumentationRequest>; export declare const SsmlDocumentationResponse: MessageFns<SsmlDocumentationResponse>; /** Service that implements Aristech Speech-API (TTS-API, ariTTS) */ export type SpeechServiceService = typeof SpeechServiceService; export declare const SpeechServiceService: { /** * Performs Text-to-Speech with the given SpeechRequest and streams back the audio as * packets of type SpeechResponse. */ readonly getSpeech: { readonly path: "/aristech.tts.SpeechService/GetSpeech"; readonly requestStream: false; readonly responseStream: true; readonly requestSerialize: (value: SpeechRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => SpeechRequest; readonly responseSerialize: (value: SpeechResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => SpeechResponse; }; /** * Performs Text-to-Speech and streams back the audio. Adds the capability to stop * the speech synthesis and free a port during synthesis. */ readonly controlServer: { readonly path: "/aristech.tts.SpeechService/ControlServer"; readonly requestStream: true; readonly responseStream: true; readonly requestSerialize: (value: ServerCommand) => Buffer; readonly requestDeserialize: (value: Buffer) => ServerCommand; readonly responseSerialize: (value: ServerCommandResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => ServerCommandResponse; }; /** Returns available voices as stream. */ readonly getVoiceList: { readonly path: "/aristech.tts.SpeechService/GetVoiceList"; readonly requestStream: false; readonly responseStream: true; readonly requestSerialize: (value: VoiceListRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => VoiceListRequest; readonly responseSerialize: (value: Voice) => Buffer; readonly responseDeserialize: (value: Buffer) => Voice; }; /** Returns the phoneset for a given voice */ readonly getPhoneset: { readonly path: "/aristech.tts.SpeechService/GetPhoneset"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: PhonesetRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => PhonesetRequest; readonly responseSerialize: (value: PhonesetResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => PhonesetResponse; }; /** Returns the transcription for a word for a given voice */ readonly getTranscription: { readonly path: "/aristech.tts.SpeechService/GetTranscription"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: TranscriptionRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => TranscriptionRequest; readonly responseSerialize: (value: TranscriptionResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => TranscriptionResponse; }; /** adds a clone of voice to server created from a wav-file provided by the request */ readonly addVoice: { readonly path: "/aristech.tts.SpeechService/AddVoice"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: AddVoiceRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => AddVoiceRequest; readonly responseSerialize: (value: AddVoiceResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => AddVoiceResponse; }; /** Returns SSML documentation for a specific voice, filtered to that voice's supported tags. */ readonly getSsmlDocumentation: { readonly path: "/aristech.tts.SpeechService/GetSsmlDocumentation"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: SsmlDocumentationRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => SsmlDocumentationRequest; readonly responseSerialize: (value: SsmlDocumentationResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => SsmlDocumentationResponse; }; /** Clears the cache of the server, removing all cached audio data. */ readonly clearCache: { readonly path: "/aristech.tts.SpeechService/ClearCache"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: ClearCacheRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => ClearCacheRequest; readonly responseSerialize: (value: ClearCacheResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => ClearCacheResponse; }; /** Tests whether the configured cache backend is reachable/usable. */ readonly testCacheConnection: { readonly path: "/aristech.tts.SpeechService/TestCacheConnection"; readonly requestStream: false; readonly responseStream: false; readonly requestSerialize: (value: TestCacheConnectionRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => TestCacheConnectionRequest; readonly responseSerialize: (value: TestCacheConnectionResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => TestCacheConnectionResponse; }; }; export interface SpeechServiceServer extends UntypedServiceImplementation { /** * Performs Text-to-Speech with the given SpeechRequest and streams back the audio as * packets of type SpeechResponse. */ getSpeech: handleServerStreamingCall<SpeechRequest, SpeechResponse>; /** * Performs Text-to-Speech and streams back the audio. Adds the capability to stop * the speech synthesis and free a port during synthesis. */ controlServer: handleBidiStreamingCall<ServerCommand, ServerCommandResponse>; /** Returns available voices as stream. */ getVoiceList: handleServerStreamingCall<VoiceListRequest, Voice>; /** Returns the phoneset for a given voice */ getPhoneset: handleUnaryCall<PhonesetRequest, PhonesetResponse>; /** Returns the transcription for a word for a given voice */ getTranscription: handleUnaryCall<TranscriptionRequest, TranscriptionResponse>; /** adds a clone of voice to server created from a wav-file provided by the request */ addVoice: handleUnaryCall<AddVoiceRequest, AddVoiceResponse>; /** Returns SSML documentation for a specific voice, filtered to that voice's supported tags. */ getSsmlDocumentation: handleUnaryCall<SsmlDocumentationRequest, SsmlDocumentationResponse>; /** Clears the cache of the server, removing all cached audio data. */ clearCache: handleUnaryCall<ClearCacheRequest, ClearCacheResponse>; /** Tests whether the configured cache backend is reachable/usable. */ testCacheConnection: handleUnaryCall<TestCacheConnectionRequest, TestCacheConnectionResponse>; } export interface SpeechServiceClient extends Client { /** * Performs Text-to-Speech with the given SpeechRequest and streams back the audio as * packets of type SpeechResponse. */ getSpeech(request: SpeechRequest, options?: Partial<CallOptions>): ClientReadableStream<SpeechResponse>; getSpeech(request: SpeechRequest, metadata?: Metadata, options?: Partial<CallOptions>): ClientReadableStream<SpeechResponse>; /** * Performs Text-to-Speech and streams back the audio. Adds the capability to stop * the speech synthesis and free a port during synthesis. */ controlServer(): ClientDuplexStream<ServerCommand, ServerCommandResponse>; controlServer(options: Partial<CallOptions>): ClientDuplexStream<ServerCommand, ServerCommandResponse>; controlServer(metadata: Metadata, options?: Partial<CallOptions>): ClientDuplexStream<ServerCommand, ServerCommandResponse>; /** Returns available voices as stream. */ getVoiceList(request: VoiceListRequest, options?: Partial<CallOptions>): ClientReadableStream<Voice>; getVoiceList(request: VoiceListRequest, metadata?: Metadata, options?: Partial<CallOptions>): ClientReadableStream<Voice>; /** Returns the phoneset for a given voice */ getPhoneset(request: PhonesetRequest, callback: (error: ServiceError | null, response: PhonesetResponse) => void): ClientUnaryCall; getPhoneset(request: PhonesetRequest, metadata: Metadata, callback: (error: ServiceError | null, response: PhonesetResponse) => void): ClientUnaryCall; getPhoneset(request: PhonesetRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: PhonesetResponse) => void): ClientUnaryCall; /** Returns the transcription for a word for a given voice */ getTranscription(request: TranscriptionRequest, callback: (error: ServiceError | null, response: TranscriptionResponse) => void): ClientUnaryCall; getTranscription(request: TranscriptionRequest, metadata: Metadata, callback: (error: ServiceError | null, response: TranscriptionResponse) => void): ClientUnaryCall; getTranscription(request: TranscriptionRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: TranscriptionResponse) => void): ClientUnaryCall; /** adds a clone of voice to server created from a wav-file provided by the request */ addVoice(request: AddVoiceRequest, callback: (error: ServiceError | null, response: AddVoiceResponse) => void): ClientUnaryCall; addVoice(request: AddVoiceRequest, metadata: Metadata, callback: (error: ServiceError | null, response: AddVoiceResponse) => void): ClientUnaryCall; addVoice(request: AddVoiceRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: AddVoiceResponse) => void): ClientUnaryCall; /** Returns SSML documentation for a specific voice, filtered to that voice's supported tags. */ getSsmlDocumentation(request: SsmlDocumentationRequest, callback: (error: ServiceError | null, response: SsmlDocumentationResponse) => void): ClientUnaryCall; getSsmlDocumentation(request: SsmlDocumentationRequest, metadata: Metadata, callback: (error: ServiceError | null, response: SsmlDocumentationResponse) => void): ClientUnaryCall; getSsmlDocumentation(request: SsmlDocumentationRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: SsmlDocumentationResponse) => void): ClientUnaryCall; /** Clears the cache of the server, removing all cached audio data. */ clearCache(request: ClearCacheRequest, callback: (error: ServiceError | null, response: ClearCacheResponse) => void): ClientUnaryCall; clearCache(request: ClearCacheRequest, metadata: Metadata, callback: (error: ServiceError | null, response: ClearCacheResponse) => void): ClientUnaryCall; clearCache(request: ClearCacheRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: ClearCacheResponse) => void): ClientUnaryCall; /** Tests whether the configured cache backend is reachable/usable. */ testCacheConnection(request: TestCacheConnectionRequest, callback: (error: ServiceError | null, response: TestCacheConnectionResponse) => void): ClientUnaryCall; testCacheConnection(request: TestCacheConnectionRequest, metadata: Metadata, callback: (error: ServiceError | null, response: TestCacheConnectionResponse) => void): ClientUnaryCall; testCacheConnection(request: TestCacheConnectionRequest, metadata: Metadata, options: Partial<CallOptions>, callback: (error: ServiceError | null, response: TestCacheConnectionResponse) => void): ClientUnaryCall; } export declare const SpeechServiceClient: { new (address: string, credentials: ChannelCredentials, options?: Partial<ClientOptions>): SpeechServiceClient; service: typeof SpeechServiceService; serviceName: string; }; /** Internal Use Only: debug access to engine */ export type DebugServiceService = typeof DebugServiceService; export declare const DebugServiceService: { readonly processData: { readonly path: "/aristech.tts.DebugService/ProcessData"; readonly requestStream: false; readonly responseStream: true; readonly requestSerialize: (value: SpeechRequest) => Buffer; readonly requestDeserialize: (value: Buffer) => SpeechRequest; readonly responseSerialize: (value: SpeechResponse) => Buffer; readonly responseDeserialize: (value: Buffer) => SpeechResponse; }; }; export interface DebugServiceServer extends UntypedServiceImplementation { processData: handleServerStreamingCall<SpeechRequest, SpeechResponse>; } export interface DebugServiceClient extends Client { processData(request: SpeechRequest, options?: Partial<CallOptions>): ClientReadableStream<SpeechResponse>; processData(request: SpeechRequest, metadata?: Metadata, options?: Partial<CallOptions>): ClientReadableStream<SpeechResponse>; } export declare const DebugServiceClient: { new (address: string, credentials: ChannelCredentials, options?: Partial<ClientOptions>): DebugServiceClient; service: typeof DebugServiceService; serviceName: string; }; type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined; export type DeepPartial<T> = T extends Builtin ? T : T extends globalThis.Array<infer U> ? globalThis.Array<DeepPartial<U>> : T extends ReadonlyArray<infer U> ? ReadonlyArray<DeepPartial<U>> : T extends {} ? { [K in keyof T]?: DeepPartial<T[K]>; } : Partial<T>; type KeysOfUnion<T> = T extends T ? keyof T : never; export type Exact<P, I extends P> = P extends Builtin ? P : P & { [K in keyof P]: Exact<P[K], I[K]>; } & { [K in Exclude<keyof I, KeysOfUnion<P>>]: never; }; export interface MessageFns<T> { encode(message: T, writer?: BinaryWriter): BinaryWriter; decode(input: BinaryReader | Uint8Array, length?: number): T; fromJSON(object: any): T; toJSON(message: T): unknown; create<I extends Exact<DeepPartial<T>, I>>(base?: I): T; fromPartial<I extends Exact<DeepPartial<T>, I>>(object: I): T; } export {};