@aristech-org/tts-client
Version:
A Node.js client library for the Aristech Text-to-Speech API
221 lines (220 loc) • 8.66 kB
TypeScript
import { BinaryReader, BinaryWriter } from "@bufbuild/protobuf/wire";
export declare const protobufPackage = "aristech.tts";
export declare enum SqrType {
/** UNSET - default value to identify clients that didn't set a value */
UNSET = 0,
/** NO_SQRS - don't apply synced sqrs */
NO_SQRS = 1,
/** PRODUCTIVE - apply only productive sqrs */
PRODUCTIVE = 2,
/** NON_PRODUCTIVE - apply only non-productive sqrs */
NON_PRODUCTIVE = 3,
/** ALL - apply all (latest) sqrs */
ALL = 4,
UNRECOGNIZED = -1
}
export declare function sqrTypeFromJSON(object: any): SqrType;
export declare function sqrTypeToJSON(object: SqrType): string;
/** Further options for a `SpeechRequest`. */
export interface SpeechRequestOption {
/**
* The `voiceId` from `Voice` to use as wrapping voice to synthesize text.
* Note: The `voiceId` can also be swapped within the text by using ssml's
* `<voice/>` tag with another `voiceId`.
*/
voiceId: string;
/** Further options that specify the generated audio. */
audio: SpeechAudioFormat | undefined;
/**
* If the text should be normalized before being transformed into audio or
* not.
*/
normalize: boolean;
/**
* Which sqrs from the designer should be applied. Deprecated, use sqr_type in NormalizationOptions instead
*
* @deprecated
*/
sqrType: SqrType;
/** Options to customize the normalization process. */
normalizationOptions: SpeechRequestOption_NormalizationOptions | undefined;
/** If true, disable cache usage for this request (don't read/write cache). */
disableCache: boolean;
}
/** Options to customize the normalization process. */
export interface SpeechRequestOption_NormalizationOptions {
/**
* Which sqrs from the tts-designer should be applied. Replaces sqr_type in SpeechRequestOption
* takes precedence over sqr_type in SpeechRequestOption
*/
sqrType: SqrType;
/** Whether to keep markdown syntax in the text or remove it during normalization. */
keepMarkdown: boolean;
/**
* If true, the server will attempt to synthesize the full input in a single pass
* instead of generating each sentence on its own.
* Notes:
* - Only supported by backends that support streaming/single-pass style synthesis.
* - For long inputs, internal limits may still split text into multiple chunks.
*/
attemptSinglePassSynthesis?: boolean | undefined;
}
/**
* `SpeechRequestOptionAudioFormat` specifies the retured audio format for a
* `getSpeech` request.
*/
export interface SpeechAudioFormat {
/** The container to use. */
container: SpeechAudioFormat_Container;
/** The encoding of the audio. */
codec: SpeechAudioFormat_Codec;
/** The samplerate of the audio. */
samplerate: number;
/** Bit depth of the audio (e.g. 16). */
bitDepth: number;
/** The channels count of the audio. */
channels: number;
/** The bitrate of the audio (only for compressed formats like mp3, ogg). */
bitrate: number;
}
/** Indicates the container of the audio. */
export declare enum SpeechAudioFormat_Container {
/** RAW - raw */
RAW = 0,
/** WAV - wav */
WAV = 1,
/** FLAC - flac */
FLAC = 2,
/** OGG - ogg */
OGG = 3,
/** MP3 - mp3 */
MP3 = 4,
/** SPEEX - speex */
SPEEX = 5,
UNRECOGNIZED = -1
}
export declare function speechAudioFormat_ContainerFromJSON(object: any): SpeechAudioFormat_Container;
export declare function speechAudioFormat_ContainerToJSON(object: SpeechAudioFormat_Container): string;
/** Indicates the codec of the audio. */
export declare enum SpeechAudioFormat_Codec {
/** PCM - pcm a.k.a. signed-integer */
PCM = 0,
/** MU_LAW - mu-law */
MU_LAW = 1,
/** A_LAW - a-law */
A_LAW = 2,
UNRECOGNIZED = -1
}
export declare function speechAudioFormat_CodecFromJSON(object: any): SpeechAudioFormat_Codec;
export declare function speechAudioFormat_CodecToJSON(object: SpeechAudioFormat_Codec): string;
/** Represents a languages locale. */
export interface SpeechLocale {
/** Example locale: de_DE */
language: string;
/** ex.: DE */
accent: string;
/** ex.: Deutsch */
languageName: string;
/** ex.: Deutschland */
accentName: string;
}
/** holds some default config values of a voice */
export interface VoiceDefaults {
rate: string;
pitch: string;
defaultVoice: boolean;
volume: string;
/** Whether single-pass synthesis is enabled by default for this voice when the request does not override it. */
attemptSinglePassSynthesisDefault: boolean;
}
/** Contains several infos about a voice */
export interface Voice {
/** [Output-only] The voice name. */
name: string;
/** [Output-only] The audio type of the voice. */
audio: SpeechAudioFormat | undefined;
/** [Output-only] The locale of the voice. */
locale: SpeechLocale | undefined;
/** [Output-only] The gender of the voice. */
gender: Voice_Gender;
/**
* [Output-only] The voice id.
* This is the value to use as `SpeechRequestOption.voice_id`
*/
voiceId: string;
/** [Output-only] The uuid of the voice. */
uuid: string;
/** The version number of the voice */
version: string;
/** available ports for the voice as configured in the license; */
portsAvailable: number;
/** whether debug is available for the voice (only with developer license) */
debugAvailable: boolean;
/** default config values */
defaults: VoiceDefaults | undefined;
/** Whether this voice supports request-level single-pass synthesis mode. */
supportsSinglePassSynthesis: boolean;
/** SSML capabilities and documentation for this voice. */
ssmlProfile: VoiceSsmlProfile | undefined;
}
export declare enum Voice_Gender {
FEMALE = 0,
MALE = 1,
OTHER = 2,
UNRECOGNIZED = -1
}
export declare function voice_GenderFromJSON(object: any): Voice_Gender;
export declare function voice_GenderToJSON(object: Voice_Gender): string;
/** Describes a single supported SSML tag and its allowed attributes. */
export interface SsmlTagInfo {
/** The tag name, e.g. "break", "prosody", "phoneme". */
tag: string;
/** The list of allowed attribute names for this tag. */
allowedAttributes: string[];
/** Whether this is a self-closing tag (e.g. <break/>, <audio/>). */
selfClosing: boolean;
}
/** Per-voice SSML capabilities (lightweight, included in GetVoiceList). */
export interface VoiceSsmlProfile {
/** The SSML tags this voice supports, with attribute constraints. */
supportedTags: SsmlTagInfo[];
}
/** A human-readable documentation section for a single SSML tag. */
export interface SsmlDocSection {
/** The tag this documentation refers to, e.g. "break". */
tag: string;
/** Display title, e.g. "Pauses with <break>". */
title: string;
/** Markdown-formatted documentation body. */
markdown: string;
/** Example SSML snippets demonstrating usage. */
examples: string[];
}
export declare const SpeechRequestOption: MessageFns<SpeechRequestOption>;
export declare const SpeechRequestOption_NormalizationOptions: MessageFns<SpeechRequestOption_NormalizationOptions>;
export declare const SpeechAudioFormat: MessageFns<SpeechAudioFormat>;
export declare const SpeechLocale: MessageFns<SpeechLocale>;
export declare const VoiceDefaults: MessageFns<VoiceDefaults>;
export declare const Voice: MessageFns<Voice>;
export declare const SsmlTagInfo: MessageFns<SsmlTagInfo>;
export declare const VoiceSsmlProfile: MessageFns<VoiceSsmlProfile>;
export declare const SsmlDocSection: MessageFns<SsmlDocSection>;
type Builtin = Date | Function | Uint8Array | string | number | boolean | undefined;
export type DeepPartial<T> = T extends Builtin ? T : T extends globalThis.Array<infer U> ? globalThis.Array<DeepPartial<U>> : T extends ReadonlyArray<infer U> ? ReadonlyArray<DeepPartial<U>> : T extends {} ? {
[K in keyof T]?: DeepPartial<T[K]>;
} : Partial<T>;
type KeysOfUnion<T> = T extends T ? keyof T : never;
export type Exact<P, I extends P> = P extends Builtin ? P : P & {
[K in keyof P]: Exact<P[K], I[K]>;
} & {
[K in Exclude<keyof I, KeysOfUnion<P>>]: never;
};
export interface MessageFns<T> {
encode(message: T, writer?: BinaryWriter): BinaryWriter;
decode(input: BinaryReader | Uint8Array, length?: number): T;
fromJSON(object: any): T;
toJSON(message: T): unknown;
create<I extends Exact<DeepPartial<T>, I>>(base?: I): T;
fromPartial<I extends Exact<DeepPartial<T>, I>>(object: I): T;
}
export {};