echogarden
Version:
An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.
2,100 lines • 53.4 kB
JSON
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "#/definitions/APIOptions",
"definitions": {
"APIOptions": {
"type": "object",
"properties": {
"SynthesisOptions": {
"$ref": "#/definitions/SynthesisOptions"
},
"VoiceListRequestOptions": {
"$ref": "#/definitions/VoiceListRequestOptions"
},
"RecognitionOptions": {
"$ref": "#/definitions/RecognitionOptions"
},
"AlignmentOptions": {
"$ref": "#/definitions/AlignmentOptions"
},
"TranslationAlignmentOptions": {
"$ref": "#/definitions/TranslationAlignmentOptions"
},
"TranscriptAndTranslationAlignmentOptions": {
"$ref": "#/definitions/TranscriptAndTranslationAlignmentOptions"
},
"TimelineTranslationAlignmentOptions": {
"$ref": "#/definitions/TimelineTranslationAlignmentOptions"
},
"SpeechTranslationOptions": {
"$ref": "#/definitions/SpeechTranslationOptions"
},
"TextTranslationOptions": {
"$ref": "#/definitions/TextTranslationOptions"
},
"SpeechLanguageDetectionOptions": {
"$ref": "#/definitions/SpeechLanguageDetectionOptions"
},
"TextLanguageDetectionOptions": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"VADOptions": {
"$ref": "#/definitions/VADOptions"
},
"DenoisingOptions": {
"$ref": "#/definitions/DenoisingOptions"
},
"SourceSeparationOptions": {
"$ref": "#/definitions/SourceSeparationOptions"
},
"ServerOptions": {
"$ref": "#/definitions/ServerOptions"
},
"GlobalOptions": {
"$ref": "#/definitions/GlobalOptions"
},
"CLIOptions": {
"$ref": "#/definitions/CLIOptions"
}
},
"required": [
"SynthesisOptions",
"VoiceListRequestOptions",
"RecognitionOptions",
"AlignmentOptions",
"TranslationAlignmentOptions",
"TranscriptAndTranslationAlignmentOptions",
"TimelineTranslationAlignmentOptions",
"SpeechTranslationOptions",
"TextTranslationOptions",
"SpeechLanguageDetectionOptions",
"TextLanguageDetectionOptions",
"VADOptions",
"DenoisingOptions",
"SourceSeparationOptions",
"ServerOptions",
"GlobalOptions",
"CLIOptions"
],
"additionalProperties": false
},
"SynthesisOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/SynthesisEngine"
},
"language": {
"type": "string"
},
"voice": {
"type": "string"
},
"voiceGender": {
"$ref": "#/definitions/VoiceGender"
},
"speed": {
"type": "number"
},
"pitch": {
"type": "number"
},
"pitchVariation": {
"type": "number"
},
"splitToSentences": {
"type": "boolean"
},
"ssml": {
"type": "boolean"
},
"segmentEndPause": {
"type": "number"
},
"sentenceEndPause": {
"type": "number"
},
"customLexiconPaths": {
"type": "array",
"items": {
"type": "string"
}
},
"plainText": {
"$ref": "#/definitions/PlainTextOptions"
},
"alignment": {
"$ref": "#/definitions/AlignmentOptions"
},
"postProcessing": {
"type": "object",
"properties": {
"normalizeAudio": {
"type": "boolean"
},
"targetPeak": {
"type": "number"
},
"maxGainIncrease": {
"type": "number"
},
"speed": {
"type": "number"
},
"pitch": {
"type": "number"
},
"timePitchShiftingMethod": {
"$ref": "#/definitions/TimePitchShiftingMethod"
},
"rubberband": {
"$ref": "#/definitions/RubberbandOptions"
}
},
"additionalProperties": false
},
"outputAudioFormat": {
"type": "object",
"properties": {
"codec": {
"type": "string",
"enum": [
"wav",
"mp3",
"opus",
"m4a",
"ogg",
"flac"
]
},
"bitrate": {
"type": "number"
}
},
"additionalProperties": false
},
"languageDetection": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"vits": {
"type": "object",
"properties": {
"speakerId": {
"type": "number"
},
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"kokoro": {
"type": "object",
"properties": {
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"model": {
"type": "string",
"enum": [
"82m-v1.0-fp32",
"82m-v1.0-quantized"
]
}
},
"additionalProperties": false
},
"pico": {
"type": "object",
"additionalProperties": false
},
"flite": {
"type": "object",
"additionalProperties": false
},
"gnuspeech": {
"type": "object",
"properties": {
"tempo": {
"type": "number"
},
"controlRate": {
"type": "number"
},
"debug": {
"type": "boolean"
}
},
"additionalProperties": false
},
"espeak": {
"type": "object",
"properties": {
"rate": {
"type": "number"
},
"pitch": {
"type": "number"
},
"pitchRange": {
"type": "number"
},
"useKlatt": {
"type": "boolean"
},
"insertSeparators": {
"type": "boolean"
}
},
"additionalProperties": false
},
"sam": {
"type": "object",
"properties": {
"pitch": {
"type": "number"
},
"speed": {
"type": "number"
},
"mouth": {
"type": "number"
},
"throat": {
"type": "number"
}
},
"additionalProperties": false
},
"sapi": {
"type": "object",
"properties": {
"rate": {
"type": "number"
}
},
"additionalProperties": false
},
"msspeech": {
"type": "object",
"properties": {
"rate": {
"type": "number"
}
},
"additionalProperties": false
},
"coquiServer": {
"type": "object",
"properties": {
"serverUrl": {
"type": "string"
},
"speakerId": {
"type": [
"string",
"null"
]
}
},
"additionalProperties": false
},
"googleCloud": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
},
"pitchDeltaSemitones": {
"type": "number"
},
"customVoice": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"reportedUsage": {
"type": "string"
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"microsoftAzure": {
"type": "object",
"properties": {
"subscriptionKey": {
"type": "string"
},
"serviceRegion": {
"type": "string"
},
"pitchDeltaHz": {
"type": "number"
}
},
"additionalProperties": false
},
"amazonPolly": {
"type": "object",
"properties": {
"region": {
"type": "string"
},
"accessKeyId": {
"type": "string"
},
"secretAccessKey": {
"type": "string"
},
"pollyEngine": {
"type": "string",
"enum": [
"standard",
"neural"
]
},
"lexiconNames": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false
},
"openAICloud": {
"$ref": "#/definitions/OpenAICloudTTSOptions"
},
"elevenLabs": {
"$ref": "#/definitions/ElevenLabsTTSOptions"
},
"deepgram": {
"$ref": "#/definitions/DeepgramTTSOptions"
},
"googleTranslate": {
"type": "object",
"properties": {
"tld": {
"type": "string"
}
},
"additionalProperties": false
},
"microsoftEdge": {
"type": "object",
"properties": {
"trustedClientToken": {
"type": "string"
},
"pitchDeltaHz": {
"type": "number"
}
},
"additionalProperties": false
},
"streamlabsPolly": {
"type": "object",
"additionalProperties": false
}
},
"additionalProperties": false
},
"SynthesisEngine": {
"type": "string",
"enum": [
"vits",
"kokoro",
"pico",
"flite",
"gnuspeech",
"espeak",
"sam",
"sapi",
"msspeech",
"coqui-server",
"google-cloud",
"microsoft-azure",
"amazon-polly",
"openai-cloud",
"elevenlabs",
"deepgram",
"google-translate",
"microsoft-edge",
"streamlabs-polly"
]
},
"VoiceGender": {
"type": "string",
"enum": [
"male",
"female",
"unknown"
]
},
"PlainTextOptions": {
"type": "object",
"properties": {
"paragraphBreaks": {
"$ref": "#/definitions/ParagraphBreakType"
},
"whitespace": {
"$ref": "#/definitions/WhitespaceProcessing"
}
},
"additionalProperties": false
},
"ParagraphBreakType": {
"type": "string",
"enum": [
"single",
"double"
]
},
"WhitespaceProcessing": {
"type": "string",
"enum": [
"preserve",
"removeLineBreaks",
"collapse"
]
},
"AlignmentOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/AlignmentEngine"
},
"language": {
"type": "string"
},
"isolate": {
"type": "boolean"
},
"crop": {
"type": "boolean"
},
"customLexiconPaths": {
"type": "array",
"items": {
"type": "string"
}
},
"languageDetection": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"vad": {
"$ref": "#/definitions/VADOptions"
},
"plainText": {
"$ref": "#/definitions/PlainTextOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"dtw": {
"type": "object",
"properties": {
"granularity": {
"anyOf": [
{
"$ref": "#/definitions/DtwGranularity"
},
{
"type": "array",
"items": {
"$ref": "#/definitions/DtwGranularity"
}
}
]
},
"windowDuration": {
"anyOf": [
{
"type": "number"
},
{
"type": "string"
},
{
"type": "array",
"items": {
"type": [
"string",
"number"
]
}
}
]
},
"phoneAlignmentMethod": {
"$ref": "#/definitions/PhoneAlignmentMethod"
}
},
"additionalProperties": false
},
"recognition": {
"$ref": "#/definitions/RecognitionOptions"
},
"sourceSeparation": {
"$ref": "#/definitions/SourceSeparationOptions"
},
"whisper": {
"$ref": "#/definitions/WhisperAlignmentOptions"
}
},
"additionalProperties": false
},
"AlignmentEngine": {
"type": "string",
"enum": [
"dtw",
"dtw-ra",
"dtw-ea",
"whisper"
]
},
"TextLanguageDetectionOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/TextLanguageDetectionEngine"
},
"defaultLanguage": {
"type": "string"
},
"fallbackThresholdProbability": {
"type": "number"
}
},
"additionalProperties": false
},
"TextLanguageDetectionEngine": {
"type": "string",
"enum": [
"tinyld",
"fasttext"
]
},
"VADOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/VADEngine"
},
"activityThreshold": {
"type": "number"
},
"webrtc": {
"type": "object",
"properties": {
"frameDuration": {
"type": "number",
"enum": [
10,
20,
30
]
},
"mode": {
"type": "number",
"enum": [
0,
1,
2,
3
]
}
},
"additionalProperties": false
},
"silero": {
"type": "object",
"properties": {
"frameDuration": {
"type": "number",
"enum": [
30,
60,
90
]
},
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"rnnoise": {
"type": "object",
"additionalProperties": false
},
"whisper": {
"$ref": "#/definitions/WhisperVADOptions"
},
"adaptiveGate": {
"$ref": "#/definitions/AdaptiveGateVADOptions"
}
},
"additionalProperties": false
},
"VADEngine": {
"type": "string",
"enum": [
"webrtc",
"silero",
"rnnoise",
"whisper",
"adaptive-gate"
]
},
"OnnxExecutionProvider": {
"type": "string",
"enum": [
"cpu",
"dml",
"cuda",
"coreml",
"webgpu"
]
},
"WhisperVADOptions": {
"type": "object",
"properties": {
"model": {
"$ref": "#/definitions/WhisperModelName"
},
"temperature": {
"type": "number"
},
"encoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"decoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"WhisperModelName": {
"type": "string",
"enum": [
"tiny",
"tiny.en",
"base",
"base.en",
"small",
"small.en",
"medium",
"medium.en",
"large-v1",
"large-v2",
"large-v3",
"large-v3-turbo"
]
},
"AdaptiveGateVADOptions": {
"type": "object",
"properties": {
"lowCutoff": {
"type": "number"
},
"highCutoff": {
"type": "number"
},
"positiveAdaptationRate": {
"type": "number"
},
"negativeAdaptationRate": {
"type": "number"
},
"peakLoudnessDecay": {
"type": "number"
},
"backwardExtensionDuration": {
"type": "number"
},
"relativeThreshold": {
"type": "number"
}
},
"additionalProperties": false
},
"SubtitlesConfig": {
"type": "object",
"properties": {
"format": {
"type": "string",
"enum": [
"srt",
"webvtt"
]
},
"language": {
"type": "string"
},
"mode": {
"$ref": "#/definitions/SubtitlesMode"
},
"maxLineCount": {
"type": "number"
},
"maxLineWidth": {
"type": "number"
},
"minWordsInLine": {
"type": "number"
},
"separatePhrases": {
"type": "boolean"
},
"maxAddedDuration": {
"type": "number"
},
"decimalSeparator": {
"type": "string",
"enum": [
",",
"."
]
},
"includeCueIndexes": {
"type": "boolean"
},
"includeHours": {
"type": "boolean"
},
"lineBreakString": {
"type": "string",
"enum": [
"\n",
"\r\n"
]
},
"originalText": {
"type": "string"
},
"totalDuration": {
"type": "number"
}
},
"additionalProperties": false
},
"SubtitlesMode": {
"type": "string",
"enum": [
"line",
"segment",
"sentence",
"word",
"phone",
"word+phone"
]
},
"DtwGranularity": {
"type": "string",
"enum": [
"xx-low",
"x-low",
"low",
"medium",
"high",
"x-high"
]
},
"PhoneAlignmentMethod": {
"type": "string",
"enum": [
"interpolation",
"dtw"
]
},
"RecognitionOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/RecognitionEngine"
},
"language": {
"type": "string"
},
"maxAlternatives": {
"type": "number"
},
"isolate": {
"type": "boolean"
},
"crop": {
"type": "boolean"
},
"alignment": {
"$ref": "#/definitions/AlignmentOptions"
},
"languageDetection": {
"$ref": "#/definitions/SpeechLanguageDetectionOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"vad": {
"$ref": "#/definitions/VADOptions"
},
"sourceSeparation": {
"$ref": "#/definitions/SourceSeparationOptions"
},
"whisper": {
"$ref": "#/definitions/WhisperOptions"
},
"whisperCpp": {
"$ref": "#/definitions/WhisperCppOptions"
},
"vosk": {
"type": "object",
"properties": {
"modelPath": {
"type": "string"
}
},
"additionalProperties": false
},
"silero": {
"$ref": "#/definitions/SileroRecognitionOptions"
},
"googleCloud": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
},
"alternativeLanguageCodes": {
"type": "array",
"items": {
"type": "string"
}
},
"profanityFilter": {
"type": "boolean"
},
"autoPunctuation": {
"type": "boolean"
},
"useEnhancedModel": {
"type": "boolean"
}
},
"additionalProperties": false
},
"microsoftAzure": {
"type": "object",
"properties": {
"subscriptionKey": {
"type": "string"
},
"serviceRegion": {
"type": "string"
}
},
"additionalProperties": false
},
"amazonTranscribe": {
"type": "object",
"properties": {
"region": {
"type": "string"
},
"accessKeyId": {
"type": "string"
},
"secretAccessKey": {
"type": "string"
}
},
"additionalProperties": false
},
"openAICloud": {
"$ref": "#/definitions/OpenAICloudSTTOptions"
},
"deepgram": {
"$ref": "#/definitions/DeepgramSTTOptions"
}
},
"additionalProperties": false
},
"RecognitionEngine": {
"type": "string",
"enum": [
"whisper",
"whisper.cpp",
"vosk",
"silero",
"google-cloud",
"microsoft-azure",
"amazon-transcribe",
"openai-cloud",
"deepgram"
]
},
"SpeechLanguageDetectionOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/SpeechLanguageDetectionEngine"
},
"defaultLanguage": {
"type": "string"
},
"fallbackThresholdProbability": {
"type": "number"
},
"crop": {
"type": "boolean"
},
"silero": {
"$ref": "#/definitions/SileroLanguageDetectionOptions"
},
"whisper": {
"$ref": "#/definitions/WhisperLanguageDetectionOptions"
},
"whisperCpp": {
"$ref": "#/definitions/WhisperCppOptions"
},
"vad": {
"$ref": "#/definitions/VADOptions"
}
},
"additionalProperties": false
},
"SpeechLanguageDetectionEngine": {
"type": "string",
"enum": [
"silero",
"whisper",
"whisper.cpp"
]
},
"SileroLanguageDetectionOptions": {
"type": "object",
"properties": {
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"WhisperLanguageDetectionOptions": {
"type": "object",
"properties": {
"model": {
"$ref": "#/definitions/WhisperModelName"
},
"temperature": {
"type": "number"
},
"encoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"decoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"WhisperCppOptions": {
"type": "object",
"properties": {
"build": {
"$ref": "#/definitions/WhisperCppBuild"
},
"executablePath": {
"type": "string"
},
"enableGPU": {
"type": "boolean"
},
"model": {
"$ref": "#/definitions/WhisperCppModelId"
},
"threadCount": {
"type": "number"
},
"splitCount": {
"type": "number"
},
"topCandidateCount": {
"type": "number"
},
"beamCount": {
"type": "number"
},
"repetitionThreshold": {
"type": "number"
},
"temperature": {
"type": "number"
},
"temperatureIncrement": {
"type": "number"
},
"prompt": {
"type": "string"
},
"enableDTW": {
"type": "boolean"
},
"enableFlashAttention": {
"type": "boolean"
},
"verbose": {
"type": "boolean"
}
},
"additionalProperties": false
},
"WhisperCppBuild": {
"type": "string",
"enum": [
"cpu",
"cublas-12.4.0",
"custom"
]
},
"WhisperCppModelId": {
"type": "string",
"enum": [
"tiny",
"tiny-q5_1",
"tiny.en",
"tiny.en-q5_1",
"tiny.en-q8_0",
"base",
"base-q5_1",
"base.en",
"base.en-q5_1",
"small",
"small-q5_1",
"small.en",
"small.en-q5_1",
"medium",
"medium-q5_0",
"medium.en",
"medium.en-q5_0",
"large",
"large-v1",
"large-v2",
"large-v2-q5_0",
"large-v3",
"large-v3-q5_0",
"large-v3-turbo",
"large-v3-turbo-q5_0"
]
},
"SourceSeparationOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/SourceSeparationEngine"
},
"mdxNet": {
"$ref": "#/definitions/MDXNetOptions"
}
},
"additionalProperties": false
},
"SourceSeparationEngine": {
"type": "string",
"const": "mdx-net"
},
"MDXNetOptions": {
"type": "object",
"properties": {
"model": {
"$ref": "#/definitions/MDXNetModelName"
},
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"MDXNetModelName": {
"type": "string",
"enum": [
"UVR_MDXNET_1_9703",
"UVR_MDXNET_2_9682",
"UVR_MDXNET_3_9662",
"UVR_MDXNET_KARA",
"UVR_MDXNET_Main",
"Kim_Vocal_1",
"Kim_Vocal_2"
]
},
"WhisperOptions": {
"type": "object",
"properties": {
"model": {
"$ref": "#/definitions/WhisperModelName"
},
"temperature": {
"type": "number"
},
"prompt": {
"type": "string"
},
"topCandidateCount": {
"type": "number"
},
"punctuationThreshold": {
"type": "number"
},
"autoPromptParts": {
"type": "boolean"
},
"maxTokensPerPart": {
"type": "number"
},
"suppressRepetition": {
"type": "boolean"
},
"repetitionThreshold": {
"type": "number"
},
"decodeTimestampTokens": {
"type": "boolean"
},
"endTokenThreshold": {
"type": "number"
},
"includeEndTokenInCandidates": {
"type": "boolean"
},
"timestampAccuracy": {
"type": "string",
"enum": [
"medium",
"high"
]
},
"encoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"decoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"seed": {
"type": "number"
}
},
"additionalProperties": false
},
"SileroRecognitionOptions": {
"type": "object",
"properties": {
"modelPath": {
"type": "string"
},
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"OpenAICloudSTTOptions": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"apiKey": {
"type": "string"
},
"organization": {
"type": "string"
},
"baseURL": {
"type": "string"
},
"temperature": {
"type": "number"
},
"prompt": {
"type": "string"
},
"timeout": {
"type": "number"
},
"maxRetries": {
"type": "number"
},
"requestWordTimestamps": {
"type": "boolean"
}
},
"additionalProperties": false
},
"DeepgramSTTOptions": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
},
"model": {
"type": "string"
},
"punctuate": {
"type": "boolean"
}
},
"additionalProperties": false
},
"WhisperAlignmentOptions": {
"type": "object",
"properties": {
"model": {
"$ref": "#/definitions/WhisperModelName"
},
"endTokenThreshold": {
"type": "number"
},
"maxTokensPerPart": {
"type": "number"
},
"timestampAccuracy": {
"type": "string",
"enum": [
"medium",
"high"
]
},
"encoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"decoderProvider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"TimePitchShiftingMethod": {
"type": "string",
"enum": [
"sonic",
"rubberband"
]
},
"RubberbandOptions": {
"type": "object",
"properties": {
"stretch": {
"type": "string",
"enum": [
"elastic",
"precise"
]
},
"transients": {
"type": "string",
"enum": [
"crisp",
"mixed",
"smooth"
]
},
"detector": {
"type": "string",
"enum": [
"compound",
"percussive",
"soft"
]
},
"phase": {
"type": "string",
"enum": [
"laminar",
"independent"
]
},
"window": {
"type": "string",
"enum": [
"standard",
"long",
"short"
]
},
"smoothing": {
"type": "string",
"enum": [
"off",
"on"
]
},
"formant": {
"type": "string",
"enum": [
"shited",
"preserved"
]
},
"pitch": {
"type": "string",
"enum": [
"high-speed",
"high-quality",
"high-consistency"
]
},
"channels": {
"type": "string",
"enum": [
"apart",
"together"
]
},
"engine": {
"type": "string",
"enum": [
"faster",
"finer"
]
}
},
"additionalProperties": false
},
"OpenAICloudTTSOptions": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
},
"organization": {
"type": "string"
},
"baseURL": {
"type": "string"
},
"model": {
"type": "string",
"enum": [
"tts-1",
"tts-1-hd",
"gpt-4o-mini-tts"
]
},
"instructions": {
"type": "string"
},
"timeout": {
"type": "number"
},
"maxRetries": {
"type": "number"
}
},
"additionalProperties": false
},
"ElevenLabsTTSOptions": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
},
"modelId": {
"type": "string"
},
"stability": {
"type": "number"
},
"similarityBoost": {
"type": "number"
},
"style": {
"type": "number"
},
"useSpeakerBoost": {
"type": "boolean"
},
"seed": {
"type": "number"
}
},
"additionalProperties": false
},
"DeepgramTTSOptions": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
}
},
"additionalProperties": false
},
"VoiceListRequestOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/SynthesisEngine"
},
"language": {
"type": "string"
},
"voice": {
"type": "string"
},
"voiceGender": {
"$ref": "#/definitions/VoiceGender"
},
"speed": {
"type": "number"
},
"pitch": {
"type": "number"
},
"pitchVariation": {
"type": "number"
},
"splitToSentences": {
"type": "boolean"
},
"ssml": {
"type": "boolean"
},
"segmentEndPause": {
"type": "number"
},
"sentenceEndPause": {
"type": "number"
},
"customLexiconPaths": {
"type": "array",
"items": {
"type": "string"
}
},
"plainText": {
"$ref": "#/definitions/PlainTextOptions"
},
"alignment": {
"$ref": "#/definitions/AlignmentOptions"
},
"postProcessing": {
"type": "object",
"properties": {
"normalizeAudio": {
"type": "boolean"
},
"targetPeak": {
"type": "number"
},
"maxGainIncrease": {
"type": "number"
},
"speed": {
"type": "number"
},
"pitch": {
"type": "number"
},
"timePitchShiftingMethod": {
"$ref": "#/definitions/TimePitchShiftingMethod"
},
"rubberband": {
"$ref": "#/definitions/RubberbandOptions"
}
},
"additionalProperties": false
},
"outputAudioFormat": {
"type": "object",
"properties": {
"codec": {
"type": "string",
"enum": [
"wav",
"mp3",
"opus",
"m4a",
"ogg",
"flac"
]
},
"bitrate": {
"type": "number"
}
},
"additionalProperties": false
},
"languageDetection": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"vits": {
"type": "object",
"properties": {
"speakerId": {
"type": "number"
},
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
}
},
"additionalProperties": false
},
"kokoro": {
"type": "object",
"properties": {
"provider": {
"$ref": "#/definitions/OnnxExecutionProvider"
},
"model": {
"type": "string",
"enum": [
"82m-v1.0-fp32",
"82m-v1.0-quantized"
]
}
},
"additionalProperties": false
},
"pico": {
"type": "object",
"additionalProperties": false
},
"flite": {
"type": "object",
"additionalProperties": false
},
"gnuspeech": {
"type": "object",
"properties": {
"tempo": {
"type": "number"
},
"controlRate": {
"type": "number"
},
"debug": {
"type": "boolean"
}
},
"additionalProperties": false
},
"espeak": {
"type": "object",
"properties": {
"rate": {
"type": "number"
},
"pitch": {
"type": "number"
},
"pitchRange": {
"type": "number"
},
"useKlatt": {
"type": "boolean"
},
"insertSeparators": {
"type": "boolean"
}
},
"additionalProperties": false
},
"sam": {
"type": "object",
"properties": {
"pitch": {
"type": "number"
},
"speed": {
"type": "number"
},
"mouth": {
"type": "number"
},
"throat": {
"type": "number"
}
},
"additionalProperties": false
},
"sapi": {
"type": "object",
"properties": {
"rate": {
"type": "number"
}
},
"additionalProperties": false
},
"msspeech": {
"type": "object",
"properties": {
"rate": {
"type": "number"
}
},
"additionalProperties": false
},
"coquiServer": {
"type": "object",
"properties": {
"serverUrl": {
"type": "string"
},
"speakerId": {
"type": [
"string",
"null"
]
}
},
"additionalProperties": false
},
"googleCloud": {
"type": "object",
"properties": {
"apiKey": {
"type": "string"
},
"pitchDeltaSemitones": {
"type": "number"
},
"customVoice": {
"type": "object",
"properties": {
"model": {
"type": "string"
},
"reportedUsage": {
"type": "string"
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"microsoftAzure": {
"type": "object",
"properties": {
"subscriptionKey": {
"type": "string"
},
"serviceRegion": {
"type": "string"
},
"pitchDeltaHz": {
"type": "number"
}
},
"additionalProperties": false
},
"amazonPolly": {
"type": "object",
"properties": {
"region": {
"type": "string"
},
"accessKeyId": {
"type": "string"
},
"secretAccessKey": {
"type": "string"
},
"pollyEngine": {
"type": "string",
"enum": [
"standard",
"neural"
]
},
"lexiconNames": {
"type": "array",
"items": {
"type": "string"
}
}
},
"additionalProperties": false
},
"openAICloud": {
"$ref": "#/definitions/OpenAICloudTTSOptions"
},
"elevenLabs": {
"$ref": "#/definitions/ElevenLabsTTSOptions"
},
"deepgram": {
"$ref": "#/definitions/DeepgramTTSOptions"
},
"googleTranslate": {
"type": "object",
"properties": {
"tld": {
"type": "string"
}
},
"additionalProperties": false
},
"microsoftEdge": {
"type": "object",
"properties": {
"trustedClientToken": {
"type": "string"
},
"pitchDeltaHz": {
"type": "number"
}
},
"additionalProperties": false
},
"streamlabsPolly": {
"type": "object",
"additionalProperties": false
},
"cache": {
"type": "object",
"properties": {
"path": {
"type": "string"
},
"duration": {
"type": "number"
}
},
"additionalProperties": false
}
},
"additionalProperties": false
},
"TranslationAlignmentOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/TranslationAlignmentEngine"
},
"sourceLanguage": {
"type": "string"
},
"targetLanguage": {
"type": "string"
},
"isolate": {
"type": "boolean"
},
"crop": {
"type": "boolean"
},
"languageDetection": {
"$ref": "#/definitions/SpeechLanguageDetectionOptions"
},
"vad": {
"$ref": "#/definitions/VADOptions"
},
"plainText": {
"$ref": "#/definitions/PlainTextOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"sourceSeparation": {
"$ref": "#/definitions/SourceSeparationOptions"
},
"whisper": {
"$ref": "#/definitions/WhisperAlignmentOptions"
}
},
"additionalProperties": false
},
"TranslationAlignmentEngine": {
"type": "string",
"const": "whisper"
},
"TranscriptAndTranslationAlignmentOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/TranscriptAndTranslationAlignmentEngine"
},
"sourceLanguage": {
"type": "string"
},
"targetLanguage": {
"type": "string"
},
"isolate": {
"type": "boolean"
},
"crop": {
"type": "boolean"
},
"alignment": {
"$ref": "#/definitions/AlignmentOptions"
},
"timelineAlignment": {
"$ref": "#/definitions/TimelineTranslationAlignmentOptions"
},
"languageDetection": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"vad": {
"$ref": "#/definitions/VADOptions"
},
"plainText": {
"$ref": "#/definitions/PlainTextOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"sourceSeparation": {
"$ref": "#/definitions/SourceSeparationOptions"
}
},
"additionalProperties": false
},
"TranscriptAndTranslationAlignmentEngine": {
"type": "string",
"const": "two-stage"
},
"TimelineTranslationAlignmentOptions": {
"type": "object",
"properties": {
"engine": {
"type": "string",
"const": "e5"
},
"sourceLanguage": {
"type": "string"
},
"targetLanguage": {
"type": "string"
},
"audio": {
"$ref": "#/definitions/AudioSourceParam"
},
"languageDetection": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"e5": {
"type": "object",
"properties": {
"model": {
"type": "string",
"const": "small-fp16"
}
},
"required": [
"model"
],
"additionalProperties": false
}
},
"additionalProperties": false
},
"AudioSourceParam": {
"anyOf": [
{
"type": "string"
},
{
"type": "object",
"properties": {
"BYTES_PER_ELEMENT": {
"type": "number"
},
"buffer": {
"type": "object",
"properties": {
"byteLength": {
"type": "number"
}
},
"required": [
"byteLength"
],
"additionalProperties": false
},
"byteLength": {
"type": "number"
},
"byteOffset": {
"type": "number"
},
"length": {
"type": "number"
}
},
"required": [
"BYTES_PER_ELEMENT",
"buffer",
"byteLength",
"byteOffset",
"length"
],
"additionalProperties": {
"type": "number"
}
},
{
"$ref": "#/definitions/RawAudio"
}
]
},
"RawAudio": {
"type": "object",
"properties": {
"audioChannels": {
"type": "array",
"items": {
"type": "object",
"properties": {
"BYTES_PER_ELEMENT": {
"type": "number"
},
"buffer": {
"type": "object",
"properties": {
"byteLength": {
"type": "number"
}
},
"required": [
"byteLength"
],
"additionalProperties": false
},
"byteLength": {
"type": "number"
},
"byteOffset": {
"type": "number"
},
"length": {
"type": "number"
}
},
"required": [
"BYTES_PER_ELEMENT",
"buffer",
"byteLength",
"byteOffset",
"length"
],
"additionalProperties": {
"type": "number"
}
}
},
"sampleRate": {
"type": "number"
}
},
"required": [
"audioChannels",
"sampleRate"
],
"additionalProperties": false
},
"SpeechTranslationOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/SpeechTranslationEngine"
},
"sourceLanguage": {
"type": "string"
},
"targetLanguage": {
"type": "string"
},
"crop": {
"type": "boolean"
},
"isolate": {
"type": "boolean"
},
"languageDetection": {
"$ref": "#/definitions/SpeechLanguageDetectionOptions"
},
"subtitles": {
"$ref": "#/definitions/SubtitlesConfig"
},
"vad": {
"$ref": "#/definitions/VADOptions"
},
"sourceSeparation": {
"$ref": "#/definitions/SourceSeparationOptions"
},
"whisper": {
"$ref": "#/definitions/WhisperOptions"
},
"whisperCpp": {
"$ref": "#/definitions/WhisperCppOptions"
},
"openAICloud": {
"$ref": "#/definitions/OpenAICloudSTTOptions"
}
},
"additionalProperties": false
},
"SpeechTranslationEngine": {
"type": "string",
"enum": [
"whisper",
"whisper.cpp",
"openai-cloud"
]
},
"TextTranslationOptions": {
"type": "object",
"properties": {
"engine": {
"$ref": "#/definitions/TextTranslationEngine"
},
"sourceLanguage": {
"type": "string"
},
"targetLanguage": {
"type": "string"
},
"languageDetection": {
"$ref": "#/definitions/TextLanguageDetectionOptions"
},
"plainText": {
"$ref": "#/definitions/PlainTextOptions"
},
"nllb": {
"type": "object",
"additionalProperties": false
},
"googleTranslate": {
"$ref": "#/definitions/GoogleTranslateTextTranslationOptions"
},
"deepl": {
"type": "object",
"additionalProperties": false
}
},
"additionalProperties": false
},
"TextTranslationEngine": {
"type": "string",
"enum": [
"nllb",
"google-translate",
"deepl"
]
},
"GoogleTranslateTextTranslationOptions": {
"type":