UNPKG

echogarden

Version:

An easy-to-use speech toolset. Includes tools for synthesis, recognition, alignment, speech translation, language detection, source separation and more.

2,100 lines 53.4 kB
{ "$schema": "http://json-schema.org/draft-07/schema#", "$ref": "#/definitions/APIOptions", "definitions": { "APIOptions": { "type": "object", "properties": { "SynthesisOptions": { "$ref": "#/definitions/SynthesisOptions" }, "VoiceListRequestOptions": { "$ref": "#/definitions/VoiceListRequestOptions" }, "RecognitionOptions": { "$ref": "#/definitions/RecognitionOptions" }, "AlignmentOptions": { "$ref": "#/definitions/AlignmentOptions" }, "TranslationAlignmentOptions": { "$ref": "#/definitions/TranslationAlignmentOptions" }, "TranscriptAndTranslationAlignmentOptions": { "$ref": "#/definitions/TranscriptAndTranslationAlignmentOptions" }, "TimelineTranslationAlignmentOptions": { "$ref": "#/definitions/TimelineTranslationAlignmentOptions" }, "SpeechTranslationOptions": { "$ref": "#/definitions/SpeechTranslationOptions" }, "TextTranslationOptions": { "$ref": "#/definitions/TextTranslationOptions" }, "SpeechLanguageDetectionOptions": { "$ref": "#/definitions/SpeechLanguageDetectionOptions" }, "TextLanguageDetectionOptions": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "VADOptions": { "$ref": "#/definitions/VADOptions" }, "DenoisingOptions": { "$ref": "#/definitions/DenoisingOptions" }, "SourceSeparationOptions": { "$ref": "#/definitions/SourceSeparationOptions" }, "ServerOptions": { "$ref": "#/definitions/ServerOptions" }, "GlobalOptions": { "$ref": "#/definitions/GlobalOptions" }, "CLIOptions": { "$ref": "#/definitions/CLIOptions" } }, "required": [ "SynthesisOptions", "VoiceListRequestOptions", "RecognitionOptions", "AlignmentOptions", "TranslationAlignmentOptions", "TranscriptAndTranslationAlignmentOptions", "TimelineTranslationAlignmentOptions", "SpeechTranslationOptions", "TextTranslationOptions", "SpeechLanguageDetectionOptions", "TextLanguageDetectionOptions", "VADOptions", "DenoisingOptions", "SourceSeparationOptions", "ServerOptions", "GlobalOptions", "CLIOptions" ], "additionalProperties": false }, "SynthesisOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/SynthesisEngine" }, "language": { "type": "string" }, "voice": { "type": "string" }, "voiceGender": { "$ref": "#/definitions/VoiceGender" }, "speed": { "type": "number" }, "pitch": { "type": "number" }, "pitchVariation": { "type": "number" }, "splitToSentences": { "type": "boolean" }, "ssml": { "type": "boolean" }, "segmentEndPause": { "type": "number" }, "sentenceEndPause": { "type": "number" }, "customLexiconPaths": { "type": "array", "items": { "type": "string" } }, "plainText": { "$ref": "#/definitions/PlainTextOptions" }, "alignment": { "$ref": "#/definitions/AlignmentOptions" }, "postProcessing": { "type": "object", "properties": { "normalizeAudio": { "type": "boolean" }, "targetPeak": { "type": "number" }, "maxGainIncrease": { "type": "number" }, "speed": { "type": "number" }, "pitch": { "type": "number" }, "timePitchShiftingMethod": { "$ref": "#/definitions/TimePitchShiftingMethod" }, "rubberband": { "$ref": "#/definitions/RubberbandOptions" } }, "additionalProperties": false }, "outputAudioFormat": { "type": "object", "properties": { "codec": { "type": "string", "enum": [ "wav", "mp3", "opus", "m4a", "ogg", "flac" ] }, "bitrate": { "type": "number" } }, "additionalProperties": false }, "languageDetection": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "vits": { "type": "object", "properties": { "speakerId": { "type": "number" }, "provider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "kokoro": { "type": "object", "properties": { "provider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "model": { "type": "string", "enum": [ "82m-v1.0-fp32", "82m-v1.0-quantized" ] } }, "additionalProperties": false }, "pico": { "type": "object", "additionalProperties": false }, "flite": { "type": "object", "additionalProperties": false }, "gnuspeech": { "type": "object", "properties": { "tempo": { "type": "number" }, "controlRate": { "type": "number" }, "debug": { "type": "boolean" } }, "additionalProperties": false }, "espeak": { "type": "object", "properties": { "rate": { "type": "number" }, "pitch": { "type": "number" }, "pitchRange": { "type": "number" }, "useKlatt": { "type": "boolean" }, "insertSeparators": { "type": "boolean" } }, "additionalProperties": false }, "sam": { "type": "object", "properties": { "pitch": { "type": "number" }, "speed": { "type": "number" }, "mouth": { "type": "number" }, "throat": { "type": "number" } }, "additionalProperties": false }, "sapi": { "type": "object", "properties": { "rate": { "type": "number" } }, "additionalProperties": false }, "msspeech": { "type": "object", "properties": { "rate": { "type": "number" } }, "additionalProperties": false }, "coquiServer": { "type": "object", "properties": { "serverUrl": { "type": "string" }, "speakerId": { "type": [ "string", "null" ] } }, "additionalProperties": false }, "googleCloud": { "type": "object", "properties": { "apiKey": { "type": "string" }, "pitchDeltaSemitones": { "type": "number" }, "customVoice": { "type": "object", "properties": { "model": { "type": "string" }, "reportedUsage": { "type": "string" } }, "additionalProperties": false } }, "additionalProperties": false }, "microsoftAzure": { "type": "object", "properties": { "subscriptionKey": { "type": "string" }, "serviceRegion": { "type": "string" }, "pitchDeltaHz": { "type": "number" } }, "additionalProperties": false }, "amazonPolly": { "type": "object", "properties": { "region": { "type": "string" }, "accessKeyId": { "type": "string" }, "secretAccessKey": { "type": "string" }, "pollyEngine": { "type": "string", "enum": [ "standard", "neural" ] }, "lexiconNames": { "type": "array", "items": { "type": "string" } } }, "additionalProperties": false }, "openAICloud": { "$ref": "#/definitions/OpenAICloudTTSOptions" }, "elevenLabs": { "$ref": "#/definitions/ElevenLabsTTSOptions" }, "deepgram": { "$ref": "#/definitions/DeepgramTTSOptions" }, "googleTranslate": { "type": "object", "properties": { "tld": { "type": "string" } }, "additionalProperties": false }, "microsoftEdge": { "type": "object", "properties": { "trustedClientToken": { "type": "string" }, "pitchDeltaHz": { "type": "number" } }, "additionalProperties": false }, "streamlabsPolly": { "type": "object", "additionalProperties": false } }, "additionalProperties": false }, "SynthesisEngine": { "type": "string", "enum": [ "vits", "kokoro", "pico", "flite", "gnuspeech", "espeak", "sam", "sapi", "msspeech", "coqui-server", "google-cloud", "microsoft-azure", "amazon-polly", "openai-cloud", "elevenlabs", "deepgram", "google-translate", "microsoft-edge", "streamlabs-polly" ] }, "VoiceGender": { "type": "string", "enum": [ "male", "female", "unknown" ] }, "PlainTextOptions": { "type": "object", "properties": { "paragraphBreaks": { "$ref": "#/definitions/ParagraphBreakType" }, "whitespace": { "$ref": "#/definitions/WhitespaceProcessing" } }, "additionalProperties": false }, "ParagraphBreakType": { "type": "string", "enum": [ "single", "double" ] }, "WhitespaceProcessing": { "type": "string", "enum": [ "preserve", "removeLineBreaks", "collapse" ] }, "AlignmentOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/AlignmentEngine" }, "language": { "type": "string" }, "isolate": { "type": "boolean" }, "crop": { "type": "boolean" }, "customLexiconPaths": { "type": "array", "items": { "type": "string" } }, "languageDetection": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "vad": { "$ref": "#/definitions/VADOptions" }, "plainText": { "$ref": "#/definitions/PlainTextOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "dtw": { "type": "object", "properties": { "granularity": { "anyOf": [ { "$ref": "#/definitions/DtwGranularity" }, { "type": "array", "items": { "$ref": "#/definitions/DtwGranularity" } } ] }, "windowDuration": { "anyOf": [ { "type": "number" }, { "type": "string" }, { "type": "array", "items": { "type": [ "string", "number" ] } } ] }, "phoneAlignmentMethod": { "$ref": "#/definitions/PhoneAlignmentMethod" } }, "additionalProperties": false }, "recognition": { "$ref": "#/definitions/RecognitionOptions" }, "sourceSeparation": { "$ref": "#/definitions/SourceSeparationOptions" }, "whisper": { "$ref": "#/definitions/WhisperAlignmentOptions" } }, "additionalProperties": false }, "AlignmentEngine": { "type": "string", "enum": [ "dtw", "dtw-ra", "dtw-ea", "whisper" ] }, "TextLanguageDetectionOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/TextLanguageDetectionEngine" }, "defaultLanguage": { "type": "string" }, "fallbackThresholdProbability": { "type": "number" } }, "additionalProperties": false }, "TextLanguageDetectionEngine": { "type": "string", "enum": [ "tinyld", "fasttext" ] }, "VADOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/VADEngine" }, "activityThreshold": { "type": "number" }, "webrtc": { "type": "object", "properties": { "frameDuration": { "type": "number", "enum": [ 10, 20, 30 ] }, "mode": { "type": "number", "enum": [ 0, 1, 2, 3 ] } }, "additionalProperties": false }, "silero": { "type": "object", "properties": { "frameDuration": { "type": "number", "enum": [ 30, 60, 90 ] }, "provider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "rnnoise": { "type": "object", "additionalProperties": false }, "whisper": { "$ref": "#/definitions/WhisperVADOptions" }, "adaptiveGate": { "$ref": "#/definitions/AdaptiveGateVADOptions" } }, "additionalProperties": false }, "VADEngine": { "type": "string", "enum": [ "webrtc", "silero", "rnnoise", "whisper", "adaptive-gate" ] }, "OnnxExecutionProvider": { "type": "string", "enum": [ "cpu", "dml", "cuda", "coreml", "webgpu" ] }, "WhisperVADOptions": { "type": "object", "properties": { "model": { "$ref": "#/definitions/WhisperModelName" }, "temperature": { "type": "number" }, "encoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "decoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "WhisperModelName": { "type": "string", "enum": [ "tiny", "tiny.en", "base", "base.en", "small", "small.en", "medium", "medium.en", "large-v1", "large-v2", "large-v3", "large-v3-turbo" ] }, "AdaptiveGateVADOptions": { "type": "object", "properties": { "lowCutoff": { "type": "number" }, "highCutoff": { "type": "number" }, "positiveAdaptationRate": { "type": "number" }, "negativeAdaptationRate": { "type": "number" }, "peakLoudnessDecay": { "type": "number" }, "backwardExtensionDuration": { "type": "number" }, "relativeThreshold": { "type": "number" } }, "additionalProperties": false }, "SubtitlesConfig": { "type": "object", "properties": { "format": { "type": "string", "enum": [ "srt", "webvtt" ] }, "language": { "type": "string" }, "mode": { "$ref": "#/definitions/SubtitlesMode" }, "maxLineCount": { "type": "number" }, "maxLineWidth": { "type": "number" }, "minWordsInLine": { "type": "number" }, "separatePhrases": { "type": "boolean" }, "maxAddedDuration": { "type": "number" }, "decimalSeparator": { "type": "string", "enum": [ ",", "." ] }, "includeCueIndexes": { "type": "boolean" }, "includeHours": { "type": "boolean" }, "lineBreakString": { "type": "string", "enum": [ "\n", "\r\n" ] }, "originalText": { "type": "string" }, "totalDuration": { "type": "number" } }, "additionalProperties": false }, "SubtitlesMode": { "type": "string", "enum": [ "line", "segment", "sentence", "word", "phone", "word+phone" ] }, "DtwGranularity": { "type": "string", "enum": [ "xx-low", "x-low", "low", "medium", "high", "x-high" ] }, "PhoneAlignmentMethod": { "type": "string", "enum": [ "interpolation", "dtw" ] }, "RecognitionOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/RecognitionEngine" }, "language": { "type": "string" }, "maxAlternatives": { "type": "number" }, "isolate": { "type": "boolean" }, "crop": { "type": "boolean" }, "alignment": { "$ref": "#/definitions/AlignmentOptions" }, "languageDetection": { "$ref": "#/definitions/SpeechLanguageDetectionOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "vad": { "$ref": "#/definitions/VADOptions" }, "sourceSeparation": { "$ref": "#/definitions/SourceSeparationOptions" }, "whisper": { "$ref": "#/definitions/WhisperOptions" }, "whisperCpp": { "$ref": "#/definitions/WhisperCppOptions" }, "vosk": { "type": "object", "properties": { "modelPath": { "type": "string" } }, "additionalProperties": false }, "silero": { "$ref": "#/definitions/SileroRecognitionOptions" }, "googleCloud": { "type": "object", "properties": { "apiKey": { "type": "string" }, "alternativeLanguageCodes": { "type": "array", "items": { "type": "string" } }, "profanityFilter": { "type": "boolean" }, "autoPunctuation": { "type": "boolean" }, "useEnhancedModel": { "type": "boolean" } }, "additionalProperties": false }, "microsoftAzure": { "type": "object", "properties": { "subscriptionKey": { "type": "string" }, "serviceRegion": { "type": "string" } }, "additionalProperties": false }, "amazonTranscribe": { "type": "object", "properties": { "region": { "type": "string" }, "accessKeyId": { "type": "string" }, "secretAccessKey": { "type": "string" } }, "additionalProperties": false }, "openAICloud": { "$ref": "#/definitions/OpenAICloudSTTOptions" }, "deepgram": { "$ref": "#/definitions/DeepgramSTTOptions" } }, "additionalProperties": false }, "RecognitionEngine": { "type": "string", "enum": [ "whisper", "whisper.cpp", "vosk", "silero", "google-cloud", "microsoft-azure", "amazon-transcribe", "openai-cloud", "deepgram" ] }, "SpeechLanguageDetectionOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/SpeechLanguageDetectionEngine" }, "defaultLanguage": { "type": "string" }, "fallbackThresholdProbability": { "type": "number" }, "crop": { "type": "boolean" }, "silero": { "$ref": "#/definitions/SileroLanguageDetectionOptions" }, "whisper": { "$ref": "#/definitions/WhisperLanguageDetectionOptions" }, "whisperCpp": { "$ref": "#/definitions/WhisperCppOptions" }, "vad": { "$ref": "#/definitions/VADOptions" } }, "additionalProperties": false }, "SpeechLanguageDetectionEngine": { "type": "string", "enum": [ "silero", "whisper", "whisper.cpp" ] }, "SileroLanguageDetectionOptions": { "type": "object", "properties": { "provider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "WhisperLanguageDetectionOptions": { "type": "object", "properties": { "model": { "$ref": "#/definitions/WhisperModelName" }, "temperature": { "type": "number" }, "encoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "decoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "WhisperCppOptions": { "type": "object", "properties": { "build": { "$ref": "#/definitions/WhisperCppBuild" }, "executablePath": { "type": "string" }, "enableGPU": { "type": "boolean" }, "model": { "$ref": "#/definitions/WhisperCppModelId" }, "threadCount": { "type": "number" }, "splitCount": { "type": "number" }, "topCandidateCount": { "type": "number" }, "beamCount": { "type": "number" }, "repetitionThreshold": { "type": "number" }, "temperature": { "type": "number" }, "temperatureIncrement": { "type": "number" }, "prompt": { "type": "string" }, "enableDTW": { "type": "boolean" }, "enableFlashAttention": { "type": "boolean" }, "verbose": { "type": "boolean" } }, "additionalProperties": false }, "WhisperCppBuild": { "type": "string", "enum": [ "cpu", "cublas-12.4.0", "custom" ] }, "WhisperCppModelId": { "type": "string", "enum": [ "tiny", "tiny-q5_1", "tiny.en", "tiny.en-q5_1", "tiny.en-q8_0", "base", "base-q5_1", "base.en", "base.en-q5_1", "small", "small-q5_1", "small.en", "small.en-q5_1", "medium", "medium-q5_0", "medium.en", "medium.en-q5_0", "large", "large-v1", "large-v2", "large-v2-q5_0", "large-v3", "large-v3-q5_0", "large-v3-turbo", "large-v3-turbo-q5_0" ] }, "SourceSeparationOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/SourceSeparationEngine" }, "mdxNet": { "$ref": "#/definitions/MDXNetOptions" } }, "additionalProperties": false }, "SourceSeparationEngine": { "type": "string", "const": "mdx-net" }, "MDXNetOptions": { "type": "object", "properties": { "model": { "$ref": "#/definitions/MDXNetModelName" }, "provider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "MDXNetModelName": { "type": "string", "enum": [ "UVR_MDXNET_1_9703", "UVR_MDXNET_2_9682", "UVR_MDXNET_3_9662", "UVR_MDXNET_KARA", "UVR_MDXNET_Main", "Kim_Vocal_1", "Kim_Vocal_2" ] }, "WhisperOptions": { "type": "object", "properties": { "model": { "$ref": "#/definitions/WhisperModelName" }, "temperature": { "type": "number" }, "prompt": { "type": "string" }, "topCandidateCount": { "type": "number" }, "punctuationThreshold": { "type": "number" }, "autoPromptParts": { "type": "boolean" }, "maxTokensPerPart": { "type": "number" }, "suppressRepetition": { "type": "boolean" }, "repetitionThreshold": { "type": "number" }, "decodeTimestampTokens": { "type": "boolean" }, "endTokenThreshold": { "type": "number" }, "includeEndTokenInCandidates": { "type": "boolean" }, "timestampAccuracy": { "type": "string", "enum": [ "medium", "high" ] }, "encoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "decoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "seed": { "type": "number" } }, "additionalProperties": false }, "SileroRecognitionOptions": { "type": "object", "properties": { "modelPath": { "type": "string" }, "provider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "OpenAICloudSTTOptions": { "type": "object", "properties": { "model": { "type": "string" }, "apiKey": { "type": "string" }, "organization": { "type": "string" }, "baseURL": { "type": "string" }, "temperature": { "type": "number" }, "prompt": { "type": "string" }, "timeout": { "type": "number" }, "maxRetries": { "type": "number" }, "requestWordTimestamps": { "type": "boolean" } }, "additionalProperties": false }, "DeepgramSTTOptions": { "type": "object", "properties": { "apiKey": { "type": "string" }, "model": { "type": "string" }, "punctuate": { "type": "boolean" } }, "additionalProperties": false }, "WhisperAlignmentOptions": { "type": "object", "properties": { "model": { "$ref": "#/definitions/WhisperModelName" }, "endTokenThreshold": { "type": "number" }, "maxTokensPerPart": { "type": "number" }, "timestampAccuracy": { "type": "string", "enum": [ "medium", "high" ] }, "encoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "decoderProvider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "TimePitchShiftingMethod": { "type": "string", "enum": [ "sonic", "rubberband" ] }, "RubberbandOptions": { "type": "object", "properties": { "stretch": { "type": "string", "enum": [ "elastic", "precise" ] }, "transients": { "type": "string", "enum": [ "crisp", "mixed", "smooth" ] }, "detector": { "type": "string", "enum": [ "compound", "percussive", "soft" ] }, "phase": { "type": "string", "enum": [ "laminar", "independent" ] }, "window": { "type": "string", "enum": [ "standard", "long", "short" ] }, "smoothing": { "type": "string", "enum": [ "off", "on" ] }, "formant": { "type": "string", "enum": [ "shited", "preserved" ] }, "pitch": { "type": "string", "enum": [ "high-speed", "high-quality", "high-consistency" ] }, "channels": { "type": "string", "enum": [ "apart", "together" ] }, "engine": { "type": "string", "enum": [ "faster", "finer" ] } }, "additionalProperties": false }, "OpenAICloudTTSOptions": { "type": "object", "properties": { "apiKey": { "type": "string" }, "organization": { "type": "string" }, "baseURL": { "type": "string" }, "model": { "type": "string", "enum": [ "tts-1", "tts-1-hd", "gpt-4o-mini-tts" ] }, "instructions": { "type": "string" }, "timeout": { "type": "number" }, "maxRetries": { "type": "number" } }, "additionalProperties": false }, "ElevenLabsTTSOptions": { "type": "object", "properties": { "apiKey": { "type": "string" }, "modelId": { "type": "string" }, "stability": { "type": "number" }, "similarityBoost": { "type": "number" }, "style": { "type": "number" }, "useSpeakerBoost": { "type": "boolean" }, "seed": { "type": "number" } }, "additionalProperties": false }, "DeepgramTTSOptions": { "type": "object", "properties": { "apiKey": { "type": "string" } }, "additionalProperties": false }, "VoiceListRequestOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/SynthesisEngine" }, "language": { "type": "string" }, "voice": { "type": "string" }, "voiceGender": { "$ref": "#/definitions/VoiceGender" }, "speed": { "type": "number" }, "pitch": { "type": "number" }, "pitchVariation": { "type": "number" }, "splitToSentences": { "type": "boolean" }, "ssml": { "type": "boolean" }, "segmentEndPause": { "type": "number" }, "sentenceEndPause": { "type": "number" }, "customLexiconPaths": { "type": "array", "items": { "type": "string" } }, "plainText": { "$ref": "#/definitions/PlainTextOptions" }, "alignment": { "$ref": "#/definitions/AlignmentOptions" }, "postProcessing": { "type": "object", "properties": { "normalizeAudio": { "type": "boolean" }, "targetPeak": { "type": "number" }, "maxGainIncrease": { "type": "number" }, "speed": { "type": "number" }, "pitch": { "type": "number" }, "timePitchShiftingMethod": { "$ref": "#/definitions/TimePitchShiftingMethod" }, "rubberband": { "$ref": "#/definitions/RubberbandOptions" } }, "additionalProperties": false }, "outputAudioFormat": { "type": "object", "properties": { "codec": { "type": "string", "enum": [ "wav", "mp3", "opus", "m4a", "ogg", "flac" ] }, "bitrate": { "type": "number" } }, "additionalProperties": false }, "languageDetection": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "vits": { "type": "object", "properties": { "speakerId": { "type": "number" }, "provider": { "$ref": "#/definitions/OnnxExecutionProvider" } }, "additionalProperties": false }, "kokoro": { "type": "object", "properties": { "provider": { "$ref": "#/definitions/OnnxExecutionProvider" }, "model": { "type": "string", "enum": [ "82m-v1.0-fp32", "82m-v1.0-quantized" ] } }, "additionalProperties": false }, "pico": { "type": "object", "additionalProperties": false }, "flite": { "type": "object", "additionalProperties": false }, "gnuspeech": { "type": "object", "properties": { "tempo": { "type": "number" }, "controlRate": { "type": "number" }, "debug": { "type": "boolean" } }, "additionalProperties": false }, "espeak": { "type": "object", "properties": { "rate": { "type": "number" }, "pitch": { "type": "number" }, "pitchRange": { "type": "number" }, "useKlatt": { "type": "boolean" }, "insertSeparators": { "type": "boolean" } }, "additionalProperties": false }, "sam": { "type": "object", "properties": { "pitch": { "type": "number" }, "speed": { "type": "number" }, "mouth": { "type": "number" }, "throat": { "type": "number" } }, "additionalProperties": false }, "sapi": { "type": "object", "properties": { "rate": { "type": "number" } }, "additionalProperties": false }, "msspeech": { "type": "object", "properties": { "rate": { "type": "number" } }, "additionalProperties": false }, "coquiServer": { "type": "object", "properties": { "serverUrl": { "type": "string" }, "speakerId": { "type": [ "string", "null" ] } }, "additionalProperties": false }, "googleCloud": { "type": "object", "properties": { "apiKey": { "type": "string" }, "pitchDeltaSemitones": { "type": "number" }, "customVoice": { "type": "object", "properties": { "model": { "type": "string" }, "reportedUsage": { "type": "string" } }, "additionalProperties": false } }, "additionalProperties": false }, "microsoftAzure": { "type": "object", "properties": { "subscriptionKey": { "type": "string" }, "serviceRegion": { "type": "string" }, "pitchDeltaHz": { "type": "number" } }, "additionalProperties": false }, "amazonPolly": { "type": "object", "properties": { "region": { "type": "string" }, "accessKeyId": { "type": "string" }, "secretAccessKey": { "type": "string" }, "pollyEngine": { "type": "string", "enum": [ "standard", "neural" ] }, "lexiconNames": { "type": "array", "items": { "type": "string" } } }, "additionalProperties": false }, "openAICloud": { "$ref": "#/definitions/OpenAICloudTTSOptions" }, "elevenLabs": { "$ref": "#/definitions/ElevenLabsTTSOptions" }, "deepgram": { "$ref": "#/definitions/DeepgramTTSOptions" }, "googleTranslate": { "type": "object", "properties": { "tld": { "type": "string" } }, "additionalProperties": false }, "microsoftEdge": { "type": "object", "properties": { "trustedClientToken": { "type": "string" }, "pitchDeltaHz": { "type": "number" } }, "additionalProperties": false }, "streamlabsPolly": { "type": "object", "additionalProperties": false }, "cache": { "type": "object", "properties": { "path": { "type": "string" }, "duration": { "type": "number" } }, "additionalProperties": false } }, "additionalProperties": false }, "TranslationAlignmentOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/TranslationAlignmentEngine" }, "sourceLanguage": { "type": "string" }, "targetLanguage": { "type": "string" }, "isolate": { "type": "boolean" }, "crop": { "type": "boolean" }, "languageDetection": { "$ref": "#/definitions/SpeechLanguageDetectionOptions" }, "vad": { "$ref": "#/definitions/VADOptions" }, "plainText": { "$ref": "#/definitions/PlainTextOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "sourceSeparation": { "$ref": "#/definitions/SourceSeparationOptions" }, "whisper": { "$ref": "#/definitions/WhisperAlignmentOptions" } }, "additionalProperties": false }, "TranslationAlignmentEngine": { "type": "string", "const": "whisper" }, "TranscriptAndTranslationAlignmentOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/TranscriptAndTranslationAlignmentEngine" }, "sourceLanguage": { "type": "string" }, "targetLanguage": { "type": "string" }, "isolate": { "type": "boolean" }, "crop": { "type": "boolean" }, "alignment": { "$ref": "#/definitions/AlignmentOptions" }, "timelineAlignment": { "$ref": "#/definitions/TimelineTranslationAlignmentOptions" }, "languageDetection": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "vad": { "$ref": "#/definitions/VADOptions" }, "plainText": { "$ref": "#/definitions/PlainTextOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "sourceSeparation": { "$ref": "#/definitions/SourceSeparationOptions" } }, "additionalProperties": false }, "TranscriptAndTranslationAlignmentEngine": { "type": "string", "const": "two-stage" }, "TimelineTranslationAlignmentOptions": { "type": "object", "properties": { "engine": { "type": "string", "const": "e5" }, "sourceLanguage": { "type": "string" }, "targetLanguage": { "type": "string" }, "audio": { "$ref": "#/definitions/AudioSourceParam" }, "languageDetection": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "e5": { "type": "object", "properties": { "model": { "type": "string", "const": "small-fp16" } }, "required": [ "model" ], "additionalProperties": false } }, "additionalProperties": false }, "AudioSourceParam": { "anyOf": [ { "type": "string" }, { "type": "object", "properties": { "BYTES_PER_ELEMENT": { "type": "number" }, "buffer": { "type": "object", "properties": { "byteLength": { "type": "number" } }, "required": [ "byteLength" ], "additionalProperties": false }, "byteLength": { "type": "number" }, "byteOffset": { "type": "number" }, "length": { "type": "number" } }, "required": [ "BYTES_PER_ELEMENT", "buffer", "byteLength", "byteOffset", "length" ], "additionalProperties": { "type": "number" } }, { "$ref": "#/definitions/RawAudio" } ] }, "RawAudio": { "type": "object", "properties": { "audioChannels": { "type": "array", "items": { "type": "object", "properties": { "BYTES_PER_ELEMENT": { "type": "number" }, "buffer": { "type": "object", "properties": { "byteLength": { "type": "number" } }, "required": [ "byteLength" ], "additionalProperties": false }, "byteLength": { "type": "number" }, "byteOffset": { "type": "number" }, "length": { "type": "number" } }, "required": [ "BYTES_PER_ELEMENT", "buffer", "byteLength", "byteOffset", "length" ], "additionalProperties": { "type": "number" } } }, "sampleRate": { "type": "number" } }, "required": [ "audioChannels", "sampleRate" ], "additionalProperties": false }, "SpeechTranslationOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/SpeechTranslationEngine" }, "sourceLanguage": { "type": "string" }, "targetLanguage": { "type": "string" }, "crop": { "type": "boolean" }, "isolate": { "type": "boolean" }, "languageDetection": { "$ref": "#/definitions/SpeechLanguageDetectionOptions" }, "subtitles": { "$ref": "#/definitions/SubtitlesConfig" }, "vad": { "$ref": "#/definitions/VADOptions" }, "sourceSeparation": { "$ref": "#/definitions/SourceSeparationOptions" }, "whisper": { "$ref": "#/definitions/WhisperOptions" }, "whisperCpp": { "$ref": "#/definitions/WhisperCppOptions" }, "openAICloud": { "$ref": "#/definitions/OpenAICloudSTTOptions" } }, "additionalProperties": false }, "SpeechTranslationEngine": { "type": "string", "enum": [ "whisper", "whisper.cpp", "openai-cloud" ] }, "TextTranslationOptions": { "type": "object", "properties": { "engine": { "$ref": "#/definitions/TextTranslationEngine" }, "sourceLanguage": { "type": "string" }, "targetLanguage": { "type": "string" }, "languageDetection": { "$ref": "#/definitions/TextLanguageDetectionOptions" }, "plainText": { "$ref": "#/definitions/PlainTextOptions" }, "nllb": { "type": "object", "additionalProperties": false }, "googleTranslate": { "$ref": "#/definitions/GoogleTranslateTextTranslationOptions" }, "deepl": { "type": "object", "additionalProperties": false } }, "additionalProperties": false }, "TextTranslationEngine": { "type": "string", "enum": [ "nllb", "google-translate", "deepl" ] }, "GoogleTranslateTextTranslationOptions": { "type":