UNPKG

@huggingface/tasks

Version:
87 lines (84 loc) 2.06 kB
import type { TaskDataCustom } from "../index.js"; const taskData: TaskDataCustom = { canonicalId: "text-to-audio", datasets: [ { description: "10K hours of multi-speaker English dataset.", id: "parler-tts/mls_eng_10k", }, { description: "Multi-speaker English dataset.", id: "mythicinfinity/libritts_r", }, { description: "Multi-lingual dataset.", id: "facebook/multilingual_librispeech", }, ], demo: { inputs: [ { label: "Input", content: "I love audio models on the Hub!", type: "text", }, ], outputs: [ { filename: "audio.wav", type: "audio", }, ], }, metrics: [ { description: "The Mel Cepstral Distortion (MCD) metric is used to calculate the quality of generated speech.", id: "mel cepstral distortion", }, ], models: [ { description: "Small yet powerful TTS model.", id: "KittenML/kitten-tts-nano-0.1", }, { description: "Bleeding edge TTS model.", id: "ResembleAI/chatterbox", }, { description: "A massively multi-lingual TTS model.", id: "fishaudio/fish-speech-1.5", }, { description: "A text-to-dialogue model.", id: "nari-labs/Dia-1.6B-0626", }, ], spaces: [ { description: "An application for generate high quality speech in different languages.", id: "hexgrad/Kokoro-TTS", }, { description: "A multilingual text-to-speech application.", id: "fishaudio/fish-speech-1", }, { description: "Performant TTS application.", id: "ResembleAI/Chatterbox", }, { description: "An application to compare different TTS models.", id: "TTS-AGI/TTS-Arena-V2", }, { description: "An application that generates podcast episodes.", id: "ngxson/kokoro-podcast-generator", }, ], summary: "Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.", widgetModels: ["suno/bark"], youtubeId: "NW62DpzJ274", }; export default taskData;