@huggingface/tasks
Version:
List of ML tasks for huggingface.co/tasks
87 lines (84 loc) • 2.06 kB
text/typescript
import type { TaskDataCustom } from "../index.js";
const taskData: TaskDataCustom = {
canonicalId: "text-to-audio",
datasets: [
{
description: "10K hours of multi-speaker English dataset.",
id: "parler-tts/mls_eng_10k",
},
{
description: "Multi-speaker English dataset.",
id: "mythicinfinity/libritts_r",
},
{
description: "Multi-lingual dataset.",
id: "facebook/multilingual_librispeech",
},
],
demo: {
inputs: [
{
label: "Input",
content: "I love audio models on the Hub!",
type: "text",
},
],
outputs: [
{
filename: "audio.wav",
type: "audio",
},
],
},
metrics: [
{
description: "The Mel Cepstral Distortion (MCD) metric is used to calculate the quality of generated speech.",
id: "mel cepstral distortion",
},
],
models: [
{
description: "Small yet powerful TTS model.",
id: "KittenML/kitten-tts-nano-0.1",
},
{
description: "Bleeding edge TTS model.",
id: "ResembleAI/chatterbox",
},
{
description: "A massively multi-lingual TTS model.",
id: "fishaudio/fish-speech-1.5",
},
{
description: "A text-to-dialogue model.",
id: "nari-labs/Dia-1.6B-0626",
},
],
spaces: [
{
description: "An application for generate high quality speech in different languages.",
id: "hexgrad/Kokoro-TTS",
},
{
description: "A multilingual text-to-speech application.",
id: "fishaudio/fish-speech-1",
},
{
description: "Performant TTS application.",
id: "ResembleAI/Chatterbox",
},
{
description: "An application to compare different TTS models.",
id: "TTS-AGI/TTS-Arena-V2",
},
{
description: "An application that generates podcast episodes.",
id: "ngxson/kokoro-podcast-generator",
},
],
summary:
"Text-to-Speech (TTS) is the task of generating natural sounding speech given text input. TTS models can be extended to have a single model that generates speech for multiple speakers and multiple languages.",
widgetModels: ["suno/bark"],
youtubeId: "NW62DpzJ274",
};
export default taskData;