@huggingface/tasks
Version:
List of ML tasks for huggingface.co/tasks
95 lines (92 loc) • 2.41 kB
text/typescript
import type { TaskDataCustom } from "../index.js";
const taskData: TaskDataCustom = {
datasets: [
{
description: "31,175 hours of multilingual audio-text dataset in 108 languages.",
id: "mozilla-foundation/common_voice_17_0",
},
{
description: "Multilingual and diverse audio dataset with 101k hours of audio.",
id: "amphion/Emilia-Dataset",
},
{
description: "A dataset with 44.6k hours of English speaker data and 6k hours of other language speakers.",
id: "parler-tts/mls_eng",
},
{
description: "A multilingual audio dataset with 370K hours of audio.",
id: "espnet/yodas",
},
],
demo: {
inputs: [
{
filename: "input.flac",
type: "audio",
},
],
outputs: [
{
/// GOING ALONG SLUSHY COUNTRY ROADS AND SPEAKING TO DAMP AUDIENCES I
label: "Transcript",
content: "Going along slushy country roads and speaking to damp audiences in...",
type: "text",
},
],
},
metrics: [
{
description: "",
id: "wer",
},
{
description: "",
id: "cer",
},
],
models: [
{
description: "A powerful ASR model by OpenAI.",
id: "openai/whisper-large-v3",
},
{
description: "A good generic speech model by MetaAI for fine-tuning.",
id: "facebook/w2v-bert-2.0",
},
{
description: "An end-to-end model that performs ASR and Speech Translation by MetaAI.",
id: "facebook/seamless-m4t-v2-large",
},
{
description: "A powerful multilingual ASR and Speech Translation model by Nvidia.",
id: "nvidia/canary-1b",
},
{
description: "Powerful speaker diarization model.",
id: "pyannote/speaker-diarization-3.1",
},
],
spaces: [
{
description: "A powerful general-purpose speech recognition application.",
id: "hf-audio/whisper-large-v3",
},
{
description: "Latest ASR model from Useful Sensors.",
id: "mrfakename/Moonshinex",
},
{
description: "A high quality speech and text translation model by Meta.",
id: "facebook/seamless_m4t",
},
{
description: "A powerful multilingual ASR and Speech Translation model by Nvidia",
id: "nvidia/canary-1b",
},
],
summary:
"Automatic Speech Recognition (ASR), also known as Speech to Text (STT), is the task of transcribing a given audio to text. It has many applications, such as voice user interfaces.",
widgetModels: ["openai/whisper-large-v3"],
youtubeId: "TksaY_FDgnk",
};
export default taskData;