UNPKG

@huggingface/tasks

Version:
1,445 lines (1,268 loc) 70 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.renderformer = exports.relik = exports.pyannote_audio = exports.pyannote_audio_pipeline = exports.phantom_wan = exports.perception_encoder = exports.paddleocr = exports.paddlenlp = exports.open_clip = exports.mesh_anything = exports.matanyone = exports.mars5_tts = exports.mamba_ssm = exports.tf_keras = exports.lerobot = exports.llama_cpp_python = exports.lightning_ir = exports.kittentts = exports.kimi_audio = exports.keras_hub = exports.keras = exports.htrflow = exports.indextts = exports.gliner = exports.flair = exports.fairseq = exports.espnet = exports.espnetASR = exports.espnetTTS = exports.edsnlp = exports.cartesia_mlx = exports.cartesia_pytorch = exports.diffusionkit = exports.diffusers = exports.describe_anything = exports.dia = exports.derm_foundation = exports.depth_pro = exports.depth_anything_v2 = exports.cxr_foundation = exports.contexttab = exports.chatterbox = exports.bm25s = exports.bertopic = exports.ben2 = exports.audioseal = exports.asteroid = exports.araclip = exports.allennlp = exports.adapters = void 0; exports.zonos = exports.hezar = exports.threedtopia_xl = exports.whisperkit = exports.audiocraft = exports.anemoi = exports.pythae = exports.pxia = exports.outetts = exports.nemo = exports.pruna = exports.model2vec = exports.mlx = exports.mlxim = exports.univa = exports.swarmformer = exports.birefnet = exports.ultralytics = exports.chattts = exports.vui = exports.voicecraft = exports.lvface = exports.vfimamba = exports.videoprism = exports.sana = exports.sentis = exports.mlAgents = exports.stableBaselines3 = exports.fasttext = exports.peft = exports.transformersJS = exports.transformers = exports.terratorch = exports.speechbrain = exports.stanza = exports.span_marker = exports.spacy = exports.setfit = exports.sentenceTransformers = exports.sampleFactory = exports.sam2 = exports.fastai = exports.stable_audio_tools = exports.sklearn = exports.seed_story = exports.saelens = exports.timm = exports.tensorflowtts = void 0; const library_to_tasks_js_1 = require("./library-to-tasks.js"); const inputs_js_1 = require("./snippets/inputs.js"); const common_js_1 = require("./snippets/common.js"); const TAG_CUSTOM_CODE = "custom_code"; function nameWithoutNamespace(modelId) { const splitted = modelId.split("/"); return splitted.length === 1 ? splitted[0] : splitted[1]; } const escapeStringForJson = (str) => JSON.stringify(str).slice(1, -1); // slice is needed to remove surrounding quotes added by JSON.stringify //#region snippets const adapters = (model) => [ `from adapters import AutoAdapterModel model = AutoAdapterModel.from_pretrained("${model.config?.adapter_transformers?.model_name}") model.load_adapter("${model.id}", set_active=True)`, ]; exports.adapters = adapters; const allennlpUnknown = (model) => [ `import allennlp_models from allennlp.predictors.predictor import Predictor predictor = Predictor.from_path("hf://${model.id}")`, ]; const allennlpQuestionAnswering = (model) => [ `import allennlp_models from allennlp.predictors.predictor import Predictor predictor = Predictor.from_path("hf://${model.id}") predictor_input = {"passage": "My name is Wolfgang and I live in Berlin", "question": "Where do I live?"} predictions = predictor.predict_json(predictor_input)`, ]; const allennlp = (model) => { if (model.tags.includes("question-answering")) { return allennlpQuestionAnswering(model); } return allennlpUnknown(model); }; exports.allennlp = allennlp; const araclip = (model) => [ `from araclip import AraClip model = AraClip.from_pretrained("${model.id}")`, ]; exports.araclip = araclip; const asteroid = (model) => [ `from asteroid.models import BaseModel model = BaseModel.from_pretrained("${model.id}")`, ]; exports.asteroid = asteroid; const audioseal = (model) => { const watermarkSnippet = `# Watermark Generator from audioseal import AudioSeal model = AudioSeal.load_generator("${model.id}") # pass a tensor (tensor_wav) of shape (batch, channels, samples) and a sample rate wav, sr = tensor_wav, 16000 watermark = model.get_watermark(wav, sr) watermarked_audio = wav + watermark`; const detectorSnippet = `# Watermark Detector from audioseal import AudioSeal detector = AudioSeal.load_detector("${model.id}") result, message = detector.detect_watermark(watermarked_audio, sr)`; return [watermarkSnippet, detectorSnippet]; }; exports.audioseal = audioseal; function get_base_diffusers_model(model) { return model.cardData?.base_model?.toString() ?? "fill-in-base-model"; } function get_prompt_from_diffusers_model(model) { const prompt = model.widgetData?.[0]?.text ?? model.cardData?.instance_prompt; if (prompt) { return escapeStringForJson(prompt); } } const ben2 = (model) => [ `import requests from PIL import Image from ben2 import AutoModel url = "https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg" image = Image.open(requests.get(url, stream=True).raw) model = AutoModel.from_pretrained("${model.id}") model.to("cuda").eval() foreground = model.inference(image) `, ]; exports.ben2 = ben2; const bertopic = (model) => [ `from bertopic import BERTopic model = BERTopic.load("${model.id}")`, ]; exports.bertopic = bertopic; const bm25s = (model) => [ `from bm25s.hf import BM25HF retriever = BM25HF.load_from_hub("${model.id}")`, ]; exports.bm25s = bm25s; const chatterbox = () => [ `# pip install chatterbox-tts import torchaudio as ta from chatterbox.tts import ChatterboxTTS model = ChatterboxTTS.from_pretrained(device="cuda") text = "Ezreal and Jinx teamed up with Ahri, Yasuo, and Teemo to take down the enemy's Nexus in an epic late-game pentakill." wav = model.generate(text) ta.save("test-1.wav", wav, model.sr) # If you want to synthesize with a different voice, specify the audio prompt AUDIO_PROMPT_PATH="YOUR_FILE.wav" wav = model.generate(text, audio_prompt_path=AUDIO_PROMPT_PATH) ta.save("test-2.wav", wav, model.sr)`, ]; exports.chatterbox = chatterbox; const contexttab = () => { const installSnippet = `pip install git+https://github.com/SAP-samples/contexttab`; const classificationSnippet = `# Run a classification task from sklearn.datasets import load_breast_cancer from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split from contexttab import ConTextTabClassifier # Load sample data X, y = load_breast_cancer(return_X_y=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42) # Initialize a classifier # You can omit checkpoint and checkpoint_revision to use the default model clf = ConTextTabClassifier(checkpoint="l2/base.pt", checkpoint_revision="v1.0.0", bagging=1, max_context_size=2048) clf.fit(X_train, y_train) # Predict probabilities prediction_probabilities = clf.predict_proba(X_test) # Predict labels predictions = clf.predict(X_test) print("Accuracy", accuracy_score(y_test, predictions))`; const regressionsSnippet = `# Run a regression task from sklearn.datasets import fetch_openml from sklearn.metrics import r2_score from sklearn.model_selection import train_test_split from contexttab import ConTextTabRegressor # Load sample data df = fetch_openml(data_id=531, as_frame=True) X = df.data y = df.target.astype(float) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42) # Initialize the regressor # You can omit checkpoint and checkpoint_revision to use the default model regressor = ConTextTabRegressor(checkpoint="l2/base.pt", checkpoint_revision="v1.0.0", bagging=1, max_context_size=2048) regressor.fit(X_train, y_train) # Predict on the test set predictions = regressor.predict(X_test) r2 = r2_score(y_test, predictions) print("R² Score:", r2)`; return [installSnippet, classificationSnippet, regressionsSnippet]; }; exports.contexttab = contexttab; const cxr_foundation = () => [ `# pip install git+https://github.com/Google-Health/cxr-foundation.git#subdirectory=python # Load image as grayscale (Stillwaterising, CC0, via Wikimedia Commons) import requests from PIL import Image from io import BytesIO image_url = "https://upload.wikimedia.org/wikipedia/commons/c/c8/Chest_Xray_PA_3-8-2010.png" img = Image.open(requests.get(image_url, headers={'User-Agent': 'Demo'}, stream=True).raw).convert('L') # Run inference from clientside.clients import make_hugging_face_client cxr_client = make_hugging_face_client('cxr_model') print(cxr_client.get_image_embeddings_from_images([img]))`, ]; exports.cxr_foundation = cxr_foundation; const depth_anything_v2 = (model) => { let encoder; let features; let out_channels; encoder = "<ENCODER>"; features = "<NUMBER_OF_FEATURES>"; out_channels = "<OUT_CHANNELS>"; if (model.id === "depth-anything/Depth-Anything-V2-Small") { encoder = "vits"; features = "64"; out_channels = "[48, 96, 192, 384]"; } else if (model.id === "depth-anything/Depth-Anything-V2-Base") { encoder = "vitb"; features = "128"; out_channels = "[96, 192, 384, 768]"; } else if (model.id === "depth-anything/Depth-Anything-V2-Large") { encoder = "vitl"; features = "256"; out_channels = "[256, 512, 1024, 1024"; } return [ ` # Install from https://github.com/DepthAnything/Depth-Anything-V2 # Load the model and infer depth from an image import cv2 import torch from depth_anything_v2.dpt import DepthAnythingV2 # instantiate the model model = DepthAnythingV2(encoder="${encoder}", features=${features}, out_channels=${out_channels}) # load the weights filepath = hf_hub_download(repo_id="${model.id}", filename="depth_anything_v2_${encoder}.pth", repo_type="model") state_dict = torch.load(filepath, map_location="cpu") model.load_state_dict(state_dict).eval() raw_img = cv2.imread("your/image/path") depth = model.infer_image(raw_img) # HxW raw depth map in numpy `, ]; }; exports.depth_anything_v2 = depth_anything_v2; const depth_pro = (model) => { const installSnippet = `# Download checkpoint pip install huggingface-hub huggingface-cli download --local-dir checkpoints ${model.id}`; const inferenceSnippet = `import depth_pro # Load model and preprocessing transform model, transform = depth_pro.create_model_and_transforms() model.eval() # Load and preprocess an image. image, _, f_px = depth_pro.load_rgb("example.png") image = transform(image) # Run inference. prediction = model.infer(image, f_px=f_px) # Results: 1. Depth in meters depth = prediction["depth"] # Results: 2. Focal length in pixels focallength_px = prediction["focallength_px"]`; return [installSnippet, inferenceSnippet]; }; exports.depth_pro = depth_pro; const derm_foundation = () => [ `from huggingface_hub import from_pretrained_keras import tensorflow as tf, requests # Load and format input IMAGE_URL = "https://storage.googleapis.com/dx-scin-public-data/dataset/images/3445096909671059178.png" input_tensor = tf.train.Example( features=tf.train.Features( feature={ "image/encoded": tf.train.Feature( bytes_list=tf.train.BytesList(value=[requests.get(IMAGE_URL, stream=True).content]) ) } ) ).SerializeToString() # Load model and run inference loaded_model = from_pretrained_keras("google/derm-foundation") infer = loaded_model.signatures["serving_default"] print(infer(inputs=tf.constant([input_tensor])))`, ]; exports.derm_foundation = derm_foundation; const dia = (model) => [ `import soundfile as sf from dia.model import Dia model = Dia.from_pretrained("${model.id}") text = "[S1] Dia is an open weights text to dialogue model. [S2] You get full control over scripts and voices. [S1] Wow. Amazing. (laughs) [S2] Try it now on Git hub or Hugging Face." output = model.generate(text) sf.write("simple.mp3", output, 44100)`, ]; exports.dia = dia; const describe_anything = (model) => [ `# pip install git+https://github.com/NVlabs/describe-anything from huggingface_hub import snapshot_download from dam import DescribeAnythingModel snapshot_download(${model.id}, local_dir="checkpoints") dam = DescribeAnythingModel( model_path="checkpoints", conv_mode="v1", prompt_mode="focal_prompt", )`, ]; exports.describe_anything = describe_anything; const diffusersDefaultPrompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k"; const diffusersImg2ImgDefaultPrompt = "Turn this cat into a dog"; const diffusersVideoDefaultPrompt = "A man with short gray hair plays a red electric guitar."; const diffusers_default = (model) => [ `from diffusers import DiffusionPipeline pipe = DiffusionPipeline.from_pretrained("${model.id}") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}" image = pipe(prompt).images[0]`, ]; const diffusers_image_to_image = (model) => [ `from diffusers import DiffusionPipeline from diffusers.utils import load_image pipe = DiffusionPipeline.from_pretrained("${model.id}") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersImg2ImgDefaultPrompt}" input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png") image = pipe(image=input_image, prompt=prompt).images[0]`, ]; const diffusers_image_to_video = (model) => [ `import torch from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video pipe = DiffusionPipeline.from_pretrained("${model.id}", torch_dtype=torch.float16) pipe.to("cuda") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersVideoDefaultPrompt}" image = load_image( "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png" ) output = pipe(image=image, prompt=prompt).frames[0] export_to_video(output, "output.mp4")`, ]; const diffusers_controlnet = (model) => [ `from diffusers import ControlNetModel, StableDiffusionControlNetPipeline controlnet = ControlNetModel.from_pretrained("${model.id}") pipe = StableDiffusionControlNetPipeline.from_pretrained( "${get_base_diffusers_model(model)}", controlnet=controlnet )`, ]; const diffusers_lora = (model) => [ `from diffusers import DiffusionPipeline pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}") pipe.load_lora_weights("${model.id}") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersDefaultPrompt}" image = pipe(prompt).images[0]`, ]; const diffusers_lora_image_to_image = (model) => [ `from diffusers import DiffusionPipeline from diffusers.utils import load_image pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}") pipe.load_lora_weights("${model.id}") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersImg2ImgDefaultPrompt}" input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/cat.png") image = pipe(image=input_image, prompt=prompt).images[0]`, ]; const diffusers_lora_text_to_video = (model) => [ `from diffusers import DiffusionPipeline from diffusers.utils import export_to_video pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}") pipe.load_lora_weights("${model.id}") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersVideoDefaultPrompt}" output = pipe(prompt=prompt).frames[0] export_to_video(output, "output.mp4")`, ]; const diffusers_lora_image_to_video = (model) => [ `from diffusers import DiffusionPipeline from diffusers.utils import load_image, export_to_video pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}") pipe.load_lora_weights("${model.id}") prompt = "${get_prompt_from_diffusers_model(model) ?? diffusersVideoDefaultPrompt}" input_image = load_image("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/guitar-man.png") image = pipe(image=input_image, prompt=prompt).frames[0] export_to_video(output, "output.mp4")`, ]; const diffusers_textual_inversion = (model) => [ `from diffusers import DiffusionPipeline pipe = DiffusionPipeline.from_pretrained("${get_base_diffusers_model(model)}") pipe.load_textual_inversion("${model.id}")`, ]; const diffusers_flux_fill = (model) => [ `import torch from diffusers import FluxFillPipeline from diffusers.utils import load_image image = load_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup.png") mask = load_image("https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/cup_mask.png") pipe = FluxFillPipeline.from_pretrained("${model.id}", torch_dtype=torch.bfloat16).to("cuda") image = pipe( prompt="a white paper cup", image=image, mask_image=mask, height=1632, width=1232, guidance_scale=30, num_inference_steps=50, max_sequence_length=512, generator=torch.Generator("cpu").manual_seed(0) ).images[0] image.save(f"flux-fill-dev.png")`, ]; const diffusers_inpainting = (model) => [ `import torch from diffusers import AutoPipelineForInpainting from diffusers.utils import load_image pipe = AutoPipelineForInpainting.from_pretrained("${model.id}", torch_dtype=torch.float16, variant="fp16").to("cuda") img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png" mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png" image = load_image(img_url).resize((1024, 1024)) mask_image = load_image(mask_url).resize((1024, 1024)) prompt = "a tiger sitting on a park bench" generator = torch.Generator(device="cuda").manual_seed(0) image = pipe( prompt=prompt, image=image, mask_image=mask_image, guidance_scale=8.0, num_inference_steps=20, # steps between 15 and 30 work well for us strength=0.99, # make sure to use \`strength\` below 1.0 generator=generator, ).images[0]`, ]; const diffusers = (model) => { if (model.tags.includes("StableDiffusionInpaintPipeline") || model.tags.includes("StableDiffusionXLInpaintPipeline")) { return diffusers_inpainting(model); } else if (model.tags.includes("controlnet")) { return diffusers_controlnet(model); } else if (model.tags.includes("lora")) { if (model.pipeline_tag === "image-to-image") { return diffusers_lora_image_to_image(model); } else if (model.pipeline_tag === "image-to-video") { return diffusers_lora_image_to_video(model); } else if (model.pipeline_tag === "text-to-video") { return diffusers_lora_text_to_video(model); } else { return diffusers_lora(model); } } else if (model.tags.includes("textual_inversion")) { return diffusers_textual_inversion(model); } else if (model.tags.includes("FluxFillPipeline")) { return diffusers_flux_fill(model); } else if (model.pipeline_tag === "image-to-video") { return diffusers_image_to_video(model); } else if (model.pipeline_tag === "image-to-image") { return diffusers_image_to_image(model); } else { return diffusers_default(model); } }; exports.diffusers = diffusers; const diffusionkit = (model) => { const sd3Snippet = `# Pipeline for Stable Diffusion 3 from diffusionkit.mlx import DiffusionPipeline pipeline = DiffusionPipeline( shift=3.0, use_t5=False, model_version=${model.id}, low_memory_mode=True, a16=True, w16=True, )`; const fluxSnippet = `# Pipeline for Flux from diffusionkit.mlx import FluxPipeline pipeline = FluxPipeline( shift=1.0, model_version=${model.id}, low_memory_mode=True, a16=True, w16=True, )`; const generateSnippet = `# Image Generation HEIGHT = 512 WIDTH = 512 NUM_STEPS = ${model.tags.includes("flux") ? 4 : 50} CFG_WEIGHT = ${model.tags.includes("flux") ? 0 : 5} image, _ = pipeline.generate_image( "a photo of a cat", cfg_weight=CFG_WEIGHT, num_steps=NUM_STEPS, latent_size=(HEIGHT // 8, WIDTH // 8), )`; const pipelineSnippet = model.tags.includes("flux") ? fluxSnippet : sd3Snippet; return [pipelineSnippet, generateSnippet]; }; exports.diffusionkit = diffusionkit; const cartesia_pytorch = (model) => [ `# pip install --no-binary :all: cartesia-pytorch from cartesia_pytorch import ReneLMHeadModel from transformers import AutoTokenizer model = ReneLMHeadModel.from_pretrained("${model.id}") tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-1B-hf") in_message = ["Rene Descartes was"] inputs = tokenizer(in_message, return_tensors="pt") outputs = model.generate(inputs.input_ids, max_length=50, top_k=100, top_p=0.99) out_message = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0] print(out_message) )`, ]; exports.cartesia_pytorch = cartesia_pytorch; const cartesia_mlx = (model) => [ `import mlx.core as mx import cartesia_mlx as cmx model = cmx.from_pretrained("${model.id}") model.set_dtype(mx.float32) prompt = "Rene Descartes was" for text in model.generate( prompt, max_tokens=500, eval_every_n=5, verbose=True, top_p=0.99, temperature=0.85, ): print(text, end="", flush=True) `, ]; exports.cartesia_mlx = cartesia_mlx; const edsnlp = (model) => { const packageName = nameWithoutNamespace(model.id).replaceAll("-", "_"); return [ `# Load it from the Hub directly import edsnlp nlp = edsnlp.load("${model.id}") `, `# Or install it as a package !pip install git+https://huggingface.co/${model.id} # and import it as a module import ${packageName} nlp = ${packageName}.load() # or edsnlp.load("${packageName}") `, ]; }; exports.edsnlp = edsnlp; const espnetTTS = (model) => [ `from espnet2.bin.tts_inference import Text2Speech model = Text2Speech.from_pretrained("${model.id}") speech, *_ = model("text to generate speech from")`, ]; exports.espnetTTS = espnetTTS; const espnetASR = (model) => [ `from espnet2.bin.asr_inference import Speech2Text model = Speech2Text.from_pretrained( "${model.id}" ) speech, rate = soundfile.read("speech.wav") text, *_ = model(speech)[0]`, ]; exports.espnetASR = espnetASR; const espnetUnknown = () => [`unknown model type (must be text-to-speech or automatic-speech-recognition)`]; const espnet = (model) => { if (model.tags.includes("text-to-speech")) { return (0, exports.espnetTTS)(model); } else if (model.tags.includes("automatic-speech-recognition")) { return (0, exports.espnetASR)(model); } return espnetUnknown(); }; exports.espnet = espnet; const fairseq = (model) => [ `from fairseq.checkpoint_utils import load_model_ensemble_and_task_from_hf_hub models, cfg, task = load_model_ensemble_and_task_from_hf_hub( "${model.id}" )`, ]; exports.fairseq = fairseq; const flair = (model) => [ `from flair.models import SequenceTagger tagger = SequenceTagger.load("${model.id}")`, ]; exports.flair = flair; const gliner = (model) => [ `from gliner import GLiNER model = GLiNER.from_pretrained("${model.id}")`, ]; exports.gliner = gliner; const indextts = (model) => [ `# Download model from huggingface_hub import snapshot_download snapshot_download(${model.id}, local_dir="checkpoints") from indextts.infer import IndexTTS # Ensure config.yaml is present in the checkpoints directory tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml") voice = "path/to/your/reference_voice.wav" # Path to the voice reference audio file text = "Hello, how are you?" output_path = "output_index.wav" tts.infer(voice, text, output_path)`, ]; exports.indextts = indextts; const htrflow = (model) => [ `# CLI usage # see docs: https://ai-riksarkivet.github.io/htrflow/latest/getting_started/quick_start.html htrflow pipeline <path/to/pipeline.yaml> <path/to/image>`, `# Python usage from htrflow.pipeline.pipeline import Pipeline from htrflow.pipeline.steps import Task from htrflow.models.framework.model import ModelClass pipeline = Pipeline( [ Task( ModelClass, {"model": "${model.id}"}, {} ), ])`, ]; exports.htrflow = htrflow; const keras = (model) => [ `# Available backend options are: "jax", "torch", "tensorflow". import os os.environ["KERAS_BACKEND"] = "jax" import keras model = keras.saving.load_model("hf://${model.id}") `, ]; exports.keras = keras; const _keras_hub_causal_lm = (modelId) => ` import keras_hub # Load CausalLM model (optional: use half precision for inference) causal_lm = keras_hub.models.CausalLM.from_preset("hf://${modelId}", dtype="bfloat16") causal_lm.compile(sampler="greedy") # (optional) specify a sampler # Generate text causal_lm.generate("Keras: deep learning for", max_length=64) `; const _keras_hub_text_to_image = (modelId) => ` import keras_hub # Load TextToImage model (optional: use half precision for inference) text_to_image = keras_hub.models.TextToImage.from_preset("hf://${modelId}", dtype="bfloat16") # Generate images with a TextToImage model. text_to_image.generate("Astronaut in a jungle") `; const _keras_hub_text_classifier = (modelId) => ` import keras_hub # Load TextClassifier model text_classifier = keras_hub.models.TextClassifier.from_preset( "hf://${modelId}", num_classes=2, ) # Fine-tune text_classifier.fit(x=["Thilling adventure!", "Total snoozefest."], y=[1, 0]) # Classify text text_classifier.predict(["Not my cup of tea."]) `; const _keras_hub_image_classifier = (modelId) => ` import keras_hub import keras # Load ImageClassifier model image_classifier = keras_hub.models.ImageClassifier.from_preset( "hf://${modelId}", num_classes=2, ) # Fine-tune image_classifier.fit( x=keras.random.randint((32, 64, 64, 3), 0, 256), y=keras.random.randint((32, 1), 0, 2), ) # Classify image image_classifier.predict(keras.random.randint((1, 64, 64, 3), 0, 256)) `; const _keras_hub_tasks_with_example = { CausalLM: _keras_hub_causal_lm, TextToImage: _keras_hub_text_to_image, TextClassifier: _keras_hub_text_classifier, ImageClassifier: _keras_hub_image_classifier, }; const _keras_hub_task_without_example = (task, modelId) => ` import keras_hub # Create a ${task} model task = keras_hub.models.${task}.from_preset("hf://${modelId}") `; const _keras_hub_generic_backbone = (modelId) => ` import keras_hub # Create a Backbone model unspecialized for any task backbone = keras_hub.models.Backbone.from_preset("hf://${modelId}") `; const keras_hub = (model) => { const modelId = model.id; const tasks = model.config?.keras_hub?.tasks ?? []; const snippets = []; // First, generate tasks with examples for (const [task, snippet] of Object.entries(_keras_hub_tasks_with_example)) { if (tasks.includes(task)) { snippets.push(snippet(modelId)); } } // Then, add remaining tasks for (const task of tasks) { if (!Object.keys(_keras_hub_tasks_with_example).includes(task)) { snippets.push(_keras_hub_task_without_example(task, modelId)); } } // Finally, add generic backbone snippet snippets.push(_keras_hub_generic_backbone(modelId)); return snippets; }; exports.keras_hub = keras_hub; const kimi_audio = (model) => [ `# Example usage for KimiAudio # pip install git+https://github.com/MoonshotAI/Kimi-Audio.git from kimia_infer.api.kimia import KimiAudio model = KimiAudio(model_path="${model.id}", load_detokenizer=True) sampling_params = { "audio_temperature": 0.8, "audio_top_k": 10, "text_temperature": 0.0, "text_top_k": 5, } # For ASR asr_audio = "asr_example.wav" messages_asr = [ {"role": "user", "message_type": "text", "content": "Please transcribe the following audio:"}, {"role": "user", "message_type": "audio", "content": asr_audio} ] _, text = model.generate(messages_asr, **sampling_params, output_type="text") print(text) # For Q&A qa_audio = "qa_example.wav" messages_conv = [{"role": "user", "message_type": "audio", "content": qa_audio}] wav, text = model.generate(messages_conv, **sampling_params, output_type="both") sf.write("output_audio.wav", wav.cpu().view(-1).numpy(), 24000) print(text) `, ]; exports.kimi_audio = kimi_audio; const kittentts = (model) => [ `from kittentts import KittenTTS m = KittenTTS("${model.id}") audio = m.generate("This high quality TTS model works without a GPU") # Save the audio import soundfile as sf sf.write('output.wav', audio, 24000)`, ]; exports.kittentts = kittentts; const lightning_ir = (model) => { if (model.tags.includes("bi-encoder")) { return [ `#install from https://github.com/webis-de/lightning-ir from lightning_ir import BiEncoderModule model = BiEncoderModule("${model.id}") model.score("query", ["doc1", "doc2", "doc3"])`, ]; } else if (model.tags.includes("cross-encoder")) { return [ `#install from https://github.com/webis-de/lightning-ir from lightning_ir import CrossEncoderModule model = CrossEncoderModule("${model.id}") model.score("query", ["doc1", "doc2", "doc3"])`, ]; } return [ `#install from https://github.com/webis-de/lightning-ir from lightning_ir import BiEncoderModule, CrossEncoderModule # depending on the model type, use either BiEncoderModule or CrossEncoderModule model = BiEncoderModule("${model.id}") # model = CrossEncoderModule("${model.id}") model.score("query", ["doc1", "doc2", "doc3"])`, ]; }; exports.lightning_ir = lightning_ir; const llama_cpp_python = (model) => { const snippets = [ `# !pip install llama-cpp-python from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="${model.id}", filename="{{GGUF_FILE}}", ) `, ]; if (model.tags.includes("conversational")) { const messages = (0, inputs_js_1.getModelInputSnippet)(model); snippets.push(`llm.create_chat_completion( messages = ${(0, common_js_1.stringifyMessages)(messages, { attributeKeyQuotes: true, indent: "\t" })} )`); } else { snippets.push(`output = llm( "Once upon a time,", max_tokens=512, echo=True ) print(output)`); } return snippets; }; exports.llama_cpp_python = llama_cpp_python; const lerobot = (model) => { if (model.tags.includes("smolvla")) { const smolvlaSnippets = [ // Installation snippet `# See https://github.com/huggingface/lerobot?tab=readme-ov-file#installation for more details git clone https://github.com/huggingface/lerobot.git cd lerobot pip install -e .[smolvla]`, // Finetune snippet `# Launch finetuning on your dataset python lerobot/scripts/train.py \\ --policy.path=${model.id} \\ --dataset.repo_id=lerobot/svla_so101_pickplace \\ --batch_size=64 \\ --steps=20000 \\ --output_dir=outputs/train/my_smolvla \\ --job_name=my_smolvla_training \\ --policy.device=cuda \\ --wandb.enable=true`, ]; if (model.id !== "lerobot/smolvla_base") { // Inference snippet (only if not base model) smolvlaSnippets.push(`# Run the policy using the record function python -m lerobot.record \\ --robot.type=so101_follower \\ --robot.port=/dev/ttyACM0 \\ # <- Use your port --robot.id=my_blue_follower_arm \\ # <- Use your robot id --robot.cameras="{ front: {type: opencv, index_or_path: 8, width: 640, height: 480, fps: 30}}" \\ # <- Use your cameras --dataset.single_task="Grasp a lego block and put it in the bin." \\ # <- Use the same task description you used in your dataset recording --dataset.repo_id=HF_USER/dataset_name \\ # <- This will be the dataset name on HF Hub --dataset.episode_time_s=50 \\ --dataset.num_episodes=10 \\ --policy.path=${model.id}`); } return smolvlaSnippets; } return []; }; exports.lerobot = lerobot; const tf_keras = (model) => [ `# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy) # See https://github.com/keras-team/tf-keras for more details. from huggingface_hub import from_pretrained_keras model = from_pretrained_keras("${model.id}") `, ]; exports.tf_keras = tf_keras; const mamba_ssm = (model) => [ `from mamba_ssm import MambaLMHeadModel model = MambaLMHeadModel.from_pretrained("${model.id}")`, ]; exports.mamba_ssm = mamba_ssm; const mars5_tts = (model) => [ `# Install from https://github.com/Camb-ai/MARS5-TTS from inference import Mars5TTS mars5 = Mars5TTS.from_pretrained("${model.id}")`, ]; exports.mars5_tts = mars5_tts; const matanyone = (model) => [ `# Install from https://github.com/pq-yang/MatAnyone.git from matanyone.model.matanyone import MatAnyone model = MatAnyone.from_pretrained("${model.id}")`, ` from matanyone import InferenceCore processor = InferenceCore("${model.id}")`, ]; exports.matanyone = matanyone; const mesh_anything = () => [ `# Install from https://github.com/buaacyw/MeshAnything.git from MeshAnything.models.meshanything import MeshAnything # refer to https://github.com/buaacyw/MeshAnything/blob/main/main.py#L91 on how to define args # and https://github.com/buaacyw/MeshAnything/blob/main/app.py regarding usage model = MeshAnything(args)`, ]; exports.mesh_anything = mesh_anything; const open_clip = (model) => [ `import open_clip model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms('hf-hub:${model.id}') tokenizer = open_clip.get_tokenizer('hf-hub:${model.id}')`, ]; exports.open_clip = open_clip; const paddlenlp = (model) => { if (model.config?.architectures?.[0]) { const architecture = model.config.architectures[0]; return [ [ `from paddlenlp.transformers import AutoTokenizer, ${architecture}`, "", `tokenizer = AutoTokenizer.from_pretrained("${model.id}", from_hf_hub=True)`, `model = ${architecture}.from_pretrained("${model.id}", from_hf_hub=True)`, ].join("\n"), ]; } else { return [ [ `# ⚠️ Type of model unknown`, `from paddlenlp.transformers import AutoTokenizer, AutoModel`, "", `tokenizer = AutoTokenizer.from_pretrained("${model.id}", from_hf_hub=True)`, `model = AutoModel.from_pretrained("${model.id}", from_hf_hub=True)`, ].join("\n"), ]; } }; exports.paddlenlp = paddlenlp; const paddleocr = (model) => { const mapping = { textline_detection: { className: "TextDetection" }, textline_recognition: { className: "TextRecognition" }, seal_text_detection: { className: "SealTextDetection" }, doc_img_unwarping: { className: "TextImageUnwarping" }, doc_img_orientation_classification: { className: "DocImgOrientationClassification" }, textline_orientation_classification: { className: "TextLineOrientationClassification" }, chart_parsing: { className: "ChartParsing" }, formula_recognition: { className: "FormulaRecognition" }, layout_detection: { className: "LayoutDetection" }, table_cells_detection: { className: "TableCellsDetection" }, wired_table_classification: { className: "TableClassification" }, table_structure_recognition: { className: "TableStructureRecognition" }, }; if (model.tags.includes("doc_vlm")) { return [ `# pip install paddleocr from paddleocr import DocVLM model = DocVLM(model_name="${nameWithoutNamespace(model.id)}") output = model.predict( input={"image": "path/to/image.png", "query": "Parsing this image and output the content in Markdown format."}, batch_size=1 ) for res in output: res.print() res.save_to_json(save_path="./output/res.json")`, ]; } for (const tag of model.tags) { if (tag in mapping) { const { className } = mapping[tag]; return [ `# pip install paddleocr from paddleocr import ${className} model = ${className}(model_name="${nameWithoutNamespace(model.id)}") output = model.predict(input="path/to/image.png", batch_size=1) for res in output: res.print() res.save_to_img(save_path="./output/") res.save_to_json(save_path="./output/res.json")`, ]; } } return [ `# Please refer to the document for information on how to use the model. # https://paddlepaddle.github.io/PaddleOCR/latest/en/version3.x/module_usage/module_overview.html`, ]; }; exports.paddleocr = paddleocr; const perception_encoder = (model) => { const clip_model = `# Use PE-Core models as CLIP models import core.vision_encoder.pe as pe model = pe.CLIP.from_config("${model.id}", pretrained=True)`; const vision_encoder = `# Use any PE model as a vision encoder import core.vision_encoder.pe as pe model = pe.VisionTransformer.from_config("${model.id}", pretrained=True)`; if (model.id.includes("Core")) { return [clip_model, vision_encoder]; } else { return [vision_encoder]; } }; exports.perception_encoder = perception_encoder; const phantom_wan = (model) => [ `from huggingface_hub import snapshot_download from phantom_wan import WANI2V, configs checkpoint_dir = snapshot_download("${model.id}") wan_i2v = WanI2V( config=configs.WAN_CONFIGS['i2v-14B'], checkpoint_dir=checkpoint_dir, ) video = wan_i2v.generate(text_prompt, image_prompt)`, ]; exports.phantom_wan = phantom_wan; const pyannote_audio_pipeline = (model) => [ `from pyannote.audio import Pipeline pipeline = Pipeline.from_pretrained("${model.id}") # inference on the whole file pipeline("file.wav") # inference on an excerpt from pyannote.core import Segment excerpt = Segment(start=2.0, end=5.0) from pyannote.audio import Audio waveform, sample_rate = Audio().crop("file.wav", excerpt) pipeline({"waveform": waveform, "sample_rate": sample_rate})`, ]; exports.pyannote_audio_pipeline = pyannote_audio_pipeline; const pyannote_audio_model = (model) => [ `from pyannote.audio import Model, Inference model = Model.from_pretrained("${model.id}") inference = Inference(model) # inference on the whole file inference("file.wav") # inference on an excerpt from pyannote.core import Segment excerpt = Segment(start=2.0, end=5.0) inference.crop("file.wav", excerpt)`, ]; const pyannote_audio = (model) => { if (model.tags.includes("pyannote-audio-pipeline")) { return (0, exports.pyannote_audio_pipeline)(model); } return pyannote_audio_model(model); }; exports.pyannote_audio = pyannote_audio; const relik = (model) => [ `from relik import Relik relik = Relik.from_pretrained("${model.id}")`, ]; exports.relik = relik; const renderformer = (model) => [ `# Install from https://github.com/microsoft/renderformer from renderformer import RenderFormerRenderingPipeline pipeline = RenderFormerRenderingPipeline.from_pretrained("${model.id}")`, ]; exports.renderformer = renderformer; const tensorflowttsTextToMel = (model) => [ `from tensorflow_tts.inference import AutoProcessor, TFAutoModel processor = AutoProcessor.from_pretrained("${model.id}") model = TFAutoModel.from_pretrained("${model.id}") `, ]; const tensorflowttsMelToWav = (model) => [ `from tensorflow_tts.inference import TFAutoModel model = TFAutoModel.from_pretrained("${model.id}") audios = model.inference(mels) `, ]; const tensorflowttsUnknown = (model) => [ `from tensorflow_tts.inference import TFAutoModel model = TFAutoModel.from_pretrained("${model.id}") `, ]; const tensorflowtts = (model) => { if (model.tags.includes("text-to-mel")) { return tensorflowttsTextToMel(model); } else if (model.tags.includes("mel-to-wav")) { return tensorflowttsMelToWav(model); } return tensorflowttsUnknown(model); }; exports.tensorflowtts = tensorflowtts; const timm = (model) => [ `import timm model = timm.create_model("hf_hub:${model.id}", pretrained=True)`, ]; exports.timm = timm; const saelens = ( /* model: ModelData */) => [ `# pip install sae-lens from sae_lens import SAE sae, cfg_dict, sparsity = SAE.from_pretrained( release = "RELEASE_ID", # e.g., "gpt2-small-res-jb". See other options in https://github.com/jbloomAus/SAELens/blob/main/sae_lens/pretrained_saes.yaml sae_id = "SAE_ID", # e.g., "blocks.8.hook_resid_pre". Won't always be a hook point )`, ]; exports.saelens = saelens; const seed_story = () => [ `# seed_story_cfg_path refers to 'https://github.com/TencentARC/SEED-Story/blob/master/configs/clm_models/agent_7b_sft.yaml' # llm_cfg_path refers to 'https://github.com/TencentARC/SEED-Story/blob/master/configs/clm_models/llama2chat7b_lora.yaml' from omegaconf import OmegaConf import hydra # load Llama2 llm_cfg = OmegaConf.load(llm_cfg_path) llm = hydra.utils.instantiate(llm_cfg, torch_dtype="fp16") # initialize seed_story seed_story_cfg = OmegaConf.load(seed_story_cfg_path) seed_story = hydra.utils.instantiate(seed_story_cfg, llm=llm) `, ]; exports.seed_story = seed_story; const skopsPickle = (model, modelFile) => { return [ `import joblib from skops.hub_utils import download download("${model.id}", "path_to_folder") model = joblib.load( "${modelFile}" ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html`, ]; }; const skopsFormat = (model, modelFile) => { return [ `from skops.hub_utils import download from skops.io import load download("${model.id}", "path_to_folder") # make sure model file is in skops format # if model is a pickle file, make sure it's from a source you trust model = load("path_to_folder/${modelFile}")`, ]; }; const skopsJobLib = (model) => { return [ `from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("${model.id}", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html`, ]; }; const sklearn = (model) => { if (model.tags.includes("skops")) { const skopsmodelFile = model.config?.sklearn?.model?.file; const skopssaveFormat = model.config?.sklearn?.model_format; if (!skopsmodelFile) { return [`# ⚠️ Model filename not specified in config.json`]; } if (skopssaveFormat === "pickle") { return skopsPickle(model, skopsmodelFile); } else { return skopsFormat(model, skopsmodelFile); } } else { return skopsJobLib(model); } }; exports.sklearn = sklearn; const stable_audio_tools = (model) => [ `import torch import torchaudio from einops import rearrange from stable_audio_tools import get_pretrained_model from stable_audio_tools.inference.generation import generate_diffusion_cond device = "cuda" if torch.cuda.is_available() else "cpu" # Download model model, model_config = get_pretrained_model("${model.id}") sample_rate = model_config["sample_rate"] sample_size = model_config["sample_size"] model = model.to(device) # Set up text and timing conditioning conditioning = [{ "prompt": "128 BPM tech house drum loop", }] # Generate stereo audio output = generate_diffusion_cond( model, conditioning=conditioning, sample_size=sample_size, device=device ) # Rearrange audio batch to a single sequence output = rearrange(output, "b d n -> d (b n)") # Peak normalize, clip, convert to int16, and save to file output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu() torchaudio.save("output.wav", output, sample_rate)`, ]; exports.stable_audio_tools = stable_audio_tools; const fastai = (model) => [ `from huggingface_hub import from_pretrained_fastai learn = from_pretrained_fastai("${model.id}")`, ]; exports.fastai = fastai; const sam2 = (model) => { const image_predictor = `# Use SAM2 with images import torch from sam2.sam2_image_predictor import SAM2ImagePredictor predictor = SAM2ImagePredictor.from_pretrained(${model.id}) with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): predictor.set_image(<your_image>) masks, _, _ = predictor.predict(<input_prompts>)`; const video_predictor = `# Use SAM2 with videos import torch from sam2.sam2_video_predictor import SAM2VideoPredictor predictor = SAM2VideoPredictor.from_pretrained(${model.id}) with torch.inference_mode(), torch.autocast("cuda", dtype=torch.bfloat16): state = predictor.init_state(<your_video>) # add new prompts and instantly get the output on the same frame frame_idx, object_ids, masks = predictor.add_new_points(state, <your_prompts>): # propagate the prompts to get masklets throughout the video for frame_idx, object_ids, masks in predictor.propagate_in_video(state): ...`; return [image_predictor, video_predictor]; }; exports.sam2 = sam2; const sampleFactory = (model) => [ `python -m sample_factory.huggingface.load_from_hub -r ${model.id} -d ./train_dir`, ]; exports.sampleFactory = sampleFactory; function get_widget_examples_from_st_model(model) { const widgetExample = model.widgetData?.[0]; if (widgetExample?.source_sentence && widgetExample?.sentences?.length) { return [widgetExample.source_sentence, ...widgetExample.sentences]; } } const sentenceTransformers = (model) => { const remote_code_snippet = model.tags.includes(TAG_CUSTOM_CODE) ? ", trust_remote_code=True" : ""; if (model.tags.includes("cross-encoder") || model.pipeline_tag == "text-ranking") { return [ `from sentence_transformers import CrossEncoder model = CrossEncoder("${model.id}"${remote_code_snippet}) query = "Which planet is known as the Red Planet?" passages = [ "Venus is often called Earth's twin because of its similar size and proximity.", "Mars, known for its reddish appearance, is often referred to as the Red Planet.", "Jupiter, the largest planet in our solar system, has a prominent red spot.", "Saturn, famous for its rings, is sometimes mistaken for the Red Planet." ] scores = model.predict([(query, passage) for passage in passages]) print(scores)`, ]; } const exampleSentences = get_widget_examples_from_st_model(model) ?? [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium.", ]; return [ `from sentence_transformers import SentenceTransformer model = SentenceTransformer("${model.id}"${remote_code_snippet}) sentences = ${JSON.stringify(exampleSentences, null, 4)} embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [${exampleSentences.length}, ${exampleSentences.length}]`, ]; }; exports.sentenceTransformers = sentenceTransformers; const setfit = (model) => [ `from setfit import SetFitModel model = SetFitModel.from_pretrained("${model.id}")`, ]; exports.setfit = setfit; const spacy = (model) => [ `!pip install https://huggingface.co/${model.id}/resolve/main/${nameWithoutNamespace(model.id)}-any-py3-none-any.whl # Using spacy.load(). import spacy nlp = spacy.load("${nameWithoutNamespace(model.id)}") # Importing as module. import ${nameWithoutNamespace(model.id)} nlp = ${nameWithoutNamespace(model.id)}.load()`, ]; exports.spacy = spacy; const span_marker = (model) => [ `from span_marker import SpanMarkerModel model = SpanMarkerModel.from_pretrained("${model.id}")`, ]; exports.span_marker = span_marker; const stanza = (model) => [ `import stanza stanza.download("${nameWithoutNamespace(model.id).replace("stanza-", "")}") nlp = stanza.Pipeline("${nameWithoutNamespace(model.id).replace("stanza-", "")}")`, ]; exports.stanza = stanza; const speechBrainMethod = (speechbrainInterface) => { switch (speechbrainInterface) { case "EncoderClassifier": return "classify_file"; case "EncoderDecoderASR": case "EncoderASR": return "transcribe_file"; case "SpectralMaskEnhancement": return "enhance_file"; case "SepformerSeparation": return "separate_file"; default: return undefined; } }; const speechbrain = (model) => { const speechbrainInterface = model.config?.speechbrain?.speechbrain_interface; if (speechbrainInterface === undefined) { return [`# interface not specified in config.json`]; } const speechbrainMethod = speechBrainMethod(speechbrainInterface); if (speechbrainMethod === undefined) { return [`# interface in config.json invalid`]; } return [ `from speechbrain.pretrained import ${speechbrainInterface} model = ${speechbrainInterface}.from_hparams( "${model.id}" ) model.${speechbrainMethod}("file.wav")`, ]; }; exports.speechbrain = speechbrain; const terratorch = (model) => [ `from terratorch.registry import BACKBONE_REGISTRY model = BACKBONE_REGISTRY.build("${model.id}")`, ]; exports.terratorch = terratorch; const hasChatTemplate = (model) => model.config?.tokenizer_config?.chat_template !== undefined || model.config?.processor_config?.chat_template !== undefined || model.config?.chat_template_jinja !== undefined; const transformers = (model) => { const info = model.transformersInfo; if (!info) { return [`# ⚠️ Type of model unknown`]; } const remote_code_snippet = model.tags.includes(TAG_CUSTOM_CODE) ? ", trust_remote_code=True" : ""; const autoSnippet = []; if (info.processor) { const processorVarName = info.processor === "AutoTokenizer" ? "tokenizer" : info.processor === "AutoFeatureExtractor" ? "extractor" : "proc