UNPKG

@huggingface/tasks

Version:
551 lines (520 loc) 17.2 kB
import { parseGGUFQuantLabel } from "./gguf.js"; import type { ModelData } from "./model-data.js"; import type { PipelineType } from "./pipelines.js"; import { stringifyMessages } from "./snippets/common.js"; import { getModelInputSnippet } from "./snippets/inputs.js"; import type { ChatCompletionInputMessage } from "./tasks/index.js"; export interface LocalAppSnippet { /** * Title of the snippet */ title: string; /** * Optional setup guide */ setup?: string; /** * Content (or command) to be run */ content: string | string[]; } /** * Elements configurable by a local app. */ export type LocalApp = { /** * Name that appears in buttons */ prettyLabel: string; /** * Link to get more info about a local app (website etc) */ docsUrl: string; /** * main category of app */ mainTask: PipelineType; /** * Whether to display a pill "macOS-only" */ macOSOnly?: boolean; comingSoon?: boolean; /** * IMPORTANT: function to figure out whether to display the button on a model page's main "Use this model" dropdown. */ displayOnModelPage: (model: ModelData) => boolean; } & ( | { /** * If the app supports deeplink, URL to open. */ deeplink: (model: ModelData, filepath?: string) => URL; } | { /** * And if not (mostly llama.cpp), snippet to copy/paste in your terminal * Support the placeholder {{GGUF_FILE}} that will be replaced by the gguf file path or the list of available files. * Support the placeholder {{QUANT_TAG}} that will be replaced by the list of available quant tags or will be removed if there are no multiple quant files in a same repo. */ snippet: (model: ModelData, filepath?: string) => string | string[] | LocalAppSnippet | LocalAppSnippet[]; } ); function isAwqModel(model: ModelData): boolean { return model.config?.quantization_config?.quant_method === "awq"; } function isGptqModel(model: ModelData): boolean { return model.config?.quantization_config?.quant_method === "gptq"; } function isAqlmModel(model: ModelData): boolean { return model.config?.quantization_config?.quant_method === "aqlm"; } function isMarlinModel(model: ModelData): boolean { return model.config?.quantization_config?.quant_method === "marlin"; } function isTransformersModel(model: ModelData): boolean { return model.tags.includes("transformers"); } function isTgiModel(model: ModelData): boolean { return model.tags.includes("text-generation-inference"); } function isLlamaCppGgufModel(model: ModelData) { return !!model.gguf?.context_length; } function isAmdRyzenModel(model: ModelData) { return model.tags.includes("ryzenai-hybrid") || model.tags.includes("ryzenai-npu"); } function isMlxModel(model: ModelData) { return model.tags.includes("mlx"); } function getQuantTag(filepath?: string): string { const defaultTag = ":{{QUANT_TAG}}"; if (!filepath) { return defaultTag; } const quantLabel = parseGGUFQuantLabel(filepath); return quantLabel ? `:${quantLabel}` : defaultTag; } const snippetLlamacpp = (model: ModelData, filepath?: string): LocalAppSnippet[] => { const command = (binary: string) => { const snippet = ["# Load and run the model:", `${binary} -hf ${model.id}${getQuantTag(filepath)}`]; return snippet.join("\n"); }; return [ { title: "Install from brew", setup: "brew install llama.cpp", content: command("llama-server"), }, { title: "Install from WinGet (Windows)", setup: "winget install llama.cpp", content: command("llama-server"), }, { title: "Use pre-built binary", setup: [ // prettier-ignore "# Download pre-built binary from:", "# https://github.com/ggerganov/llama.cpp/releases", ].join("\n"), content: command("./llama-server"), }, { title: "Build from source code", setup: [ "git clone https://github.com/ggerganov/llama.cpp.git", "cd llama.cpp", "cmake -B build", "cmake --build build -j --target llama-server", ].join("\n"), content: command("./build/bin/llama-server"), }, ]; }; const snippetNodeLlamaCppCli = (model: ModelData, filepath?: string): LocalAppSnippet[] => { const tagName = getQuantTag(filepath); return [ { title: "Chat with the model", content: `npx -y node-llama-cpp chat hf:${model.id}${tagName}`, }, { title: "Estimate the model compatibility with your hardware", content: `npx -y node-llama-cpp inspect estimate hf:${model.id}${tagName}`, }, ]; }; const snippetOllama = (model: ModelData, filepath?: string): string => { return `ollama run hf.co/${model.id}${getQuantTag(filepath)}`; }; const snippetLocalAI = (model: ModelData, filepath?: string): LocalAppSnippet[] => { const command = (binary: string) => ["# Load and run the model:", `${binary} huggingface://${model.id}/${filepath ?? "{{GGUF_FILE}}"}`].join("\n"); return [ { title: "Install from binary", setup: "curl https://localai.io/install.sh | sh", content: command("local-ai run"), }, { title: "Use Docker images", setup: [ // prettier-ignore "# Pull the image:", "docker pull localai/localai:latest-cpu", ].join("\n"), content: command( "docker run -p 8080:8080 --name localai -v $PWD/models:/build/models localai/localai:latest-cpu" ), }, ]; }; const snippetVllm = (model: ModelData): LocalAppSnippet[] => { const messages = getModelInputSnippet(model) as ChatCompletionInputMessage[]; const runCommandInstruct = `# Call the server using curl: curl -X POST "http://localhost:8000/v1/chat/completions" \\ -H "Content-Type: application/json" \\ --data '{ "model": "${model.id}", "messages": ${stringifyMessages(messages, { indent: "\t\t", attributeKeyQuotes: true, customContentEscaper: (str) => str.replace(/'/g, "'\\''"), })} }'`; const runCommandNonInstruct = `# Call the server using curl: curl -X POST "http://localhost:8000/v1/completions" \\ -H "Content-Type: application/json" \\ --data '{ "model": "${model.id}", "prompt": "Once upon a time,", "max_tokens": 512, "temperature": 0.5 }'`; const runCommand = model.tags.includes("conversational") ? runCommandInstruct : runCommandNonInstruct; let setup; let dockerCommand; if (model.tags.includes("mistral-common")) { setup = [ "# Install vLLM from pip:", "pip install vllm", "# Make sure you have the latest version of mistral-common installed:", "pip install --upgrade mistral-common", ].join("\n"); dockerCommand = `# Load and run the model:\ndocker exec -it my_vllm_container bash -c "vllm serve ${model.id} --tokenizer_mode mistral --config_format mistral --load_format mistral --tool-call-parser mistral --enable-auto-tool-choice"`; } else { setup = ["# Install vLLM from pip:", "pip install vllm"].join("\n"); dockerCommand = `# Load and run the model:\ndocker exec -it my_vllm_container bash -c "vllm serve ${model.id}"`; } return [ { title: "Install from pip", setup: setup, content: [`# Load and run the model:\nvllm serve "${model.id}"`, runCommand], }, { title: "Use Docker images", setup: [ "# Deploy with docker on Linux:", `docker run --runtime nvidia --gpus all \\`, ` --name my_vllm_container \\`, ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`, ` --env "HUGGING_FACE_HUB_TOKEN=<secret>" \\`, ` -p 8000:8000 \\`, ` --ipc=host \\`, ` vllm/vllm-openai:latest \\`, ` --model ${model.id}`, ].join("\n"), content: [dockerCommand, runCommand], }, ]; }; const snippetTgi = (model: ModelData): LocalAppSnippet[] => { const runCommand = [ "# Call the server using curl:", `curl -X POST "http://localhost:8000/v1/chat/completions" \\`, ` -H "Content-Type: application/json" \\`, ` --data '{`, ` "model": "${model.id}",`, ` "messages": [`, ` {"role": "user", "content": "What is the capital of France?"}`, ` ]`, ` }'`, ]; return [ { title: "Use Docker images", setup: [ "# Deploy with docker on Linux:", `docker run --gpus all \\`, ` -v ~/.cache/huggingface:/root/.cache/huggingface \\`, ` -e HF_TOKEN="<secret>" \\`, ` -p 8000:80 \\`, ` ghcr.io/huggingface/text-generation-inference:latest \\`, ` --model-id ${model.id}`, ].join("\n"), content: [runCommand.join("\n")], }, ]; }; const snippetMlxLm = (model: ModelData): LocalAppSnippet[] => { const openaiCurl = [ "# Calling the OpenAI-compatible server with curl", `curl -X POST "http://localhost:8000/v1/chat/completions" \\`, ` -H "Content-Type: application/json" \\`, ` --data '{`, ` "model": "${model.id}",`, ` "messages": [`, ` {"role": "user", "content": "Hello"}`, ` ]`, ` }'`, ]; return [ { title: "Generate or start a chat session", setup: ["# Install MLX LM", "uv tool install mlx-lm"].join("\n"), content: [ ...(model.tags.includes("conversational") ? ["# Interactive chat REPL", `mlx_lm.chat --model "${model.id}"`] : ["# Generate some text", `mlx_lm.generate --model "${model.id}" --prompt "Once upon a time"`]), ].join("\n"), }, ...(model.tags.includes("conversational") ? [ { title: "Run an OpenAI-compatible server", setup: ["# Install MLX LM", "uv tool install mlx-lm"].join("\n"), content: ["# Start the server", `mlx_lm.server --model "${model.id}"`, ...openaiCurl].join("\n"), }, ] : []), ]; }; const snippetDockerModelRunner = (model: ModelData, filepath?: string): string => { return `docker model run hf.co/${model.id}${getQuantTag(filepath)}`; }; const snippetLemonade = (model: ModelData, filepath?: string): LocalAppSnippet[] => { const tagName = getQuantTag(filepath); const modelName = model.id.includes("/") ? model.id.split("/")[1] : model.id; // Get recipe according to model type let simplifiedModelName: string; let recipe: string; let checkpoint: string; let requirements: string; if (model.tags.some((tag) => ["ryzenai-npu", "ryzenai-hybrid"].includes(tag))) { recipe = model.tags.includes("ryzenai-npu") ? "oga-npu" : "oga-hybrid"; checkpoint = model.id; requirements = " (requires RyzenAI 300 series)"; simplifiedModelName = modelName.split("-awq-")[0]; simplifiedModelName += recipe === "oga-npu" ? "-NPU" : "-Hybrid"; } else { recipe = "llamacpp"; checkpoint = `${model.id}${tagName}`; requirements = ""; simplifiedModelName = modelName; } return [ { title: "Pull the model", setup: "# Download Lemonade from https://lemonade-server.ai/", content: [ `lemonade-server pull user.${simplifiedModelName} --checkpoint ${checkpoint} --recipe ${recipe}`, "# Note: If you installed from source, use the lemonade-server-dev command instead.", ].join("\n"), }, { title: `Run and chat with the model${requirements}`, content: `lemonade-server run user.${simplifiedModelName}`, }, { title: "List all available models", content: "lemonade-server list", }, ]; }; /** * Add your new local app here. * * This is open to new suggestions and awesome upcoming apps. * * /!\ IMPORTANT * * If possible, you need to support deeplinks and be as cross-platform as possible. * * Ping the HF team if we can help with anything! */ export const LOCAL_APPS = { "llama.cpp": { prettyLabel: "llama.cpp", docsUrl: "https://github.com/ggerganov/llama.cpp", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, snippet: snippetLlamacpp, }, "node-llama-cpp": { prettyLabel: "node-llama-cpp", docsUrl: "https://node-llama-cpp.withcat.ai", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, snippet: snippetNodeLlamaCppCli, }, vllm: { prettyLabel: "vLLM", docsUrl: "https://docs.vllm.ai", mainTask: "text-generation", displayOnModelPage: (model: ModelData) => (isAwqModel(model) || isGptqModel(model) || isAqlmModel(model) || isMarlinModel(model) || isLlamaCppGgufModel(model) || isTransformersModel(model)) && (model.pipeline_tag === "text-generation" || model.pipeline_tag === "image-text-to-text"), snippet: snippetVllm, }, "mlx-lm": { prettyLabel: "MLX LM", docsUrl: "https://github.com/ml-explore/mlx-lm", mainTask: "text-generation", displayOnModelPage: (model) => model.pipeline_tag === "text-generation" && isMlxModel(model), snippet: snippetMlxLm, }, tgi: { prettyLabel: "TGI", docsUrl: "https://huggingface.co/docs/text-generation-inference/", mainTask: "text-generation", displayOnModelPage: isTgiModel, snippet: snippetTgi, }, lmstudio: { prettyLabel: "LM Studio", docsUrl: "https://lmstudio.ai", mainTask: "text-generation", displayOnModelPage: (model) => isLlamaCppGgufModel(model) || isMlxModel(model), deeplink: (model, filepath) => new URL(`lmstudio://open_from_hf?model=${model.id}${filepath ? `&file=${filepath}` : ""}`), }, localai: { prettyLabel: "LocalAI", docsUrl: "https://github.com/mudler/LocalAI", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, snippet: snippetLocalAI, }, jan: { prettyLabel: "Jan", docsUrl: "https://jan.ai", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, deeplink: (model) => new URL(`jan://models/huggingface/${model.id}`), }, backyard: { prettyLabel: "Backyard AI", docsUrl: "https://backyard.ai", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, deeplink: (model) => new URL(`https://backyard.ai/hf/model/${model.id}`), }, sanctum: { prettyLabel: "Sanctum", docsUrl: "https://sanctum.ai", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, deeplink: (model) => new URL(`sanctum://open_from_hf?model=${model.id}`), }, jellybox: { prettyLabel: "Jellybox", docsUrl: "https://jellybox.com", mainTask: "text-generation", displayOnModelPage: (model) => isLlamaCppGgufModel(model) || (model.library_name === "diffusers" && model.tags.includes("safetensors") && (model.pipeline_tag === "text-to-image" || model.tags.includes("lora"))), deeplink: (model) => { if (isLlamaCppGgufModel(model)) { return new URL(`jellybox://llm/models/huggingface/LLM/${model.id}`); } else if (model.tags.includes("lora")) { return new URL(`jellybox://image/models/huggingface/ImageLora/${model.id}`); } else { return new URL(`jellybox://image/models/huggingface/Image/${model.id}`); } }, }, msty: { prettyLabel: "Msty", docsUrl: "https://msty.app", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, deeplink: (model) => new URL(`msty://models/search/hf/${model.id}`), }, recursechat: { prettyLabel: "RecurseChat", docsUrl: "https://recurse.chat", mainTask: "text-generation", macOSOnly: true, displayOnModelPage: isLlamaCppGgufModel, deeplink: (model) => new URL(`recursechat://new-hf-gguf-model?hf-model-id=${model.id}`), }, drawthings: { prettyLabel: "Draw Things", docsUrl: "https://drawthings.ai", mainTask: "text-to-image", macOSOnly: true, displayOnModelPage: (model) => model.library_name === "diffusers" && (model.pipeline_tag === "text-to-image" || model.tags.includes("lora")), deeplink: (model) => { if (model.tags.includes("lora")) { return new URL(`https://drawthings.ai/import/diffusers/pipeline.load_lora_weights?repo_id=${model.id}`); } else { return new URL(`https://drawthings.ai/import/diffusers/pipeline.from_pretrained?repo_id=${model.id}`); } }, }, diffusionbee: { prettyLabel: "DiffusionBee", docsUrl: "https://diffusionbee.com", mainTask: "text-to-image", macOSOnly: true, displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image", deeplink: (model) => new URL(`https://diffusionbee.com/huggingface_import?model_id=${model.id}`), }, joyfusion: { prettyLabel: "JoyFusion", docsUrl: "https://joyfusion.app", mainTask: "text-to-image", macOSOnly: true, displayOnModelPage: (model) => model.tags.includes("coreml") && model.tags.includes("joyfusion") && model.pipeline_tag === "text-to-image", deeplink: (model) => new URL(`https://joyfusion.app/import_from_hf?repo_id=${model.id}`), }, invoke: { prettyLabel: "Invoke", docsUrl: "https://github.com/invoke-ai/InvokeAI", mainTask: "text-to-image", displayOnModelPage: (model) => model.library_name === "diffusers" && model.pipeline_tag === "text-to-image", deeplink: (model) => new URL(`https://models.invoke.ai/huggingface/${model.id}`), }, ollama: { prettyLabel: "Ollama", docsUrl: "https://ollama.com", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, snippet: snippetOllama, }, "docker-model-runner": { prettyLabel: "Docker Model Runner", docsUrl: "https://docs.docker.com/ai/model-runner/", mainTask: "text-generation", displayOnModelPage: isLlamaCppGgufModel, snippet: snippetDockerModelRunner, }, lemonade: { prettyLabel: "Lemonade", docsUrl: "https://lemonade-server.ai", mainTask: "text-generation", displayOnModelPage: (model) => isLlamaCppGgufModel(model) || isAmdRyzenModel(model), snippet: snippetLemonade, }, } satisfies Record<string, LocalApp>; export type LocalAppKey = keyof typeof LOCAL_APPS;