UNPKG

inference-server

Version:

Libraries and server to build AI applications. Adapters to various native bindings allowing local inference. Integrate it with your application, or use as a microservice.

342 lines 13.9 kB
import { format as formatURL } from 'node:url'; import { createExpressServer } from './http.js'; import { InferenceServer } from './server.js'; import { WhisperForConditionalGeneration, } from '@huggingface/transformers'; // Currently only used for debugging. Do not use. const serverOptions = { log: 'debug', concurrency: 2, engines: { // 'chat-with-vision': new ChatWithVisionEngine({ // imageToTextModel: 'florence2', // chatModel: 'llama3-8b', // }), // 'voice-function-calling': new VoiceFunctionCallEngine({ // speechToTextModel: 'whisper-base', // chatModel: 'functionary', // }), }, models: { // 'sciphi-triplex': { // url: 'https://huggingface.co/SciPhi/Triplex/blob/main/quantized_model-Q4_K_M.gguf', // sha256: '6f8f6f1fca005640a1282dd0bd12512dedf22957d0c2135ba5e71583d33754fc', // engine: 'node-llama-cpp', // task: 'text-completion', // }, // 'lite-mistral': { // url: 'https://huggingface.co/bartowski/Lite-Mistral-150M-v2-Instruct-GGUF/resolve/main/Lite-Mistral-150M-v2-Instruct-Q8_0.gguf', // sha256: 'b369c9b1ac20b66b2f94117d5cdc71d029a47a33948cefef9fe104615dcddfbd', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'async', // }, // 'gemma-9b': { // url: 'https://huggingface.co/bartowski/gemma-2-9b-it-GGUF/blob/main/gemma-2-9b-it-Q4_K_M.gguf', // sha256: // '13b2a7b4115bbd0900162edcebe476da1ba1fc24e718e8b40d32f6e300f56dfe', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'async', // }, // 'llama3.1-8b': { // url: 'https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GGUF/blob/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf', // sha256: // '2a4ca64e02e7126436cfdb066dd7311f2486eb487191910d3d000fde13826a4d', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'async', // }, // 'dolphin-nemo-12b': { // url: 'https://huggingface.co/cognitivecomputations/dolphin-2.9.3-mistral-nemo-12b-gguf/blob/main/dolphin-2.9.3-mistral-nemo-Q4_K_M.gguf', // sha256: '09f9114e06d88b791e322586cf28a844d2d0a3876d04d6deffe2dfb26616dd83', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'async', // }, // 'phi3-mini-4k': { // task: 'text-completion', // url: 'https://gpt4all.io/models/gguf/Phi-3-mini-4k-instruct.Q4_0.gguf', // engine: 'gpt4all', // maxInstances: 2, // prepare: 'async', // }, // 'mxbai-embed-large-v1': { // url: 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1', // engine: 'transformers-js', // task: 'embedding', // prepare: 'blocking', // device: { // gpu: false, // }, // }, // 'jina-clip-v1': { // url: 'https://huggingface.co/jinaai/jina-clip-v1', // engine: 'transformers-js', // task: 'embedding', // textModel: { // modelClass: CLIPTextModelWithProjection, // }, // visionModel: { // processor: { // url: 'https://huggingface.co/Xenova/clip-vit-base-patch32', // // url: 'https://huggingface.co/Xenova/vit-base-patch16-224-in21k', // }, // modelClass: CLIPVisionModelWithProjection, // }, // prepare: 'blocking', // device: { // gpu: false, // }, // }, // 'florence2-large': { // url: 'https://huggingface.co/onnx-community/Florence-2-large-ft', // engine: 'transformers-js', // task: 'image-to-text', // prepare: 'blocking', // visionModel: { // modelClass: Florence2ForConditionalGeneration, // dtype: { // embed_tokens: 'fp16', // vision_encoder: 'fp32', // encoder_model: 'fp16', // decoder_model_merged: 'q4', // }, // }, // device: { // gpu: false, // }, // }, // 'whisper-base': { // url: 'https://huggingface.co/onnx-community/whisper-base', // engine: 'transformers-js', // task: 'speech-to-text', // prepare: 'async', // minInstances: 1, // speechModel: { // modelClass: WhisperForConditionalGeneration, // dtype: { // encoder_model: 'fp32', // 'fp16' works too // decoder_model_merged: 'q4', // or 'fp32' ('fp16' is broken) // }, // }, // device: { // gpu: false, // }, // }, // 'mistral-nemo-12b': { // 'url': 'https://huggingface.co/mradermacher/Mistral-Nemo-Instruct-2407-GGUF/blob/main/Mistral-Nemo-Instruct-2407.Q4_K_M.gguf', // 'sha256': '1ac4b6cdf0eeb1e2145f0097c6fd0a75df541e143f226a8ff25c8ae0e8dfff6f', // 'engine': 'node-llama-cpp', // 'task': 'text-completion', // 'prepare': 'async', // }, // 'phi-3.5-mini': { // url: 'https://huggingface.co/bartowski/Phi-3.5-mini-instruct-GGUF/blob/main/Phi-3.5-mini-instruct-Q4_K_M.gguf', // sha256: // 'e4165e3a71af97f1b4820da61079826d8752a2088e313af0c7d346796c38eff5', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'async', // minInstances: 1, // device: { // gpu: 'vulkan', // }, // }, // 'falcon-mamba-7b': { // url: 'https://huggingface.co/mradermacher/falcon-mamba-7b-instruct-GGUF/blob/main/falcon-mamba-7b-instruct.Q4_K_M.gguf', // sha256: 'f3357486034d89dd91fcefdb91bb1dfadfe0fd2969349a8a404e59d2bd3ad1b8', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'async', // }, // 'florence2-large': { // url: 'https://huggingface.co/onnx-community/Florence-2-large-ft', // engine: 'transformers-js', // task: 'image-to-text', // minInstances: 1, // visionModel: { // modelClass: Florence2ForConditionalGeneration, // dtype: { // embed_tokens: 'fp16', // vision_encoder: 'fp32', // encoder_model: 'fp16', // decoder_model_merged: 'q4', // }, // }, // device: { // gpu: false, // }, // }, // 'mxbai-embed-large-v1': { // url: 'https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1', // engine: 'transformers-js', // task: 'embedding', // prepare: 'blocking', // device: { // gpu: false, // }, // }, // 'functionary-3.2-small': { // url: 'https://huggingface.co/meetkai/functionary-small-v3.2-GGUF/blob/main/functionary-small-v3.2.Q4_0.gguf', // sha256: 'c0afdbbffa498a8490dea3401e34034ac0f2c6e337646513a7dbc04fcef1c3a4', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'blocking', // }, // 'flux-schnell': { // url: 'https://huggingface.co/leejet/FLUX.1-schnell-gguf/resolve/main/flux1-schnell-q4_0.gguf', // task: 'text-to-image', // sha256: '4f30741d2bfc786c92934ce925fcb0a43df3441e76504b797c3d5d5f0878fa6f', // engine: 'stable-diffusion-cpp', // prepare: 'blocking', // diffusionModel: true, // samplingMethod: 'euler_a', // vae: { // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/ae.safetensors', // }, // clipL: { // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/clip_l.safetensors', // }, // t5xxl: { // // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/t5xxl_fp16.safetensors', // url: 'https://huggingface.co/second-state/FLUX.1-schnell-GGUF/blob/main/t5xxl-Q8_0.gguf', // }, // }, // 'sd-3.5-turbo': { // url: 'https://huggingface.co/stduhpf/SD3.5-Large-Turbo-GGUF-mixed-sdcpp/blob/main/legacy/sd3.5_large_turbo-q4_0.gguf', // sha256: '52495d9c4356065a1378a93c9556a9eb465e10014ba9ce364512674267405bb2', // engine: 'stable-diffusion-cpp', // task: 'text-to-image', // prepare: 'blocking', // samplingMethod: 'euler', // clipG: { // url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_g.safetensors', // sha256: 'ec310df2af79c318e24d20511b601a591ca8cd4f1fce1d8dff822a356bcdb1f4', // }, // clipL: { // url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/clip_l.safetensors', // sha256: '660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd', // }, // t5xxl: { // url: 'https://huggingface.co/Comfy-Org/stable-diffusion-3.5-fp8/blob/main/text_encoders/t5xxl_fp8_e4m3fn.safetensors', // sha256: '7d330da4816157540d6bb7838bf63a0f02f573fc48ca4d8de34bb0cbfd514f09', // }, // }, // 'trocr-printed': { // url: 'https://huggingface.co/Xenova/trocr-small-printed', // engine: 'transformers-js', // task: 'image-to-text', // prepare: 'blocking', // minInstances: 1, // // textModel: { // // modelClass: TrOCRPreTrainedModel, // // processorClass: DeiTFeatureExtractor, // // }, // device: { // gpu: false, // }, // }, // 'sdxl-turbo': { // url: 'https://huggingface.co/stabilityai/sdxl-turbo/blob/main/sd_xl_turbo_1.0_fp16.safetensors', // sha256: // 'e869ac7d6942cb327d68d5ed83a40447aadf20e0c3358d98b2cc9e270db0da26', // engine: 'stable-diffusion-cpp', // task: 'image-to-image', // prepare: 'blocking', // samplingMethod: 'euler', // vae: { // url: 'https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/blob/main/sdxl.vae.safetensors', // sha256: // '235745af8d86bf4a4c1b5b4f529868b37019a10f7c0b2e79ad0abca3a22bc6e1', // }, // }, // 'flux-light': { // url: 'https://huggingface.co/city96/flux.1-lite-8B-alpha-gguf/blob/main/flux.1-lite-8B-alpha-Q8_0.gguf', // sha256: // 'efc598d62123f2fdfd682948f533fee081f7fb1295b14d002ac1e66cae5f01a5', // engine: 'stable-diffusion-cpp', // task: 'image-to-image', // prepare: 'blocking', // }, // 'sd-3-medium': { // url: 'https://huggingface.co/second-state/stable-diffusion-3-medium-GGUF/blob/main/sd3-medium-Q8_0.gguf', // sha256: '7e34dfeb71f8cdbc8338677b63a444897cf4c5692ab4c1d98f04cbba6751885a', // engine: 'stable-diffusion-cpp', // task: 'text-to-image', // prepare: 'async', // }, // 'sd-1.5': { // url: 'https://huggingface.co/second-state/stable-diffusion-v1-5-GGUF/blob/main/stable-diffusion-v1-5-pruned-emaonly-f32.gguf', // sha256: '52c7ca39d8d48d6f44fa4ff2c44569f3c924d92311108cb38492958350d48ff8', // engine: 'stable-diffusion-cpp', // task: 'text-to-image', // prepare: 'async', // }, // 'llama-3.2-3b': { // url: 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/blob/main/Llama-3.2-3B-Instruct-Q6_K_L.gguf', // sha256: // 'c542b14ec07b8b3cb8d777e1a68ee5aabb964167719466d4c685c29fcfd04900', // engine: 'node-llama-cpp', // task: 'text-completion', // prepare: 'blocking', // }, // speecht5: { // url: 'https://huggingface.co/Xenova/speecht5_tts', // engine: 'transformers-js', // task: 'text-to-speech', // prepare: 'async', // minInstances: 1, // speechModel: { // speakerEmbeddings: { // voice: { // url: 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin', // }, // }, // }, // }, // 'owlv2-base': { // url: 'https://huggingface.co/Xenova/owlv2-base-patch16-finetuned', // engine: 'transformers-js', // task: 'object-detection', // prepare: 'async', // dtype: 'fp16', // }, 'whisper-base': { url: 'https://huggingface.co/onnx-community/whisper-base', engine: 'transformers-js', task: 'speech-to-text', prepare: 'async', minInstances: 1, modelClass: WhisperForConditionalGeneration, dtype: { encoder_model: 'fp16', decoder_model_merged: 'q4', }, device: { gpu: false, }, }, }, }; async function main() { const inferenceServer = new InferenceServer(serverOptions); await inferenceServer.start(); const httpServer = createExpressServer(inferenceServer); const { address, port } = httpServer.address(); const hostname = address === '' || address === '::' ? 'localhost' : address; const url = formatURL({ protocol: 'http', hostname, port, pathname: '/', }); console.log(`Server listening at ${url}`); } main().catch((err) => { console.error(err); process.exit(1); }); process.on('unhandledRejection', (err) => { console.error('Unhandled rejection:', err); }); //# sourceMappingURL=standalone.js.map