UNPKG

node-llama-cpp

Version:

Run AI models locally on your machine with node.js bindings for llama.cpp. Enforce a JSON schema on the model output on the generation level

node-llama-cpp.withcat.ai

withcatai/node-llama-cpp

318 lines • 19.2 kB

JavaScript

export const recommendedModels = [{ name: "DeepSeek R1 Distill Qwen 7B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" + "It's optimized for an assistant-like chat use cases, with support for function calling.\n" + "This model is censored, but its responses quality on many topics is extremely high.\n" + "This is the 7 billion parameters version of the model - a fine tuned Qwen 2.5 7B base model with distillation from the 671B DeepSeek R1 version.", fileOptions: [ "hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q8_0", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q5_K_M", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q5_K_S", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M" ] }, { name: "DeepSeek R1 Distill Qwen 14B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" + "It's optimized for an assistant-like chat use cases, with support for function calling.\n" + "This model is censored, but its responses quality on many topics is extremely high.\n" + "This is the 14 billion parameters version of the model - a fine tuned Qwen 2.5 14B base model with distillation from the 671B DeepSeek R1 version.", fileOptions: [ "hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q8_0", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q6_K", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q5_K_M", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q5_K_S", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q4_K_M" ] }, { name: "DeepSeek R1 Distill Qwen 32B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" + "It's optimized for an assistant-like chat use cases, with support for function calling.\n" + "This model is censored, but its responses quality on many topics is extremely high.\n" + "This is the 32 billion parameters version of the model - a fine tuned Qwen 2.5 32B base model with distillation from the 671B DeepSeek R1 version.", fileOptions: [ "hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q8_0", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q5_K_M", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q5_K_S", "hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M" ] }, { name: "DeepSeek R1 Distill Llama 8B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" + "It's optimized for an assistant-like chat use cases, with support for function calling.\n" + "This model is censored, even though it's based on Llama 3.1.\n" + "This is the 8 billion parameters version of the model - a fine tuned Llama 3.1 8B base model with distillation from the 671B DeepSeek R1 version.", fileOptions: [ "hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q8_0", "hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q6_K", "hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q5_K_M", "hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q5_K_S", "hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q4_K_M" ] }, { name: "DeepSeek R1 Distill Llama 70B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" + "It's optimized for an assistant-like chat use cases, with support for function calling.\n" + "This model is censored, even though it's based on Llama 3.3.\n" + "This is the 70 billion parameters version of the model - a fine tuned Llama 3.3 70B base model with distillation from the 671B DeepSeek R1 version.", fileOptions: [ "hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B.Q8_0.gguf.part1of2", "hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B.Q6_K.gguf.part1of2", "hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q5_K_M", "hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q5_K_S", "hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q4_K_M" ] }, { name: "QwQ 32B", abilities: ["chat", "complete", "functionCalling", "reasoning"], description: "QwQ model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" + "It's optimized for an assistant-like chat use cases, with native support for function calling.\n" + "This model is censored, but its responses quality on many topics is extremely high.\n" + "Its performance is comparable to DeepSeek R1 671B.", fileOptions: [ "hf:Qwen/QwQ-32B-GGUF:Q8_0", "hf:Qwen/QwQ-32B-GGUF:Q6_K", "hf:Qwen/QwQ-32B-GGUF:Q5_K_M", "hf:Qwen/QwQ-32B-GGUF:Q4_K_M" ] }, { name: "Llama 3.1 8B", abilities: ["chat", "complete", "functionCalling"], description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" + "This is the 8 billion parameters version of the model.", fileOptions: [ "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q8_0", "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q6_K", "hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M" ] }, { name: "Llama 3.1 70B", abilities: ["chat", "complete", "functionCalling"], description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" + "This is the 70 billion parameters version of the model. " + "You need a GPU with a lot of VRAM to use this version.", fileOptions: [ "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2", "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2", "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:Q4_K_M", "hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:Q4_K_S" ] }, { name: "Llama 3.1 405B", abilities: ["chat", "complete", "functionCalling"], description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" + "This is the 405 billion parameters version of the model, and its capabilities are comparable and sometimes even surpass GPT-4o and Claude 3.5 Sonnet.\n" + "You need a GPU with a lot of VRAM to use this version of Llama 3.1.", fileOptions: [ "hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5", "hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4" ] }, { name: "Phi 4 14B", abilities: ["chat", "complete", "functionCalling"], description: "Phi 4 model was created by Microsoft and is optimized for complex reasoning in areas such as math.", fileOptions: [ "hf:mradermacher/phi-4-GGUF:Q8_0", "hf:mradermacher/phi-4-GGUF:Q6_K", "hf:mradermacher/phi-4-GGUF:Q4_K_M", "hf:mradermacher/phi-4-GGUF:Q4_K_S" ] }, { name: "Mistral Nemo 12B", abilities: ["chat", "complete", "functionCalling"], description: "Mistral Nemo model was created by Mistral AI and was trained on large proportion of multilingual and code data, with support for function calling.\n" + "It was trained jointly by Mistral AI and NVIDIA.\n" + "This is a 12 billion parameters model.", fileOptions: [ "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q8_0", "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q6_K", "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M", "hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_S" ] }, { name: "Llama 3.2 3B", abilities: ["chat", "complete", "functionCalling"], description: "Llama 3.2 3B model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" + "This model is smarter than the 1B model, but is still relatively small and can run on less capable machines.", fileOptions: [ "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q8_0", "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q6_K", "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_M", "hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_S" ] }, { name: "Phi 3 3.8B", abilities: ["chat", "complete", "functionCalling"], description: "Phi 3 model was created by Microsoft and is optimized for strong reasoning (especially math and logic).\n" + "This is the small version of the model.", fileOptions: [ "hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF:Q8_0", "hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF:Q4_K_M" ] }, { name: "OLMoE 1B 7B MoE", abilities: ["chat"], description: "OLMoE models were created by AllenAI, and are fully open source models that utilize a Mixture of Experts architecture.\n" + "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" + "This model includes 64 expert models, with a total of 7 billion parameters.\n" + "This model generates output extremely fast.", fileOptions: [ "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf", "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q6_k.gguf", "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q5_k_m.gguf", "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_s.gguf", "hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_m.gguf" ] }, { name: "Mixtral 8x7B MoE", abilities: ["chat", "complete"], description: "Mixtral models were created by Mistal AI and are general purpose models that utilize a Mixture of Experts architecture.\n" + "Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" + "This model includes 8 expert models, each with 7 billion parameters.", fileOptions: [ "hf:TheBloke/Mixtral-8x7B-v0.1-GGUF:Q5_K_M", "hf:TheBloke/Mixtral-8x7B-v0.1-GGUF:Q4_K_M" ] }, { name: "Mistral 7B Instruct v0.2", abilities: ["chat", "complete"], description: "Mistral models were created by Mistal AI and are general purpose models.\n" + "This is the 7 billion parameters version of the model.", fileOptions: [ "hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:Q5_K_M", "hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:Q4_K_M" ] }, { name: "Dolphin 2.5 Mixtral 8x7B MoE", abilities: ["chat", "complete"], description: "This Dolphin Mixtral model was created by Eric Hartford and is an uncensored model based on Mixtral, with really good coding skills.\n" + "See the Mixtral model above for more information about Mixtral models.\n" + "This model includes 8 expert models, each with 7 billion parameters.", fileOptions: [ "hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF:Q5_K_M", "hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF:Q4_K_M" ] }, { name: "Gemma 2 9B", abilities: ["chat", "complete"], description: "Gemma models were created by Google and are optimized suited for variety of text generation tasks, " + "including question answering, summarization, and reasoning, with a focus on responsible responses.\n" + "This is the 9 billion parameters version of the model.", fileOptions: [ "hf:bartowski/gemma-2-9b-it-GGUF:Q6_K_L", "hf:bartowski/gemma-2-9b-it-GGUF:Q6_K", "hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_L", "hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_M", "hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_S", "hf:bartowski/gemma-2-9b-it-GGUF:Q4_K_L", "hf:bartowski/gemma-2-9b-it-GGUF:Q4_K_M" ] }, { name: "Gemma 2 2B", abilities: ["chat", "complete"], description: "Gemma models were created by Google and are optimized suited for variety of text generation tasks, " + "including question answering, summarization, and reasoning, with a focus on responsible responses.\n" + "This is the 2 billion parameters version of the model and is significantly less powerful than the 9B version.", fileOptions: [ "hf:bartowski/gemma-2-2b-it-GGUF:Q6_K_L", "hf:bartowski/gemma-2-2b-it-GGUF:Q6_K", "hf:bartowski/gemma-2-2b-it-GGUF:Q5_K_M", "hf:bartowski/gemma-2-2b-it-GGUF:Q5_K_S", "hf:bartowski/gemma-2-2b-it-GGUF:Q4_K_M" ] }, { name: "Gemma 2 27B", abilities: ["chat", "complete"], description: "Gemma models were created by Google and are optimized suited for varoety of text generation tasks, " + "including question answering, summarization, and reasoning, with a focus on responsible responses.\n" + "This is the 27 billion parameters version of the model.\n" + "Since the model is relatively big, it may not run well on your machine", fileOptions: [ "hf:bartowski/gemma-2-27b-it-GGUF:Q6_K_L", "hf:bartowski/gemma-2-27b-it-GGUF:Q6_K", "hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_L", "hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_M", "hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_S", "hf:bartowski/gemma-2-27b-it-GGUF:Q4_K_L", "hf:bartowski/gemma-2-27b-it-GGUF:Q4_K_M" ] }, { name: "Orca 2 13B", abilities: ["chat", "complete"], description: "Orca 2 model was created by Microsoft and is optimized for reasoning over given data, reading comprehensions, math problem solving and text summarization.\n" + "This is the 13 billion parameters version of the model.", fileOptions: [ "hf:TheBloke/Orca-2-13B-GGUF:Q5_K_M", "hf:TheBloke/Orca-2-13B-GGUF:Q4_K_M" ] }, { name: "Code Llama 7B", abilities: ["chat", "complete", "infill"], description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" + "This is the 7 billion parameters version of the model.", fileOptions: [ "hf:TheBloke/CodeLlama-7B-GGUF:Q5_K_M", "hf:TheBloke/CodeLlama-7B-GGUF:Q4_K_M" ] }, { name: "Code Llama 13B", abilities: ["chat", "complete", "infill"], description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" + "This is the 13 billion parameters version of the model.", fileOptions: [ "hf:TheBloke/CodeLlama-13B-GGUF:Q5_K_M", "hf:TheBloke/CodeLlama-13B-GGUF:Q4_K_M" ] }, { name: "Code Llama 34B", abilities: ["chat", "complete", "infill"], description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" + "This is the 34 billion parameters version of the model.\n" + "You need a GPU with handful of VRAM to use this version.", fileOptions: [ "hf:TheBloke/CodeLlama-34B-GGUF:Q5_K_M", "hf:TheBloke/CodeLlama-34B-GGUF:Q4_K_M" ] }, { name: "CodeGemma 2B", abilities: ["code", "complete", "infill"], description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " + "natual language understanding, mathematical reasoning, and instruction following.\n" + "This model is not suited for chat.\n" + "This is the 2 billion parameters version of the model.\n", fileOptions: [ "hf:bartowski/codegemma-2b-GGUF:Q8_0", "hf:bartowski/codegemma-2b-GGUF:Q6_K", "hf:bartowski/codegemma-2b-GGUF:Q5_K_M", "hf:bartowski/codegemma-2b-GGUF:Q5_K_S", "hf:bartowski/codegemma-2b-GGUF:Q4_K_M" ] }, { name: "CodeGemma 7B", abilities: ["code", "complete", "infill"], description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " + "natual language understanding, mathematical reasoning, and instruction following.\n" + "This model is not suited for chat.\n" + "This is the 7 billion parameters version of the model.\n", fileOptions: [ "hf:bartowski/codegemma-1.1-7b-it-GGUF:Q6_K", "hf:bartowski/codegemma-1.1-7b-it-GGUF:Q5_K_M", "hf:bartowski/codegemma-1.1-7b-it-GGUF:Q5_K_S", "hf:bartowski/codegemma-1.1-7b-it-GGUF:Q4_K_M" ] }, { name: "Stable Code Instruct 3B", abilities: ["chat", "complete", "infill"], description: "Stable Code models were created by Stability AI and are optimized for code completion.", fileOptions: [ "hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q5_k_m.gguf", "hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q4_k_m.gguf" ] }]; //# sourceMappingURL=recommendedModels.js.map