@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

github.com/lobehub/lobe-chat

lobehub/lobe-chat

501 lines • 227 kB

JSON

{ "01-ai/yi-1.5-34b-chat": { "description": "Zero One Everything, the latest open-source fine-tuned model with 34 billion parameters, supports various dialogue scenarios with high-quality training data aligned with human preferences." }, "01-ai/yi-1.5-9b-chat": { "description": "Zero One Everything, the latest open-source fine-tuned model with 9 billion parameters, supports various dialogue scenarios with high-quality training data aligned with human preferences." }, "360/deepseek-r1": { "description": "[360 Deployment Version] DeepSeek-R1 extensively utilizes reinforcement learning techniques in the post-training phase, significantly enhancing model inference capabilities with minimal labeled data. It performs comparably to OpenAI's o1 official version in tasks such as mathematics, coding, and natural language reasoning." }, "360gpt-pro": { "description": "360GPT Pro, as an important member of the 360 AI model series, meets diverse natural language application scenarios with efficient text processing capabilities, supporting long text understanding and multi-turn dialogue." }, "360gpt-pro-trans": { "description": "A translation-specific model, finely tuned for optimal translation results." }, "360gpt-turbo": { "description": "360GPT Turbo offers powerful computation and dialogue capabilities, with excellent semantic understanding and generation efficiency, making it an ideal intelligent assistant solution for enterprises and developers." }, "360gpt-turbo-responsibility-8k": { "description": "360GPT Turbo Responsibility 8K emphasizes semantic safety and responsibility, designed specifically for applications with high content safety requirements, ensuring accuracy and robustness in user experience." }, "360gpt2-o1": { "description": "360gpt2-o1 builds a chain of thought using tree search and incorporates a reflection mechanism, trained with reinforcement learning, enabling the model to self-reflect and correct errors." }, "360gpt2-pro": { "description": "360GPT2 Pro is an advanced natural language processing model launched by 360, featuring exceptional text generation and understanding capabilities, particularly excelling in generation and creative tasks, capable of handling complex language transformations and role-playing tasks." }, "360zhinao2-o1": { "description": "360zhinao2-o1 uses tree search to build a chain of thought and introduces a reflection mechanism, utilizing reinforcement learning for training, enabling the model to possess self-reflection and error-correction capabilities." }, "4.0Ultra": { "description": "Spark4.0 Ultra is the most powerful version in the Spark large model series, enhancing text content understanding and summarization capabilities while upgrading online search links. It is a comprehensive solution for improving office productivity and accurately responding to demands, leading the industry as an intelligent product." }, "Baichuan2-Turbo": { "description": "Utilizes search enhancement technology to achieve comprehensive links between large models and domain knowledge, as well as knowledge from the entire web. Supports uploads of various documents such as PDF and Word, and URL input, providing timely and comprehensive information retrieval with accurate and professional output." }, "Baichuan3-Turbo": { "description": "Optimized for high-frequency enterprise scenarios, significantly improving performance and cost-effectiveness. Compared to the Baichuan2 model, content creation improves by 20%, knowledge Q&A by 17%, and role-playing ability by 40%. Overall performance is superior to GPT-3.5." }, "Baichuan3-Turbo-128k": { "description": "Features a 128K ultra-long context window, optimized for high-frequency enterprise scenarios, significantly improving performance and cost-effectiveness. Compared to the Baichuan2 model, content creation improves by 20%, knowledge Q&A by 17%, and role-playing ability by 40%. Overall performance is superior to GPT-3.5." }, "Baichuan4": { "description": "The model is the best in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also boasts industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks." }, "Baichuan4-Air": { "description": "The leading model in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also possesses industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks." }, "Baichuan4-Turbo": { "description": "The leading model in the country, surpassing mainstream foreign models in Chinese tasks such as knowledge encyclopedias, long texts, and creative generation. It also possesses industry-leading multimodal capabilities, excelling in multiple authoritative evaluation benchmarks." }, "DeepSeek-R1": { "description": "A state-of-the-art efficient LLM, skilled in reasoning, mathematics, and programming." }, "DeepSeek-R1-Distill-Llama-70B": { "description": "DeepSeek R1— the larger and smarter model in the DeepSeek suite— distilled into the Llama 70B architecture. Based on benchmark testing and human evaluation, this model is smarter than the original Llama 70B, particularly excelling in tasks requiring mathematical and factual accuracy." }, "DeepSeek-R1-Distill-Qwen-1.5B": { "description": "The DeepSeek-R1 distillation model based on Qwen2.5-Math-1.5B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks." }, "DeepSeek-R1-Distill-Qwen-14B": { "description": "The DeepSeek-R1 distillation model based on Qwen2.5-14B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks." }, "DeepSeek-R1-Distill-Qwen-32B": { "description": "The DeepSeek-R1 series optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks, surpassing the level of OpenAI-o1-mini." }, "DeepSeek-R1-Distill-Qwen-7B": { "description": "The DeepSeek-R1 distillation model based on Qwen2.5-Math-7B optimizes inference performance through reinforcement learning and cold-start data, refreshing the benchmark for open-source models across multiple tasks." }, "DeepSeek-V3": { "description": "DeepSeek-V3 is a MoE model developed in-house by Deep Seek Company. Its performance surpasses that of other open-source models such as Qwen2.5-72B and Llama-3.1-405B in multiple assessments, and it stands on par with the world's top proprietary models like GPT-4o and Claude-3.5-Sonnet." }, "Doubao-lite-128k": { "description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 128k context window." }, "Doubao-lite-32k": { "description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 32k context window." }, "Doubao-lite-4k": { "description": "Doubao-lite offers ultra-fast response times and better cost-effectiveness, providing customers with more flexible options for different scenarios. Supports inference and fine-tuning with a 4k context window." }, "Doubao-pro-128k": { "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 128k context window." }, "Doubao-pro-32k": { "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 32k context window." }, "Doubao-pro-4k": { "description": "The best-performing flagship model, suitable for handling complex tasks. It excels in scenarios such as reference Q&A, summarization, creative writing, text classification, and role-playing. Supports inference and fine-tuning with a 4k context window." }, "ERNIE-3.5-128K": { "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information." }, "ERNIE-3.5-8K": { "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information." }, "ERNIE-3.5-8K-Preview": { "description": "Baidu's self-developed flagship large-scale language model, covering a vast amount of Chinese and English corpus. It possesses strong general capabilities, meeting the requirements for most dialogue Q&A, creative generation, and plugin application scenarios; it supports automatic integration with Baidu's search plugin to ensure the timeliness of Q&A information." }, "ERNIE-4.0-8K-Latest": { "description": "Baidu's self-developed flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information." }, "ERNIE-4.0-8K-Preview": { "description": "Baidu's self-developed flagship ultra-large-scale language model, which has achieved a comprehensive upgrade in model capabilities compared to ERNIE 3.5, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information." }, "ERNIE-4.0-Turbo-8K-Latest": { "description": "Baidu's self-developed flagship ultra-large-scale language model, demonstrating excellent overall performance, suitable for complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It offers better performance compared to ERNIE 4.0." }, "ERNIE-4.0-Turbo-8K-Preview": { "description": "Baidu's self-developed flagship ultra-large-scale language model, demonstrating excellent overall performance, widely applicable to complex task scenarios across various fields; supports automatic integration with Baidu search plugins to ensure the timeliness of Q&A information. It outperforms ERNIE 4.0 in performance." }, "ERNIE-Character-8K": { "description": "Baidu's self-developed vertical scene large language model, suitable for applications such as game NPCs, customer service dialogues, and role-playing conversations, featuring more distinct and consistent character styles, stronger adherence to instructions, and superior inference performance." }, "ERNIE-Lite-Pro-128K": { "description": "Baidu's self-developed lightweight large language model, balancing excellent model performance with inference efficiency, offering better results than ERNIE Lite, suitable for inference on low-power AI acceleration cards." }, "ERNIE-Speed-128K": { "description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance." }, "ERNIE-Speed-Pro-128K": { "description": "Baidu's latest self-developed high-performance large language model released in 2024, with outstanding general capabilities, providing better results than ERNIE Speed, suitable as a base model for fine-tuning, effectively addressing specific scenario issues while also exhibiting excellent inference performance." }, "Gryphe/MythoMax-L2-13b": { "description": "MythoMax-L2 (13B) is an innovative model suitable for multi-domain applications and complex tasks." }, "InternVL2-8B": { "description": "InternVL2-8B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers." }, "InternVL2.5-26B": { "description": "InternVL2.5-26B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers." }, "Llama-3.2-11B-Vision-Instruct": { "description": "Exhibits outstanding image reasoning capabilities on high-resolution images, suitable for visual understanding applications." }, "Llama-3.2-90B-Vision-Instruct\t": { "description": "Advanced image reasoning capabilities suitable for visual understanding agent applications." }, "Meta-Llama-3.1-405B-Instruct": { "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models." }, "Meta-Llama-3.1-70B-Instruct": { "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models." }, "Meta-Llama-3.1-8B-Instruct": { "description": "Llama 3.1 instruction-tuned text model optimized for multilingual dialogue use cases, performing excellently on common industry benchmarks among many available open-source and closed chat models." }, "Meta-Llama-3.2-1B-Instruct": { "description": "An advanced cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities." }, "Meta-Llama-3.2-3B-Instruct": { "description": "An advanced cutting-edge small language model with language understanding, excellent reasoning capabilities, and text generation abilities." }, "Meta-Llama-3.3-70B-Instruct": { "description": "Llama 3.3 is the most advanced multilingual open-source large language model in the Llama series, offering performance comparable to a 405B model at a very low cost. Based on the Transformer architecture, it enhances usability and safety through supervised fine-tuning (SFT) and reinforcement learning from human feedback (RLHF). Its instruction-tuned version is optimized for multilingual dialogue and outperforms many open-source and closed chat models on various industry benchmarks. Knowledge cutoff date is December 2023." }, "MiniMax-M1": { "description": "A brand-new self-developed inference model. Globally leading: 80K reasoning chains x 1M input, performance comparable to top overseas models." }, "MiniMax-Text-01": { "description": "In the MiniMax-01 series of models, we have made bold innovations: for the first time, we have implemented a linear attention mechanism on a large scale, making the traditional Transformer architecture no longer the only option. This model has a parameter count of up to 456 billion, with a single activation of 45.9 billion. Its overall performance rivals that of top overseas models while efficiently handling the world's longest context of 4 million tokens, which is 32 times that of GPT-4o and 20 times that of Claude-3.5-Sonnet." }, "MiniMaxAI/MiniMax-M1-80k": { "description": "MiniMax-M1 is a large-scale hybrid attention inference model with open-source weights, featuring 456 billion parameters, with approximately 45.9 billion parameters activated per token. The model natively supports ultra-long contexts of up to 1 million tokens and, through lightning attention mechanisms, reduces floating-point operations by 75% compared to DeepSeek R1 in tasks generating 100,000 tokens. Additionally, MiniMax-M1 employs a Mixture of Experts (MoE) architecture, combining the CISPO algorithm with an efficient reinforcement learning training design based on hybrid attention, achieving industry-leading performance in long-input inference and real-world software engineering scenarios." }, "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": { "description": "Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B) is a high-precision instruction model suitable for complex computations." }, "Phi-3-medium-128k-instruct": { "description": "The same Phi-3-medium model, but with a larger context size for RAG or few-shot prompting." }, "Phi-3-medium-4k-instruct": { "description": "A 14B parameter model that provides better quality than Phi-3-mini, focusing on high-quality, reasoning-dense data." }, "Phi-3-mini-128k-instruct": { "description": "The same Phi-3-mini model, but with a larger context size for RAG or few-shot prompting." }, "Phi-3-mini-4k-instruct": { "description": "The smallest member of the Phi-3 family, optimized for both quality and low latency." }, "Phi-3-small-128k-instruct": { "description": "The same Phi-3-small model, but with a larger context size for RAG or few-shot prompting." }, "Phi-3-small-8k-instruct": { "description": "A 7B parameter model that provides better quality than Phi-3-mini, focusing on high-quality, reasoning-dense data." }, "Phi-3.5-mini-instruct": { "description": "An updated version of the Phi-3-mini model." }, "Phi-3.5-vision-instrust": { "description": "An updated version of the Phi-3-vision model." }, "Pro/Qwen/Qwen2-7B-Instruct": { "description": "Qwen2-7B-Instruct is an instruction-tuned large language model in the Qwen2 series, with a parameter size of 7B. This model is based on the Transformer architecture and employs techniques such as the SwiGLU activation function, attention QKV bias, and group query attention. It can handle large-scale inputs. The model excels in language understanding, generation, multilingual capabilities, coding, mathematics, and reasoning across multiple benchmark tests, surpassing most open-source models and demonstrating competitive performance comparable to proprietary models in certain tasks. Qwen2-7B-Instruct outperforms Qwen1.5-7B-Chat in multiple evaluations, showing significant performance improvements." }, "Pro/Qwen/Qwen2.5-7B-Instruct": { "description": "Qwen2.5-7B-Instruct is one of the latest large language models released by Alibaba Cloud. This 7B model shows significant improvements in coding and mathematics. It also provides multilingual support, covering over 29 languages, including Chinese and English. The model has made notable advancements in instruction following, understanding structured data, and generating structured outputs, especially JSON." }, "Pro/Qwen/Qwen2.5-Coder-7B-Instruct": { "description": "Qwen2.5-Coder-7B-Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents." }, "Pro/Qwen/Qwen2.5-VL-7B-Instruct": { "description": "Qwen2.5-VL is the newest addition to the Qwen series, featuring enhanced visual comprehension capabilities. It can analyze text, charts, and layouts within images, comprehend long videos while capturing events. The model supports reasoning, tool manipulation, multi-format object localization, and structured output generation. It incorporates optimized dynamic resolution and frame rate training for video understanding, along with improved efficiency in its visual encoder." }, "Pro/THUDM/GLM-4.1V-9B-Thinking": { "description": "GLM-4.1V-9B-Thinking is an open-source vision-language model (VLM) jointly released by Zhipu AI and Tsinghua University's KEG Lab, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the Chain-of-Thought reasoning mechanism and employing reinforcement learning strategies." }, "Pro/THUDM/glm-4-9b-chat": { "description": "GLM-4-9B-Chat is the open-source version of the GLM-4 series pre-trained models launched by Zhipu AI. This model excels in semantics, mathematics, reasoning, code, and knowledge. In addition to supporting multi-turn dialogues, GLM-4-9B-Chat also features advanced capabilities such as web browsing, code execution, custom tool invocation (Function Call), and long-text reasoning. The model supports 26 languages, including Chinese, English, Japanese, Korean, and German. In multiple benchmark tests, GLM-4-9B-Chat has demonstrated excellent performance, such as in AlignBench-v2, MT-Bench, MMLU, and C-Eval. The model supports a maximum context length of 128K, making it suitable for academic research and commercial applications." }, "Pro/deepseek-ai/DeepSeek-R1": { "description": "DeepSeek-R1 is a reinforcement learning (RL) driven inference model that addresses issues of repetitiveness and readability in models. Prior to RL, DeepSeek-R1 introduced cold start data to further optimize inference performance. It performs comparably to OpenAI-o1 in mathematical, coding, and reasoning tasks, and enhances overall effectiveness through carefully designed training methods." }, "Pro/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { "description": "DeepSeek-R1-Distill-Qwen-7B is a model derived from Qwen2.5-Math-7B through knowledge distillation. It was fine-tuned using 800,000 carefully selected samples generated by DeepSeek-R1, demonstrating exceptional reasoning capabilities. The model achieves outstanding performance across multiple benchmarks, including 92.8% accuracy on MATH-500, a 55.5% pass rate on AIME 2024, and a score of 1189 on CodeForces, showcasing strong mathematical and programming abilities for a 7B-scale model." }, "Pro/deepseek-ai/DeepSeek-V3": { "description": "DeepSeek-V3 is a mixed expert (MoE) language model with 671 billion parameters, utilizing multi-head latent attention (MLA) and the DeepSeekMoE architecture, combined with a load balancing strategy without auxiliary loss to optimize inference and training efficiency. Pre-trained on 14.8 trillion high-quality tokens and fine-tuned with supervision and reinforcement learning, DeepSeek-V3 outperforms other open-source models and approaches leading closed-source models." }, "QwQ-32B-Preview": { "description": "QwQ-32B-Preview is an innovative natural language processing model capable of efficiently handling complex dialogue generation and context understanding tasks." }, "Qwen/QVQ-72B-Preview": { "description": "QVQ-72B-Preview is a research-oriented model developed by the Qwen team, focusing on visual reasoning capabilities, with unique advantages in understanding complex scenes and solving visually related mathematical problems." }, "Qwen/QwQ-32B": { "description": "QwQ is the inference model of the Qwen series. Compared to traditional instruction-tuned models, QwQ possesses reasoning and cognitive abilities, achieving significantly enhanced performance in downstream tasks, especially in solving difficult problems. QwQ-32B is a medium-sized inference model that competes effectively against state-of-the-art inference models (such as DeepSeek-R1 and o1-mini). This model employs technologies such as RoPE, SwiGLU, RMSNorm, and Attention QKV bias, featuring a 64-layer network structure and 40 Q attention heads (with 8 KV heads in the GQA architecture)." }, "Qwen/QwQ-32B-Preview": { "description": "QwQ-32B-Preview is Qwen's latest experimental research model, focusing on enhancing AI reasoning capabilities. By exploring complex mechanisms such as language mixing and recursive reasoning, its main advantages include strong analytical reasoning, mathematical, and programming abilities. However, it also faces challenges such as language switching issues, reasoning loops, safety considerations, and differences in other capabilities." }, "Qwen/Qwen2-72B-Instruct": { "description": "Qwen2 is an advanced general-purpose language model that supports various types of instructions." }, "Qwen/Qwen2-7B-Instruct": { "description": "Qwen2-72B-Instruct is an instruction-tuned large language model in the Qwen2 series, with a parameter size of 72B. This model is based on the Transformer architecture and employs techniques such as the SwiGLU activation function, attention QKV bias, and group query attention. It can handle large-scale inputs. The model excels in language understanding, generation, multilingual capabilities, coding, mathematics, and reasoning across multiple benchmark tests, surpassing most open-source models and demonstrating competitive performance comparable to proprietary models in certain tasks." }, "Qwen/Qwen2-VL-72B-Instruct": { "description": "Qwen2-VL is the latest iteration of the Qwen-VL model, achieving state-of-the-art performance in visual understanding benchmarks." }, "Qwen/Qwen2.5-14B-Instruct": { "description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks." }, "Qwen/Qwen2.5-32B-Instruct": { "description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks." }, "Qwen/Qwen2.5-72B-Instruct": { "description": "A large language model developed by the Alibaba Cloud Tongyi Qianwen team" }, "Qwen/Qwen2.5-72B-Instruct-128K": { "description": "Qwen2.5 is a new large language model series with enhanced understanding and generation capabilities." }, "Qwen/Qwen2.5-72B-Instruct-Turbo": { "description": "Qwen2.5 is a new large language model series designed to optimize instruction-based task processing." }, "Qwen/Qwen2.5-7B-Instruct": { "description": "Qwen2.5 is a brand new series of large language models designed to optimize the handling of instruction-based tasks." }, "Qwen/Qwen2.5-7B-Instruct-Turbo": { "description": "Qwen2.5 is a new large language model series designed to optimize instruction-based task processing." }, "Qwen/Qwen2.5-Coder-32B-Instruct": { "description": "Qwen2.5-Coder focuses on code writing." }, "Qwen/Qwen2.5-Coder-7B-Instruct": { "description": "Qwen2.5-Coder-7B-Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents." }, "Qwen/Qwen2.5-VL-32B-Instruct": { "description": "Qwen2.5-VL-32B-Instruct is a multimodal large language model developed by the Tongyi Qianwen team, representing part of the Qwen2.5-VL series. This model excels not only in recognizing common objects but also in analyzing text, charts, icons, graphics, and layouts within images. It functions as a visual agent capable of reasoning and dynamically manipulating tools, with the ability to operate computers and mobile devices. Additionally, the model can precisely locate objects in images and generate structured outputs for documents like invoices and tables. Compared to its predecessor Qwen2-VL, this version demonstrates enhanced mathematical and problem-solving capabilities through reinforcement learning, while also exhibiting more human-preferred response styles." }, "Qwen/Qwen2.5-VL-72B-Instruct": { "description": "Qwen2.5-VL is the vision-language model in the Qwen2.5 series. This model demonstrates significant improvements across multiple dimensions: enhanced visual comprehension capable of recognizing common objects, analyzing text, charts, and layouts; serving as a visual agent that can reason and dynamically guide tool usage; supporting understanding of long videos exceeding 1 hour while capturing key events; able to precisely locate objects in images by generating bounding boxes or points; and capable of producing structured outputs particularly suitable for scanned data like invoices and forms." }, "Qwen/Qwen3-14B": { "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode." }, "Qwen/Qwen3-235B-A22B": { "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode." }, "Qwen/Qwen3-30B-A3B": { "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode." }, "Qwen/Qwen3-32B": { "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode." }, "Qwen/Qwen3-8B": { "description": "Qwen3 is a next-generation model with significantly enhanced capabilities, achieving industry-leading levels in reasoning, general tasks, agent functions, and multilingual support, with a switchable thinking mode." }, "Qwen2-72B-Instruct": { "description": "Qwen2 is the latest series of the Qwen model, supporting 128k context. Compared to the current best open-source models, Qwen2-72B significantly surpasses leading models in natural language understanding, knowledge, coding, mathematics, and multilingual capabilities." }, "Qwen2-7B-Instruct": { "description": "Qwen2 is the latest series of the Qwen model, capable of outperforming optimal open-source models of similar size and even larger models. Qwen2 7B has achieved significant advantages in multiple evaluations, especially in coding and Chinese comprehension." }, "Qwen2-VL-72B": { "description": "Qwen2-VL-72B is a powerful visual language model that supports multimodal processing of images and text, capable of accurately recognizing image content and generating relevant descriptions or answers." }, "Qwen2.5-14B-Instruct": { "description": "Qwen2.5-14B-Instruct is a large language model with 14 billion parameters, delivering excellent performance, optimized for Chinese and multilingual scenarios, and supporting applications such as intelligent Q&A and content generation." }, "Qwen2.5-32B-Instruct": { "description": "Qwen2.5-32B-Instruct is a large language model with 32 billion parameters, offering balanced performance, optimized for Chinese and multilingual scenarios, and supporting applications such as intelligent Q&A and content generation." }, "Qwen2.5-72B-Instruct": { "description": "Qwen2.5-72B-Instruct supports 16k context and generates long texts exceeding 8K. It enables seamless interaction with external systems through function calls, greatly enhancing flexibility and scalability. The model's knowledge has significantly increased, and its coding and mathematical abilities have been greatly improved, with multilingual support for over 29 languages." }, "Qwen2.5-7B-Instruct": { "description": "Qwen2.5-7B-Instruct is a large language model with 7 billion parameters, supporting function calls and seamless interaction with external systems, greatly enhancing flexibility and scalability. It is optimized for Chinese and multilingual scenarios, supporting applications such as intelligent Q&A and content generation." }, "Qwen2.5-Coder-14B-Instruct": { "description": "Qwen2.5-Coder-14B-Instruct is a large-scale pre-trained programming instruction model with strong code understanding and generation capabilities, efficiently handling various programming tasks, particularly suited for intelligent code writing, automated script generation, and programming problem-solving." }, "Qwen2.5-Coder-32B-Instruct": { "description": "Qwen2.5-Coder-32B-Instruct is a large language model specifically designed for code generation, code understanding, and efficient development scenarios, featuring an industry-leading 32 billion parameters to meet diverse programming needs." }, "SenseChat": { "description": "Basic version model (V4) with a context length of 4K, featuring strong general capabilities." }, "SenseChat-128K": { "description": "Basic version model (V4) with a context length of 128K, excelling in long text comprehension and generation tasks." }, "SenseChat-32K": { "description": "Basic version model (V4) with a context length of 32K, flexibly applicable to various scenarios." }, "SenseChat-5": { "description": "The latest version model (V5.5) with a context length of 128K shows significant improvements in mathematical reasoning, English conversation, instruction following, and long text comprehension, comparable to GPT-4o." }, "SenseChat-5-1202": { "description": "Based on version V5.5, this latest release shows significant improvements over the previous version in foundational Chinese and English capabilities, chat, science knowledge, humanities knowledge, writing, mathematical logic, and word count control." }, "SenseChat-5-Cantonese": { "description": "With a context length of 32K, it surpasses GPT-4 in Cantonese conversation comprehension and is competitive with GPT-4 Turbo in knowledge, reasoning, mathematics, and code writing across multiple domains." }, "SenseChat-5-beta": { "description": "Partially outperforms SenseCat-5-1202" }, "SenseChat-Character": { "description": "Standard version model with an 8K context length and high response speed." }, "SenseChat-Character-Pro": { "description": "Advanced version model with a context length of 32K, offering comprehensive capability enhancements and supporting both Chinese and English conversations." }, "SenseChat-Turbo": { "description": "Suitable for fast question answering and model fine-tuning scenarios." }, "SenseChat-Turbo-1202": { "description": "This is the latest lightweight version model, achieving over 90% of the full model's capabilities while significantly reducing inference costs." }, "SenseChat-Vision": { "description": "The latest version model (V5.5) supports multi-image input and fully optimizes the model's basic capabilities, achieving significant improvements in object attribute recognition, spatial relationships, action event recognition, scene understanding, emotion recognition, logical reasoning, and text understanding and generation." }, "SenseNova-V6-Pro": { "description": "Achieves a native unification of image, text, and video capabilities, breaking through the limitations of traditional discrete multimodality, winning dual championships in the OpenCompass and SuperCLUE evaluations." }, "SenseNova-V6-Reasoner": { "description": "Balances visual and linguistic deep reasoning, enabling slow thinking and profound inference, presenting a complete chain of thought process." }, "SenseNova-V6-Turbo": { "description": "Achieves a native unification of image, text, and video capabilities, breaking through the limitations of traditional discrete multimodality, leading comprehensively in core dimensions such as multimodal foundational abilities and linguistic foundational abilities, excelling in both literature and science, and consistently ranking among the top tier in various assessments both domestically and internationally." }, "Skylark2-lite-8k": { "description": "Skylark 2nd generation model, Skylark2-lite model is characterized by high response speed, suitable for high real-time requirements, cost-sensitive scenarios, and situations where model accuracy is less critical, with a context window length of 8k." }, "Skylark2-pro-32k": { "description": "Skylark 2nd generation model, Skylark2-pro version has high model accuracy, suitable for more complex text generation scenarios such as professional field copy generation, novel writing, and high-quality translation, with a context window length of 32k." }, "Skylark2-pro-4k": { "description": "Skylark 2nd generation model, Skylark2-pro model has high model accuracy, suitable for more complex text generation scenarios such as professional field copy generation, novel writing, and high-quality translation, with a context window length of 4k." }, "Skylark2-pro-character-4k": { "description": "Skylark 2nd generation model, Skylark2-pro-character has excellent role-playing and chat capabilities, adept at engaging in conversations with users based on their prompt requests, showcasing distinct character styles and flowing dialogue, making it well-suited for building chatbots, virtual assistants, and online customer service, with high response speed." }, "Skylark2-pro-turbo-8k": { "description": "Skylark 2nd generation model, Skylark2-pro-turbo-8k provides faster inference at a lower cost, with a context window length of 8k." }, "THUDM/GLM-4-32B-0414": { "description": "GLM-4-32B-0414 is the next-generation open-source model in the GLM series, boasting 32 billion parameters. Its performance is comparable to OpenAI's GPT series and DeepSeek's V3/R1 series." }, "THUDM/GLM-4-9B-0414": { "description": "GLM-4-9B-0414 is a small model in the GLM series, with 9 billion parameters. This model inherits the technical characteristics of the GLM-4-32B series while providing a more lightweight deployment option. Despite its smaller size, GLM-4-9B-0414 still demonstrates excellent capabilities in tasks such as code generation, web design, SVG graphics generation, and search-based writing." }, "THUDM/GLM-4.1V-9B-Thinking": { "description": "GLM-4.1V-9B-Thinking is an open-source vision-language model (VLM) jointly released by Zhipu AI and Tsinghua University's KEG Lab, designed specifically for handling complex multimodal cognitive tasks. Based on the GLM-4-9B-0414 foundation model, it significantly enhances cross-modal reasoning ability and stability by introducing the Chain-of-Thought reasoning mechanism and employing reinforcement learning strategies." }, "THUDM/GLM-Z1-32B-0414": { "description": "GLM-Z1-32B-0414 is a reasoning model with deep thinking capabilities. This model is developed based on GLM-4-32B-0414 through cold start and extended reinforcement learning, with further training in mathematics, coding, and logic tasks. Compared to the base model, GLM-Z1-32B-0414 significantly enhances mathematical abilities and the capacity to solve complex tasks." }, "THUDM/GLM-Z1-9B-0414": { "description": "GLM-Z1-9B-0414 is a small model in the GLM series, with only 9 billion parameters, yet it demonstrates remarkable capabilities while maintaining the open-source tradition. Despite its smaller size, this model excels in mathematical reasoning and general tasks, leading the performance among similarly sized open-source models." }, "THUDM/GLM-Z1-Rumination-32B-0414": { "description": "GLM-Z1-Rumination-32B-0414 is a deep reasoning model with reflective capabilities (comparable to OpenAI's Deep Research). Unlike typical deep thinking models, reflective models engage in longer periods of deep thought to tackle more open and complex problems." }, "THUDM/glm-4-9b-chat": { "description": "GLM-4 9B is an open-source version that provides an optimized conversational experience for chat applications." }, "Tongyi-Zhiwen/QwenLong-L1-32B": { "description": "QwenLong-L1-32B is the first large reasoning model (LRM) trained with reinforcement learning for long-context tasks, optimized specifically for long-text reasoning. It achieves stable transfer from short to long contexts through a progressive context expansion reinforcement learning framework. In seven long-context document QA benchmarks, QwenLong-L1-32B outperforms flagship models like OpenAI-o3-mini and Qwen3-235B-A22B, with performance comparable to Claude-3.7-Sonnet-Thinking. The model excels in complex tasks such as mathematical reasoning, logical reasoning, and multi-hop reasoning." }, "Yi-34B-Chat": { "description": "Yi-1.5-34B significantly enhances mathematical logic and coding abilities by incrementally training on 500 billion high-quality tokens while maintaining the excellent general language capabilities of the original series." }, "abab5.5-chat": { "description": "Targeted at productivity scenarios, supporting complex task processing and efficient text generation, suitable for professional applications." }, "abab5.5s-chat": { "description": "Designed for Chinese persona dialogue scenarios, providing high-quality Chinese dialogue generation capabilities, suitable for various application contexts." }, "abab6.5g-chat": { "description": "Designed for multilingual persona dialogue, supporting high-quality dialogue generation in English and other languages." }, "abab6.5s-chat": { "description": "Suitable for a wide range of natural language processing tasks, including text generation and dialogue systems." }, "abab6.5t-chat": { "description": "Optimized for Chinese persona dialogue scenarios, providing smooth dialogue generation that aligns with Chinese expression habits." }, "accounts/fireworks/models/deepseek-r1": { "description": "DeepSeek-R1 is a state-of-the-art large language model optimized through reinforcement learning and cold-start data, excelling in reasoning, mathematics, and programming performance." }, "accounts/fireworks/models/deepseek-v3": { "description": "A powerful Mixture-of-Experts (MoE) language model provided by Deepseek, with a total parameter count of 671B, activating 37B parameters per token." }, "accounts/fireworks/models/llama-v3-70b-instruct": { "description": "Llama 3 70B instruction model, optimized for multilingual dialogues and natural language understanding, outperforming most competitive models." }, "accounts/fireworks/models/llama-v3-8b-instruct": { "description": "Llama 3 8B instruction model, optimized for dialogues and multilingual tasks, delivering outstanding and efficient performance." }, "accounts/fireworks/models/llama-v3-8b-instruct-hf": { "description": "Llama 3 8B instruction model (HF version), consistent with official implementation results, featuring high consistency and cross-platform compatibility." }, "accounts/fireworks/models/llama-v3p1-405b-instruct": { "description": "Llama 3.1 405B instruction model, equipped with massive parameters, suitable for complex tasks and instruction following in high-load scenarios." }, "accounts/fireworks/models/llama-v3p1-70b-instruct": { "description": "Llama 3.1 70B instruction model provides exceptional natural language understanding and generation capabilities, making it an ideal choice for dialogue and analysis tasks." }, "accounts/fireworks/models/llama-v3p1-8b-instruct": { "description": "Llama 3.1 8B instruction model, optimized for multilingual dialogues, capable of surpassing most open-source and closed-source models on common industry benchmarks." }, "accounts/fireworks/models/llama-v3p2-11b-vision-instruct": { "description": "Meta's 11B parameter instruction-tuned image reasoning model. This model is optimized for visual recognition, image reasoning, image description, and answering general questions about images. It understands visual data like charts and graphs, generating text descriptions of image details to bridge the gap between vision and language." }, "accounts/fireworks/models/llama-v3p2-3b-instruct": { "description": "The Llama 3.2 3B instruction model is a lightweight multilingual model introduced by Meta. This model aims to enhance efficiency, providing significant improvements in latency and cost compared to larger models. Sample use cases include querying, prompt rewriting, and writing assistance." }, "accounts/fireworks/models/llama-v3p2-90b-vision-instruct": { "description": "Meta's 90B parameter instruction-tuned image reasoning model. This model is optimized for visual recognition, image reasoning, image description, and answering general questions about images. It understands visual data like charts and graphs, generating text descriptions of image details to bridge the gap between vision and language." }, "accounts/fireworks/models/llama-v3p3-70b-instruct": { "description": "Llama 3.3 70B Instruct is the December update of Llama 3.1 70B. This model builds upon Llama 3.1 70B (released in July 2024) with enhancements in tool invocation, multilingual text support, mathematics, and programming capabilities. It achieves industry-leading performance in reasoning, mathematics, and instruction following, providing similar performance to 3.1 405B while offering significant advantages in speed and cost." }, "accounts/fireworks/models/mistral-small-24b-instruct-2501": { "description": "A 24B parameter model that possesses state-of-the-art capabilities comparable to larger models." }, "accounts/fireworks/models/mixtral-8x22b-instruct": { "description": "Mixtral MoE 8x22B instruction model, featuring large-scale parameters and a multi-expert architecture, fully supporting efficient processing of complex tasks." }, "accounts/fireworks/models/mixtral-8x7b-instruct": { "description": "Mixtral MoE 8x7B instruction model, with a multi-expert architecture providing efficient instruction following and execution." }, "accounts/fireworks/models/mythomax-l2-13b": { "description": "MythoMax L2 13B model, combining novel merging techniques, excels in narrative and role-playing." }, "accounts/fireworks/models/phi-3-vision-128k-instruct": { "description": "Phi 3 Vision instruction model, a lightweight multimodal model capable of handling complex visual and textual information, with strong reasoning abilities." }, "accounts/fireworks/models/qwen-qwq-32b-preview": { "description": "The QwQ model is an experimental research model developed by the Qwen team, focusing on enhancing AI reasoning capabilities." }, "accounts/fireworks/models/qwen2-vl-72b-instruct": { "description": "The 72B version of the Qwen-VL model is the latest iteration from Alibaba, representing nearly a year of innovation." }, "accounts/fireworks/models/qwen2p5-72b-instruct": { "description": "Qwen2.5 is a series of decoder-only language models developed by the Alibaba Cloud Qwen team. These models come in different sizes including 0.5B, 1.5B, 3B, 7B, 14B, 32B, and 72B, available in both base and instruct variants." }, "accounts/fireworks/models/qwen2p5-coder-32b-instruct": { "description": "Qwen2.5 Coder 32B Instruct is the latest version in Alibaba Cloud's series of code-specific large language models. This model significantly enhances code generation, reasoning, and repair capabilities based on Qwen2.5, trained on 55 trillion tokens. It not only improves coding abilities but also maintains advantages in mathematics and general capabilities, providing a more comprehensive foundation for practical applications such as code agents." }, "accounts/yi-01-ai/models/yi-large": { "description": "Yi-Large model, featuring exceptional multilingual processing capabilities, suitable for various language generation and understanding tasks." }, "ai21-jamba-1.5-large": { "description": "A 398B parameter (94B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation." }, "ai21-jamba-1.5-mini": { "description": "A 52B parameter (12B active) multilingual model, offering a 256K long context window, function calling, structured output, and grounded generation." }, "ai21-labs/AI21-Jamba-1.5-Large": { "description": "A 398B parameter (94B active) multilingual model providing a 256K long context window, function calling, structured output, and fact-based generation." }, "ai21-labs/AI21-Jamba-1.5-Mini": { "description": "A 52B parameter (12B active) multilingual model offering a 256K long context window, function calling, structured output, and fact-based generation." }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "description": "Claude 3.5 Sonnet raises the industry standard, outperforming competitor models and Claude 3 Opus, excelling in a wide range of evaluations while maintaining the speed and cost of our mid-tier models." }, "anthropic.claude-3-5-sonnet-20241022-v2:0": { "description": "Claude 3.5 Sonnet raises the industry standard, outperforming