UNPKG

@mlc-ai/web-llm

Version:

Hardware accelerated language model chats on browsers

github.com/mlc-ai/web-llm

1,275 lines (1,214 loc) • 5.06 MB

JavaScript

const require$$3 = "MLC_DUMMY_REQUIRE_VAR" const require$$4 = "MLC_DUMMY_REQUIRE_VAR" var commonjsGlobal = typeof globalThis !== 'undefined' ? globalThis : typeof window !== 'undefined' ? window : typeof global !== 'undefined' ? global : typeof self !== 'undefined' ? self : {}; function getAugmentedNamespace(n) { if (n.__esModule) return n; var a = Object.defineProperty({}, '__esModule', {value: true}); Object.keys(n).forEach(function (k) { var d = Object.getOwnPropertyDescriptor(n, k); Object.defineProperty(a, k, d.get ? d : { enumerable: true, get: function () { return n[k]; } }); }); return a; } var loglevel = {exports: {}}; /* * loglevel - https://github.com/pimterry/loglevel * * Copyright (c) 2013 Tim Perry * Licensed under the MIT license. */ (function (module) { (function (root, definition) { if (module.exports) { module.exports = definition(); } else { root.log = definition(); } }(commonjsGlobal, function () { // Slightly dubious tricks to cut down minimized file size var noop = function() {}; var undefinedType = "undefined"; var isIE = (typeof window !== undefinedType) && (typeof window.navigator !== undefinedType) && ( /Trident\/|MSIE /.test(window.navigator.userAgent) ); var logMethods = [ "trace", "debug", "info", "warn", "error" ]; var _loggersByName = {}; var defaultLogger = null; // Cross-browser bind equivalent that works at least back to IE6 function bindMethod(obj, methodName) { var method = obj[methodName]; if (typeof method.bind === 'function') { return method.bind(obj); } else { try { return Function.prototype.bind.call(method, obj); } catch (e) { // Missing bind shim or IE8 + Modernizr, fallback to wrapping return function() { return Function.prototype.apply.apply(method, [obj, arguments]); }; } } } // Trace() doesn't print the message in IE, so for that case we need to wrap it function traceForIE() { if (console.log) { if (console.log.apply) { console.log.apply(console, arguments); } else { // In old IE, native console methods themselves don't have apply(). Function.prototype.apply.apply(console.log, [console, arguments]); } } if (console.trace) console.trace(); } // Build the best logging method possible for this env // Wherever possible we want to bind, not wrap, to preserve stack traces function realMethod(methodName) { if (methodName === 'debug') { methodName = 'log'; } if (typeof console === undefinedType) { return false; // No method possible, for now - fixed later by enableLoggingWhenConsoleArrives } else if (methodName === 'trace' && isIE) { return traceForIE; } else if (console[methodName] !== undefined) { return bindMethod(console, methodName); } else if (console.log !== undefined) { return bindMethod(console, 'log'); } else { return noop; } } // These private functions always need `this` to be set properly function replaceLoggingMethods() { /*jshint validthis:true */ var level = this.getLevel(); // Replace the actual methods. for (var i = 0; i < logMethods.length; i++) { var methodName = logMethods[i]; this[methodName] = (i < level) ? noop : this.methodFactory(methodName, level, this.name); } // Define log.log as an alias for log.debug this.log = this.debug; // Return any important warnings. if (typeof console === undefinedType && level < this.levels.SILENT) { return "No console available for logging"; } } // In old IE versions, the console isn't present until you first open it. // We build realMethod() replacements here that regenerate logging methods function enableLoggingWhenConsoleArrives(methodName) { return function () { if (typeof console !== undefinedType) { replaceLoggingMethods.call(this); this[methodName].apply(this, arguments); } }; } // By default, we use closely bound real methods wherever possible, and // otherwise we wait for a console to appear, and then try again. function defaultMethodFactory(methodName, _level, _loggerName) { /*jshint validthis:true */ return realMethod(methodName) || enableLoggingWhenConsoleArrives.apply(this, arguments); } function Logger(name, factory) { // Private instance variables. var self = this; /** * The level inherited from a parent logger (or a global default). We * cache this here rather than delegating to the parent so that it stays * in sync with the actual logging methods that we have installed (the * parent could change levels but we might not have rebuilt the loggers * in this child yet). * @type {number} */ var inheritedLevel; /** * The default level for this logger, if any. If set, this overrides * `inheritedLevel`. * @type {number|null} */ var defaultLevel; /** * A user-specific level for this logger. If set, this overrides * `defaultLevel`. * @type {number|null} */ var userLevel; var storageKey = "loglevel"; if (typeof name === "string") { storageKey += ":" + name; } else if (typeof name === "symbol") { storageKey = undefined; } function persistLevelIfPossible(levelNum) { var levelName = (logMethods[levelNum] || 'silent').toUpperCase(); if (typeof window === undefinedType || !storageKey) return; // Use localStorage if available try { window.localStorage[storageKey] = levelName; return; } catch (ignore) {} // Use session cookie as fallback try { window.document.cookie = encodeURIComponent(storageKey) + "=" + levelName + ";"; } catch (ignore) {} } function getPersistedLevel() { var storedLevel; if (typeof window === undefinedType || !storageKey) return; try { storedLevel = window.localStorage[storageKey]; } catch (ignore) {} // Fallback to cookies if local storage gives us nothing if (typeof storedLevel === undefinedType) { try { var cookie = window.document.cookie; var cookieName = encodeURIComponent(storageKey); var location = cookie.indexOf(cookieName + "="); if (location !== -1) { storedLevel = /^([^;]+)/.exec( cookie.slice(location + cookieName.length + 1) )[1]; } } catch (ignore) {} } // If the stored level is not valid, treat it as if nothing was stored. if (self.levels[storedLevel] === undefined) { storedLevel = undefined; } return storedLevel; } function clearPersistedLevel() { if (typeof window === undefinedType || !storageKey) return; // Use localStorage if available try { window.localStorage.removeItem(storageKey); } catch (ignore) {} // Use session cookie as fallback try { window.document.cookie = encodeURIComponent(storageKey) + "=; expires=Thu, 01 Jan 1970 00:00:00 UTC"; } catch (ignore) {} } function normalizeLevel(input) { var level = input; if (typeof level === "string" && self.levels[level.toUpperCase()] !== undefined) { level = self.levels[level.toUpperCase()]; } if (typeof level === "number" && level >= 0 && level <= self.levels.SILENT) { return level; } else { throw new TypeError("log.setLevel() called with invalid level: " + input); } } /* * * Public logger API - see https://github.com/pimterry/loglevel for details * */ self.name = name; self.levels = { "TRACE": 0, "DEBUG": 1, "INFO": 2, "WARN": 3, "ERROR": 4, "SILENT": 5}; self.methodFactory = factory || defaultMethodFactory; self.getLevel = function () { if (userLevel != null) { return userLevel; } else if (defaultLevel != null) { return defaultLevel; } else { return inheritedLevel; } }; self.setLevel = function (level, persist) { userLevel = normalizeLevel(level); if (persist !== false) { // defaults to true persistLevelIfPossible(userLevel); } // NOTE: in v2, this should call rebuild(), which updates children. return replaceLoggingMethods.call(self); }; self.setDefaultLevel = function (level) { defaultLevel = normalizeLevel(level); if (!getPersistedLevel()) { self.setLevel(level, false); } }; self.resetLevel = function () { userLevel = null; clearPersistedLevel(); replaceLoggingMethods.call(self); }; self.enableAll = function(persist) { self.setLevel(self.levels.TRACE, persist); }; self.disableAll = function(persist) { self.setLevel(self.levels.SILENT, persist); }; self.rebuild = function () { if (defaultLogger !== self) { inheritedLevel = normalizeLevel(defaultLogger.getLevel()); } replaceLoggingMethods.call(self); if (defaultLogger === self) { for (var childName in _loggersByName) { _loggersByName[childName].rebuild(); } } }; // Initialize all the internal levels. inheritedLevel = normalizeLevel( defaultLogger ? defaultLogger.getLevel() : "WARN" ); var initialLevel = getPersistedLevel(); if (initialLevel != null) { userLevel = normalizeLevel(initialLevel); } replaceLoggingMethods.call(self); } /* * * Top-level API * */ defaultLogger = new Logger(); defaultLogger.getLogger = function getLogger(name) { if ((typeof name !== "symbol" && typeof name !== "string") || name === "") { throw new TypeError("You must supply a name when creating a logger."); } var logger = _loggersByName[name]; if (!logger) { logger = _loggersByName[name] = new Logger( name, defaultLogger.methodFactory ); } return logger; }; // Grab the current global log variable in case of overwrite var _log = (typeof window !== undefinedType) ? window.log : undefined; defaultLogger.noConflict = function() { if (typeof window !== undefinedType && window.log === defaultLogger) { window.log = _log; } return defaultLogger; }; defaultLogger.getLoggers = function getLoggers() { return _loggersByName; }; // ES6 default export, for compatibility defaultLogger['default'] = defaultLogger; return defaultLogger; })); }(loglevel)); var log = loglevel.exports; class ModelNotFoundError extends Error { constructor(modelId) { super(`Cannot find model record in appConfig for ${modelId}. Please check if the model ID is correct and included in the model_list configuration.`); this.name = "ModelNotFoundError"; } } class ConfigValueError extends Error { constructor(message) { super(message); this.name = "ConfigValueError"; } } class MinValueError extends ConfigValueError { constructor(paramName, minValue) { super(`Make sure \`${paramName}\` > ${minValue}.`); this.name = "MinValueError"; } } class RangeError extends ConfigValueError { constructor(paramName, minValue, maxValue, additionalMessage) { super(`Make sure ${minValue} < ${paramName} <= ${maxValue}.${additionalMessage ? " " + additionalMessage : ""}`); this.name = "RangeError"; } } class NonNegativeError extends ConfigValueError { constructor(paramName) { super(`Make sure ${paramName} >= 0.`); this.name = "NonNegativeError"; } } class InvalidNumberStringError extends ConfigValueError { constructor(paramName, actualValue) { super(`Make sure ${paramName} to be number represented in string.${actualValue ? " Got " + actualValue : ""}`); this.name = "InvalidNumberStringError"; } } class DependencyError extends ConfigValueError { constructor(dependentParam, requiredParam, requiredValue) { super(`${dependentParam} requires ${requiredParam} to be ${requiredValue}.`); this.name = "DependencyError"; } } class WebGPUNotAvailableError extends Error { constructor() { super("WebGPU is not supported in your current environment, but it is necessary to run the WebLLM engine. " + "Please make sure that your browser supports WebGPU and that it is enabled in your browser settings. " + "You can also consult your browser's compatibility chart to see if it supports WebGPU. " + "For more information about WebGPU support in your browser, visit https://webgpureport.org/"); this.name = "WebGPUNotAvailableError"; } } class WebGPUNotFoundError extends Error { constructor() { super("Cannot find WebGPU in the environment"); this.name = "WebGPUNotFoundError"; } } class ModelNotLoadedError extends Error { constructor(requestName) { super(`Model not loaded before trying to complete ${requestName}. Please ensure you have called ` + `MLCEngine.reload(model) to load the model before initiating APIs, ` + `or initialize your engine using CreateMLCEngine() with a valid model configuration.`); this.name = "ModelNotLoadedError"; } } class WorkerEngineModelNotLoadedError extends Error { constructor(engineName) { super(`${engineName} is not loaded with a model. Did you call \`engine.reload()\`?`); this.name = "WorkerEngineModelNotLoadedError"; } } class MessageOrderError extends Error { constructor(message) { super(message); this.name = "MessageOrderError"; } } class SystemMessageOrderError extends Error { constructor() { super("System prompt should always be the first message in `messages`."); this.name = "SystemMessageOrderError"; } } class ContentTypeError extends Error { constructor(name) { super(`${name} should have string content.`); this.name = "ContentTypeError"; } } class UnsupportedRoleError extends Error { constructor(role) { super(`Unsupported role of message: ${role}`); this.name = "UnsupportedRoleError"; } } class UserMessageContentErrorForNonVLM extends Error { constructor(modelId, modelType, content) { super(`The model loaded is not of type ModelType.VLM (vision-language model). ` + `Therefore, user message only supports string content, but received: ${content}\n` + `Loaded modelId: ${modelId}, modelType: ${modelType}`); this.name = "UserMessageContentErrorForNonVLM"; } } class PrefillChunkSizeSmallerThanImageError extends Error { constructor(prefillChunkSize, imageEmbedSize) { super(`prefillChunkSize needs to be greater than imageEmbedSize because a single image's ` + `prefill cannot be chunked. Got prefillChunkSize: ` + `${prefillChunkSize}, imageEmbedSize: ${imageEmbedSize}`); this.name = "PrefillChunkSizeSmallerThanImageError"; } } class CannotFindImageEmbedError extends Error { constructor() { super(`Received image input but model does not have kernel image_embed. ` + `Make sure to only pass in image to a vision model.`); this.name = "CannotFindImageEmbedError"; } } class UnsupportedDetailError extends Error { constructor(detail) { super(`Currently do not support field image_url.detail, but received: ${detail}`); this.name = "UnsupportedDetailError"; } } class UnsupportedImageURLError extends Error { constructor(url) { super(`image_url.url should start with "data:image" for base64, or with "http", but got: ${url}`); this.name = "UnsupportedImageURLError"; } } class MultipleTextContentError extends Error { constructor() { super(`Each message can have at most one text contentPart, but received more than 1.`); this.name = "MultipleTextContentError"; } } class ToolCallOutputParseError extends Error { constructor(outputMessage, error) { super(`Internal error: error encountered when parsing outputMessage for function ` + `calling. Got outputMessage: ${outputMessage}\nGot error: ${error}`); this.name = "ToolCallOutputParseError"; } } class ToolCallOutputInvalidTypeError extends Error { constructor(expectedType) { super(`Internal error: expect output of function calling to be an ${expectedType}`); this.name = "ToolCallOutputInvalidTypeError"; } } class ToolCallOutputMissingFieldsError extends Error { constructor(missingFields, object) { super(`Expect generated tool call to have fields ${missingFields.map((field) => `"\`${field}\`"`).join(", ")}, but got object: ${JSON.stringify(object)}`); this.name = "JSONFieldError"; } } class ConfigurationNotInitializedError extends Error { constructor() { super("Configuration not initialized. Ensure you have called `reload()` function first."); this.name = "ConfigurationNotInitializedError"; } } class MissingModelWasmError extends Error { constructor(modelId) { super(`Missing \`model_lib\` for the model with ID "${modelId}". Please ensure that \`model_lib\` is provided in \`model_list\` for each model. This URL is essential for downloading the WASM library necessary to run the model.`); this.name = "MissingModelError"; } } class FeatureSupportError extends Error { constructor(feature) { super(`This model requires feature ${feature}, which is not yet supported by this browser.`); this.name = "FeatureSupportError"; } } class UnsupportedFieldsError extends Error { constructor(unsupportedFields, targetClass) { super(`The following fields in ${targetClass} are not yet supported: \n` + unsupportedFields.join(", ")); this.name = "UnsupportedFieldsError"; } } class ShaderF16SupportError extends FeatureSupportError { constructor() { super("This model requires WebGPU extension shader-f16, which is not enabled in this browser. " + 'You can try to launch Chrome Canary in command line with flag "--enable-dawn-features=allow_unsafe_apis".'); this.name = "ShaderF16SupportError"; } } class DeviceLostError extends Error { constructor() { super("The WebGPU device was lost while loading the model. This issue often occurs due to running out of memory (OOM). To resolve this, try reloading with a model that has fewer parameters or uses a smaller context length."); this.name = "DeviceLostError"; } } class UnsupportedTokenizerFilesError extends Error { constructor(files) { super(`Cannot handle tokenizer files ${files}`); this.name = "UnsupportedTokenizerFilesError"; } } class WindowSizeConfigurationError extends Error { constructor(contextWindowSize, slidingWindowSize) { super(`Only one of context_window_size and sliding_window_size can be positive. Got: ` + `context_window_size: ${contextWindowSize}, sliding_window_size: ${slidingWindowSize}\n` + `Consider modifying ModelRecord.overrides to set one of them to -1.`); this.name = "WindowSizeConfigurationError"; } } class AttentionSinkSizeError extends Error { constructor() { super("Need to specify non-negative attention_sink_size if using sliding window. " + "Consider modifying ModelRecord.overrides. " + "Use `attention_sink_size=0` for default sliding window."); this.name = "AttentionSinkSizeError"; } } class WindowSizeSpecificationError extends Error { constructor() { super("Need to specify either sliding_window_size or max_window_size.\n" + "Consider modifying ModelRecord.overrides to set one of them to positive."); this.name = "WindowSizeSpecificationError"; } } class ContextWindowSizeExceededError extends Error { constructor(numPromptTokens, contextWindowSize) { super(`Prompt tokens exceed context window size: number of prompt tokens: ${numPromptTokens}; ` + `context window size: ${contextWindowSize}\nConsider shortening the prompt, or increase ` + "`context_window_size`, or using sliding window via `sliding_window_size`."); this.name = "ContextWindowSizeExceededError"; } } class NonWorkerEnvironmentError extends Error { constructor(className) { super(`${className} must be created in the service worker script.`); this.name = "NonWorkerEnvironmentError"; } } class NoServiceWorkerAPIError extends Error { constructor() { super("Service worker API is not available in your browser. Please ensure that your browser supports service workers and that you are using a secure context (HTTPS). " + "Check the browser compatibility and ensure that service workers are not disabled in your browser settings."); this.name = "NoServiceWorkerAPIError"; } } class ServiceWorkerInitializationError extends Error { constructor() { super("Service worker failed to initialize. This could be due to a failure in the service worker registration process or because the service worker is not active. " + "Please refresh the page to retry initializing the service worker."); this.name = "ServiceWorkerInitializationError"; } } class StreamingCountError extends Error { constructor() { super("When streaming, `n` cannot be > 1."); this.name = "StreamingCountError"; } } class SeedTypeError extends Error { constructor(seed) { super("`seed` should be an integer, but got " + seed); this.name = "SeedTypeError"; } } class InvalidResponseFormatError extends Error { constructor() { super("JSON schema is only supported with `json_object` response format."); this.name = "InvalidResponseFormatError"; } } class InvalidResponseFormatGrammarError extends Error { constructor() { super("When ResponseFormat.type is `grammar`, ResponseFormat.grammar needs to be specified.\n" + "When ResponseFormat.grammar is specified, ResponseFormat.type needs to be grammar."); this.name = "InvalidResponseFormatGrammarError"; } } class CustomResponseFormatError extends Error { constructor(currentFormat) { super("When using Hermes-2-Pro function calling via ChatCompletionRequest.tools, " + "cannot specify customized response_format. We will set it for you internally. Currently " + "set to: " + JSON.stringify(currentFormat)); this.name = "CustomResponseFormatError"; } } class UnsupportedModelIdError extends Error { constructor(currentModelId, supportedModelIds) { super(`${currentModelId} is not supported for ChatCompletionRequest.tools. Currently, models ` + `that support function calling are: ${supportedModelIds.join(", ")}`); this.name = "UnsupportedModelIdError"; } } class CustomSystemPromptError extends Error { constructor() { super("When using Hermes-2-Pro function calling via ChatCompletionRequest.tools, cannot specify customized system prompt."); this.name = "CustomSystemPromptError"; } } class InvalidStreamOptionsError extends Error { constructor() { super("Only specify stream_options when stream=True."); this.name = "InvalidStreamOptionsError"; } } class UnknownMessageKindError extends Error { constructor(msgKind, msgContent) { super(`Unknown message kind, msg: [${msgKind}] ${msgContent}`); this.name = "UnknownMessageKindError"; } } class TextCompletionExpectsKVEmptyError extends Error { constructor() { super("Non-chat text completion API expects KVCache to be empty."); this.name = "TextCompletionExpectsKVEmptyError"; } } class TextCompletionConversationExpectsPrompt extends Error { constructor() { super("Non-chat text completion API expects isTextCompletion is true, and prompt is defined."); this.name = "TextCompletionConversationExpectsPrompt"; } } class TextCompletionConversationError extends Error { constructor(funcName) { super(`Non-chat text completion API cannot call ${funcName}.`); this.name = "TextCompletionConversationError"; } } class EmbeddingUnsupportedEncodingFormatError extends Error { constructor() { super("Embedding in base64 format is currently not supported."); this.name = "EmbeddingUnsupportedEncodingFormatError"; } } class EmbeddingUnsupportedModelError extends Error { constructor(currentModel) { super(`Trying to run embeddings.create() with ${currentModel}, which does not have ` + `ModelRecord.model_type === ModelType.embedding in the model record. ` + `Either make sure an embedding model is loaded, or specify the model type in ModelRecord.`); this.name = "EmbeddingUnsupportedModelError"; } } class EmbeddingSlidingWindowError extends Error { constructor(sliding_window_size) { super(`Embedding should not use sliding window. However, ` + `sliding_window_size=${sliding_window_size} is specified in the chat config.`); this.name = "EmbeddingSlidingWindowError"; } } class EmbeddingChunkingUnsupportedError extends Error { constructor(contextWindowSize, prefillChunkSize) { super(`Embedding currently does not support chunking. Make sure ` + `contextWindowSize === prefillChunkSize. Got contextWindowSize=${contextWindowSize}, ` + `prefillChunkSize=${prefillChunkSize} instead.`); this.name = "EmbeddingChunkingUnsupportedError"; } } class EmbeddingExceedContextWindowSizeError extends Error { constructor(contextWindowSize, receivedSize) { super(`The embedding model you are using only supports up to ${contextWindowSize} context size.` + `However, an input in the batch has size ${receivedSize}.`); this.name = "EmbeddingExceedContextWindowSizeError"; } } class EmbeddingInputEmptyError extends Error { constructor() { super("Embedding input cannot be empty string or empty token array."); this.name = "EmbeddingInputEmptyError"; } } class ReloadArgumentSizeUnmatchedError extends Error { constructor(numModelId, numChatOpts) { super(`Expect chatOpts, if specified, to match the size of modelId. However, got ` + `${numModelId} modelId, but ${numChatOpts} chatOpts.`); this.name = "ReloadArgumentSizeUnmatchedError"; } } class UnclearModelToUseError extends Error { constructor(loadedModels, requestName) { super(`Multiple models are loaded in engine. Please specify the model in ${requestName}.\n` + `Currently loaded models are:\n${loadedModels}`); this.name = "UnclearModelToUseError"; } } class SpecifiedModelNotFoundError extends Error { constructor(loadedModels, requestedModelId, requestName) { super(`Specified model ${requestedModelId} for ${requestName} is not found in loaded models. ` + `Please check if the correct model is loaded/specified. ` + `Currently loaded models are:\n${loadedModels}`); this.name = "SpecifiedModelNotFoundError"; } } class IncorrectPipelineLoadedError extends Error { constructor(selectedModelId, expectedPipeline, requestName) { super(`${requestName} expects model to be loaded with ${expectedPipeline}. However, ` + `${selectedModelId} is not loaded with this pipeline.`); this.name = "IncorrectPipelineLoadedError"; } } class ReloadModelIdNotUniqueError extends Error { constructor(modelId) { super(`Need to make models in modelId passed to reload() need to be unique. If you want to, ` + `load copies of the same model, consider making copies of the ModelRecord with ` + `different model_id. Received modelId: ${modelId}`); this.name = "ReloadModelIdNotUniqueError"; } } /* eslint-disable @typescript-eslint/no-non-null-assertion */ var Role; (function (Role) { Role["user"] = "user"; Role["assistant"] = "assistant"; Role["tool"] = "tool"; })(Role || (Role = {})); const DefaultLogLevel = "WARN"; /** * Place holders that can be used in role templates. * For example, a role template of * `<<question>> ${MessagePlaceholders.USER} <<function>> ${MessagePlaceholders.FUNCTION}` * will insert the user message to ${MessagePlaceholders.USER} * and insert the function message to ${MessagePlaceholders.FUNCTION} * at run time. */ var MessagePlaceholders; (function (MessagePlaceholders) { MessagePlaceholders["system"] = "{system_message}"; MessagePlaceholders["user"] = "{user_message}"; MessagePlaceholders["assistant"] = "{assistant_message}"; MessagePlaceholders["tool"] = "{tool_message}"; MessagePlaceholders["function"] = "{function_string}"; MessagePlaceholders["hermes_tools"] = "{hermes_tools}"; })(MessagePlaceholders || (MessagePlaceholders = {})); function postInitAndCheckGenerationConfigValues(config) { function _hasValue(value) { // if we use `if value` directly, `value` being 0 evaluates to false, violating semantics return value !== undefined && value !== null; } if (config.frequency_penalty && (config.frequency_penalty < -2.0 || config.frequency_penalty > 2.0)) { throw new RangeError("frequency_penalty", -2.0, 2.0); } if (config.presence_penalty && (config.presence_penalty < -2.0 || config.presence_penalty > 2.0)) { throw new RangeError("presence_penalty", -2.0, 2.0); } if (_hasValue(config.repetition_penalty) && config.repetition_penalty <= 0) { throw new MinValueError("repetition_penalty", 0); } if (_hasValue(config.max_tokens) && config.max_tokens <= 0) { throw new MinValueError("max_tokens", 0); } if ((_hasValue(config.top_p) && config.top_p <= 0) || config.top_p > 1) { throw new RangeError("top_p", 0, 1); } if (_hasValue(config.temperature) && config.temperature < 0) { throw new NonNegativeError("temperature"); } // If only one of frequency or presence penatly is set, make the other one 0.0 if (_hasValue(config.frequency_penalty) && !_hasValue(config.presence_penalty)) { config.presence_penalty = 0.0; log.warn("Only frequency_penalty is set; we default presence_penaty to 0."); } if (_hasValue(config.presence_penalty) && !_hasValue(config.frequency_penalty)) { config.frequency_penalty = 0.0; log.warn("Only presence_penalty is set; we default frequency_penalty to 0."); } // Check logit_bias range if (_hasValue(config.logit_bias)) { for (const tokenID in config.logit_bias) { const bias = config.logit_bias[tokenID]; if (bias > 100 || bias < -100) { throw new RangeError("logit_bias", -100, 100, "Got " + bias + " for tokenID " + tokenID); } if (isNaN(parseInt(tokenID))) { throw new InvalidNumberStringError("logit_bias's keys", tokenID); } } } // logprobs and top_logprobs if (_hasValue(config.top_logprobs)) { // If top_logprobs is non-null, logprobs must be true if (!config.logprobs) { throw new DependencyError("top_logprobs", "logprobs", true); } // top_logprobs should be in range [0,5] if (config.top_logprobs < 0 || config.top_logprobs > 5) { throw new RangeError("top_logprobs", 0, 5, "Got " + config.top_logprobs); } } // If defined logprobs but not top_logprobs, simply make it 0 if (config.logprobs) { if (!_hasValue(config.top_logprobs)) { config.top_logprobs = 0; } } } var ModelType; (function (ModelType) { ModelType[ModelType["LLM"] = 0] = "LLM"; ModelType[ModelType["embedding"] = 1] = "embedding"; ModelType[ModelType["VLM"] = 2] = "VLM"; })(ModelType || (ModelType = {})); /** * modelVersion: the prebuilt model libraries that the current npm is compatible with, affects the * `model_lib`s in `prebuiltAppConfig`. * * @note The model version does not have to match the npm version, since not each npm update * requires an update of the model libraries. */ const modelVersion = "v0_2_48"; const modelLibURLPrefix = "https://raw.githubusercontent.com/mlc-ai/binary-mlc-llm-libs/main/web-llm-models/"; /** * Models that support function calling (i.e. usage of `ChatCompletionRequest.tools`). More to come. */ const functionCallingModelIds = [ "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", "Hermes-3-Llama-3.1-8B-q4f32_1-MLC", "Hermes-3-Llama-3.1-8B-q4f16_1-MLC", ]; /** * Default models and model library mapping to be used if unspecified. * * @note This is the only source of truth of which prebuilt model libraries are compatible with the * current WebLLM npm version. */ const prebuiltAppConfig = { useIndexedDBCache: false, model_list: [ // Llama-3.2 { model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f32_1-MLC", model_id: "Llama-3.2-1B-Instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-1B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 1128.82, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q4f16_1-MLC", model_id: "Llama-3.2-1B-Instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-1B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 879.04, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f32-MLC", model_id: "Llama-3.2-1B-Instruct-q0f32-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-1B-Instruct-q0f32-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 5106.26, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.2-1B-Instruct-q0f16-MLC", model_id: "Llama-3.2-1B-Instruct-q0f16-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-1B-Instruct-q0f16-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 2573.13, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.2-3B-Instruct-q4f32_1-MLC", model_id: "Llama-3.2-3B-Instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 2951.51, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.2-3B-Instruct-q4f16_1-MLC", model_id: "Llama-3.2-3B-Instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 2263.69, low_resource_required: true, overrides: { context_window_size: 4096, }, }, // Llama-3.1 { model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC", model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 5295.7, low_resource_required: true, overrides: { context_window_size: 1024, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC", model_id: "Llama-3.1-8B-Instruct-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 4598.34, low_resource_required: true, overrides: { context_window_size: 1024, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f32_1-MLC", model_id: "Llama-3.1-8B-Instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 6101.01, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Llama-3.1-8B-Instruct-q4f16_1-MLC", model_id: "Llama-3.1-8B-Instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 5001.0, low_resource_required: false, overrides: { context_window_size: 4096, }, }, // DeepSeek-R1-Distill-Qwen // TODO(Charlie): Qwen2-1.5B is experiencing correctness issue, hence commented for now. // { // model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC", // model_id: "DeepSeek-R1-Distill-Qwen-1.5B-q4f16_1-MLC", // model_lib: // modelLibURLPrefix + // modelVersion + // "/Qwen2-1.5B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", // low_resource_required: true, // vram_required_MB: 1629.75, // overrides: { // context_window_size: 4096, // }, // }, // { // model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC", // model_id: "DeepSeek-R1-Distill-Qwen-1.5B-q4f32_1-MLC", // model_lib: // modelLibURLPrefix + // modelVersion + // "/Qwen2-1.5B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", // low_resource_required: true, // vram_required_MB: 1888.97, // overrides: { // context_window_size: 4096, // }, // }, { model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC", model_id: "DeepSeek-R1-Distill-Qwen-7B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Qwen2-7B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", low_resource_required: false, vram_required_MB: 5106.67, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC", model_id: "DeepSeek-R1-Distill-Qwen-7B-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Qwen2-7B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", low_resource_required: false, vram_required_MB: 5900.09, overrides: { context_window_size: 4096, }, }, // DeepSeek-R1-Distill-Llama { model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC", model_id: "DeepSeek-R1-Distill-Llama-8B-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 6101.01, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC", model_id: "DeepSeek-R1-Distill-Llama-8B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 5001.0, low_resource_required: false, overrides: { context_window_size: 4096, }, }, // Hermes-3 and Hermes-2 { model: "https://huggingface.co/mlc-ai/Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC", model_id: "Hermes-2-Theta-Llama-3-8B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 4976.13, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-2-Theta-Llama-3-8B-q4f32_1-MLC", model_id: "Hermes-2-Theta-Llama-3-8B-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 6051.27, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", model_id: "Hermes-2-Pro-Llama-3-8B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 4976.13, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", model_id: "Hermes-2-Pro-Llama-3-8B-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 6051.27, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f32_1-MLC", model_id: "Hermes-3-Llama-3.2-3B-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-3B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 2951.51, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.2-3B-q4f16_1-MLC", model_id: "Hermes-3-Llama-3.2-3B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3.2-3B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 2263.69, low_resource_required: true, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.1-8B-q4f32_1-MLC", model_id: "Hermes-3-Llama-3.1-8B-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 5779.27, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-3-Llama-3.1-8B-q4f16_1-MLC", model_id: "Hermes-3-Llama-3.1-8B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Llama-3_1-8B-Instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 4876.13, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", model_id: "Hermes-2-Pro-Mistral-7B-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Mistral-7B-Instruct-v0.3-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 4033.28, low_resource_required: false, required_features: ["shader-f16"], overrides: { context_window_size: 4096, sliding_window_size: -1, }, }, // Phi3.5-mini-instruct { model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC", model_id: "Phi-3.5-mini-instruct-q4f16_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 3672.07, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC", model_id: "Phi-3.5-mini-instruct-q4f32_1-MLC", model_lib: modelLibURLPrefix + modelVersion + "/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 5483.12, low_resource_required: false, overrides: { context_window_size: 4096, }, }, { model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f16_1-MLC", model_id: "Phi-3.5-mini-instruct-q4f16_1-MLC-1k", model_lib: modelLibURLPrefix + modelVersion + "/Phi-3.5-mini-instruct-q4f16_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 2520.07, low_resource_required: true, overrides: { context_window_size: 1024, }, }, { model: "https://huggingface.co/mlc-ai/Phi-3.5-mini-instruct-q4f32_1-MLC", model_id: "Phi-3.5-mini-instruct-q4f32_1-MLC-1k", model_lib: modelLibURLPrefix + modelVersion + "/Phi-3.5-mini-instruct-q4f32_1-ctx4k_cs1k-webgpu.wasm", vram_required_MB: 3179.12, low_resource_required: true, overrides: { context_window_size: 1024, }, }, // Phi-3.5-vision-instruct { model: "https://huggingface.co/mlc-ai/Phi-3.5-vision-instruct-q4f16_1