UNPKG

@lenml/tokenizers

Version:

a lightweight no-dependency fork of transformers.js (only tokenizers)

github.com/lenML/tokenizers

lenML/tokenizers

82 lines (77 loc) • 3.99 kB

JavaScript

/** * @typedef {Object} PretrainedOptions Options for loading a pretrained model. * @property {boolean?} [quantized=true] Whether to load the 8-bit quantized version of the model (only applicable when loading model files). * @property {function} [progress_callback=null] If specified, this function will be called during model construction, to provide the user with progress updates. * @property {Object} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when: * - The model is a model provided by the library (loaded with the *model id* string of a pretrained model). * - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a configuration JSON file named *config.json* is found in the directory. * @property {string} [cache_dir=null] Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. * @property {boolean} [local_files_only=false] Whether or not to only look at local files (e.g., not try downloading the model). * @property {string} [revision='main'] The specific model version to use. It can be a branch name, a tag name, or a commit id, * since we use a git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git. * NOTE: This setting is ignored for local requests. * @property {string} [model_file_name=null] If specified, load the model with this name (excluding the .onnx suffix). Currently only valid for encoder- or decoder-only models. */ /** * * Retrieves a file from either a remote URL using the Fetch API or from the local file system using the FileSystem API. * If the filesystem is available and `env.useCache = true`, the file will be downloaded and cached. * * @param {string} path_or_repo_id This can be either: * - a string, the *model id* of a model repo on huggingface.co. * - a path to a *directory* potentially containing the file. * @param {string} filename The name of the file to locate in `path_or_repo`. * @param {boolean} [fatal=true] Whether to throw an error if the file is not found. * @param {PretrainedOptions} [options] An object containing optional parameters. * * @throws Will throw an error if the file is not found and `fatal` is true. * @returns {Promise} A Promise that resolves with the file content as a buffer. */ export async function getModelFile( path_or_repo_id, filename, fatal = true, options = {} ) { if (path_or_repo_id.startsWith("http")) { return fetch(path_or_repo_id + filename).then((response) => { if (!response.ok) { if (fatal) { throw new Error(`File not found at ${path_or_repo_id}${filename}`); } else { return null; } } return response.arrayBuffer(); }); } else { throw new Error( "Filesystem not supported, please implement your own file reading logic." ); } } /** * Fetches a JSON file from a given path and file name. * * @param {string} modelPath The path to the directory containing the file. * @param {string} fileName The name of the file to fetch. * @param {boolean} [fatal=true] Whether to throw an error if the file is not found. * @param {PretrainedOptions} [options] An object containing optional parameters. * @returns {Promise<Object>} The JSON data parsed into a JavaScript object. * @throws Will throw an error if the file is not found and `fatal` is true. */ export async function getModelJSON( modelPath, fileName, fatal = true, options = {} ) { let buffer = await getModelFile(modelPath, fileName, fatal, options); if (buffer === null) { // Return empty object return {}; } let decoder = new TextDecoder("utf-8"); let jsonData = decoder.decode(buffer); return JSON.parse(jsonData); }