@lenml/tokenizers
Version:
a lightweight no-dependency fork of transformers.js (only tokenizers)
82 lines (77 loc) • 3.99 kB
JavaScript
/**
* @typedef {Object} PretrainedOptions Options for loading a pretrained model.
* @property {boolean?} [quantized=true] Whether to load the 8-bit quantized version of the model (only applicable when loading model files).
* @property {function} [progress_callback=null] If specified, this function will be called during model construction, to provide the user with progress updates.
* @property {Object} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
* - The model is a model provided by the library (loaded with the *model id* string of a pretrained model).
* - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a configuration JSON file named *config.json* is found in the directory.
* @property {string} [cache_dir=null] Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.
* @property {boolean} [local_files_only=false] Whether or not to only look at local files (e.g., not try downloading the model).
* @property {string} [revision='main'] The specific model version to use. It can be a branch name, a tag name, or a commit id,
* since we use a git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git.
* NOTE: This setting is ignored for local requests.
* @property {string} [model_file_name=null] If specified, load the model with this name (excluding the .onnx suffix). Currently only valid for encoder- or decoder-only models.
*/
/**
*
* Retrieves a file from either a remote URL using the Fetch API or from the local file system using the FileSystem API.
* If the filesystem is available and `env.useCache = true`, the file will be downloaded and cached.
*
* @param {string} path_or_repo_id This can be either:
* - a string, the *model id* of a model repo on huggingface.co.
* - a path to a *directory* potentially containing the file.
* @param {string} filename The name of the file to locate in `path_or_repo`.
* @param {boolean} [fatal=true] Whether to throw an error if the file is not found.
* @param {PretrainedOptions} [options] An object containing optional parameters.
*
* @throws Will throw an error if the file is not found and `fatal` is true.
* @returns {Promise} A Promise that resolves with the file content as a buffer.
*/
export async function getModelFile(
path_or_repo_id,
filename,
fatal = true,
options = {}
) {
if (path_or_repo_id.startsWith("http")) {
return fetch(path_or_repo_id + filename).then((response) => {
if (!response.ok) {
if (fatal) {
throw new Error(`File not found at ${path_or_repo_id}${filename}`);
} else {
return null;
}
}
return response.arrayBuffer();
});
} else {
throw new Error(
"Filesystem not supported, please implement your own file reading logic."
);
}
}
/**
* Fetches a JSON file from a given path and file name.
*
* @param {string} modelPath The path to the directory containing the file.
* @param {string} fileName The name of the file to fetch.
* @param {boolean} [fatal=true] Whether to throw an error if the file is not found.
* @param {PretrainedOptions} [options] An object containing optional parameters.
* @returns {Promise<Object>} The JSON data parsed into a JavaScript object.
* @throws Will throw an error if the file is not found and `fatal` is true.
*/
export async function getModelJSON(
modelPath,
fileName,
fatal = true,
options = {}
) {
let buffer = await getModelFile(modelPath, fileName, fatal, options);
if (buffer === null) {
// Return empty object
return {};
}
let decoder = new TextDecoder("utf-8");
let jsonData = decoder.decode(buffer);
return JSON.parse(jsonData);
}