chromadb-default-embed
Version:
Chroma's fork of @xenova/transformers serving as our default embedding function
143 lines (119 loc) • 5.43 kB
JavaScript
/**
* @file Module used to configure Transformers.js.
*
* **Example:** Disable remote models.
* ```javascript
* import { env } from '@xenova/transformers';
* env.allowRemoteModels = false;
* ```
*
* **Example:** Set local model path.
* ```javascript
* import { env } from '@xenova/transformers';
* env.localModelPath = '/path/to/local/models/';
* ```
*
* **Example:** Set cache directory.
* ```javascript
* import { env } from '@xenova/transformers';
* env.cacheDir = '/path/to/cache/directory/';
* ```
*
* @module env
*/
import fs from 'fs';
import path from 'path';
import url from 'url';
import { ONNX } from './backends/onnx.js';
const { env: onnx_env } = ONNX;
const VERSION = '2.13.2';
/**
* Check if the current environment is a browser.
* @returns {boolean} True if running in a browser, false otherwise.
*/
function isBrowser() {
return (
typeof window !== "undefined" &&
typeof window.document !== "undefined"
);
}
// Check if various APIs are available (depends on environment)
const WEB_CACHE_AVAILABLE = typeof self !== 'undefined' && 'caches' in self;
const FS_AVAILABLE = !isBrowser() && !isEmpty(fs); // check if file system is available and not in browser
const PATH_AVAILABLE = !isBrowser() && !isEmpty(path); // check if path is available and not in browser
const RUNNING_LOCALLY = FS_AVAILABLE && PATH_AVAILABLE;
const __dirname = RUNNING_LOCALLY
? path.dirname(path.dirname(url.fileURLToPath(import.meta.url)))
: './';
// Only used for environments with access to file system
const DEFAULT_CACHE_DIR = RUNNING_LOCALLY
? path.join(__dirname, '/.cache/')
: null;
// Set local model path, based on available APIs
const DEFAULT_LOCAL_MODEL_PATH = '/models/';
const localModelPath = RUNNING_LOCALLY
? path.join(__dirname, DEFAULT_LOCAL_MODEL_PATH)
: DEFAULT_LOCAL_MODEL_PATH;
// Set path to wasm files. This is needed when running in a web worker.
// https://onnxruntime.ai/docs/api/js/interfaces/Env.WebAssemblyFlags.html#wasmPaths
// We use remote wasm files by default to make it easier for newer users.
// In practice, users should probably self-host the necessary .wasm files.
onnx_env.wasm.wasmPaths = RUNNING_LOCALLY
? path.join(__dirname, '/dist/')
: `https://cdn.jsdelivr.net/npm/@xenova/transformers@${VERSION}/dist/`;
/**
* Global variable used to control execution. This provides users a simple way to configure Transformers.js.
* @property {Object} backends Expose environment variables of different backends,
* allowing users to set these variables if they want to.
* @property {string} __dirname Directory name of module. Useful for resolving local paths.
* @property {string} version This version of Transformers.js.
* @property {boolean} allowRemoteModels Whether to allow loading of remote files, defaults to `true`.
* If set to `false`, it will have the same effect as setting `local_files_only=true` when loading pipelines, models, tokenizers, processors, etc.
* @property {string} remoteHost Host URL to load models from. Defaults to the Hugging Face Hub.
* @property {string} remotePathTemplate Path template to fill in and append to `remoteHost` when loading models.
* @property {boolean} allowLocalModels Whether to allow loading of local files, defaults to `true`.
* If set to `false`, it will skip the local file check and try to load the model from the remote host.
* @property {string} localModelPath Path to load local models from. Defaults to `/models/`.
* @property {boolean} useFS Whether to use the file system to load files. By default, it is `true` if available.
* @property {boolean} isBrowser Whether the environment is a browser. Determined by checking for window and document objects.
* @property {boolean} useBrowserCache Whether to use Cache API to cache models. By default, it is `true` if available.
* @property {boolean} useFSCache Whether to use the file system to cache files. By default, it is `true` if available.
* @property {string} cacheDir The directory to use for caching files with the file system. By default, it is `./.cache`.
* @property {boolean} useCustomCache Whether to use a custom cache system (defined by `customCache`), defaults to `false`.
* @property {Object} customCache The custom cache to use. Defaults to `null`. Note: this must be an object which
* implements the `match` and `put` functions of the Web Cache API. For more information, see https://developer.mozilla.org/en-US/docs/Web/API/Cache
*/
export const env = {
/////////////////// Backends settings ///////////////////
backends: {
// onnxruntime-web/onnxruntime-node
onnx: onnx_env,
// TensorFlow.js
tfjs: {},
},
__dirname,
version: VERSION,
/////////////////// Model settings ///////////////////
allowRemoteModels: true,
remoteHost: 'https://huggingface.co/',
remotePathTemplate: '{model}/resolve/{revision}/',
allowLocalModels: true,
localModelPath: localModelPath,
/////////////////// Environment detection ///////////////////
useFS: FS_AVAILABLE,
isBrowser: isBrowser(),
/////////////////// Cache settings ///////////////////
useBrowserCache: WEB_CACHE_AVAILABLE,
useFSCache: FS_AVAILABLE,
cacheDir: DEFAULT_CACHE_DIR,
useCustomCache: false,
customCache: null,
//////////////////////////////////////////////////////
}
/**
* @param {Object} obj
* @private
*/
function isEmpty(obj) {
return Object.keys(obj).length === 0;
}