transformers-fork
Version:
State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!
146 lines (129 loc) • 5.41 kB
JavaScript
/**
* @file Processors are used to prepare inputs (e.g., text, image or audio) for a model.
*
* **Example:** Using a `WhisperProcessor` to prepare an audio input for a model.
* ```javascript
* import { AutoProcessor, read_audio } from '@huggingface/transformers';
*
* const processor = await AutoProcessor.from_pretrained('openai/whisper-tiny.en');
* const audio = await read_audio('https://huggingface.co/datasets/Narsil/asr_dummy/resolve/main/mlk.flac', 16000);
* const { input_features } = await processor(audio);
* // Tensor {
* // data: Float32Array(240000) [0.4752984642982483, 0.5597258806228638, 0.56434166431427, ...],
* // dims: [1, 80, 3000],
* // type: 'float32',
* // size: 240000,
* // }
* ```
*
* @module processors
*/
import { PROCESSOR_NAME } from '../utils/constants.js';
import {
Callable,
} from '../utils/generic.js';
import { getModelJSON } from '../utils/hub.js';
/**
* @typedef {Object} ProcessorProperties Additional processor-specific properties.
* @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
*/
/**
* Represents a Processor that extracts features from an input.
*/
export class Processor extends Callable {
static classes = [
'image_processor_class',
'tokenizer_class',
'feature_extractor_class',
]
static uses_processor_config = false;
/**
* Creates a new Processor with the given components
* @param {Object} config
* @param {Record<string, Object>} components
*/
constructor(config, components) {
super();
this.config = config;
this.components = components;
}
/**
* @returns {import('./image_processors_utils.js').ImageProcessor|undefined} The image processor of the processor, if it exists.
*/
get image_processor() {
return this.components.image_processor;
}
/**
* @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
*/
get tokenizer() {
return this.components.tokenizer;
}
/**
* @returns {import('./feature_extraction_utils.js').FeatureExtractor|undefined} The feature extractor of the processor, if it exists.
*/
get feature_extractor() {
return this.components.feature_extractor;
}
apply_chat_template(messages, options = {}) {
if (!this.tokenizer) {
throw new Error('Unable to apply chat template without a tokenizer.');
}
return this.tokenizer.apply_chat_template(messages, {
tokenize: false, // default to false
...options,
});
}
batch_decode(...args) {
if (!this.tokenizer) {
throw new Error('Unable to decode without a tokenizer.');
}
return this.tokenizer.batch_decode(...args);
}
/**
* Calls the feature_extractor function with the given input.
* @param {any} input The input to extract features from.
* @param {...any} args Additional arguments.
* @returns {Promise<any>} A Promise that resolves with the extracted features.
*/
async _call(input, ...args) {
for (const item of [this.image_processor, this.feature_extractor, this.tokenizer]) {
if (item) {
return item(input, ...args);
}
}
throw new Error('No image processor, feature extractor, or tokenizer found.');
}
/**
* Instantiate one of the processor classes of the library from a pretrained model.
*
* The processor class to instantiate is selected based on the `feature_extractor_type` property of the config object
* (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
*
* @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
* - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
* Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
* user or organization name, like `dbmdz/bert-base-german-cased`.
* - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
* @param {PretrainedProcessorOptions} options Additional options for loading the processor.
*
* @returns {Promise<Processor>} A new instance of the Processor class.
*/
static async from_pretrained(pretrained_model_name_or_path, options) {
const [config, components] = await Promise.all([
// TODO:
this.uses_processor_config
? getModelJSON(pretrained_model_name_or_path, PROCESSOR_NAME, true, options)
: {},
Promise.all(
this.classes
.filter((cls) => cls in this)
.map(async (cls) => {
const component = await this[cls].from_pretrained(pretrained_model_name_or_path, options);
return [cls.replace(/_class$/, ''), component];
})
).then(Object.fromEntries)
]);
return new this(config, components);
}
}