UNPKG

@mediapipe/tasks-audio

Version:

MediaPipe Audio Tasks

503 lines (485 loc) 20.6 kB
/** Performs audio classification. */ export declare class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> { /** * Initializes the Wasm runtime and creates a new audio classifier from the * provided options. * @export * @param wasmFileset A configuration object that provides the location of the * Wasm binary and its loader. * @param audioClassifierOptions The options for the audio classifier. Note * that either a path to the model asset or a model buffer needs to be * provided (via `baseOptions`). */ static createFromOptions(wasmFileset: WasmFileset, audioClassifierOptions: AudioClassifierOptions): Promise<AudioClassifier>; /** * Initializes the Wasm runtime and creates a new audio classifier based on * the provided model asset buffer. * @export * @param wasmFileset A configuration object that provides the location of the * Wasm binary and its loader. * @param modelAssetBuffer An array or a stream containing a binary * representation of the model. */ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array | ReadableStreamDefaultReader): Promise<AudioClassifier>; /** * Initializes the Wasm runtime and creates a new audio classifier based on * the path to the model asset. * @export * @param wasmFileset A configuration object that provides the location of the * Wasm binary and its loader. * @param modelAssetPath The path to the model asset. */ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<AudioClassifier>; private constructor(); /** * Sets new options for the audio classifier. * * Calling `setOptions()` with a subset of options only affects those options. * You can reset an option back to its default value by explicitly setting it * to `undefined`. * * @export * @param options The options for the audio classifier. */ setOptions(options: AudioClassifierOptions): Promise<void>; /** * Performs audio classification on the provided audio clip and waits * synchronously for the response. * * @export * @param audioData An array of raw audio capture data, like from a call to * `getChannelData()` on an AudioBuffer. * @param sampleRate The sample rate in Hz of the provided audio data. If not * set, defaults to the sample rate set via `setDefaultSampleRate()` or * `48000` if no custom default was set. * @return The classification result of the audio data */ classify(audioData: Float32Array, sampleRate?: number): AudioClassifierResult[]; } /** Options to configure the MediaPipe Audio Classifier Task */ export declare interface AudioClassifierOptions extends ClassifierOptions, TaskRunnerOptions { } /** Classification results of a model. */ export declare interface AudioClassifierResult { /** The classification results for each head of the model. */ classifications: Classifications[]; /** * The optional timestamp (in milliseconds) of the start of the chunk of data * corresponding to these results. * * This is only used for classification on time series (e.g. audio * classification). In these use cases, the amount of data to process might * exceed the maximum size that the model can process: to solve this, the * input data is split into multiple chunks starting at different timestamps. */ timestampMs?: number; } /** Performs embedding extraction on audio. */ export declare class AudioEmbedder extends AudioTaskRunner<AudioEmbedderResult[]> { /** * Initializes the Wasm runtime and creates a new audio embedder from the * provided options. * @export * @param wasmFileset A configuration object that provides the location of the * Wasm binary and its loader. * @param audioEmbedderOptions The options for the audio embedder. Note that * either a path to the TFLite model or the model itself needs to be * provided (via `baseOptions`). */ static createFromOptions(wasmFileset: WasmFileset, audioEmbedderOptions: AudioEmbedderOptions): Promise<AudioEmbedder>; /** * Initializes the Wasm runtime and creates a new audio embedder based on the * provided model asset buffer. * @export * @param wasmFileset A configuration object that provides the location of the * Wasm binary and its loader. * @param modelAssetBuffer An array or a stream containing a binary * representation of the model. */ static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array | ReadableStreamDefaultReader): Promise<AudioEmbedder>; /** * Initializes the Wasm runtime and creates a new audio embedder based on the * path to the model asset. * @export * @param wasmFileset A configuration object that provides the location of the * Wasm binary and its loader. * @param modelAssetPath The path to the TFLite model. */ static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<AudioEmbedder>; private constructor(); /** * Sets new options for the audio embedder. * * Calling `setOptions()` with a subset of options only affects those options. * You can reset an option back to its default value by explicitly setting it * to `undefined`. * * @export * @param options The options for the audio embedder. */ setOptions(options: AudioEmbedderOptions): Promise<void>; /** * Performs embeding extraction on the provided audio clip and waits * synchronously for the response. * * @export * @param audioData An array of raw audio capture data, like from a call to * `getChannelData()` on an AudioBuffer. * @param sampleRate The sample rate in Hz of the provided audio data. If not * set, defaults to the sample rate set via `setDefaultSampleRate()` or * `48000` if no custom default was set. * @return The embedding results of the audio */ embed(audioData: Float32Array, sampleRate?: number): AudioEmbedderResult[]; } /** Options to configure the MediaPipe Audio Embedder Task */ export declare interface AudioEmbedderOptions extends EmbedderOptions, TaskRunnerOptions { } /** Embedding results for a given embedder model. */ export declare interface AudioEmbedderResult { /** * The embedding results for each model head, i.e. one for each output tensor. */ embeddings: Embedding[]; /** * The optional timestamp (in milliseconds) of the start of the chunk of * data corresponding to these results. * * This is only used for embedding extraction on time series (e.g. audio * embedding). In these use cases, the amount of data to process might * exceed the maximum size that the model can process: to solve this, the * input data is split into multiple chunks starting at different timestamps. */ timestampMs?: number; } /** Base class for all MediaPipe Audio Tasks. */ declare abstract class AudioTaskRunner<T> extends TaskRunner { /** * Sets the sample rate for API calls that omit an explicit sample rate. * `48000` is used as a default if this method is not called. * * @export * @param sampleRate A sample rate (e.g. `44100`). */ setDefaultSampleRate(sampleRate: number): void; } /** * Copyright 2022 The MediaPipe Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** Options to configure MediaPipe model loading and processing. */ declare interface BaseOptions_2 { /** * The model path to the model asset file. Only one of `modelAssetPath` or * `modelAssetBuffer` can be set. */ modelAssetPath?: string | undefined; /** * A buffer or stream reader containing the model asset. Only one of * `modelAssetPath` or `modelAssetBuffer` can be set. */ modelAssetBuffer?: Uint8Array | ReadableStreamDefaultReader | undefined; /** Overrides the default backend to use for the provided model. */ delegate?: "CPU" | "GPU" | undefined; } /** * Copyright 2022 The MediaPipe Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** A classification category. */ export declare interface Category { /** The probability score of this label category. */ score: number; /** The index of the category in the corresponding label file. */ index: number; /** * The label of this category object. Defaults to an empty string if there is * no category. */ categoryName: string; /** * The display name of the label, which may be translated for different * locales. For example, a label, "apple", may be translated into Spanish for * display purpose, so that the `display_name` is "manzana". Defaults to an * empty string if there is no display name. */ displayName: string; } /** Classification results for a given classifier head. */ export declare interface Classifications { /** * The array of predicted categories, usually sorted by descending scores, * e.g., from high to low probability. */ categories: Category[]; /** * The index of the classifier head these categories refer to. This is * useful for multi-head models. */ headIndex: number; /** * The name of the classifier head, which is the corresponding tensor * metadata name. Defaults to an empty string if there is no such metadata. */ headName: string; } /** * Copyright 2022 The MediaPipe Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** Options to configure a MediaPipe Classifier Task. */ declare interface ClassifierOptions { /** * The locale to use for display names specified through the TFLite Model * Metadata, if any. Defaults to English. */ displayNamesLocale?: string | undefined; /** The maximum number of top-scored detection results to return. */ maxResults?: number | undefined; /** * Overrides the value provided in the model metadata. Results below this * value are rejected. */ scoreThreshold?: number | undefined; /** * Allowlist of category names. If non-empty, detection results whose category * name is not in this set will be filtered out. Duplicate or unknown category * names are ignored. Mutually exclusive with `categoryDenylist`. */ categoryAllowlist?: string[] | undefined; /** * Denylist of category names. If non-empty, detection results whose category * name is in this set will be filtered out. Duplicate or unknown category * names are ignored. Mutually exclusive with `categoryAllowlist`. */ categoryDenylist?: string[] | undefined; } /** * Copyright 2022 The MediaPipe Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** Options to configure a MediaPipe Embedder Task */ declare interface EmbedderOptions { /** * Whether to normalize the returned feature vector with L2 norm. Use this * option only if the model does not already contain a native L2_NORMALIZATION * TF Lite Op. In most cases, this is already the case and L2 norm is thus * achieved through TF Lite inference. */ l2Normalize?: boolean | undefined; /** * Whether the returned embedding should be quantized to bytes via scalar * quantization. Embeddings are implicitly assumed to be unit-norm and * therefore any dimension is guaranteed to have a value in [-1.0, 1.0]. Use * the l2_normalize option if this is not the case. */ quantize?: boolean | undefined; } /** * Copyright 2022 The MediaPipe Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * List of embeddings with an optional timestamp. * * One and only one of the two 'floatEmbedding' and 'quantizedEmbedding' will * contain data, based on whether or not the embedder was configured to perform * scalar quantization. */ export declare interface Embedding { /** * Floating-point embedding. Empty if the embedder was configured to perform * scalar-quantization. */ floatEmbedding?: number[]; /** * Scalar-quantized embedding. Empty if the embedder was not configured to * perform scalar quantization. */ quantizedEmbedding?: Uint8Array; /** * The index of the classifier head these categories refer to. This is * useful for multi-head models. */ headIndex: number; /** * The name of the classifier head, which is the corresponding tensor * metadata name. */ headName: string; } /** * Resolves the files required for the MediaPipe Task APIs. * * This class verifies whether SIMD is supported in the current environment and * loads the SIMD files only if support is detected. The returned filesets * require that the Wasm files are published without renaming. If this is not * possible, you can invoke the MediaPipe Tasks APIs using a manually created * `WasmFileset`. */ export declare class FilesetResolver { /** * Returns whether SIMD is supported in the current environment. * * If your environment requires custom locations for the MediaPipe Wasm files, * you can use `isSimdSupported()` to decide whether to load the SIMD-based * assets. * * @export * @return Whether SIMD support was detected in the current environment. */ static isSimdSupported(): Promise<boolean>; /** * Creates a fileset for the MediaPipe Audio tasks. * * @export * @param basePath An optional base path to specify the directory the Wasm * files should be loaded from. If not specified, the Wasm files are * loaded from the host's root directory. * @return A `WasmFileset` that can be used to initialize MediaPipe Audio * tasks. */ static forAudioTasks(basePath?: string): Promise<WasmFileset>; /** * Creates a fileset for the MediaPipe GenAI tasks. * * @export * @param basePath An optional base path to specify the directory the Wasm * files should be loaded from. If not specified, the Wasm files are * loaded from the host's root directory. * @return A `WasmFileset` that can be used to initialize MediaPipe GenAI * tasks. */ static forGenAiTasks(basePath?: string): Promise<WasmFileset>; /** * Creates a fileset for the MediaPipe GenAI Experimental tasks. * * @export * @param basePath An optional base path to specify the directory the Wasm * files should be loaded from. If not specified, the Wasm files are * loaded from the host's root directory. * @return A `WasmFileset` that can be used to initialize MediaPipe GenAI * tasks. */ static forGenAiExperimentalTasks(basePath?: string): Promise<WasmFileset>; /** * Creates a fileset for the MediaPipe Text tasks. * * @export * @param basePath An optional base path to specify the directory the Wasm * files should be loaded from. If not specified, the Wasm files are * loaded from the host's root directory. * @return A `WasmFileset` that can be used to initialize MediaPipe Text * tasks. */ static forTextTasks(basePath?: string): Promise<WasmFileset>; /** * Creates a fileset for the MediaPipe Vision tasks. * * @export * @param basePath An optional base path to specify the directory the Wasm * files should be loaded from. If not specified, the Wasm files are * loaded from the host's root directory. * @return A `WasmFileset` that can be used to initialize MediaPipe Vision * tasks. */ static forVisionTasks(basePath?: string): Promise<WasmFileset>; } /** Base class for all MediaPipe Tasks. */ declare abstract class TaskRunner { protected constructor(); /** Configures the task with custom options. */ abstract setOptions(options: TaskRunnerOptions): Promise<void>; /** * Closes and cleans up the resources held by this task. * @export */ close(): void; } /** Options to configure MediaPipe Tasks in general. */ declare interface TaskRunnerOptions { /** Options to configure the loading of the model assets. */ baseOptions?: BaseOptions_2; } /** * Copyright 2022 The MediaPipe Authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** An object containing the locations of the Wasm assets */ declare interface WasmFileset { /** The path to the Wasm loader script. */ wasmLoaderPath: string; /** The path to the Wasm binary. */ wasmBinaryPath: string; /** The optional path to the asset loader script. */ assetLoaderPath?: string; /** The optional path to the assets binary. */ assetBinaryPath?: string; } export { }