@mediapipe/tasks-audio
Version:
MediaPipe Audio Tasks
338 lines (325 loc) • 14 kB
TypeScript
/** Performs audio classification. */
export declare class AudioClassifier extends AudioTaskRunner<AudioClassifierResult[]> {
/**
* Initializes the Wasm runtime and creates a new audio classifier from the
* provided options.
* @export
* @param wasmFileset A configuration object that provides the location of the
* Wasm binary and its loader.
* @param audioClassifierOptions The options for the audio classifier. Note
* that either a path to the model asset or a model buffer needs to be
* provided (via `baseOptions`).
*/
static createFromOptions(wasmFileset: WasmFileset, audioClassifierOptions: AudioClassifierOptions): Promise<AudioClassifier>;
/**
* Initializes the Wasm runtime and creates a new audio classifier based on
* the provided model asset buffer.
* @export
* @param wasmFileset A configuration object that provides the location of the
* Wasm binary and its loader.
* @param modelAssetBuffer An array or a stream containing a binary
* representation of the model.
*/
static createFromModelBuffer(wasmFileset: WasmFileset, modelAssetBuffer: Uint8Array | ReadableStreamDefaultReader): Promise<AudioClassifier>;
/**
* Initializes the Wasm runtime and creates a new audio classifier based on
* the path to the model asset.
* @export
* @param wasmFileset A configuration object that provides the location of the
* Wasm binary and its loader.
* @param modelAssetPath The path to the model asset.
*/
static createFromModelPath(wasmFileset: WasmFileset, modelAssetPath: string): Promise<AudioClassifier>;
private constructor();
/**
* Sets new options for the audio classifier.
*
* Calling `setOptions()` with a subset of options only affects those options.
* You can reset an option back to its default value by explicitly setting it
* to `undefined`.
*
* @export
* @param options The options for the audio classifier.
*/
setOptions(options: AudioClassifierOptions): Promise<void>;
/**
* Performs audio classification on the provided audio clip and waits
* synchronously for the response.
*
* @export
* @param audioData An array of raw audio capture data, like from a call to
* `getChannelData()` on an AudioBuffer.
* @param sampleRate The sample rate in Hz of the provided audio data. If not
* set, defaults to the sample rate set via `setDefaultSampleRate()` or
* `48000` if no custom default was set.
* @return The classification result of the audio data
*/
classify(audioData: Float32Array, sampleRate?: number): AudioClassifierResult[];
}
/** Options to configure the MediaPipe Audio Classifier Task */
export declare interface AudioClassifierOptions extends ClassifierOptions, TaskRunnerOptions {
}
/** Classification results of a model. */
export declare interface AudioClassifierResult {
/** The classification results for each head of the model. */
classifications: Classifications[];
/**
* The optional timestamp (in milliseconds) of the start of the chunk of data
* corresponding to these results.
*
* This is only used for classification on time series (e.g. audio
* classification). In these use cases, the amount of data to process might
* exceed the maximum size that the model can process: to solve this, the
* input data is split into multiple chunks starting at different timestamps.
*/
timestampMs?: number;
}
/** Base class for all MediaPipe Audio Tasks. */
declare abstract class AudioTaskRunner<T> extends TaskRunner {
/**
* Sets the sample rate for API calls that omit an explicit sample rate.
* `48000` is used as a default if this method is not called.
*
* @export
* @param sampleRate A sample rate (e.g. `44100`).
*/
setDefaultSampleRate(sampleRate: number): void;
}
/**
* Copyright 2022 The MediaPipe Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Options to configure MediaPipe model loading and processing. */
declare interface BaseOptions_2 {
/**
* The model path to the model asset file. Only one of `modelAssetPath` or
* `modelAssetBuffer` can be set.
*/
modelAssetPath?: string | undefined;
/**
* A buffer or stream reader containing the model asset. Only one of
* `modelAssetPath` or `modelAssetBuffer` can be set.
*/
modelAssetBuffer?: Uint8Array | ReadableStreamDefaultReader | undefined;
/** Overrides the default backend to use for the provided model. */
delegate?: "CPU" | "GPU" | undefined;
}
/**
* Copyright 2022 The MediaPipe Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** A classification category. */
export declare interface Category {
/** The probability score of this label category. */
score: number;
/** The index of the category in the corresponding label file. */
index: number;
/**
* The label of this category object. Defaults to an empty string if there is
* no category.
*/
categoryName: string;
/**
* The display name of the label, which may be translated for different
* locales. For example, a label, "apple", may be translated into Spanish for
* display purpose, so that the `display_name` is "manzana". Defaults to an
* empty string if there is no display name.
*/
displayName: string;
}
/** Classification results for a given classifier head. */
export declare interface Classifications {
/**
* The array of predicted categories, usually sorted by descending scores,
* e.g., from high to low probability.
*/
categories: Category[];
/**
* The index of the classifier head these categories refer to. This is
* useful for multi-head models.
*/
headIndex: number;
/**
* The name of the classifier head, which is the corresponding tensor
* metadata name. Defaults to an empty string if there is no such metadata.
*/
headName: string;
}
/**
* Copyright 2022 The MediaPipe Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Options to configure a MediaPipe Classifier Task. */
declare interface ClassifierOptions {
/**
* The locale to use for display names specified through the TFLite Model
* Metadata, if any. Defaults to English.
*/
displayNamesLocale?: string | undefined;
/** The maximum number of top-scored detection results to return. */
maxResults?: number | undefined;
/**
* Overrides the value provided in the model metadata. Results below this
* value are rejected.
*/
scoreThreshold?: number | undefined;
/**
* Allowlist of category names. If non-empty, detection results whose category
* name is not in this set will be filtered out. Duplicate or unknown category
* names are ignored. Mutually exclusive with `categoryDenylist`.
*/
categoryAllowlist?: string[] | undefined;
/**
* Denylist of category names. If non-empty, detection results whose category
* name is in this set will be filtered out. Duplicate or unknown category
* names are ignored. Mutually exclusive with `categoryAllowlist`.
*/
categoryDenylist?: string[] | undefined;
}
/**
* Resolves the files required for the MediaPipe Task APIs.
*
* This class verifies whether SIMD is supported in the current environment and
* loads the SIMD files only if support is detected. The returned filesets
* require that the Wasm files are published without renaming. If this is not
* possible, you can invoke the MediaPipe Tasks APIs using a manually created
* `WasmFileset`.
*/
export declare class FilesetResolver {
/**
* Returns whether SIMD is supported in the current environment.
*
* If your environment requires custom locations for the MediaPipe Wasm files,
* you can use `isSimdSupported()` to decide whether to load the SIMD-based
* assets. Note that for ES6 Modules, SIMD is assumed to be always supported.
*
* @param useModule Whether to use ES6 Modules for the Wasm files.
* @export
* @return Whether SIMD support was detected in the current environment.
*/
static isSimdSupported(useModule?: boolean): Promise<boolean>;
/**
* Creates a fileset for the MediaPipe Audio tasks.
*
* @export
* @param basePath An optional base path to specify the directory the Wasm
* files should be loaded from. If not specified, the Wasm files are
* loaded from the host's root directory.
* @param useModule Whether to use ES6 Modules for the Wasm files.
* @return A `WasmFileset` that can be used to initialize MediaPipe Audio
* tasks.
*/
static forAudioTasks(basePath?: string, useModule?: boolean): Promise<WasmFileset>;
/**
* Creates a fileset for the MediaPipe GenAI tasks.
*
* @export
* @param basePath An optional base path to specify the directory the Wasm
* files should be loaded from. If not specified, the Wasm files are
* loaded from the host's root directory.
* @param useModule Whether to use ES6 Modules for the Wasm files.
* @return A `WasmFileset` that can be used to initialize MediaPipe GenAI
* tasks.
*/
static forGenAiTasks(basePath?: string, useModule?: boolean): Promise<WasmFileset>;
/**
* Creates a fileset for the MediaPipe Text tasks.
*
* @export
* @param basePath An optional base path to specify the directory the Wasm
* files should be loaded from. If not specified, the Wasm files are
* loaded from the host's root directory.
* @param useModule Whether to use ES6 Modules for the Wasm files.
* @return A `WasmFileset` that can be used to initialize MediaPipe Text
* tasks.
*/
static forTextTasks(basePath?: string, useModule?: boolean): Promise<WasmFileset>;
/**
* Creates a fileset for the MediaPipe Vision tasks.
*
* @export
* @param basePath An optional base path to specify the directory the Wasm
* files should be loaded from. If not specified, the Wasm files are
* loaded from the host's root directory.
* @param useModule Whether to use ES6 Modules for the Wasm files.
* @return A `WasmFileset` that can be used to initialize MediaPipe Vision
* tasks.
*/
static forVisionTasks(basePath?: string, useModule?: boolean): Promise<WasmFileset>;
}
/** Base class for all MediaPipe Tasks. */
declare abstract class TaskRunner {
protected constructor();
/** Configures the task with custom options. */
abstract setOptions(options: TaskRunnerOptions): Promise<void>;
/**
* Closes and cleans up the resources held by this task.
* @export
*/
close(): void;
}
/** Options to configure MediaPipe Tasks in general. */
declare interface TaskRunnerOptions {
/** Options to configure the loading of the model assets. */
baseOptions?: BaseOptions_2;
}
/**
* Copyright 2022 The MediaPipe Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** An object containing the locations of the Wasm assets */
declare interface WasmFileset {
/** The path to the Wasm loader script. */
wasmLoaderPath: string;
/** The path to the Wasm binary. */
wasmBinaryPath: string;
/** The optional path to the asset loader script. */
assetLoaderPath?: string;
/** The optional path to the assets binary. */
assetBinaryPath?: string;
}
export { }