UNPKG

chromadb-default-embed

Version:

Chroma's fork of @xenova/transformers serving as our default embedding function

github.com/xenova/transformers.js

xenova/transformers.js

665 lines (591 loc) • 28.9 kB

JavaScript

/** * @file Helper module for audio processing. * * These functions and classes are only used internally, * meaning an end-user shouldn't need to access anything here. * * @module utils/audio */ import { getFile, } from './hub.js'; import { FFT, max } from './maths.js'; import { calculateReflectOffset, } from './core.js'; /** * Helper function to read audio from a path/URL. * @param {string|URL} url The path/URL to load the audio from. * @param {number} sampling_rate The sampling rate to use when decoding the audio. * @returns {Promise<Float32Array>} The decoded audio as a `Float32Array`. */ export async function read_audio(url, sampling_rate) { if (typeof AudioContext === 'undefined') { // Running in node or an environment without AudioContext throw Error( "Unable to load audio from path/URL since `AudioContext` is not available in your environment. " + "Instead, audio data should be passed directly to the pipeline/processor. " + "For more information and some example code, see https://huggingface.co/docs/transformers.js/guides/node-audio-processing." ) } const response = await (await getFile(url)).arrayBuffer(); const audioCTX = new AudioContext({ sampleRate: sampling_rate }); if (typeof sampling_rate === 'undefined') { console.warn(`No sampling rate provided, using default of ${audioCTX.sampleRate}Hz.`) } const decoded = await audioCTX.decodeAudioData(response); /** @type {Float32Array} */ let audio; // We now replicate HuggingFace's `ffmpeg_read` method: if (decoded.numberOfChannels === 2) { // When downmixing a stereo audio file to mono using the -ac 1 option in FFmpeg, // the audio signal is summed across both channels to create a single mono channel. // However, if the audio is at full scale (i.e. the highest possible volume level), // the summing of the two channels can cause the audio signal to clip or distort. // To prevent this clipping, FFmpeg applies a scaling factor of 1/sqrt(2) (~ 0.707) // to the audio signal before summing the two channels. This scaling factor ensures // that the combined audio signal will not exceed the maximum possible level, even // if both channels are at full scale. // After applying this scaling factor, the audio signal from both channels is summed // to create a single mono channel. It's worth noting that this scaling factor is // only applied when downmixing stereo audio to mono using the -ac 1 option in FFmpeg. // If you're using a different downmixing method, or if you're not downmixing the // audio at all, this scaling factor may not be needed. const SCALING_FACTOR = Math.sqrt(2); const left = decoded.getChannelData(0); const right = decoded.getChannelData(1); audio = new Float32Array(left.length); for (let i = 0; i < decoded.length; ++i) { audio[i] = SCALING_FACTOR * (left[i] + right[i]) / 2; } } else { // If the audio is not stereo, we can just use the first channel: audio = decoded.getChannelData(0); } return audio; } /** * Generates a Hanning window of length M. * * @param {number} M The length of the Hanning window to generate. * @returns {Float64Array} The generated Hanning window. */ export function hanning(M) { if (M < 1) { return new Float64Array(); } if (M === 1) { return new Float64Array([1]); } const denom = M - 1; const factor = Math.PI / denom; const cos_vals = new Float64Array(M); for (let i = 0; i < M; ++i) { const n = 2 * i - denom; cos_vals[i] = 0.5 + 0.5 * Math.cos(factor * n); } return cos_vals; } const HERTZ_TO_MEL_MAPPING = { "htk": (/** @type {number} */ freq) => 2595.0 * Math.log10(1.0 + (freq / 700.0)), "kaldi": (/** @type {number} */ freq) => 1127.0 * Math.log(1.0 + (freq / 700.0)), "slaney": (/** @type {number} */ freq, min_log_hertz = 1000.0, min_log_mel = 15.0, logstep = 27.0 / Math.log(6.4)) => freq >= min_log_hertz ? min_log_mel + Math.log(freq / min_log_hertz) * logstep : 3.0 * freq / 200.0, } /** * @template {Float32Array|Float64Array|number} T * @param {T} freq * @param {string} [mel_scale] * @returns {T} */ function hertz_to_mel(freq, mel_scale = "htk") { const fn = HERTZ_TO_MEL_MAPPING[mel_scale]; if (!fn) { throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".'); } return typeof freq === 'number' ? fn(freq) : freq.map(x => fn(x)); } const MEL_TO_HERTZ_MAPPING = { "htk": (/** @type {number} */ mels) => 700.0 * (10.0 ** (mels / 2595.0) - 1.0), "kaldi": (/** @type {number} */ mels) => 700.0 * (Math.exp(mels / 1127.0) - 1.0), "slaney": (/** @type {number} */ mels, min_log_hertz = 1000.0, min_log_mel = 15.0, logstep = Math.log(6.4) / 27.0) => mels >= min_log_mel ? min_log_hertz * Math.exp(logstep * (mels - min_log_mel)) : 200.0 * mels / 3.0, } /** * @template {Float32Array|Float64Array|number} T * @param {T} mels * @param {string} [mel_scale] * @returns {T} */ function mel_to_hertz(mels, mel_scale = "htk") { const fn = MEL_TO_HERTZ_MAPPING[mel_scale]; if (!fn) { throw new Error('mel_scale should be one of "htk", "slaney" or "kaldi".'); } return typeof mels === 'number' ? fn(mels) : mels.map(x => fn(x)); } /** * Creates a triangular filter bank. * * Adapted from torchaudio and librosa. * * @param {Float64Array} fft_freqs Discrete frequencies of the FFT bins in Hz, of shape `(num_frequency_bins,)`. * @param {Float64Array} filter_freqs Center frequencies of the triangular filters to create, in Hz, of shape `(num_mel_filters,)`. * @returns {number[][]} of shape `(num_frequency_bins, num_mel_filters)`. */ function _create_triangular_filter_bank(fft_freqs, filter_freqs) { const filter_diff = Float64Array.from( { length: filter_freqs.length - 1 }, (_, i) => filter_freqs[i + 1] - filter_freqs[i] ); const slopes = Array.from({ length: fft_freqs.length }, () => new Array(filter_freqs.length)); for (let j = 0; j < fft_freqs.length; ++j) { const slope = slopes[j]; for (let i = 0; i < filter_freqs.length; ++i) { slope[i] = filter_freqs[i] - fft_freqs[j]; } } const numFreqs = filter_freqs.length - 2; const ret = Array.from({ length: numFreqs }, () => new Array(fft_freqs.length)); for (let j = 0; j < fft_freqs.length; ++j) { // 201 const slope = slopes[j]; for (let i = 0; i < numFreqs; ++i) { // 80 const down = -slope[i] / filter_diff[i]; const up = slope[i + 2] / filter_diff[i + 1]; ret[i][j] = Math.max(0, Math.min(down, up)); } } return ret; } /** * Return evenly spaced numbers over a specified interval. * @param {number} start The starting value of the sequence. * @param {number} end The end value of the sequence. * @param {number} num Number of samples to generate. * @returns `num` evenly spaced samples, calculated over the interval `[start, stop]`. */ function linspace(start, end, num) { const step = (end - start) / (num - 1); return Float64Array.from({ length: num }, (_, i) => start + step * i); } /** * Creates a frequency bin conversion matrix used to obtain a mel spectrogram. This is called a *mel filter bank*, and * various implementation exist, which differ in the number of filters, the shape of the filters, the way the filters * are spaced, the bandwidth of the filters, and the manner in which the spectrum is warped. The goal of these * features is to approximate the non-linear human perception of the variation in pitch with respect to the frequency. * @param {number} num_frequency_bins Number of frequencies used to compute the spectrogram (should be the same as in `stft`). * @param {number} num_mel_filters Number of mel filters to generate. * @param {number} min_frequency Lowest frequency of interest in Hz. * @param {number} max_frequency Highest frequency of interest in Hz. This should not exceed `sampling_rate / 2`. * @param {number} sampling_rate Sample rate of the audio waveform. * @param {string} [norm] If `"slaney"`, divide the triangular mel weights by the width of the mel band (area normalization). * @param {string} [mel_scale] The mel frequency scale to use, `"htk"` or `"slaney"`. * @param {boolean} [triangularize_in_mel_space] If this option is enabled, the triangular filter is applied in mel space rather than frequency space. * This should be set to `true` in order to get the same results as `torchaudio` when computing mel filters. * @returns {number[][]} Triangular filter bank matrix, which is a 2D array of shape (`num_frequency_bins`, `num_mel_filters`). * This is a projection matrix to go from a spectrogram to a mel spectrogram. */ export function mel_filter_bank( num_frequency_bins, num_mel_filters, min_frequency, max_frequency, sampling_rate, norm = null, mel_scale = "htk", triangularize_in_mel_space = false, ) { if (norm !== null && norm !== "slaney") { throw new Error('norm must be one of null or "slaney"'); } const mel_min = hertz_to_mel(min_frequency, mel_scale); const mel_max = hertz_to_mel(max_frequency, mel_scale); const mel_freqs = linspace(mel_min, mel_max, num_mel_filters + 2); let filter_freqs = mel_to_hertz(mel_freqs, mel_scale); let fft_freqs; // frequencies of FFT bins in Hz if (triangularize_in_mel_space) { const fft_bin_width = sampling_rate / (num_frequency_bins * 2); fft_freqs = hertz_to_mel(Float64Array.from({ length: num_frequency_bins }, (_, i) => i * fft_bin_width), mel_scale); filter_freqs = mel_freqs; } else { fft_freqs = linspace(0, Math.floor(sampling_rate / 2), num_frequency_bins); } const mel_filters = _create_triangular_filter_bank(fft_freqs, filter_freqs); if (norm !== null && norm === "slaney") { // Slaney-style mel is scaled to be approx constant energy per channel for (let i = 0; i < num_mel_filters; ++i) { const filter = mel_filters[i]; const enorm = 2.0 / (filter_freqs[i + 2] - filter_freqs[i]); for (let j = 0; j < num_frequency_bins; ++j) { // Apply this enorm to all frequency bins filter[j] *= enorm; } } } // TODO warn if there is a zero row return mel_filters; } /** * @template {Float32Array|Float64Array} T * Pads an array with a reflected version of itself on both ends. * @param {T} array The array to pad. * @param {number} left The amount of padding to add to the left. * @param {number} right The amount of padding to add to the right. * @returns {T} The padded array. */ function padReflect(array, left, right) { // @ts-ignore const padded = new array.constructor(array.length + left + right); const w = array.length - 1; for (let i = 0; i < array.length; ++i) { padded[left + i] = array[i]; } for (let i = 1; i <= left; ++i) { padded[left - i] = array[calculateReflectOffset(i, w)]; } for (let i = 1; i <= right; ++i) { padded[w + left + i] = array[calculateReflectOffset(w - i, w)]; } return padded; } /** * Helper function to compute `amplitude_to_db` and `power_to_db`. * @template {Float32Array|Float64Array} T * @param {T} spectrogram * @param {number} factor * @param {number} reference * @param {number} min_value * @param {number} db_range * @returns {T} */ function _db_conversion_helper(spectrogram, factor, reference, min_value, db_range) { if (reference <= 0) { throw new Error('reference must be greater than zero'); } if (min_value <= 0) { throw new Error('min_value must be greater than zero'); } reference = Math.max(min_value, reference); const logReference = Math.log10(reference); for (let i = 0; i < spectrogram.length; ++i) { spectrogram[i] = factor * Math.log10(Math.max(min_value, spectrogram[i]) - logReference) } if (db_range !== null) { if (db_range <= 0) { throw new Error('db_range must be greater than zero'); } const maxValue = max(spectrogram)[0] - db_range; for (let i = 0; i < spectrogram.length; ++i) { spectrogram[i] = Math.max(spectrogram[i], maxValue); } } return spectrogram; } /** * Converts an amplitude spectrogram to the decibel scale. This computes `20 * log10(spectrogram / reference)`, * using basic logarithm properties for numerical stability. NOTE: Operates in-place. * * The motivation behind applying the log function on the (mel) spectrogram is that humans do not hear loudness on a * linear scale. Generally to double the perceived volume of a sound we need to put 8 times as much energy into it. * This means that large variations in energy may not sound all that different if the sound is loud to begin with. * This compression operation makes the (mel) spectrogram features match more closely what humans actually hear. * * @template {Float32Array|Float64Array} T * @param {T} spectrogram The input amplitude (mel) spectrogram. * @param {number} [reference=1.0] Sets the input spectrogram value that corresponds to 0 dB. * For example, use `np.max(spectrogram)` to set the loudest part to 0 dB. Must be greater than zero. * @param {number} [min_value=1e-5] The spectrogram will be clipped to this minimum value before conversion to decibels, * to avoid taking `log(0)`. The default of `1e-5` corresponds to a minimum of -100 dB. Must be greater than zero. * @param {number} [db_range=null] Sets the maximum dynamic range in decibels. For example, if `db_range = 80`, the * difference between the peak value and the smallest value will never be more than 80 dB. Must be greater than zero. * @returns {T} The modified spectrogram in decibels. */ function amplitude_to_db(spectrogram, reference = 1.0, min_value = 1e-5, db_range = null) { return _db_conversion_helper(spectrogram, 20.0, reference, min_value, db_range); } /** * Converts a power spectrogram to the decibel scale. This computes `10 * log10(spectrogram / reference)`, * using basic logarithm properties for numerical stability. NOTE: Operates in-place. * * The motivation behind applying the log function on the (mel) spectrogram is that humans do not hear loudness on a * linear scale. Generally to double the perceived volume of a sound we need to put 8 times as much energy into it. * This means that large variations in energy may not sound all that different if the sound is loud to begin with. * This compression operation makes the (mel) spectrogram features match more closely what humans actually hear. * * Based on the implementation of `librosa.power_to_db`. * * @template {Float32Array|Float64Array} T * @param {T} spectrogram The input power (mel) spectrogram. Note that a power spectrogram has the amplitudes squared! * @param {number} [reference=1.0] Sets the input spectrogram value that corresponds to 0 dB. * For example, use `np.max(spectrogram)` to set the loudest part to 0 dB. Must be greater than zero. * @param {number} [min_value=1e-10] The spectrogram will be clipped to this minimum value before conversion to decibels, * to avoid taking `log(0)`. The default of `1e-10` corresponds to a minimum of -100 dB. Must be greater than zero. * @param {number} [db_range=null] Sets the maximum dynamic range in decibels. For example, if `db_range = 80`, the * difference between the peak value and the smallest value will never be more than 80 dB. Must be greater than zero. * @returns {T} The modified spectrogram in decibels. */ function power_to_db(spectrogram, reference = 1.0, min_value = 1e-10, db_range = null) { return _db_conversion_helper(spectrogram, 10.0, reference, min_value, db_range); } /** * Calculates a spectrogram over one waveform using the Short-Time Fourier Transform. * * This function can create the following kinds of spectrograms: * - amplitude spectrogram (`power = 1.0`) * - power spectrogram (`power = 2.0`) * - complex-valued spectrogram (`power = None`) * - log spectrogram (use `log_mel` argument) * - mel spectrogram (provide `mel_filters`) * - log-mel spectrogram (provide `mel_filters` and `log_mel`) * * In this implementation, the window is assumed to be zero-padded to have the same size as the analysis frame. * A padded window can be obtained from `window_function()`. The FFT input buffer may be larger than the analysis frame, * typically the next power of two. * * @param {Float32Array|Float64Array} waveform The input waveform of shape `(length,)`. This must be a single real-valued, mono waveform. * @param {Float32Array|Float64Array} window The windowing function to apply of shape `(frame_length,)`, including zero-padding if necessary. The actual window length may be * shorter than `frame_length`, but we're assuming the array has already been zero-padded. * @param {number} frame_length The length of the analysis frames in samples (a.k.a., `fft_length`). * @param {number} hop_length The stride between successive analysis frames in samples. * @param {Object} options * @param {number} [options.fft_length=null] The size of the FFT buffer in samples. This determines how many frequency bins the spectrogram will have. * For optimal speed, this should be a power of two. If `null`, uses `frame_length`. * @param {number} [options.power=1.0] If 1.0, returns the amplitude spectrogram. If 2.0, returns the power spectrogram. If `null`, returns complex numbers. * @param {boolean} [options.center=true] Whether to pad the waveform so that frame `t` is centered around time `t * hop_length`. If `false`, frame * `t` will start at time `t * hop_length`. * @param {string} [options.pad_mode="reflect"] Padding mode used when `center` is `true`. Possible values are: `"constant"` (pad with zeros), * `"edge"` (pad with edge values), `"reflect"` (pads with mirrored values). * @param {boolean} [options.onesided=true] If `true`, only computes the positive frequencies and returns a spectrogram containing `fft_length // 2 + 1` * frequency bins. If `false`, also computes the negative frequencies and returns `fft_length` frequency bins. * @param {number} [options.preemphasis=null] Coefficient for a low-pass filter that applies pre-emphasis before the DFT. * @param {number[][]} [options.mel_filters=null] The mel filter bank of shape `(num_freq_bins, num_mel_filters)`. * If supplied, applies this filter bank to create a mel spectrogram. * @param {number} [options.mel_floor=1e-10] Minimum value of mel frequency banks. * @param {string} [options.log_mel=null] How to convert the spectrogram to log scale. Possible options are: * `null` (don't convert), `"log"` (take the natural logarithm) `"log10"` (take the base-10 logarithm), `"dB"` (convert to decibels). * Can only be used when `power` is not `null`. * @param {number} [options.reference=1.0] Sets the input spectrogram value that corresponds to 0 dB. For example, use `max(spectrogram)[0]` to set * the loudest part to 0 dB. Must be greater than zero. * @param {number} [options.min_value=1e-10] The spectrogram will be clipped to this minimum value before conversion to decibels, to avoid taking `log(0)`. * For a power spectrogram, the default of `1e-10` corresponds to a minimum of -100 dB. For an amplitude spectrogram, the value `1e-5` corresponds to -100 dB. * Must be greater than zero. * @param {number} [options.db_range=null] Sets the maximum dynamic range in decibels. For example, if `db_range = 80`, the difference between the * peak value and the smallest value will never be more than 80 dB. Must be greater than zero. * @param {boolean} [options.remove_dc_offset=null] Subtract mean from waveform on each frame, applied before pre-emphasis. This should be set to `true` in * order to get the same results as `torchaudio.compliance.kaldi.fbank` when computing mel filters. * @param {number} [options.max_num_frames=null] If provided, limits the number of frames to compute to this value. * @param {boolean} [options.do_pad=true] If `true`, pads the output spectrogram to have `max_num_frames` frames. * @param {boolean} [options.transpose=false] If `true`, the returned spectrogram will have shape `(num_frames, num_frequency_bins/num_mel_filters)`. If `false`, the returned spectrogram will have shape `(num_frequency_bins/num_mel_filters, num_frames)`. * @returns {{data: Float32Array, dims: number[]}} Spectrogram of shape `(num_frequency_bins, length)` (regular spectrogram) or shape `(num_mel_filters, length)` (mel spectrogram). */ export function spectrogram( waveform, window, frame_length, hop_length, { fft_length = null, power = 1.0, center = true, pad_mode = "reflect", onesided = true, preemphasis = null, mel_filters = null, mel_floor = 1e-10, log_mel = null, reference = 1.0, min_value = 1e-10, db_range = null, remove_dc_offset = null, // Custom parameters for efficiency reasons max_num_frames = null, do_pad = true, transpose = false, } = {} ) { const window_length = window.length; if (fft_length === null) { fft_length = frame_length; } if (frame_length > fft_length) { throw Error(`frame_length (${frame_length}) may not be larger than fft_length (${fft_length})`) } if (window_length !== frame_length) { throw new Error(`Length of the window (${window_length}) must equal frame_length (${frame_length})`); } if (hop_length <= 0) { throw new Error("hop_length must be greater than zero"); } if (center) { if (pad_mode !== 'reflect') { throw new Error(`pad_mode="${pad_mode}" not implemented yet.`) } const half_window = Math.floor((fft_length - 1) / 2) + 1; waveform = padReflect(waveform, half_window, half_window); } // split waveform into frames of frame_length size const num_frames = Math.floor(1 + Math.floor((waveform.length - frame_length) / hop_length)) const num_frequency_bins = onesided ? Math.floor(fft_length / 2) + 1 : fft_length let d1 = num_frames; let d1Max = num_frames; // If maximum number of frames is provided, we must either pad or truncate if (max_num_frames !== null) { if (max_num_frames > num_frames) { // input is too short, so we pad if (do_pad) { d1Max = max_num_frames; } } else { // input is too long, so we truncate d1Max = d1 = max_num_frames; } } // Preallocate arrays to store output. const fft = new FFT(fft_length); const inputBuffer = new Float64Array(fft_length); const outputBuffer = new Float64Array(fft.outputBufferSize); const magnitudes = new Array(d1); for (let i = 0; i < d1; ++i) { // Populate buffer with waveform data const offset = i * hop_length; for (let j = 0; j < frame_length; ++j) { inputBuffer[j] = waveform[offset + j]; } if (remove_dc_offset) { let sum = 0; for (let j = 0; j < frame_length; ++j) { sum += inputBuffer[j]; } const mean = sum / frame_length; for (let j = 0; j < frame_length; ++j) { inputBuffer[j] -= mean; } } if (preemphasis !== null) { // Done in reverse to avoid copies and distructive modification for (let j = frame_length - 1; j >= 1; --j) { inputBuffer[j] -= preemphasis * inputBuffer[j - 1]; } inputBuffer[0] *= 1 - preemphasis; } for (let j = 0; j < window.length; ++j) { inputBuffer[j] *= window[j]; } fft.realTransform(outputBuffer, inputBuffer); // compute magnitudes const row = new Array(num_frequency_bins); for (let j = 0; j < row.length; ++j) { const j2 = j << 1; row[j] = outputBuffer[j2] ** 2 + outputBuffer[j2 + 1] ** 2; } magnitudes[i] = row; } // TODO what should happen if power is None? // https://github.com/huggingface/transformers/issues/27772 if (power !== null && power !== 2) { // slight optimization to not sqrt const pow = 2 / power; // we use 2 since we already squared for (let i = 0; i < magnitudes.length; ++i) { const magnitude = magnitudes[i]; for (let j = 0; j < magnitude.length; ++j) { magnitude[j] **= pow; } } } // TODO: What if `mel_filters` is null? const num_mel_filters = mel_filters.length; // Only here do we create Float32Array const mel_spec = new Float32Array(num_mel_filters * d1Max); // Perform matrix muliplication: // mel_spec = mel_filters @ magnitudes.T // - mel_filters.shape=(80, 201) // - magnitudes.shape=(3000, 201) => - magnitudes.T.shape=(201, 3000) // - mel_spec.shape=(80, 3000) const dims = transpose ? [d1Max, num_mel_filters] : [num_mel_filters, d1Max]; for (let i = 0; i < num_mel_filters; ++i) { // num melfilters (e.g., 80) const filter = mel_filters[i]; for (let j = 0; j < d1; ++j) { // num frames (e.g., 3000) const magnitude = magnitudes[j]; let sum = 0; for (let k = 0; k < num_frequency_bins; ++k) { // num frequency bins (e.g., 201) sum += filter[k] * magnitude[k]; } mel_spec[ transpose ? j * num_mel_filters + i : i * d1 + j ] = Math.max(mel_floor, sum); } } if (power !== null && log_mel !== null) { const o = Math.min(mel_spec.length, d1 * num_mel_filters); switch (log_mel) { case 'log': for (let i = 0; i < o; ++i) { mel_spec[i] = Math.log(mel_spec[i]); } break; case 'log10': for (let i = 0; i < o; ++i) { mel_spec[i] = Math.log10(mel_spec[i]); } break; case 'dB': if (power === 1.0) { // NOTE: operates in-place amplitude_to_db(mel_spec, reference, min_value, db_range); } else if (power === 2.0) { power_to_db(mel_spec, reference, min_value, db_range); } else { throw new Error(`Cannot use log_mel option '${log_mel}' with power ${power}`) } break; default: throw new Error(`log_mel must be one of null, 'log', 'log10' or 'dB'. Got '${log_mel}'`); } } return { data: mel_spec, dims }; } /** * Returns an array containing the specified window. * @param {number} window_length The length of the window in samples. * @param {string} name The name of the window function. * @param {Object} options Additional options. * @param {boolean} [options.periodic=true] Whether the window is periodic or symmetric. * @param {number} [options.frame_length=null] The length of the analysis frames in samples. * Provide a value for `frame_length` if the window is smaller than the frame length, so that it will be zero-padded. * @param {boolean} [options.center=true] Whether to center the window inside the FFT buffer. Only used when `frame_length` is provided. * @returns {Float64Array} The window of shape `(window_length,)` or `(frame_length,)`. */ export function window_function(window_length, name, { periodic = true, frame_length = null, center = true, } = {}) { const length = periodic ? window_length + 1 : window_length; let window; switch (name) { case 'boxcar': window = new Float64Array(length).fill(1.0); break; case 'hann': case 'hann_window': window = hanning(length); break; default: throw new Error(`Unknown window type ${name}.`); } if (periodic) { window = window.subarray(0, window_length); } if (frame_length === null) { return window; } if (window_length > frame_length) { throw new Error(`Length of the window (${window_length}) may not be larger than frame_length (${frame_length})`); } return window; }