@webav/av-cliper

Version:

WebCodecs-based, combine video, audio, images, text, with animation support 基于 WebCodecs 合成视频、音频、图片、文字，支持动画

1 lines • 241 kB

Source Map (JSON)

{"version":3,"file":"av-cliper.umd.cjs","sources":["../src/chromakey.ts","../src/dom-utils.ts","../src/av-utils.ts","../src/clips/iclip.ts","../src/mp4-utils/mp4box-utils.ts","../src/clips/mp4-clip.ts","../src/clips/img-clip.ts","../src/clips/audio-clip.ts","../src/clips/media-stream-clip.ts","../src/clips/embed-subtitles-clip.ts","../src/mp4-utils/sample-transform.ts","../src/mp4-utils/index.ts","../src/combinator.ts","../src/sprite/rect.ts","../src/sprite/base-sprite.ts","../src/sprite/offscreen-sprite.ts","../src/sprite/visible-sprite.ts"],"sourcesContent":["// 改编自 https://jameshfisher.com/2020/08/11/production-ready-green-screen-in-the-browser/\nconst vertexShader = `#version 300 es\n layout (location = 0) in vec4 a_position;\n layout (location = 1) in vec2 a_texCoord;\n out vec2 v_texCoord;\n void main () {\n gl_Position = a_position;\n v_texCoord = a_texCoord;\n }\n`;\n\nconst fragmentShader = `#version 300 es\nprecision mediump float;\nout vec4 FragColor;\nin vec2 v_texCoord;\n\nuniform sampler2D frameTexture;\nuniform vec3 keyColor;\n\n// 色度的相似度计算\nuniform float similarity;\n// 透明度的平滑度计算\nuniform float smoothness;\n// 降低绿幕饱和度，提高抠图准确度\nuniform float spill;\n\nvec2 RGBtoUV(vec3 rgb) {\n return vec2(\n rgb.r * -0.169 + rgb.g * -0.331 + rgb.b * 0.5 + 0.5,\n rgb.r * 0.5 + rgb.g * -0.419 + rgb.b * -0.081 + 0.5\n );\n}\n\nvoid main() {\n // 获取当前像素的rgba值\n vec4 rgba = texture(frameTexture, v_texCoord);\n // 计算当前像素与绿幕像素的色度差值\n vec2 chromaVec = RGBtoUV(rgba.rgb) - RGBtoUV(keyColor);\n // 计算当前像素与绿幕像素的色度距离（向量长度）, 越相像则色度距离越小\n float chromaDist = sqrt(dot(chromaVec, chromaVec));\n // 设置了一个相似度阈值，baseMask为负，则表明是绿幕，为正则表明不是绿幕\n float baseMask = chromaDist - similarity;\n // 如果baseMask为负数，fullMask等于0；baseMask为正数，越大，则透明度越低\n float fullMask = pow(clamp(baseMask / smoothness, 0., 1.), 1.5);\n rgba.a = fullMask; // 设置透明度\n // 如果baseMask为负数，spillVal等于0；baseMask为整数，越小，饱和度越低\n float spillVal = pow(clamp(baseMask / spill, 0., 1.), 1.5);\n float desat = clamp(rgba.r * 0.2126 + rgba.g * 0.7152 + rgba.b * 0.0722, 0., 1.); // 计算当前像素的灰度值\n rgba.rgb = mix(vec3(desat, desat, desat), rgba.rgb, spillVal);\n FragColor = rgba;\n}\n`;\n\nconst POINT_POS = [-1, 1, -1, -1, 1, -1, 1, -1, 1, 1, -1, 1];\nconst TEX_COORD_POS = [0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1];\n\n// 初始化着色器程序，让 WebGL 知道如何绘制我们的数据\nfunction initShaderProgram(\n gl: WebGLRenderingContext,\n vsSource: string,\n fsSource: string,\n) {\n const vertexShader = loadShader(gl, gl.VERTEX_SHADER, vsSource)!;\n const fragmentShader = loadShader(gl, gl.FRAGMENT_SHADER, fsSource)!;\n\n // 创建着色器程序\n const shaderProgram = gl.createProgram()!;\n gl.attachShader(shaderProgram, vertexShader);\n gl.attachShader(shaderProgram, fragmentShader);\n gl.linkProgram(shaderProgram);\n\n if (!gl.getProgramParameter(shaderProgram, gl.LINK_STATUS)) {\n throw Error(\n gl.getProgramInfoLog(shaderProgram) ??\n 'Unable to initialize the shader program',\n );\n }\n\n return shaderProgram;\n}\n\n// 创建指定类型的着色器，上传 source 源码并编译\nfunction loadShader(gl: WebGLRenderingContext, type: number, source: string) {\n const shader = gl.createShader(type)!;\n\n // Send the source to the shader object\n gl.shaderSource(shader, source);\n\n // Compile the shader program\n gl.compileShader(shader);\n\n // See if it compiled successfully\n if (!gl.getShaderParameter(shader, gl.COMPILE_STATUS)) {\n const errMsg = gl.getShaderInfoLog(shader);\n gl.deleteShader(shader);\n throw Error(errMsg ?? 'An error occurred compiling the shaders');\n }\n\n return shader;\n}\n\nfunction updateTexture(\n gl: WebGLRenderingContext,\n img: TImgSource,\n texture: WebGLTexture,\n) {\n gl.bindTexture(gl.TEXTURE_2D, texture);\n gl.texImage2D(gl.TEXTURE_2D, 0, gl.RGBA, gl.RGBA, gl.UNSIGNED_BYTE, img);\n gl.drawArrays(gl.TRIANGLES, 0, 6);\n}\n\nfunction initTexture(gl: WebGLRenderingContext) {\n const texture = gl.createTexture();\n if (texture == null) throw Error('Create WebGL texture error');\n gl.bindTexture(gl.TEXTURE_2D, texture);\n\n // put a single pixel in the texture so we can use it immediately.\n const level = 0;\n const internalFormat = gl.RGBA;\n const width = 1;\n const height = 1;\n const border = 0;\n const srcFormat = gl.RGBA;\n const srcType = gl.UNSIGNED_BYTE;\n const pixel = new Uint8Array([0, 0, 255, 255]); // opaque blue\n gl.texImage2D(\n gl.TEXTURE_2D,\n level,\n internalFormat,\n width,\n height,\n border,\n srcFormat,\n srcType,\n pixel,\n );\n\n gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MAG_FILTER, gl.LINEAR);\n gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_MIN_FILTER, gl.LINEAR);\n gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_S, gl.CLAMP_TO_EDGE);\n gl.texParameteri(gl.TEXTURE_2D, gl.TEXTURE_WRAP_T, gl.CLAMP_TO_EDGE);\n\n return texture;\n}\n\ninterface IChromakeyOpts {\n keyColor: [number, number, number];\n similarity: number;\n smoothness: number;\n spill: number;\n}\n\nfunction initCvs(\n opts: {\n width: number;\n height: number;\n } & IChromakeyOpts,\n) {\n const cvs =\n 'document' in globalThis\n ? globalThis.document.createElement('canvas')\n : new OffscreenCanvas(opts.width, opts.height);\n cvs.width = opts.width;\n cvs.height = opts.height;\n\n const gl = cvs.getContext('webgl2', {\n premultipliedAlpha: false,\n alpha: true,\n }) as WebGL2RenderingContext | null;\n\n if (gl == null) throw Error('Cant create gl context');\n\n const shaderProgram = initShaderProgram(gl, vertexShader, fragmentShader);\n gl.useProgram(shaderProgram);\n\n gl.uniform3fv(\n gl.getUniformLocation(shaderProgram, 'keyColor'),\n opts.keyColor.map((v) => v / 255),\n );\n gl.uniform1f(\n gl.getUniformLocation(shaderProgram, 'similarity'),\n opts.similarity,\n );\n gl.uniform1f(\n gl.getUniformLocation(shaderProgram, 'smoothness'),\n opts.smoothness,\n );\n gl.uniform1f(gl.getUniformLocation(shaderProgram, 'spill'), opts.spill);\n\n const posBuffer = gl.createBuffer();\n gl.bindBuffer(gl.ARRAY_BUFFER, posBuffer);\n gl.bufferData(gl.ARRAY_BUFFER, new Float32Array(POINT_POS), gl.STATIC_DRAW);\n const a_position = gl.getAttribLocation(shaderProgram, 'a_position');\n gl.vertexAttribPointer(\n a_position,\n 2,\n gl.FLOAT,\n false,\n Float32Array.BYTES_PER_ELEMENT * 2,\n 0,\n );\n gl.enableVertexAttribArray(a_position);\n\n const texCoordBuffer = gl.createBuffer();\n gl.bindBuffer(gl.ARRAY_BUFFER, texCoordBuffer);\n gl.bufferData(\n gl.ARRAY_BUFFER,\n new Float32Array(TEX_COORD_POS),\n gl.STATIC_DRAW,\n );\n const a_texCoord = gl.getAttribLocation(shaderProgram, 'a_texCoord');\n gl.vertexAttribPointer(\n a_texCoord,\n 2,\n gl.FLOAT,\n false,\n Float32Array.BYTES_PER_ELEMENT * 2,\n 0,\n );\n gl.enableVertexAttribArray(a_texCoord);\n\n gl.pixelStorei(gl.UNPACK_FLIP_Y_WEBGL, 1);\n\n return { cvs, gl };\n}\n\ntype TImgSource =\n | HTMLVideoElement\n | HTMLCanvasElement\n | HTMLImageElement\n | ImageBitmap\n | OffscreenCanvas\n | VideoFrame;\n\nfunction getSourceWH(imgSource: TImgSource) {\n return imgSource instanceof VideoFrame\n ? { width: imgSource.codedWidth, height: imgSource.codedHeight }\n : { width: imgSource.width, height: imgSource.height };\n}\n\nfunction getKeyColor(imgSource: TImgSource) {\n const cvs = new OffscreenCanvas(1, 1);\n const ctx = cvs.getContext('2d')!;\n ctx.drawImage(imgSource, 0, 0);\n const {\n data: [r, g, b],\n } = ctx.getImageData(0, 0, 1, 1);\n return [r, g, b] as [number, number, number];\n}\n\n/**\n * 绿幕抠图\n * keyColor 需要扣除的背景色，若不传则取第一个像素点\n * similarity 背景色相似度阈值，过小可能保留背景色，过大可能扣掉更多非背景像素点\n * smoothness 平滑度；过小可能出现锯齿，过大导致整体变透明\n * spill 饱和度；过小可能保留绿色混合，过大导致图片变灰度\n * @param opts: {\n * keyColor?: [r, g, b]\n * similarity: number\n * smoothness: number\n * spill: number\n * }\n */\nexport const createChromakey = (\n opts: Omit<IChromakeyOpts, 'keyColor'> & {\n keyColor?: [number, number, number];\n },\n) => {\n let cvs: HTMLCanvasElement | OffscreenCanvas | null = null;\n let gl: WebGLRenderingContext | null = null;\n let keyC = opts.keyColor;\n let texture: WebGLTexture | null = null;\n\n return async (imgSource: TImgSource) => {\n if (cvs == null || gl == null || texture == null) {\n if (keyC == null) keyC = getKeyColor(imgSource);\n ({ cvs, gl } = initCvs({\n ...getSourceWH(imgSource),\n keyColor: keyC,\n ...opts,\n }));\n texture = initTexture(gl);\n }\n\n updateTexture(gl, imgSource, texture);\n\n if (\n globalThis.VideoFrame != null &&\n imgSource instanceof globalThis.VideoFrame\n ) {\n const rs = new VideoFrame(cvs, {\n alpha: 'keep',\n timestamp: imgSource.timestamp,\n duration: imgSource.duration ?? undefined,\n });\n imgSource.close();\n return rs;\n }\n\n return createImageBitmap(cvs, {\n imageOrientation: imgSource instanceof ImageBitmap ? 'flipY' : 'none',\n });\n };\n};\n","// 在主线程中执行的工具函数\n\n/**\n * 创建一个新的 HTML 元素\n * @param tagName - 要创建的元素的标签名\n * @returns 新创建的 HTML 元素\n */\nexport function createEl(tagName: string): HTMLElement {\n return document.createElement(tagName);\n}\n\nfunction arrayBufferToBase64(buffer: ArrayBuffer) {\n var binary = '';\n var bytes = new Uint8Array(buffer);\n var len = bytes.byteLength;\n for (let i = 0; i < len; i++) {\n binary += String.fromCharCode(bytes[i]);\n }\n return window.btoa(binary);\n}\n\n/**\n * 将文本渲染为图片\n * @param txt - 要渲染的文本\n * @param cssText - 应用于文本的 CSS 样式\n * @returns 渲染后的图片元素\n */\nexport async function renderTxt2Img(\n txt: string,\n cssText: string,\n opts: {\n font?: { name: string; url: string };\n onCreated?: (el: HTMLElement) => void;\n } = {},\n): Promise<HTMLImageElement> {\n const preEl = createEl('pre');\n preEl.style.cssText = `margin: 0; ${cssText}; position: fixed;`;\n preEl.textContent = txt;\n document.body.appendChild(preEl);\n opts.onCreated?.(preEl);\n\n // 避免重复覆盖其他字体他\n const tmpFontName = 'TMP_FONT_NAME_' + crypto.randomUUID();\n let fontFace: FontFace | null = null;\n // 等待字体加载完成后再计算尺寸\n if (opts.font != null) {\n preEl.style.fontFamily = tmpFontName;\n fontFace = new FontFace(tmpFontName, `url(${opts.font.url})`);\n await fontFace.load();\n // @ts-expect-error https://developer.mozilla.org/en-US/docs/Web/API/FontFaceSet/add\n document.fonts.add(fontFace);\n await document.fonts.ready;\n }\n\n const { width, height } = preEl.getBoundingClientRect();\n // 计算出 rect，立即从dom移除\n preEl.remove();\n if (fontFace != null) {\n // @ts-expect-error https://developer.mozilla.org/en-US/docs/Web/API/FontFaceSet/delete\n document.fonts.delete(fontFace);\n }\n\n const img = new Image();\n img.width = width;\n img.height = height;\n const fontFaceStr =\n opts.font == null\n ? ''\n : `\n @font-face {\n font-family: '${tmpFontName}';\n src: url('data:font/woff2;base64,${arrayBufferToBase64(await (await fetch(opts.font.url)).arrayBuffer())}') format('woff2');\n }\n `;\n const svgStr = `\n <svg xmlns=\"http://www.w3.org/2000/svg\" width=\"${width}\" height=\"${height}\">\n <style>\n ${fontFaceStr}\n </style>\n <foreignObject width=\"100%\" height=\"100%\">\n <div xmlns=\"http://www.w3.org/1999/xhtml\">${preEl.outerHTML}</div>\n </foreignObject>\n </svg>\n `\n .replace(/\\t/g, '')\n .replace(/#/g, '%23');\n\n img.src = `data:image/svg+xml;charset=utf-8,${svgStr}`;\n\n await new Promise((resolve) => {\n img.onload = resolve;\n });\n return img;\n}\n\n/**\n * 将文本渲染为 {@link ImageBitmap}，用来创建 {@link ImgClip}\n * @param txt - 要渲染的文本\n * @param cssText - 应用于文本的 CSS 样式\n * @param opts - 选项\n * @param opts.font - 自定义字体\n * @param opts.onCreated - 创建完成后的回调\n *\n * @example\n * new ImgClip(\n * await renderTxt2ImgBitmap(\n * '水印',\n * `font-size:40px; color: white; text-shadow: 2px 2px 6px red; font-family: CustomFont;`,\n * {\n * font: {\n * name: 'CustomFont',\n * url: '/CustomFont.ttf',\n * },\n * },\n * )\n * )\n */\nexport async function renderTxt2ImgBitmap(\n txt: string,\n cssText: string,\n opts: {\n font?: { name: string; url: string };\n onCreated?: (el: HTMLElement) => void;\n } = {},\n): Promise<ImageBitmap> {\n const imgEl = await renderTxt2Img(txt, cssText, opts);\n const cvs = new OffscreenCanvas(imgEl.width, imgEl.height);\n const ctx = cvs.getContext('2d');\n ctx?.drawImage(imgEl, 0, 0, imgEl.width, imgEl.height);\n return await createImageBitmap(cvs);\n}\n","// 能同时在 worker 和主线程中运行的工具函数\n\nimport { workerTimer } from '@webav/internal-utils';\nimport * as waveResampler from 'wave-resampler';\n\nimport { Log } from '@webav/internal-utils';\n\nif (import.meta.env?.DEV) {\n Log.setLogLevel(Log.debug);\n}\n\nif (import.meta.env?.MODE === 'test') {\n Log.setLogLevel(Log.warn);\n}\n\n/**\n * 合并（串联）多个 Float32Array，通常用于合并 PCM 数据\n */\nexport function concatFloat32Array(bufs: Float32Array[]): Float32Array {\n const rs = new Float32Array(\n bufs.map((buf) => buf.length).reduce((a, b) => a + b),\n );\n\n let offset = 0;\n for (const buf of bufs) {\n rs.set(buf, offset);\n offset += buf.length;\n }\n\n return rs;\n}\n\n/**\n * 将小片段的 PCM 合并成一个大片段\n * @param fragments 小片段 PCM，子元素是不同声道的原始 PCM 数据\n */\nexport function concatPCMFragments(\n fragments: Float32Array[][],\n): Float32Array[] {\n // fragments: [[chan0, chan1], [chan0, chan1]...]\n // chanListPCM: [[chan0, chan0...], [chan1, chan1...]]\n const chanListPCM: Float32Array[][] = [];\n for (let i = 0; i < fragments.length; i += 1) {\n for (let j = 0; j < fragments[i].length; j += 1) {\n if (chanListPCM[j] == null) chanListPCM[j] = [];\n chanListPCM[j].push(fragments[i][j]);\n }\n }\n // [bigChan0, bigChan1]\n return chanListPCM.map(concatFloat32Array);\n}\n\n/**\n * 从 AudioData 中提取 PCM 数据的工具函数\n */\nexport function extractPCM4AudioData(ad: AudioData): Float32Array[] {\n if (ad.format === 'f32-planar') {\n const rs = [];\n for (let idx = 0; idx < ad.numberOfChannels; idx += 1) {\n const chanBufSize = ad.allocationSize({ planeIndex: idx });\n const chanBuf = new ArrayBuffer(chanBufSize);\n ad.copyTo(chanBuf, { planeIndex: idx });\n rs.push(new Float32Array(chanBuf));\n }\n return rs;\n } else if (ad.format === 'f32') {\n const buf = new ArrayBuffer(ad.allocationSize({ planeIndex: 0 }));\n ad.copyTo(buf, { planeIndex: 0 });\n return convertF32ToPlanar(new Float32Array(buf), ad.numberOfChannels);\n } else if (ad.format === 's16') {\n const buf = new ArrayBuffer(ad.allocationSize({ planeIndex: 0 }));\n ad.copyTo(buf, { planeIndex: 0 });\n return convertS16ToF32Planar(new Int16Array(buf), ad.numberOfChannels);\n }\n throw Error('Unsupported audio data format');\n}\n\n/**\n * Convert s16 PCM to f32-planar\n * @param pcmS16Data - The s16 PCM data.\n * @param numChannels - Number of audio channels.\n * @returns An array of Float32Array, each containing the audio data for one channel.\n */\nfunction convertS16ToF32Planar(pcmS16Data: Int16Array, numChannels: number) {\n const numSamples = pcmS16Data.length / numChannels;\n const planarData = Array.from(\n { length: numChannels },\n () => new Float32Array(numSamples),\n );\n\n for (let i = 0; i < numSamples; i++) {\n for (let channel = 0; channel < numChannels; channel++) {\n const sample = pcmS16Data[i * numChannels + channel];\n planarData[channel][i] = sample / 32768; // Normalize to range [-1.0, 1.0]\n }\n }\n\n return planarData;\n}\n\nfunction convertF32ToPlanar(pcmF32Data: Float32Array, numChannels: number) {\n const numSamples = pcmF32Data.length / numChannels;\n const planarData = Array.from(\n { length: numChannels },\n () => new Float32Array(numSamples),\n );\n\n for (let i = 0; i < numSamples; i++) {\n for (let channel = 0; channel < numChannels; channel++) {\n planarData[channel][i] = pcmF32Data[i * numChannels + channel];\n }\n }\n\n return planarData;\n}\n\n/**\n * 从 AudioBuffer 中提取 PCM\n */\nexport function extractPCM4AudioBuffer(ab: AudioBuffer): Float32Array[] {\n return Array(ab.numberOfChannels)\n .fill(0)\n .map((_, idx) => {\n return ab.getChannelData(idx);\n });\n}\n\n/**\n * 调整音频数据的音量\n * @param ad - 要调整的音频对象\n * @param volume - 音量调整系数（0.0 - 1.0）\n * @returns 调整音量后的新音频数据\n */\nexport function adjustAudioDataVolume(ad: AudioData, volume: number) {\n const data = new Float32Array(\n concatFloat32Array(extractPCM4AudioData(ad)),\n ).map((v) => v * volume);\n const newAd = new AudioData({\n sampleRate: ad.sampleRate,\n numberOfChannels: ad.numberOfChannels,\n timestamp: ad.timestamp,\n format: ad.format!,\n numberOfFrames: ad.numberOfFrames,\n data,\n });\n ad.close();\n return newAd;\n}\n\n/**\n * 解码图像流，返回一个视频帧数组。\n *\n * @param stream - 包含图像数据的可读流。\n * @param type - 图像的 MIME 类型，例如 'image/jpeg'。\n *\n * @returns 返回一个 Promise，该 Promise 在解码完成后解析为 {@link VideoFrame} 数组。\n *\n * @see [解码动图](https://webav-tech.github.io/WebAV/demo/1_3-decode-image)\n *\n * @example\n *\n * const frames = await decodeImg(\n * (await fetch('<gif url>')).body!,\n * `image/gif`,\n * );\n */\nexport async function decodeImg(\n stream: ReadableStream<Uint8Array>,\n type: string,\n): Promise<VideoFrame[]> {\n const init = {\n type,\n data: stream,\n };\n const imageDecoder = new ImageDecoder(init);\n\n await Promise.all([imageDecoder.completed, imageDecoder.tracks.ready]);\n\n let frameCnt = imageDecoder.tracks.selectedTrack?.frameCount ?? 1;\n\n const rs: VideoFrame[] = [];\n for (let i = 0; i < frameCnt; i += 1) {\n rs.push((await imageDecoder.decode({ frameIndex: i })).image);\n }\n return rs;\n}\n\n/**\n * 混合双通道音轨的 PCM 数据，并将多声道并排成一个 Float32Array 输出\n * @param audios - 一个二维数组，每个元素是一个 Float32Array 数组，代表一个音频流的 PCM 数据。\n * 每个 Float32Array 数组的第一个元素是左声道数据，第二个元素（如果有）是右声道数据。\n * 如果只有左声道数据，则右声道将复用左声道数据。\n *\n * @returns 返回一个 Float32Array，返回结果是将这个一个音轨的左右声道并排成 Float32Array。\n *\n * @example\n *\n * const audios = [\n * [new Float32Array([0.1, 0.2, 0.3]), new Float32Array([0.4, 0.5, 0.6])],\n * [new Float32Array([0.7, 0.8, 0.9])],\n * ];\n * const mixed = mixinPCM(audios);\n */\nexport function mixinPCM(audios: Float32Array[][]): Float32Array {\n const maxLen = Math.max(...audios.map((a) => a[0]?.length ?? 0));\n const data = new Float32Array(maxLen * 2);\n\n for (let bufIdx = 0; bufIdx < maxLen; bufIdx++) {\n let chan0 = 0;\n let chan1 = 0;\n for (let trackIdx = 0; trackIdx < audios.length; trackIdx++) {\n const _c0 = audios[trackIdx][0]?.[bufIdx] ?? 0;\n // 如果是单声道 PCM，第二声道复用第一声道数据\n const _c1 = audios[trackIdx][1]?.[bufIdx] ?? _c0;\n chan0 += _c0;\n chan1 += _c1;\n }\n data[bufIdx] = chan0;\n data[bufIdx + maxLen] = chan1;\n }\n\n return data;\n}\n\n/**\n * 对 PCM 音频数据进行重采样。\n *\n * @param pcmData - 一个 Float32Array 数组，每个元素代表一个声道的 PCM 数据。\n * @param curRate - 当前的采样率。\n * @param target - 目标参数对象。\n * @param target.rate - 目标采样率。\n * @param target.chanCount - 目标声道数。\n *\n * @returns 返回一个 Promise，该 Promise 在重采样完成后解析为一个 Float32Array 数组，每个元素代表一个声道的 PCM 数据。\n *\n * @example\n *\n * const pcmData = [new Float32Array([0.1, 0.2, 0.3]), new Float32Array([0.4, 0.5, 0.6])];\n * const curRate = 44100;\n * const target = { rate: 48000, chanCount: 2 };\n * const resampled = await audioResample(pcmData, curRate, target);\n */\nexport async function audioResample(\n pcmData: Float32Array[],\n curRate: number,\n target: {\n rate: number;\n chanCount: number;\n },\n): Promise<Float32Array[]> {\n const chanCnt = pcmData.length;\n const emptyPCM = Array(target.chanCount)\n .fill(0)\n .map(() => new Float32Array(0));\n if (chanCnt === 0) return emptyPCM;\n\n const len = Math.max(...pcmData.map((c) => c.length));\n if (len === 0) return emptyPCM;\n\n // The Worker scope does not have access to OfflineAudioContext\n if (globalThis.OfflineAudioContext == null) {\n return pcmData.map(\n (p) =>\n new Float32Array(\n waveResampler.resample(p, curRate, target.rate, {\n method: 'sinc',\n LPF: false,\n }),\n ),\n );\n }\n\n const ctx = new globalThis.OfflineAudioContext(\n target.chanCount,\n (len * target.rate) / curRate,\n target.rate,\n );\n const abSource = ctx.createBufferSource();\n const ab = ctx.createBuffer(chanCnt, len, curRate);\n pcmData.forEach((d, idx) => ab.copyToChannel(d, idx));\n\n abSource.buffer = ab;\n abSource.connect(ctx.destination);\n abSource.start();\n\n return extractPCM4AudioBuffer(await ctx.startRendering());\n}\n\n/**\n * 使当前执行环境暂停一段时间。\n * @param time - 暂停的时间，单位为毫秒。\n * @example\n * await sleep(1000); // 暂停 1 秒\n */\nexport function sleep(time: number): Promise<void> {\n return new Promise((resolve) => {\n const stop = workerTimer(() => {\n stop();\n resolve();\n }, time);\n });\n}\n\n/**\n * 从给定的 Float32Array 中提取一个环形切片，超出边界从 0 开始循环\n *\n * 主要用于截取 PCM 实现循环播放\n *\n * @param data - 输入的 Float32Array。\n * @param start - 切片的开始索引。\n * @param end - 切片的结束索引。\n * @returns - 返回一个新的 Float32Array，包含从 start 到 end 的数据。\n *\n * @example\n * const data = new Float32Array([0, 1, 2, 3, 4, 5]);\n * ringSliceFloat32Array(data, 4, 6); // => Float32Array [4, 5, 0]\n */\nexport function ringSliceFloat32Array(\n data: Float32Array,\n start: number,\n end: number,\n): Float32Array {\n const cnt = end - start;\n const rs = new Float32Array(cnt);\n let i = 0;\n while (i < cnt) {\n rs[i] = data[(start + i) % data.length];\n i += 1;\n }\n return rs;\n}\n\n/**\n * 改变 PCM 数据的播放速率，1 表示正常播放，0.5 表示播放速率减半，2 表示播放速率加倍\n */\nexport function changePCMPlaybackRate(\n pcmData: Float32Array,\n playbackRate: number,\n) {\n // 计算新的采样率\n const newLength = Math.floor(pcmData.length / playbackRate);\n const newPcmData = new Float32Array(newLength);\n\n // 线性插值\n for (let i = 0; i < newLength; i++) {\n // 原始数据中的位置\n const originalIndex = i * playbackRate;\n const intIndex = Math.floor(originalIndex);\n const frac = originalIndex - intIndex;\n\n // 边界检查\n if (intIndex + 1 < pcmData.length) {\n newPcmData[i] =\n pcmData[intIndex] * (1 - frac) + pcmData[intIndex + 1] * frac;\n } else {\n newPcmData[i] = pcmData[intIndex]; // 最后一个样本\n }\n }\n\n return newPcmData;\n}\n","interface IClipMeta {\n width: number;\n height: number;\n duration: number;\n}\n\n/**\n * 所有素材需要实现的接口\n *\n * 素材（Clip）是不同数据类型的抽象，给其他模块提供数据\n *\n * WebAV 内置了 {@link MP4Clip}, {@link AudioClip}, {@link ImgClip}, {@link MediaStreamClip} 等常用素材，用于给 {@link Combinator} {@link AVCanvas} 提供数据\n *\n * 你只需实现该接口即可自定义素材，拥有最大的灵活度来生成视频内容，比如动画、转场效果等\n * @see [自定义素材](https://webav-tech.github.io/WebAV/demo/2_6-custom-clip)\n *\n */\nexport interface IClip {\n /**\n * 从素材中提取指定时间数据\n * @param time 时间，单位微秒\n */\n tick: (time: number) => Promise<{\n video?: VideoFrame | ImageBitmap | null;\n audio?: Float32Array[];\n state: 'done' | 'success';\n }>;\n\n /**\n * 当素材准备完成，ready 会切换到 resolved 状态\n */\n readonly ready: Promise<IClipMeta>;\n\n /**\n * 数据元数据\n */\n readonly meta: IClipMeta;\n\n /**\n * clone，返回一个新素材\n */\n clone: () => Promise<this>;\n\n /**\n * 按指定时间切割，返回该时刻前后两个新素材，常用于剪辑场景按时间分割素材\n *\n * 该方法不会破坏原素材的数据\n *\n * @param time 时间，微秒\n * @returns\n */\n split?: (time: number) => Promise<[this, this]>;\n\n /**\n * 销毁实例，释放资源\n */\n destroy: () => void;\n}\n\n/**\n * 默认的音频设置，⚠️ 不要变更它的值 ⚠️\n */\nexport const DEFAULT_AUDIO_CONF = {\n sampleRate: 48000,\n channelCount: 2,\n codec: 'mp4a.40.2',\n} as const;\n","import mp4box, {\n AudioTrackOpts,\n ESDSBoxParser,\n MP4ABoxParser,\n MP4ArrayBuffer,\n MP4File,\n MP4Info,\n MP4Sample,\n TrakBoxParser,\n VideoTrackOpts,\n} from '@webav/mp4box.js';\nimport { file } from 'opfs-tools';\nimport { DEFAULT_AUDIO_CONF } from '../clips';\n\nexport function extractFileConfig(file: MP4File, info: MP4Info) {\n const vTrack = info.videoTracks[0];\n const rs: {\n videoTrackConf?: VideoTrackOpts;\n videoDecoderConf?: Parameters<VideoDecoder['configure']>[0];\n audioTrackConf?: AudioTrackOpts;\n audioDecoderConf?: Parameters<AudioDecoder['configure']>[0];\n } = {};\n if (vTrack != null) {\n const videoDesc = parseVideoCodecDesc(file.getTrackById(vTrack.id))?.buffer;\n const { descKey, type } = vTrack.codec.startsWith('avc1')\n ? { descKey: 'avcDecoderConfigRecord', type: 'avc1' }\n : vTrack.codec.startsWith('hvc1')\n ? { descKey: 'hevcDecoderConfigRecord', type: 'hvc1' }\n : { descKey: '', type: '' };\n if (descKey !== '') {\n rs.videoTrackConf = {\n timescale: vTrack.timescale,\n duration: vTrack.duration,\n width: vTrack.video.width,\n height: vTrack.video.height,\n brands: info.brands,\n type,\n [descKey]: videoDesc,\n };\n }\n\n rs.videoDecoderConf = {\n codec: vTrack.codec,\n codedHeight: vTrack.video.height,\n codedWidth: vTrack.video.width,\n description: videoDesc as ArrayBuffer,\n };\n }\n\n const aTrack = info.audioTracks[0];\n if (aTrack != null) {\n const esdsBox = getESDSBoxFromMP4File(file);\n const audioInfo = esdsBox == null ? {} : parseAudioInfoFromESDSBox(esdsBox);\n\n rs.audioTrackConf = {\n timescale: aTrack.timescale,\n samplerate: audioInfo.sampleRate ?? aTrack.audio.sample_rate,\n channel_count: audioInfo.numberOfChannels ?? aTrack.audio.channel_count,\n hdlr: 'soun',\n type: aTrack.codec.startsWith('mp4a') ? 'mp4a' : aTrack.codec,\n description: esdsBox,\n };\n\n rs.audioDecoderConf = {\n codec: audioInfo.codec ?? DEFAULT_AUDIO_CONF.codec,\n numberOfChannels:\n audioInfo.numberOfChannels ?? aTrack.audio.channel_count,\n sampleRate: audioInfo.sampleRate ?? aTrack.audio.sample_rate,\n };\n }\n return rs;\n}\n\n// track is H.264, H.265 or VPX.\nfunction parseVideoCodecDesc(track: TrakBoxParser): Uint8Array | undefined {\n for (const entry of track.mdia.minf.stbl.stsd.entries) {\n // @ts-expect-error\n const box = entry.avcC ?? entry.hvcC ?? entry.av1C ?? entry.vpcC;\n if (box != null) {\n const stream = new mp4box.DataStream(\n undefined,\n 0,\n mp4box.DataStream.BIG_ENDIAN,\n );\n box.write(stream);\n return new Uint8Array(stream.buffer.slice(8)); // Remove the box header.\n }\n }\n return undefined;\n}\n\nfunction getESDSBoxFromMP4File(file: MP4File, codec = 'mp4a') {\n const mp4aBox = file.moov?.traks\n .map((t) => t.mdia.minf.stbl.stsd.entries)\n .flat()\n .find(({ type }) => type === codec) as MP4ABoxParser;\n\n return mp4aBox?.esds;\n}\n\n// 从 ESDS Box 中解析出音频配置信息，解决封装层音频信息标识错误，导致解码异常\nfunction parseAudioInfoFromESDSBox(esds: ESDSBoxParser): {\n codec?: string;\n sampleRate?: number;\n numberOfChannels?: number;\n} {\n let codec = 'mp4a';\n const decConfDesc = esds.esd.descs[0];\n if (decConfDesc == null) return {};\n codec += '.' + decConfDesc.oti.toString(16);\n\n const decSpecInfo = decConfDesc.descs[0];\n if (decSpecInfo == null) {\n if (codec.endsWith('40')) codec += '.2';\n return { codec };\n }\n\n // ref: https://wiki.multimedia.cx/index.php/MPEG-4_Audio#Audio_Specific_Config\n const audioObjectType = (decSpecInfo.data[0] & 0xf8) >> 3;\n codec += '.' + audioObjectType;\n\n const [byte1, byte2] = decSpecInfo.data;\n // sampleRate 是第一字节后 3bit + 第二字节前 1bit\n const sampleRateIdx = ((byte1 & 0x07) << 1) + (byte2 >> 7);\n // numberOfChannels 是第二字节 [2, 5] 4bit\n const numberOfChannels = (byte2 & 0x7f) >> 3;\n const sampleRateEnum = [\n 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025,\n 8000, 7350,\n ] as const;\n\n return {\n codec,\n sampleRate: sampleRateEnum[sampleRateIdx],\n numberOfChannels,\n };\n}\n\n/**\n * 快速解析 mp4 文件，如果是非 fMP4 格式，会优先解析 moov box（略过 mdat）避免占用过多内存\n */\nexport async function quickParseMP4File(\n reader: Awaited<ReturnType<ReturnType<typeof file>['createReader']>>,\n onReady: (data: { mp4boxFile: MP4File; info: MP4Info }) => void,\n onSamples: (\n id: number,\n sampleType: 'video' | 'audio',\n samples: MP4Sample[],\n ) => void,\n) {\n const mp4boxFile = mp4box.createFile(false);\n mp4boxFile.onReady = (info) => {\n onReady({ mp4boxFile, info });\n const vTrackId = info.videoTracks[0]?.id;\n if (vTrackId != null)\n mp4boxFile.setExtractionOptions(vTrackId, 'video', { nbSamples: 100 });\n\n const aTrackId = info.audioTracks[0]?.id;\n if (aTrackId != null)\n mp4boxFile.setExtractionOptions(aTrackId, 'audio', { nbSamples: 100 });\n\n mp4boxFile.start();\n };\n mp4boxFile.onSamples = onSamples;\n\n await parse();\n\n async function parse() {\n let cursor = 0;\n const maxReadSize = 30 * 1024 * 1024;\n while (true) {\n const data = (await reader.read(maxReadSize, {\n at: cursor,\n })) as MP4ArrayBuffer;\n if (data.byteLength === 0) break;\n data.fileStart = cursor;\n const nextPos = mp4boxFile.appendBuffer(data);\n if (nextPos == null) break;\n cursor = nextPos;\n }\n\n mp4boxFile.stop();\n }\n}\n\nexport function parseMatrix(matrix?: Int32Array) {\n if (matrix?.length !== 9) return {};\n\n const signedMatrix = new Int32Array(matrix.buffer);\n\n // 提取并转成浮点数\n const a = signedMatrix[0] / 65536.0;\n const b = signedMatrix[1] / 65536.0;\n const c = signedMatrix[3] / 65536.0;\n const d = signedMatrix[4] / 65536.0;\n const tx = signedMatrix[6] / 65536.0; // 一般是 0\n const ty = signedMatrix[7] / 65536.0; // 一般是 0\n const w = signedMatrix[8] / (1 << 30); // 一般是 1\n\n // 缩放\n const scaleX = Math.sqrt(a * a + c * c);\n const scaleY = Math.sqrt(b * b + d * d);\n\n // 旋转角度（弧度）\n const rotationRad = Math.atan2(c, a);\n const rotationDeg = (rotationRad * 180) / Math.PI;\n\n return {\n scaleX,\n scaleY,\n rotationRad,\n rotationDeg,\n translateX: tx,\n translateY: ty,\n perspective: w,\n };\n}\n\n/**\n * 旋转 VideoFrame\n */\nexport function createVFRotater(\n width: number,\n height: number,\n rotationDeg: number,\n) {\n const normalizedRotation = (Math.round(rotationDeg / 90) * 90 + 360) % 360;\n if (normalizedRotation === 0) return (vf: VideoFrame | null) => vf;\n\n const rotatedWidth =\n normalizedRotation === 90 || normalizedRotation === 270 ? height : width;\n const rotatedHeight =\n normalizedRotation === 90 || normalizedRotation === 270 ? width : height;\n\n const canvas = new OffscreenCanvas(rotatedWidth, rotatedHeight);\n const ctx = canvas.getContext('2d')!;\n\n ctx.translate(rotatedWidth / 2, rotatedHeight / 2);\n ctx.rotate((-normalizedRotation * Math.PI) / 180);\n ctx.translate(-width / 2, -height / 2);\n\n return (vf: VideoFrame | null) => {\n if (vf == null) return null;\n\n ctx.drawImage(vf, 0, 0);\n const newVF = new VideoFrame(canvas, {\n timestamp: vf.timestamp,\n duration: vf.duration ?? undefined,\n });\n vf.close();\n return newVF;\n };\n}\n","import { Log } from '@webav/internal-utils';\nimport { MP4Info, MP4Sample } from '@webav/mp4box.js';\nimport { file, tmpfile, write } from 'opfs-tools';\nimport { audioResample, extractPCM4AudioData, sleep } from '../av-utils';\nimport {\n createVFRotater,\n extractFileConfig,\n parseMatrix,\n quickParseMP4File,\n} from '../mp4-utils/mp4box-utils';\nimport { DEFAULT_AUDIO_CONF, IClip } from './iclip';\n\nlet CLIP_ID = 0;\n\ntype OPFSToolFile = ReturnType<typeof file>;\nfunction isOTFile(obj: any): obj is OPFSToolFile {\n return obj.kind === 'file' && obj.createReader instanceof Function;\n}\n\n// 用于内部创建 MP4Clip 实例\ntype MPClipCloneArgs = Awaited<ReturnType<typeof mp4FileToSamples>> & {\n localFile: OPFSToolFile;\n};\n\ninterface MP4DecoderConf {\n video: VideoDecoderConfig | null;\n audio: AudioDecoderConfig | null;\n}\n\nexport interface IMP4ClipOpts {\n audio?: boolean | { volume: number };\n /**\n * 不安全，随时可能废弃\n */\n __unsafe_hardwareAcceleration__?: HardwarePreference;\n}\n\ntype ExtMP4Sample = Omit<MP4Sample, 'data'> & {\n is_idr: boolean;\n deleted?: boolean;\n data: null | Uint8Array;\n};\n\ntype LocalFileReader = Awaited<ReturnType<OPFSToolFile['createReader']>>;\n\ntype ThumbnailOpts = {\n start: number;\n end: number;\n step: number;\n};\n\n/**\n * MP4 素材，解析 MP4 文件，使用 {@link MP4Clip.tick} 按需解码指定时间的图像帧\n *\n * 可用于实现视频抽帧、生成缩略图、视频编辑等功能\n *\n * @example\n * new MP4Clip((await fetch('<mp4 url>')).body)\n * new MP4Clip(mp4File.stream())\n *\n * @see {@link Combinator}\n * @see [AVCanvas](../../av-canvas/classes/AVCanvas.html)\n *\n * @see [解码播放视频](https://webav-tech.github.io/WebAV/demo/1_1-decode-video)\n */\nexport class MP4Clip implements IClip {\n #insId = CLIP_ID++;\n\n #log = Log.create(`MP4Clip id:${this.#insId},`);\n\n ready: IClip['ready'];\n\n #destroyed = false;\n\n #meta = {\n // 微秒\n duration: 0,\n width: 0,\n height: 0,\n audioSampleRate: 0,\n audioChanCount: 0,\n };\n\n get meta() {\n return { ...this.#meta };\n }\n\n #localFile: OPFSToolFile;\n\n /** 存储视频头（box: ftyp, moov）的二进制数据 */\n #headerBoxPos: Array<{ start: number; size: number }> = [];\n /**\n * 提供视频头（box: ftyp, moov）的二进制数据\n * 使用任意 mp4 demxer 解析即可获得详细的视频信息\n * 单元测试包含使用 mp4box.js 解析示例代码\n */\n async getFileHeaderBinData() {\n await this.ready;\n const oFile = await this.#localFile.getOriginFile();\n if (oFile == null) throw Error('MP4Clip localFile is not origin file');\n\n return await new Blob(\n this.#headerBoxPos.map(({ start, size }) =>\n oFile.slice(start, start + size),\n ),\n ).arrayBuffer();\n }\n\n /**存储视频平移旋转信息，目前只还原旋转 */\n #parsedMatrix = {\n perspective: 1,\n rotationRad: 0,\n rotationDeg: 0,\n scaleX: 1,\n scaleY: 1,\n translateX: 0,\n translateY: 0,\n };\n #vfRotater: (vf: VideoFrame | null) => VideoFrame | null = (vf) => vf;\n\n #volume = 1;\n\n #videoSamples: ExtMP4Sample[] = [];\n\n #audioSamples: ExtMP4Sample[] = [];\n\n #videoFrameFinder: VideoFrameFinder | null = null;\n #audioFrameFinder: AudioFrameFinder | null = null;\n\n #decoderConf: {\n video: VideoDecoderConfig | null;\n audio: AudioDecoderConfig | null;\n } = {\n video: null,\n audio: null,\n };\n\n #opts: IMP4ClipOpts = { audio: true };\n\n constructor(\n source: OPFSToolFile | ReadableStream<Uint8Array> | MPClipCloneArgs,\n opts: IMP4ClipOpts = {},\n ) {\n if (\n !(source instanceof ReadableStream) &&\n !isOTFile(source) &&\n !Array.isArray(source.videoSamples)\n ) {\n throw Error('Illegal argument');\n }\n\n this.#opts = { audio: true, ...opts };\n this.#volume =\n typeof opts.audio === 'object' && 'volume' in opts.audio\n ? opts.audio.volume\n : 1;\n\n const initByStream = async (s: ReadableStream) => {\n await write(this.#localFile, s);\n return this.#localFile;\n };\n\n this.#localFile = isOTFile(source)\n ? source\n : 'localFile' in source\n ? source.localFile // from clone\n : tmpfile();\n\n this.ready = (\n source instanceof ReadableStream\n ? initByStream(source).then((otFile) =>\n mp4FileToSamples(otFile, this.#opts),\n )\n : isOTFile(source)\n ? mp4FileToSamples(source, this.#opts)\n : Promise.resolve(source)\n ).then(\n async ({\n videoSamples,\n audioSamples,\n decoderConf,\n headerBoxPos,\n parsedMatrix,\n }) => {\n this.#videoSamples = videoSamples;\n this.#audioSamples = audioSamples;\n this.#decoderConf = decoderConf;\n this.#headerBoxPos = headerBoxPos;\n this.#parsedMatrix = parsedMatrix;\n\n const { videoFrameFinder, audioFrameFinder } = genDecoder(\n {\n video:\n decoderConf.video == null\n ? null\n : {\n ...decoderConf.video,\n hardwareAcceleration:\n this.#opts.__unsafe_hardwareAcceleration__,\n },\n audio: decoderConf.audio,\n },\n await this.#localFile.createReader(),\n videoSamples,\n audioSamples,\n this.#opts.audio !== false ? this.#volume : 0,\n );\n this.#videoFrameFinder = videoFrameFinder;\n this.#audioFrameFinder = audioFrameFinder;\n\n const { codedWidth, codedHeight } = decoderConf.video ?? {};\n if (codedWidth && codedHeight) {\n this.#vfRotater = createVFRotater(\n codedWidth,\n codedHeight,\n parsedMatrix.rotationDeg,\n );\n }\n\n this.#meta = genMeta(\n decoderConf,\n videoSamples,\n audioSamples,\n parsedMatrix.rotationDeg,\n );\n\n this.#log.info('MP4Clip meta:', this.#meta);\n return { ...this.#meta };\n },\n );\n }\n\n /**\n * 拦截 {@link MP4Clip.tick} 方法返回的数据，用于对图像、音频数据二次处理\n * @param time 调用 tick 的时间\n * @param tickRet tick 返回的数据\n *\n * @see [移除视频绿幕背景](https://webav-tech.github.io/WebAV/demo/3_2-chromakey-video)\n */\n tickInterceptor: <T extends Awaited<ReturnType<MP4Clip['tick']>>>(\n time: number,\n tickRet: T,\n ) => Promise<T> = async (_, tickRet) => tickRet;\n\n /**\n * 获取素材指定时刻的图像帧、音频数据\n * @param time 微秒\n */\n async tick(time: number): Promise<{\n video?: VideoFrame;\n audio: Float32Array[];\n state: 'success' | 'done';\n }> {\n if (time >= this.#meta.duration) {\n return await this.tickInterceptor(time, {\n audio: (await this.#audioFrameFinder?.find(time)) ?? [],\n state: 'done',\n });\n }\n\n const [audio, video] = await Promise.all([\n this.#audioFrameFinder?.find(time) ?? [],\n this.#videoFrameFinder?.find(time).then(this.#vfRotater),\n ]);\n\n if (video == null) {\n return await this.tickInterceptor(time, {\n audio,\n state: 'success',\n });\n }\n\n return await this.tickInterceptor(time, {\n video,\n audio,\n state: 'success',\n });\n }\n\n #thumbAborter = new AbortController();\n /**\n * 生成缩略图，默认每个关键帧生成一个 100px 宽度的缩略图。\n *\n * @param imgWidth 缩略图宽度，默认 100\n * @param opts Partial<ThumbnailOpts>\n * @returns Promise<Array<{ ts: number; img: Blob }>>\n */\n async thumbnails(\n imgWidth = 100,\n opts?: Partial<ThumbnailOpts>,\n ): Promise<Array<{ ts: number; img: Blob }>> {\n this.#thumbAborter.abort();\n this.#thumbAborter = new AbortController();\n const aborterSignal = this.#thumbAborter.signal;\n\n await this.ready;\n const abortMsg = 'generate thumbnails aborted';\n if (aborterSignal.aborted) throw Error(abortMsg);\n\n const { width, height } = this.#meta;\n const convtr = createVF2BlobConvtr(\n imgWidth,\n Math.round(height * (imgWidth / width)),\n { quality: 0.1, type: 'image/png' },\n );\n\n return new Promise<Array<{ ts: number; img: Blob }>>(\n async (resolve, reject) => {\n let pngPromises: Array<{ ts: number; img: Promise<Blob> }> = [];\n const vc = this.#decoderConf.video;\n if (vc == null || this.#videoSamples.length === 0) {\n resolver();\n return;\n }\n aborterSignal.addEventListener('abort', () => {\n reject(Error(abortMsg));\n });\n\n async function resolver() {\n if (aborterSignal.aborted) return;\n resolve(\n await Promise.all(\n pngPromises.map(async (it) => ({\n ts: it.ts,\n img: await it.img,\n })),\n ),\n );\n }\n\n function pushPngPromise(vf: VideoFrame) {\n pngPromises.push({\n ts: vf.timestamp,\n img: convtr(vf),\n });\n }\n\n const { start = 0, end = this.#meta.duration, step } = opts ?? {};\n if (step) {\n let cur = start;\n // 创建一个新的 VideoFrameFinder 实例，避免与 tick 方法共用而导致冲突\n const videoFrameFinder = new VideoFrameFinder(\n await this.#localFile.createReader(),\n this.#videoSamples,\n {\n ...vc,\n hardwareAcceleration: this.#opts.__unsafe_hardwareAcceleration__,\n },\n );\n while (cur <= end && !aborterSignal.aborted) {\n const vf = await videoFrameFinder.find(cur);\n if (vf) pushPngPromise(vf);\n cur += step;\n }\n videoFrameFinder.destroy();\n resolver();\n } else {\n await thumbnailByKeyFrame(\n this.#videoSamples,\n this.#localFile,\n vc,\n aborterSignal,\n { start, end },\n (vf, done) => {\n if (vf != null) pushPngPromise(vf);\n if (done) resolver();\n },\n );\n }\n },\n );\n }\n\n async split(time: number) {\n await this.ready;\n\n if (time <= 0 || time >= this.#meta.duration)\n throw Error('\"time\" out of bounds');\n\n const [preVideoSlice, postVideoSlice] = splitVideoSampleByTime(\n this.#videoSamples,\n time,\n );\n const [preAudioSlice, postAudioSlice] = splitAudioSampleByTime(\n this.#audioSamples,\n time,\n );\n const preClip = new MP4Clip(\n {\n localFile: this.#localFile,\n videoSamples: preVideoSlice ?? [],\n audioSamples: preAudioSlice ?? [],\n decoderConf: this.#decoderConf,\n headerBoxPos: this.#headerBoxPos,\n parsedMatrix: this.#parsedMatrix,\n },\n this.#opts,\n );\n const postClip = new MP4Clip(\n {\n localFile: this.#localFile,\n videoSamples: postVideoSlice ?? [],\n audioSamples: postAudioSlice ?? [],\n decoderConf: this.#decoderConf,\n headerBoxPos: this.#headerBoxPos,\n parsedMatrix: this.#parsedMatrix,\n },\n this.#opts,\n );\n await Promise.all([preClip.ready, postClip.ready]);\n\n return [preClip, postClip] as [this, this];\n }\n\n async clone() {\n await this.ready;\n const clip = new MP4Clip(\n {\n localFile: this.#localFile,\n videoSamples: [...this.#videoSamples],\n audioSamples: [...this.#audioSamples],\n decoderConf: this.#decoderConf,\n headerBoxPos: this.#headerBoxPos,\n parsedMatrix: this.#parsedMatrix,\n },\n this.#opts,\n );\n await clip.ready;\n clip.tickInterceptor = this.tickInterceptor;\n return clip as this;\n }\n\n /**\n * 拆分 MP4Clip 为仅包含视频轨道和音频轨道的 MP4Clip\n * @returns Mp4CLip[]\n */\n async splitTrack() {\n await this.ready;\n const clips: MP4Clip[] = [];\n if (this.#videoSamples.length > 0) {\n const videoClip = new MP4Clip(\n {\n localFile: this.#localFile,\n videoSamples: [...this.#videoSamples],\n audioSamples: [],\n decoderConf: {\n video: this.#decoderConf.video,\n audio: null,\n },\n headerBoxPos: this.#headerBoxPos,\n parsedMatrix: this.#parsedMatrix,\n },\n this.#opts,\n );\n await videoClip.ready;\n videoClip.tickInterceptor = this.tickInterceptor;\n clips.push(videoClip);\n }\n if (this.#audioSamples.length > 0) {\n const audioClip = new MP4Clip(\n {\n localFile: this.#localFile,\n videoSamples: [],\n audioSamples: [...this.#audioSamples],\n decoderConf: {\n audio: this.#decoderConf.audio,\n video: null,\n },\n headerBoxPos: this.#headerBoxPos,\n parsedMatrix: this.#parsedMatrix,\n },\n this.#opts,\n );\n await audioClip.ready;\n audioClip.tickInterceptor = this.tickInterceptor;\n clips.push(audioClip);\n }\n\n return clips;\n }\n\n destroy(): void {\n if (this.#destroyed) return;\n this.#log.info('MP4Clip destroy');\n this.#destroyed = true;\n\n this.#videoFrameFinder?.destroy();\n this.#audioFrameFinder?.destroy();\n }\n}\n\nfunction genMeta(\n decoderConf: MP4DecoderConf,\n videoSamples: ExtMP4Sample[],\n audioSamples: ExtMP4Sample[],\n rotationDeg: number,\n) {\n const meta = {\n duration: 0,\n width: 0,\n height: 0,\n audioSampleRate: 0,\n audioChanCount: 0,\n };\n if (decoderConf.video != null && videoSamples.length > 0) {\n meta.width = decoderConf.video.codedWidth ?? 0;\n meta.height = decoderConf.video.codedHeight ?? 0;\n // 90, 270 度，需要交换宽高\n const normalizedRotation = (Math.round(rotationDeg / 90) * 90 + 360) % 360;\n if (normalizedRotation === 90 || normalizedRotation === 270) {\n [meta.width, meta.height] = [meta.height, meta.width];\n }\n }\n if (decoderConf.audio != null && audioSamples.length > 0) {\n meta.audioSampleRate = DEFAULT_AUDIO_CONF.sampleRate;\n meta.audioChanCount = DEFAULT_AUDIO_CONF.channelCount;\n }\n\n let vDuration = 0;\n let aDuration = 0;\n if (videoSamples.length > 0) {\n for (let i = videoSamples.length - 1; i >= 0; i--) {\n const s = videoSamples[i];\n if (s.deleted) continue;\n vDuration = s.cts + s.duration;\n break;\n }\n }\n if (audioSamples.length > 0) {\n const lastSampele = audioSamples.at(-1)!;\n aDuration = lastSampele.cts + lastSampele.duration;\n }\n meta.duration = Math.max(vDuration, aDuration);\n\n return meta;\n}\n\nfunction genDecoder(\n decoderConf: MP4DecoderConf,\n localFileReader: LocalFileReader,\n videoSamples: ExtMP4Sample[],\n audioSamples: ExtMP4Sample[],\n volume: number,\n) {\n return {\n audioFrameFinder:\n volume === 0 || decoderConf.audio == null || audioSamples.length === 0\n ? null\n : new AudioFrameFinder(\n localFileReader,\n audioSamples,\n decoderConf.audio,\n {\n volume,\n targetSampleRate: DEFAULT_AUDIO_CONF.sampleRate,\n },\n ),\n videoFrameFinder:\n decoderConf.video == null || videoSamples.length === 0\n ? null\n : new VideoFrameFinder(\n localFileReader,\n videoSamples,\n decoderConf.video,\n ),\n };\n}\n\nasync function mp4FileToSamples(otFile: OPFSToolFile, opts: IMP4ClipOpts = {}) {\n let mp4Info: MP4Info | null = null;\n const decoderConf: MP4DecoderConf = { video: null, audio: null };\n let videoSamples: ExtMP4Sample[] = [];\n let audioSamples: ExtMP4Sample[] = [];\n let headerBoxPos: Array<{ start: number; size: number }> = [];\n const parsedMatrix = {\n perspective: 1,\n rotationRad: 0,\n rotationDeg: 0,\n scaleX: 1,\n scaleY: 1,\n translateX: 0,\n translateY: 0,\n };\n\n let videoDeltaTS = -1;\n let audioDeltaTS = -1;\n const reader = await otFile.createReader();\n await quickParseMP4File(\n reader,\n async (data) => {\n mp4Info = data.info;\n const ftyp = data.mp4boxFile.ftyp!;\n headerBoxPos.push({ start: ftyp.start, size: ftyp.size });\n const moov = data.mp4boxFile.moov!;\n headerBoxPos.push({ start: moov.start, size: moov.size });\n\n Object.assign(parsedMatrix, parseMatrix(mp4Info.videoTracks[0]?.matrix));\n\n let { videoDecoderConf: vc, audioDecoderConf: ac } = extractFileConfig(\n data.mp4boxFile,\n data.info,\n );\n decoderConf.video = vc ?? null;\n decoderConf.audio = ac ?? null;\n if (vc == null && ac == null) {\n Log.error('MP4Clip no video and audio track');\n }\n if (ac != null) {\n const { supported } = await AudioDecoder.isConfigSupported(ac);\n if (!supported) {\n Log.error(`MP4Clip audio codec is not supported: ${ac.codec}`);\n }\n }\n if (vc != null) {\n const { supported } = await VideoDecoder.isConfigSupported(vc);\n if (!supported) {\n Log.error(`MP4Clip video codec is not supported: ${vc.codec}`);\n }\n }\n Log.info(\n 'mp4BoxFile moov ready',\n {\n ...data.info,\n tracks: null,\n videoTracks: null,\n audioTracks: null,\n },\n decoderConf,\n );\n },\n (_, type, samples) => {\n if (type === 'video') {\n if (videoDeltaTS === -1) videoDeltaTS = samples[0].dts;\n for (const s of samples) {\n videoSamples.push(normalizeTimescale(s, videoDeltaTS, 'video'));\n }\n } else if (type === 'audio' && opts.audio) {\n if (audioDeltaTS === -1) audioDeltaTS = samples[0].dts;\n for (const s of samples) {\n audioSamples.push(normalizeTimescale(s, audioDeltaTS, 'audio'));\n }\n }\n },\n );\n await reader.close();\n\n const lastSampele = videoSamples.at(-1) ?? audioSamples.at(-1);\n if (mp4Info == null) {\n throw Error('MP4Clip stream is done, but not emit ready');\n } else if (lastSampele == null) {\n throw Error('MP4Clip stream not contain any sample');\n }\n // 修复首帧黑帧\n fixFirstBlackFrame(videoSamples);\n Log.info('mp4 stream parsed');\n return {\n videoSamples,\n audioSamples,\n decoderConf,\n headerBoxPos,\n parsedMatrix,\n };\n}\n\nfunction normalizeTimescale(\n s: MP4Sample,\n delta = 0,\n sampleType: 'video' | 'audio',\n) {\n // todo: perf 丢弃多余字段，小尺寸对象性能更好\n let offset = s.offset;\n // 当 IDR 帧前面包含非图像帧数据（如 SEI），可能导致解码失败\n const idrOffset =\n sampleType === 'video' && s.is_sync\n ? idrNALUOffs