UNPKG

mebox-extractor

Version:

🎬 A powerful and type-safe video metadata extractor for YouTube and Bilibili platforms with full TypeScript support

955 lines (937 loc) • 28.2 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __commonJS = (cb, mod) => function __require() { return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports; }; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // node_modules/retry/lib/retry_operation.js var require_retry_operation = __commonJS({ "node_modules/retry/lib/retry_operation.js"(exports2, module2) { "use strict"; function RetryOperation(timeouts, options) { if (typeof options === "boolean") { options = { forever: options }; } this._originalTimeouts = JSON.parse(JSON.stringify(timeouts)); this._timeouts = timeouts; this._options = options || {}; this._maxRetryTime = options && options.maxRetryTime || Infinity; this._fn = null; this._errors = []; this._attempts = 1; this._operationTimeout = null; this._operationTimeoutCb = null; this._timeout = null; this._operationStart = null; this._timer = null; if (this._options.forever) { this._cachedTimeouts = this._timeouts.slice(0); } } module2.exports = RetryOperation; RetryOperation.prototype.reset = function() { this._attempts = 1; this._timeouts = this._originalTimeouts.slice(0); }; RetryOperation.prototype.stop = function() { if (this._timeout) { clearTimeout(this._timeout); } if (this._timer) { clearTimeout(this._timer); } this._timeouts = []; this._cachedTimeouts = null; }; RetryOperation.prototype.retry = function(err) { if (this._timeout) { clearTimeout(this._timeout); } if (!err) { return false; } var currentTime = (/* @__PURE__ */ new Date()).getTime(); if (err && currentTime - this._operationStart >= this._maxRetryTime) { this._errors.push(err); this._errors.unshift(new Error("RetryOperation timeout occurred")); return false; } this._errors.push(err); var timeout = this._timeouts.shift(); if (timeout === void 0) { if (this._cachedTimeouts) { this._errors.splice(0, this._errors.length - 1); timeout = this._cachedTimeouts.slice(-1); } else { return false; } } var self = this; this._timer = setTimeout(function() { self._attempts++; if (self._operationTimeoutCb) { self._timeout = setTimeout(function() { self._operationTimeoutCb(self._attempts); }, self._operationTimeout); if (self._options.unref) { self._timeout.unref(); } } self._fn(self._attempts); }, timeout); if (this._options.unref) { this._timer.unref(); } return true; }; RetryOperation.prototype.attempt = function(fn, timeoutOps) { this._fn = fn; if (timeoutOps) { if (timeoutOps.timeout) { this._operationTimeout = timeoutOps.timeout; } if (timeoutOps.cb) { this._operationTimeoutCb = timeoutOps.cb; } } var self = this; if (this._operationTimeoutCb) { this._timeout = setTimeout(function() { self._operationTimeoutCb(); }, self._operationTimeout); } this._operationStart = (/* @__PURE__ */ new Date()).getTime(); this._fn(this._attempts); }; RetryOperation.prototype.try = function(fn) { console.log("Using RetryOperation.try() is deprecated"); this.attempt(fn); }; RetryOperation.prototype.start = function(fn) { console.log("Using RetryOperation.start() is deprecated"); this.attempt(fn); }; RetryOperation.prototype.start = RetryOperation.prototype.try; RetryOperation.prototype.errors = function() { return this._errors; }; RetryOperation.prototype.attempts = function() { return this._attempts; }; RetryOperation.prototype.mainError = function() { if (this._errors.length === 0) { return null; } var counts = {}; var mainError = null; var mainErrorCount = 0; for (var i = 0; i < this._errors.length; i++) { var error = this._errors[i]; var message = error.message; var count = (counts[message] || 0) + 1; counts[message] = count; if (count >= mainErrorCount) { mainError = error; mainErrorCount = count; } } return mainError; }; } }); // node_modules/retry/lib/retry.js var require_retry = __commonJS({ "node_modules/retry/lib/retry.js"(exports2) { "use strict"; var RetryOperation = require_retry_operation(); exports2.operation = function(options) { var timeouts = exports2.timeouts(options); return new RetryOperation(timeouts, { forever: options && (options.forever || options.retries === Infinity), unref: options && options.unref, maxRetryTime: options && options.maxRetryTime }); }; exports2.timeouts = function(options) { if (options instanceof Array) { return [].concat(options); } var opts = { retries: 10, factor: 2, minTimeout: 1 * 1e3, maxTimeout: Infinity, randomize: false }; for (var key in options) { opts[key] = options[key]; } if (opts.minTimeout > opts.maxTimeout) { throw new Error("minTimeout is greater than maxTimeout"); } var timeouts = []; for (var i = 0; i < opts.retries; i++) { timeouts.push(this.createTimeout(i, opts)); } if (options && options.forever && !timeouts.length) { timeouts.push(this.createTimeout(i, opts)); } timeouts.sort(function(a, b) { return a - b; }); return timeouts; }; exports2.createTimeout = function(attempt, opts) { var random = opts.randomize ? Math.random() + 1 : 1; var timeout = Math.round(random * Math.max(opts.minTimeout, 1) * Math.pow(opts.factor, attempt)); timeout = Math.min(timeout, opts.maxTimeout); return timeout; }; exports2.wrap = function(obj, options, methods) { if (options instanceof Array) { methods = options; options = null; } if (!methods) { methods = []; for (var key in obj) { if (typeof obj[key] === "function") { methods.push(key); } } } for (var i = 0; i < methods.length; i++) { var method = methods[i]; var original = obj[method]; obj[method] = function retryWrapper(original2) { var op = exports2.operation(options); var args = Array.prototype.slice.call(arguments, 1); var callback = args.pop(); args.push(function(err) { if (op.retry(err)) { return; } if (err) { arguments[0] = op.mainError(); } callback.apply(this, arguments); }); op.attempt(function() { original2.apply(obj, args); }); }.bind(obj, original); obj[method].options = options; } }; } }); // node_modules/retry/index.js var require_retry2 = __commonJS({ "node_modules/retry/index.js"(exports2, module2) { "use strict"; module2.exports = require_retry(); } }); // src/index.ts var index_exports = {}; __export(index_exports, { BilibiliClient: () => BilibiliClient, convertURLToWebsiteKey: () => convertURLToWebsiteKey, default: () => extract, extract: () => extract, extractVideoMetadata: () => extractVideoMetadata, getVideoIdByURL: () => getVideoIdByURL }); module.exports = __toCommonJS(index_exports); // src/utils/http.ts var import_lodash_es = require("lodash-es"); // node_modules/p-retry/index.js var import_retry = __toESM(require_retry2(), 1); // node_modules/is-network-error/index.js var objectToString = Object.prototype.toString; var isError = (value) => objectToString.call(value) === "[object Error]"; var errorMessages = /* @__PURE__ */ new Set([ "network error", // Chrome "Failed to fetch", // Chrome "NetworkError when attempting to fetch resource.", // Firefox "The Internet connection appears to be offline.", // Safari 16 "Load failed", // Safari 17+ "Network request failed", // `cross-fetch` "fetch failed", // Undici (Node.js) "terminated" // Undici (Node.js) ]); function isNetworkError(error) { const isValid = error && isError(error) && error.name === "TypeError" && typeof error.message === "string"; if (!isValid) { return false; } if (error.message === "Load failed") { return error.stack === void 0; } return errorMessages.has(error.message); } // node_modules/p-retry/index.js var AbortError = class extends Error { constructor(message) { super(); if (message instanceof Error) { this.originalError = message; ({ message } = message); } else { this.originalError = new Error(message); this.originalError.stack = this.stack; } this.name = "AbortError"; this.message = message; } }; var decorateErrorWithCounts = (error, attemptNumber, options) => { const retriesLeft = options.retries - (attemptNumber - 1); error.attemptNumber = attemptNumber; error.retriesLeft = retriesLeft; return error; }; async function pRetry(input, options) { return new Promise((resolve, reject) => { options = { ...options }; options.onFailedAttempt ??= () => { }; options.shouldRetry ??= () => true; options.retries ??= 10; const operation = import_retry.default.operation(options); const abortHandler = () => { operation.stop(); reject(options.signal?.reason); }; if (options.signal && !options.signal.aborted) { options.signal.addEventListener("abort", abortHandler, { once: true }); } const cleanUp = () => { options.signal?.removeEventListener("abort", abortHandler); operation.stop(); }; operation.attempt(async (attemptNumber) => { try { const result = await input(attemptNumber); cleanUp(); resolve(result); } catch (error) { try { if (!(error instanceof Error)) { throw new TypeError(`Non-error was thrown: "${error}". You should only throw errors.`); } if (error instanceof AbortError) { throw error.originalError; } if (error instanceof TypeError && !isNetworkError(error)) { throw error; } decorateErrorWithCounts(error, attemptNumber, options); if (!await options.shouldRetry(error)) { operation.stop(); reject(error); } await options.onFailedAttempt(error); if (!operation.retry(error)) { throw operation.mainError(); } } catch (finalError) { decorateErrorWithCounts(finalError, attemptNumber, options); cleanUp(); reject(finalError); } } }); }); } // src/utils/http.ts var APIClient = class { /** * Creates a new API client * @param url - Base URL for the API * @param options - Client configuration options */ constructor(url, options) { this.url = url; this.options = options; } /** * Static method to send a simple HTTP request with retry logic * @param url - Full URL to send request to * @param init - Fetch request options * @returns Promise resolving to Response */ static async send(url, init) { return pRetry( async () => { const response = await fetch(url, init); if (!response.ok) { throw new Error(`Request failed with status code ${response.status}`); } return response; }, { retries: 3 } ); } /** * Sends a request to the configured base URL with optional path * @param path - API endpoint path * @param init - Fetch request options * @returns Promise resolving to processed response */ async send(path, init) { const endpoint = `${this.url}${path ?? ""}`; const parsedInit = (0, import_lodash_es.merge)(init ?? {}, this.options?.init ?? {}); const response = await pRetry( async () => { const res = await fetch(endpoint, parsedInit); if (!res.ok) { throw new Error(`Request failed with status code ${res.status}`); } return res; }, { retries: 3 } ); return this.options?.afterResponse?.(response) ?? response; } /** * Sends a GET request * @param path - API endpoint path * @param init - Additional fetch options * @returns Promise resolving to response */ async get(path, init) { return this.send(path, (0, import_lodash_es.merge)(init, { method: "GET" })); } /** * Sends a POST request with JSON body * @param path - API endpoint path * @param body - Request body data * @param init - Additional fetch options * @returns Promise resolving to response */ async post(path, body, init) { return this.send( path, (0, import_lodash_es.merge)(init, { method: "POST", body: JSON.stringify(body), headers: { "Content-Type": "application/json" } }) ); } }; // src/utils/cookie.ts var formatCookies = (cookies) => Object.entries(cookies).map(([name, value]) => `${name}=${value}`).join(";"); // src/clients/wbi-sign.ts var import_md5 = __toESM(require("md5"), 1); var MIXIN_KEY_ENCODING_TAB = [ 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11, 36, 20, 34, 44, 52 ]; var getMixinKey = (orig) => MIXIN_KEY_ENCODING_TAB.map((n) => orig[n]).join("").slice(0, 32); var encodeWbi = (originParams, img_key, sub_key) => { const mixin_key = getMixinKey(img_key + sub_key); const curr_time = Math.round(Date.now() / 1e3); const chr_filter = /[!'()*]/g; const params = Object.assign({}, originParams, { wts: curr_time }); const query = Object.keys(params).sort().map((key) => { const value = params[key].toString().replace(chr_filter, ""); return `${encodeURIComponent(key)}=${encodeURIComponent(value)}`; }).join("&"); const wbi_sign = (0, import_md5.default)(query + mixin_key); return query + "&w_rid=" + wbi_sign; }; // src/clients/bilibili.ts var RESOLUTION_CODE = { "4k": 120, "1080p": 80, "720p": 64, "360p": 16 }; var BilibiliClient = class { /** * Creates a new Bilibili client * @param cookies - Authentication cookies */ constructor(cookies = {}) { this.cookies = cookies; const cookieString = formatCookies(cookies); this.apiClient = new APIClient("https://api.bilibili.com", { init: { headers: { cookie: cookieString } } }); } apiClient; /** * Checks if user is logged in based on SESSDATA cookie */ get isLogin() { return !!this.cookies.SESSDATA; } /** * Fetches video URL based on login status * @param bvid - Bilibili video ID * @param cid - Video CID * @param resolution - Desired resolution * @param isPreview - Whether this is for preview * @returns Video URL and quality info */ fetchVideo(bvid, cid, resolution, isPreview = false) { if (this.isLogin) { return this.getHighResolutionDownloadURL(bvid, cid, resolution, isPreview); } return this.getLowResolutionDownloadURL(bvid, cid); } /** * Gets WBI signing keys from Bilibili API * @returns Object containing img_key and sub_key for WBI signing */ async getWbiKeys() { const response = await this.apiClient.send("/x/web-interface/nav"); const { data: { wbi_img: { img_url, sub_url } } } = await response.json(); return { img_key: img_url.slice( img_url.lastIndexOf("/") + 1, img_url.lastIndexOf(".") ), sub_key: sub_url.slice( sub_url.lastIndexOf("/") + 1, sub_url.lastIndexOf(".") ) }; } /** * Signs API parameters using WBI algorithm * @param params - Parameters to sign * @returns Signed query string */ async signParams(params) { const { img_key, sub_key } = await this.getWbiKeys(); return encodeWbi(params, img_key, sub_key); } /** * Gets basic video information * @param bvid - Bilibili video ID * @returns Video view information */ async getInfo(bvid) { const response = await this.apiClient.send( `/x/web-interface/view?bvid=${bvid}` ); const { data } = await response.json(); return data; } /** * Gets available subtitles for a video * @param aid - Video AID * @param cid - Video CID * @returns Array of available subtitles */ async getSubtitles(aid, cid) { const response = await this.apiClient.send( `/x/player/wbi/v2?aid=${aid}&cid=${cid}` ); const { data } = await response.json(); return data.subtitle?.subtitles ?? []; } /** * Gets preview URL for a video * @param bvid - Bilibili video ID * @param cid - Video CID * @returns Preview URL and quality info */ async getPreviewURL(bvid, cid) { return this.fetchVideo(bvid, cid, "1080p", true); } async getLowResolutionDownloadURL(bvid, cid) { const signedParams = await this.signParams({ bvid, cid, otype: "json", platform: "html5" }); const response = await this.apiClient.send( `/x/player/wbi/playurl?${signedParams}` ); const { data } = await response.json(); return { url: data.durl?.[0]?.url ?? data.durl?.[0]?.backup_url?.[0], quality: data.quality }; } async getHighResolutionDownloadURL(bvid, cid, resolution, isPreview) { const signedParams = await this.signParams({ bvid, cid, otype: "json", qn: RESOLUTION_CODE[resolution].toString() ?? "80", high_quality: ["4k", "1080p"].includes(resolution) ? "1" : "0" }); const resolution4KParams = resolution === "4k" ? "&fnval&128=128&fourk=1" : ""; const platform = isPreview ? "&platform=html5" : ""; const response = await this.apiClient.send( `/x/player/wbi/playurl?${signedParams}${resolution4KParams}${platform}` ); const { data } = await response.json(); return { url: data.durl?.[0]?.url, quality: data.quality }; } /** * Gets download URL for a video with quality validation * @param bvid - Bilibili video ID * @param cid - Video CID * @param resolution - Desired resolution * @returns Download URL */ async getDownloadURL(bvid, cid, resolution) { const { url, quality } = await this.fetchVideo(bvid, cid, resolution); const expectedQuality = RESOLUTION_CODE[resolution].toString(); if (expectedQuality && expectedQuality !== quality.toString()) { const label = Object.keys(RESOLUTION_CODE).find( (key) => RESOLUTION_CODE[key] === quality ); console.warn( `The requested resolution is not available, using ${label ?? "lowest quality"} instead.` ); } return url; } }; // src/extractors/bilibili.ts var parseSubtitles = async (subtitleURL) => { const parsedSubtitleURL = subtitleURL.startsWith("http") ? subtitleURL : `http:${subtitleURL}`; const response = await APIClient.send(parsedSubtitleURL); const data = await response.json(); return data.body.map(({ from, to, content }) => ({ start: from, end: to, text: content })); }; var extractBilibiliVideoMetadata = async (url, bvid, options = {}) => { const { resolution = "720p", cookies = {} } = options; const bilibiliClient = new BilibiliClient(cookies); const { aid, cid, duration, desc, title, pic } = await bilibiliClient.getInfo(bvid); const [subtitle] = await bilibiliClient.getSubtitles(aid, cid); const subtitleContent = subtitle ? await parseSubtitles(subtitle.subtitle_url) : void 0; const [downloadURL, { url: previewURL }] = await Promise.all([ bilibiliClient.getDownloadURL(bvid, cid, resolution), bilibiliClient.getPreviewURL(bvid, cid) ]); return { title, website: "bilibili", subtitles: subtitleContent, duration, videoId: bvid, url, thumbnail: pic, description: desc, previewURL, downloadURL, fps: 25 }; }; // src/extractors/youtube.ts var import_youtubei = require("youtubei.js"); // src/utils/browser.ts var SUPPORTED_WEBSITES_VALIDATORS = { youtube: /(youtube|youtu)/, bilibili: /bilibili/ }; var convertURLToWebsiteKey = (url) => { if (!url) { return; } const { hostname } = new URL(url); const websites = Object.keys(SUPPORTED_WEBSITES_VALIDATORS); return websites.find((website) => { const regex = SUPPORTED_WEBSITES_VALIDATORS[website]; return regex?.test(hostname); }); }; var isNodeJS = () => typeof process !== "undefined" && process.versions?.node; // src/utils/common.ts var WEBSITES_ID_GETTERS = { bilibili: (url) => url.pathname.split("/")[2], youtube: (url) => { if (url.hostname.includes("youtu.be")) { return url.pathname.slice(1); } if (url.searchParams.get("v")) { return url.searchParams.get("v"); } const shortsMatch = url.pathname.split("shorts/")[1]; return shortsMatch; } }; var getVideoIdByURL = (url) => { const website = convertURLToWebsiteKey(url); if (!website) { throw new Error(`The website is not supported ${url}`); } const videoId = WEBSITES_ID_GETTERS[website](new URL(url)); if (!videoId) { throw new Error(`The video id is not found, please check the URL ${url}`); } return videoId; }; // src/extractors/youtube.ts var import_fast_xml_parser = require("fast-xml-parser"); var createFetchFn = () => (input, init) => { const url = typeof input === "string" ? new URL(input) : input instanceof URL ? input : new URL(input.url); url.searchParams.set("__host", url.host); url.protocol = "http"; const headers = init?.headers ? new Headers(init.headers) : input instanceof Request ? input.headers : new Headers(); url.searchParams.set("__headers", JSON.stringify([...headers])); const request = new Request( url, input instanceof Request ? input : void 0 ); headers.delete("user-agent"); return fetch(request, { ...init ?? {}, headers }); }; var parseSubtitles2 = async (subtitleURL) => { const response = await APIClient.send(subtitleURL); const rawXMLData = await response.text(); if (!rawXMLData) { return []; } const parser = new import_fast_xml_parser.XMLParser({ ignoreAttributes: false, attributeNamePrefix: "", textNodeName: "content" }); const data = parser.parse(rawXMLData).transcript.text; return data.map(({ content, start, dur }) => ({ start: parseFloat(start), end: parseFloat(start) + parseFloat(dur), text: content })); }; var getVideoInfoForHighQualityVideo = async (yt, videoId, defaultResolution) => { const info = await pRetry(() => yt.getInfo(videoId), { retries: 3 }); const { streaming_data } = info; const chooseFormatByType = (type) => { try { return info.chooseFormat({ type, quality: type === "audio" ? "best" : defaultResolution }); } catch { console.warn(`Failed to get ${type} format, fallback to best quality`); return info.chooseFormat({ type, quality: "best" }); } }; if (!streaming_data?.adaptive_formats.length) { return; } const resourceType = ["video", "audio"]; const downloadURL = resourceType.map((type) => { const format = chooseFormatByType(type); return { url: format.decipher(yt.session.player), mimeType: format.mime_type }; }); return { downloadURL, info }; }; var getVideoInfoForLowQualityVideo = async (yt, videoId) => { const info = await yt.getInfo(videoId); const { streaming_data } = info; const [format] = streaming_data?.formats ?? []; if (!format) { return; } return { downloadURL: format.decipher(yt.session.player), info }; }; var extractYoutubeVideoMetadata = async (url, options = {}) => { const { resolution = "720p", cookies = {} } = options; const cookieString = formatCookies(cookies); const videoId = getVideoIdByURL(url); const createInnrTubeByClient = (clientType) => import_youtubei.Innertube.create({ fetch: isNodeJS() ? void 0 : createFetchFn(), cookie: cookieString, generate_session_locally: true, enable_session_cache: false, retrieve_player: true, cache: new import_youtubei.UniversalCache(true), client_type: clientType }); const videoInfo = await getVideoInfoForHighQualityVideo( await createInnrTubeByClient(import_youtubei.ClientType.WEB_EMBEDDED), videoId, resolution ) ?? await getVideoInfoForLowQualityVideo( await createInnrTubeByClient(), videoId ); if (!videoInfo) { throw new Error("Could not get correct video format"); } const { basic_info, captions } = videoInfo.info; const [caption] = captions?.caption_tracks ?? []; const subtitles = caption?.base_url ? await parseSubtitles2(caption.base_url) : void 0; const previewURL = typeof videoInfo.downloadURL === "string" ? videoInfo.downloadURL : videoInfo.downloadURL[0].url; return { previewURL, title: basic_info.title ?? "", website: "youtube", subtitles, duration: basic_info.duration ?? 0, videoId, url, thumbnail: basic_info.thumbnail?.[0].url ?? "", description: basic_info.short_description ?? "", downloadURL: videoInfo.downloadURL, fps: 0 }; }; // src/index.ts var VIDEO_METADATA_EXTRACTOR = { bilibili: extractBilibiliVideoMetadata, youtube: extractYoutubeVideoMetadata }; var extract = async (url, options = {}) => { const website = convertURLToWebsiteKey(url); const extractor = VIDEO_METADATA_EXTRACTOR[website]; if (!extractor) { throw new Error(`The website ${website} is not supported`); } const videoId = getVideoIdByURL(url); if (!videoId) { throw new Error(`The video id is not found`); } let metadata; if (website === "bilibili") { metadata = await extractor(url, videoId, options); } else { metadata = await extractor(url, options); } return metadata; }; var extractVideoMetadata = extract; // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { BilibiliClient, convertURLToWebsiteKey, extract, extractVideoMetadata, getVideoIdByURL }); //# sourceMappingURL=index.cjs.map