UNPKG

@imput/youtubei.js

Version:

A JavaScript client for YouTube's private API, known as InnerTube. Fork of youtubei.js

526 lines 23.8 kB
import PlayerStoryboardSpec from '../parser/classes/PlayerStoryboardSpec.js'; import { getStringBetweenStrings, InnertubeError, Platform } from './Utils.js'; import * as Constants from './Constants.js'; import * as Log from './Log.js'; const TAG_ = 'StreamingInfo'; function getFormatGroupings(formats, is_post_live_dvr) { const group_info = new Map(); const has_multiple_audio_tracks = formats.some((fmt) => !!fmt.audio_track); for (const format of formats) { if ((!format.index_range || !format.init_range) && !format.is_type_otf && !is_post_live_dvr) { continue; } const mime_type = format.mime_type.split(';')[0]; // Codec without any profile or level information const just_codec = getStringBetweenStrings(format.mime_type, 'codecs="', '"')?.split('.')[0]; // HDR videos have both SDR and HDR vp9 formats, so we want to stick them in different groups const color_info = format.color_info ? Object.values(format.color_info).join('-') : ''; const audio_track_id = format.audio_track?.id || ''; const drc = format.is_drc ? 'drc' : ''; const group_id = `${mime_type}-${just_codec}-${color_info}-${audio_track_id}-${drc}`; if (!group_info.has(group_id)) { group_info.set(group_id, []); } group_info.get(group_id)?.push(format); } return { groups: Array.from(group_info.values()), has_multiple_audio_tracks }; } function hoistCodecsIfPossible(formats, hoisted) { if (formats.length > 1 && new Set(formats.map((format) => getStringBetweenStrings(format.mime_type, 'codecs="', '"'))).size === 1) { hoisted.push('codecs'); return getStringBetweenStrings(formats[0].mime_type, 'codecs="', '"'); } } function hoistNumberAttributeIfPossible(formats, property, hoisted) { if (formats.length > 1 && new Set(formats.map((format) => format.fps)).size === 1) { hoisted.push(property); return Number(formats[0][property]); } } function hoistAudioChannelsIfPossible(formats, hoisted) { if (formats.length > 1 && new Set(formats.map((format) => format.audio_channels || 2)).size === 1) { hoisted.push('AudioChannelConfiguration'); return formats[0].audio_channels; } } async function getOTFSegmentTemplate(url, actions) { // Fetch the first segment as it contains the segment durations which we need to generate the manifest const response = await actions.session.http.fetch_function(`${url}&rn=0&sq=0`, { method: 'GET', headers: Constants.STREAM_HEADERS, redirect: 'follow' }); // Example OTF video: https://www.youtube.com/watch?v=DJ8GQUNUXGM // There might have been redirects, if there were we want to write the resolved URL to the manifest // So that the player doesn't have to follow the redirects every time it requests a segment const resolved_url = response.url.replace('&rn=0', '').replace('&sq=0', ''); // In this function we only need the segment durations and how often the durations are repeated // The segment count could be useful for other stuff though // The response body contains a lot of junk but the useful stuff looks like this: // Segment-Count: 922\r\n' + // 'Segment-Durations-Ms: 5120(r=920),3600,\r\n' const response_text = await response.text(); const segment_duration_strings = getStringBetweenStrings(response_text, 'Segment-Durations-Ms:', '\r\n')?.split(','); if (!segment_duration_strings) { throw new InnertubeError('Failed to extract the segment durations from this OTF stream', { url }); } const segment_durations = []; for (const segment_duration_string of segment_duration_strings) { const trimmed_segment_duration = segment_duration_string.trim(); if (trimmed_segment_duration.length === 0) { continue; } let repeat_count; const repeat_count_string = getStringBetweenStrings(trimmed_segment_duration, '(r=', ')'); if (repeat_count_string) { repeat_count = parseInt(repeat_count_string); } segment_durations.push({ duration: parseInt(trimmed_segment_duration), repeat_count }); } return { init_url: `${resolved_url}&sq=0`, media_url: `${resolved_url}&sq=$Number$`, timeline: segment_durations }; } async function getPostLiveDvrInfo(transformed_url, actions) { const response = await actions.session.http.fetch_function(`${transformed_url}&rn=0&sq=0`, { method: 'HEAD', headers: Constants.STREAM_HEADERS, redirect: 'follow' }); const duration_ms = parseInt(response.headers.get('X-Head-Time-Millis') || ''); const segment_count = parseInt(response.headers.get('X-Head-Seqnum') || ''); if (isNaN(duration_ms) || isNaN(segment_count)) { throw new InnertubeError('Failed to extract the duration or segment count for this Post Live DVR video'); } return { duration: duration_ms / 1000, segment_count }; } async function getPostLiveDvrDuration(shared_post_live_dvr_info, format, url_transformer, actions, player, cpn) { if (!shared_post_live_dvr_info.item) { const url = new URL(format.decipher(player)); url.searchParams.set('cpn', cpn || ''); const transformed_url = url_transformer(url).toString(); shared_post_live_dvr_info.item = await getPostLiveDvrInfo(transformed_url, actions); } return shared_post_live_dvr_info.item.duration; } function getSegmentInfo(format, url_transformer, actions, player, cpn, shared_post_live_dvr_info, is_sabr) { let transformed_url = ''; if (is_sabr) { const formatKey = `${format.itag || ''}:${format.xtags || ''}`; transformed_url = `sabr://${format.has_video ? 'video' : 'audio'}?key=${formatKey}`; } else { const url = new URL(format.decipher(player)); url.searchParams.set('cpn', cpn || ''); transformed_url = url_transformer(url).toString(); } if (format.is_type_otf) { if (!actions) throw new InnertubeError('Unable to get segment durations for this OTF stream without an Actions instance', { format }); const info = { is_oft: true, is_post_live_dvr: false, getSegmentTemplate() { return getOTFSegmentTemplate(transformed_url, actions); } }; return info; } if (shared_post_live_dvr_info) { if (!actions) { throw new InnertubeError('Unable to get segment count for this Post Live DVR video without an Actions instance', { format }); } const target_duration_dec = format.target_duration_dec; if (typeof target_duration_dec !== 'number') { throw new InnertubeError('Format is missing target_duration_dec', { format }); } const info = { is_oft: false, is_post_live_dvr: true, async getSegmentTemplate() { if (!shared_post_live_dvr_info.item) { shared_post_live_dvr_info.item = await getPostLiveDvrInfo(transformed_url, actions); } return { media_url: `${transformed_url}&sq=$Number$`, timeline: [ { duration: target_duration_dec * 1000, repeat_count: shared_post_live_dvr_info.item.segment_count } ] }; } }; return info; } if (!format.index_range || !format.init_range) throw new InnertubeError('Index and init ranges not available', { format }); const info = { is_oft: false, is_post_live_dvr: false, base_url: transformed_url, index_range: format.index_range, init_range: format.init_range }; return info; } function getAudioRepresentation(format, hoisted, url_transformer, actions, player, cpn, shared_post_live_dvr_info, is_sabr) { const uid_parts = [format.itag.toString()]; if (format.audio_track) { uid_parts.push(format.audio_track.id); } if (format.is_drc) { uid_parts.push('drc'); } const rep = { uid: uid_parts.join('-'), bitrate: format.bitrate, codecs: !hoisted.includes('codecs') ? getStringBetweenStrings(format.mime_type, 'codecs="', '"') : undefined, audio_sample_rate: !hoisted.includes('audio_sample_rate') ? format.audio_sample_rate : undefined, channels: !hoisted.includes('AudioChannelConfiguration') ? format.audio_channels || 2 : undefined, segment_info: getSegmentInfo(format, url_transformer, actions, player, cpn, shared_post_live_dvr_info, is_sabr) }; return rep; } function getTrackRoles(format, has_drc_streams) { if (!format.audio_track && !has_drc_streams) { return; } const roles = [ format.is_original ? 'main' : 'alternate' ]; if (format.is_dubbed || format.is_auto_dubbed) roles.push('dub'); if (format.is_descriptive) roles.push('description'); if (format.is_drc) roles.push('enhanced-audio-intelligibility'); return roles; } function getAudioSet(formats, url_transformer, actions, player, cpn, shared_post_live_dvr_info, drc_labels, is_sabr) { const first_format = formats[0]; const { audio_track } = first_format; const hoisted = []; const has_drc_streams = !!drc_labels; let track_name; if (audio_track) { if (has_drc_streams && first_format.is_drc) { track_name = drc_labels.label_drc_multiple(audio_track.display_name); } else { track_name = audio_track.display_name; } } else if (has_drc_streams) { track_name = first_format.is_drc ? drc_labels.label_drc : drc_labels.label_original; } const set = { mime_type: first_format.mime_type.split(';')[0], language: first_format.language ?? undefined, codecs: hoistCodecsIfPossible(formats, hoisted), audio_sample_rate: hoistNumberAttributeIfPossible(formats, 'audio_sample_rate', hoisted), track_name, track_roles: getTrackRoles(first_format, has_drc_streams), channels: hoistAudioChannelsIfPossible(formats, hoisted), drm_families: first_format.drm_families, drm_track_type: first_format.drm_track_type, representations: formats.map((format) => getAudioRepresentation(format, hoisted, url_transformer, actions, player, cpn, shared_post_live_dvr_info, is_sabr)) }; return set; } const COLOR_PRIMARIES = { BT709: '1', BT2020: '9' }; const COLOR_TRANSFER_CHARACTERISTICS = { BT709: '1', BT2020_10: '14', SMPTEST2084: '16', ARIB_STD_B67: '18' }; // This list is incomplete, as the player.js doesn't currently have any code for matrix coefficients, // So it doesn't have a list like with the other two, so this is just based on what we've seen in responses const COLOR_MATRIX_COEFFICIENTS = { BT709: '1', BT2020_NCL: '14' }; function getColorInfo(format) { // Section 5.5 Video source metadata signalling https://dashif.org/docs/IOP-Guidelines/DASH-IF-IOP-Part7-v5.0.0.pdf // Section 8 Video code points https://www.itu.int/rec/T-REC-H.273-202107-I/en // The player.js file was also helpful const color_info = format.color_info; let primaries; let transfer_characteristics; let matrix_coefficients; if (color_info) { if (color_info.primaries) { primaries = COLOR_PRIMARIES[color_info.primaries]; } if (color_info.transfer_characteristics) { transfer_characteristics = COLOR_TRANSFER_CHARACTERISTICS[color_info.transfer_characteristics]; } if (color_info.matrix_coefficients) { matrix_coefficients = COLOR_MATRIX_COEFFICIENTS[color_info.matrix_coefficients]; if (!matrix_coefficients) { const url = new URL(format.url); const anonymisedFormat = JSON.parse(JSON.stringify(format)); anonymisedFormat.url = 'REDACTED'; anonymisedFormat.signature_cipher = 'REDACTED'; anonymisedFormat.cipher = 'REDACTED'; Log.warn(TAG_, `Unknown matrix coefficients "${color_info.matrix_coefficients}". The DASH manifest is still usable without this.\n` + `Please report it at ${Platform.shim.info.bugs_url} so we can add support for it.\n` + `InnerTube client: ${url.searchParams.get('c')}\nformat:`, anonymisedFormat); } } } else if (getStringBetweenStrings(format.mime_type, 'codecs="', '"')?.startsWith('avc1')) { // YouTube's h264 streams always seem to be SDR, so this is a pretty safe bet. transfer_characteristics = COLOR_TRANSFER_CHARACTERISTICS.BT709; } const info = { primaries, transfer_characteristics, matrix_coefficients }; return info; } function getVideoRepresentation(format, url_transformer, hoisted, player, actions, cpn, shared_post_live_dvr_info, is_sabr) { const rep = { uid: format.itag.toString(), bitrate: format.bitrate, width: format.width, height: format.height, codecs: !hoisted.includes('codecs') ? getStringBetweenStrings(format.mime_type, 'codecs="', '"') : undefined, fps: !hoisted.includes('fps') ? format.fps : undefined, segment_info: getSegmentInfo(format, url_transformer, actions, player, cpn, shared_post_live_dvr_info, is_sabr) }; return rep; } function getVideoSet(formats, url_transformer, player, actions, cpn, shared_post_live_dvr_info, is_sabr) { const first_format = formats[0]; const color_info = getColorInfo(first_format); const hoisted = []; const set = { mime_type: first_format.mime_type.split(';')[0], color_info, codecs: hoistCodecsIfPossible(formats, hoisted), fps: hoistNumberAttributeIfPossible(formats, 'fps', hoisted), drm_families: first_format.drm_families, drm_track_type: first_format.drm_track_type, representations: formats.map((format) => getVideoRepresentation(format, url_transformer, hoisted, player, actions, cpn, shared_post_live_dvr_info, is_sabr)) }; return set; } function getStoryboardInfo(storyboards) { const mime_info = new Map(); const boards = storyboards.is(PlayerStoryboardSpec) ? storyboards.boards : [storyboards.board]; for (const storyboard of boards) { const extension = new URL(storyboard.template_url).pathname.split('.').pop(); const mime_type = `image/${extension === 'jpg' ? 'jpeg' : extension}`; if (!mime_info.has(mime_type)) { mime_info.set(mime_type, []); } mime_info.get(mime_type)?.push(storyboard); } return mime_info; } async function getStoryboardMimeType(actions, board, transform_url, probable_mime_type, shared_response) { const url = board.template_url; const req_url = transform_url(new URL(url.replace('$M', '0'))); const res_promise = shared_response.response ? shared_response.response : actions.session.http.fetch_function(req_url, { method: 'HEAD', headers: Constants.STREAM_HEADERS }); shared_response.response = res_promise; const res = await res_promise; return res.headers.get('Content-Type') || probable_mime_type; } async function getStoryboardBitrate(actions, board, shared_response) { const url = board.template_url; const response_promises = []; // Set a limit so we don't take forever for long videos const request_limit = Math.min(board.type === 'vod' ? board.storyboard_count : 5, 10); for (let i = 0; i < request_limit; i++) { const req_url = new URL(url.replace('$M', i.toString())); const response_promise = i === 0 && shared_response.response ? shared_response.response : actions.session.http.fetch_function(req_url, { method: 'HEAD', headers: Constants.STREAM_HEADERS }); if (i === 0) shared_response.response = response_promise; response_promises.push(response_promise); } // Run the requests in parallel to avoid causing too much delay const responses = await Promise.all(response_promises); const content_lengths = []; for (const response of responses) { content_lengths.push(parseInt(response.headers.get('Content-Length') || '0')); } // This is a rough estimate, so it probably won't reflect that actual peak bitrate // Hopefully it's close enough, because figuring out the actual peak bitrate would require downloading and analysing all storyboard tiles return Math.ceil((Math.max(...content_lengths) / (board.rows * board.columns)) * 8); } function getImageRepresentation(duration, actions, board, transform_url, shared_response) { const url = board.template_url; const template_url = new URL(url.replace('$M', '$Number$')); let template_duration; if (board.type === 'vod') { // Here duration is the duration of the video template_duration = duration / board.storyboard_count; } else { // Here duration is the duration of one of the video/audio segments, // As there is one tile per segment, we need to multiply it by the number of tiles template_duration = duration * board.columns * board.rows; } const rep = { uid: `thumbnails_${board.thumbnail_width}x${board.thumbnail_height}`, getBitrate() { return getStoryboardBitrate(actions, board, shared_response); }, sheet_width: board.thumbnail_width * board.columns, sheet_height: board.thumbnail_height * board.rows, thumbnail_height: board.thumbnail_height, thumbnail_width: board.thumbnail_width, rows: board.rows, columns: board.columns, template_duration: Math.round(template_duration), template_url: transform_url(template_url).toString(), getURL(n) { return template_url.toString().replace('$Number$', n.toString()); } }; return rep; } function getImageSets(duration, actions, storyboards, transform_url) { const mime_info = getStoryboardInfo(storyboards); const shared_response = {}; return Array.from(mime_info.entries()).map(([type, boards]) => ({ probable_mime_type: type, getMimeType() { return getStoryboardMimeType(actions, boards[0], transform_url, type, shared_response); }, representations: boards.map((board) => getImageRepresentation(duration, actions, board, transform_url, shared_response)) })); } function getTextSets(caption_tracks, format, transform_url) { const mime_type = format === 'vtt' ? 'text/vtt' : 'application/ttml+xml'; return caption_tracks.map((caption_track) => { const url = new URL(caption_track.base_url); url.searchParams.set('fmt', format); const track_roles = ['caption']; if (url.searchParams.has('tlang')) { track_roles.push('dub'); } return { mime_type, language: caption_track.language_code, track_name: caption_track.name.toString(), track_roles, representation: { uid: `text-${caption_track.vss_id}`, base_url: transform_url(url).toString() } }; }); } export function getStreamingInfo(streaming_data, is_post_live_dvr = false, url_transformer = (url) => url, format_filter, cpn, player, actions, storyboards, caption_tracks, options) { if (!streaming_data) throw new InnertubeError('Streaming data not available'); const formats = format_filter ? streaming_data.adaptive_formats.filter((fmt) => !format_filter(fmt)) : streaming_data.adaptive_formats; let getDuration; let shared_post_live_dvr_info; if (is_post_live_dvr) { shared_post_live_dvr_info = {}; if (!actions) { throw new InnertubeError('Unable to get duration or segment count for this Post Live DVR video without an Actions instance'); } getDuration = () => { // Should never happen, as we set it just a few lines above, but this stops TypeScript complaining if (!shared_post_live_dvr_info) { return Promise.resolve(0); } return getPostLiveDvrDuration(shared_post_live_dvr_info, formats[0], url_transformer, actions, player, cpn); }; } else { const duration = formats[0].approx_duration_ms / 1000; getDuration = () => Promise.resolve(duration); } const { groups, has_multiple_audio_tracks } = getFormatGroupings(formats, is_post_live_dvr); const { video_groups, audio_groups } = groups.reduce((acc, formats) => { if (formats[0].has_audio) { // Some videos with multiple audio tracks, have a broken one, that doesn't have any audio track information // It seems to be the same as default audio track but broken // We want to ignore it, as it messes up audio track selection in players and YouTube ignores it too // At the time of writing, this video has a broken audio track: https://youtu.be/UJeSWbR6W04 if (has_multiple_audio_tracks && !formats[0].audio_track) return acc; acc.audio_groups.push(formats); return acc; } acc.video_groups.push(formats); return acc; }, { video_groups: [], audio_groups: [] }); let drc_labels; if (audio_groups.flat().some((format) => format.is_drc)) { drc_labels = { label_original: options?.label_original || 'Original', label_drc: options?.label_drc || 'Stable Volume', label_drc_multiple: options?.label_drc_multiple || ((display_name) => `${display_name} (Stable Volume)`) }; } const audio_sets = audio_groups.map((formats) => getAudioSet(formats, url_transformer, actions, player, cpn, shared_post_live_dvr_info, drc_labels, options?.is_sabr)); const video_sets = video_groups.map((formats) => getVideoSet(formats, url_transformer, player, actions, cpn, shared_post_live_dvr_info, options?.is_sabr)); let image_sets = []; // XXX: We need to make requests to get the image sizes, so we'll skip the storyboards if we don't have an Actions instance if (storyboards && actions) { let duration; if (storyboards.is(PlayerStoryboardSpec)) { duration = formats[0].approx_duration_ms / 1000; } else { const target_duration_dec = formats[0].target_duration_dec; if (target_duration_dec === undefined) throw new InnertubeError('Format is missing target_duration_dec', { format: formats[0] }); duration = target_duration_dec; } image_sets = getImageSets(duration, actions, storyboards, url_transformer); } let text_sets = []; if (caption_tracks && options?.captions_format) { if (options.captions_format !== 'vtt' && options.captions_format !== 'ttml') { throw new InnertubeError('Invalid captions format', options.captions_format); } text_sets = getTextSets(caption_tracks, options.captions_format, url_transformer); } const info = { getDuration, audio_sets, video_sets, image_sets, text_sets }; return info; } //# sourceMappingURL=StreamingInfo.js.map