opex-yt-id
Version:
Extracts YouTube video, channel, and playlist IDs from various URLs
233 lines (193 loc) • 10.9 kB
JavaScript
/*
opex-yt-id/
├── index.js # Основной код функций
├── index.d.ts # Файл описания типов
├── LICENSE # Файл лицензии (Assuming MIT, not included here)
├── package.json # Файл манифеста
├── README.md # Документация
└── test.js # Файл тестов
*/
import { URL, URLSearchParams } from 'url';
// --- Validation Functions ---
function isValidYouTubeVideoId(id) {
return typeof id === 'string' && /^[a-zA-Z0-9_-]{11}$/.test(id);
}
function isValidYouTubeChannelId(id) {
// Standard channel IDs start with 'UC' and are 24 chars total.
return typeof id === 'string' && /^UC[a-zA-Z0-9_-]{22}$/.test(id);
}
function isValidYouTubePlaylistId(id) {
// Common playlist IDs start with 'PL' and vary in length.
// Let's use a reasonable length range (e.g., 10-40 chars after 'PL').
return typeof id === 'string' && /^PL[a-zA-Z0-9_-]{10,40}$/.test(id);
}
// --- Helper for URL Parsing and Basic Checks ---
function parseUrlAndBasicInfo(urlString) {
if (!urlString || typeof urlString !== 'string' || urlString.trim() === '') {
return null;
}
let url;
let originalInput = urlString;
let potentialUrlString = urlString;
try {
// Attempt to fix common non-standard URL inputs before parsing
if (potentialUrlString.startsWith('//')) potentialUrlString = 'https:' + potentialUrlString;
else if (!potentialUrlString.match(/^[a-zA-Z]+:/) && potentialUrlString.includes('.') && potentialUrlString.includes('/')) potentialUrlString = 'https:' + potentialUrlString;
url = new URL(potentialUrlString);
return {
url,
hostname: url.hostname.toLowerCase(),
pathname: url.pathname,
searchParams: url.searchParams,
originalInput: originalInput
};
} catch (e) {
// If URL parsing fails, return null but keep original input for regex checks
return { originalInput: originalInput, error: true };
}
}
// --- Core Extraction Functions ---
/**
* Extracts YouTube Video ID.
*/
export function getYouTubeVideoId(urlString) {
const parsed = parseUrlAndBasicInfo(urlString);
if (!parsed) return null;
const { url, hostname, pathname, searchParams, originalInput, error } = parsed;
// Phase 1: Direct String/Regex Matching (before full URL parse or if parse failed)
if (originalInput.trim().startsWith('<iframe')) {
const iframeSrcMatch = originalInput.match(/src=["'](?:https?:)?\/\/(?:www\.)?(?:youtube\.com|youtube-nocookie\.com)\/embed\/([a-zA-Z0-9_-]{11})/);
if (iframeSrcMatch && iframeSrcMatch[1] && isValidYouTubeVideoId(iframeSrcMatch[1])) return iframeSrcMatch[1];
}
if (originalInput.startsWith('youtube://')) {
const schemeMatch = originalInput.match(/^youtube:\/\/(?:(?:watch\?v=|youtu\.be\/)?)([a-zA-Z0-9_-]{11})/);
if (schemeMatch && schemeMatch[1] && isValidYouTubeVideoId(schemeMatch[1])) return schemeMatch[1];
}
const simplePattern = /(?:youtu\.be|y2u\.be)\/([a-zA-Z0-9_-]{11})|(?:v=|vi=|video_id=|docid=|embed\/|shorts\/|live\/|v\/|vi\/|e\/|user\/.+\/|attribution_link.*v(?:%3D|=))([a-zA-Z0-9_-]{11})|i\.ytimg\.com\/(?:vi|an_webp)\/([a-zA-Z0-9_-]{11})\//;
const simpleMatch = originalInput.match(simplePattern);
const potentialIdFromSimpleMatch = simpleMatch ? (simpleMatch[1] || simpleMatch[2] || simpleMatch[3]) : null;
// If URL parsing failed, rely solely on regex match
if (error) {
return potentialIdFromSimpleMatch && isValidYouTubeVideoId(potentialIdFromSimpleMatch) ? potentialIdFromSimpleMatch : null;
}
// Phase 2: Full URL Parsing Logic
let potentialId = null;
// *** Removed Google Redirect Check ***
// Check known domains and patterns
if (hostname === 'youtu.be' || hostname === 'y2u.be') {
potentialId = pathname.split('/')[1];
} else if (hostname === 'i.ytimg.com') {
const imgMatch = pathname.match(/^\/(?:vi|an_webp)\/([a-zA-Z0-9_-]{11})\//);
if (imgMatch && imgMatch[1]) potentialId = imgMatch[1];
} else {
const officialDomains = ['youtube.com', 'youtube-nocookie.com', 'music.youtube.com', 'kids.youtube.com', 'youtubekids.com', 'gaming.youtube.com'];
const thirdPartyDomains = ['ymusicapp.com', 'yewtu.be', 'vid.puffyan.us', 'invidious.snopyta.org', 'inv.riverside.rocks', 'invidious.io', 'piped.video', 'piped.kavin.rocks', 'piped.syncpundit.io']; // Example list
const isOfficial = officialDomains.some(domain => hostname === domain || hostname.endsWith('.' + domain));
const isKnownThirdParty = thirdPartyDomains.includes(hostname);
if (isOfficial || isKnownThirdParty) {
const pathMatch = pathname.match(/^\/(?:embed|v|vi|e|shorts|live)\/([a-zA-Z0-9_-]{11})/);
if (pathMatch && pathMatch[1]) {
potentialId = pathMatch[1];
} else {
potentialId = searchParams.get('v') || searchParams.get('vi') || searchParams.get('video_id') || searchParams.get('docid');
if (!potentialId || !isValidYouTubeVideoId(potentialId)) { // Check validity early
potentialId = null; // Reset if invalid param found
if (pathname === '/attribution_link' && searchParams.has('u')) {
const innerUrlPath = searchParams.get('u'); // .get() decodes
if (innerUrlPath) {
try {
// No need to decode innerUrlPath again here either
const innerUrlToParse = innerUrlPath.startsWith('http') ? innerUrlPath : `https://www.youtube.com${innerUrlPath.startsWith('/') ? innerUrlPath : '/' + innerUrlPath}`;
potentialId = getYouTubeVideoId(innerUrlToParse); // Recursive call
} catch (recursiveError) {
// Fallback attempt if parsing the constructed URL fails
const rawParamMatch = innerUrlPath.match(/(?:v|vi)(?:%3D|=)([a-zA-Z0-9_-]{11})/); // Check raw string
if (rawParamMatch && rawParamMatch[1]) potentialId = rawParamMatch[1];
}
}
} else if (pathname.startsWith('/user/')) {
// Legacy user channel page fragment identifier for videos
const fragment = url.hash || (originalInput.includes('#') ? originalInput.substring(originalInput.indexOf('#')) : '');
const fragmentMatch = fragment.match(/#p\/(?:[a-z]\/)?u\/\d+\/([a-zA-Z0-9_-]{11})/);
if (fragmentMatch && fragmentMatch[1]) potentialId = fragmentMatch[1];
}
}
}
}
}
// Phase 3: Generic Fallback for /watch?v={ID} on ANY domain
if (!potentialId || !isValidYouTubeVideoId(potentialId)) {
if (pathname === '/watch' && searchParams.has('v')) {
const genericV = searchParams.get('v');
if (isValidYouTubeVideoId(genericV)) potentialId = genericV;
}
}
// Phase 4: Final Validation & Return
if (potentialId && isValidYouTubeVideoId(potentialId)) return potentialId;
// Fallback to simple regex match if URL parsing didn't yield a valid ID
if (potentialIdFromSimpleMatch && isValidYouTubeVideoId(potentialIdFromSimpleMatch)) return potentialIdFromSimpleMatch;
return null;
}
/**
* Extracts YouTube Channel ID (starting with UC).
* Does NOT resolve custom URLs (/c/), legacy usernames (/user/), or handles (/@/).
*/
export function getYouTubeChannelId(urlString) {
const parsed = parseUrlAndBasicInfo(urlString);
if (!parsed) return null;
const { url, hostname, pathname, searchParams, originalInput, error } = parsed;
let potentialId = null;
// *** Removed Google Redirect Check ***
// Regex for direct match in the original string (useful if URL parsing fails or for non-standard formats)
const simpleMatch = originalInput.match(/(?:channel|c|user)\/(UC[a-zA-Z0-9_-]{22})/);
const potentialIdFromSimpleMatch = simpleMatch ? simpleMatch[1] : null;
if (error) {
return potentialIdFromSimpleMatch && isValidYouTubeChannelId(potentialIdFromSimpleMatch) ? potentialIdFromSimpleMatch : null;
}
// Check path segment for /channel/UC...
const pathMatch = pathname.match(/^\/channel\/(UC[a-zA-Z0-9_-]{22})/);
if (pathMatch && pathMatch[1]) {
potentialId = pathMatch[1];
} else {
// Check known third-party frontend patterns (example)
const thirdPartyDomains = ['yewtu.be', 'vid.puffyan.us', 'invidious.snopyta.org', 'inv.riverside.rocks', 'invidious.io', 'piped.video', 'piped.kavin.rocks', 'piped.syncpundit.io'];
if (thirdPartyDomains.includes(hostname)) {
const thirdPartyPathMatch = pathname.match(/^\/channel\/(UC[a-zA-Z0-9_-]{22})/);
if (thirdPartyPathMatch && thirdPartyPathMatch[1]) {
potentialId = thirdPartyPathMatch[1];
}
}
// Note: We intentionally do NOT try to resolve /c/, /user/, or /@ handles here
}
// Final Validation & Return
if (potentialId && isValidYouTubeChannelId(potentialId)) return potentialId;
if (potentialIdFromSimpleMatch && isValidYouTubeChannelId(potentialIdFromSimpleMatch)) return potentialIdFromSimpleMatch;
return null;
}
/**
* Extracts YouTube Playlist ID (starting with PL).
*/
export function getYouTubePlaylistId(urlString) {
const parsed = parseUrlAndBasicInfo(urlString);
if (!parsed) return null;
const { url, hostname, pathname, searchParams, originalInput, error } = parsed;
let potentialId = null;
// *** Removed Google Redirect Check ***
// Regex for direct match in the original string
const simpleMatch = originalInput.match(/[?&]list=(PL[a-zA-Z0-9_-]{10,40})/);
const potentialIdFromSimpleMatch = simpleMatch ? simpleMatch[1] : null;
if (error) {
return potentialIdFromSimpleMatch && isValidYouTubePlaylistId(potentialIdFromSimpleMatch) ? potentialIdFromSimpleMatch : null;
}
// Check search parameters for 'list' parameter
if (searchParams.has('list')) {
potentialId = searchParams.get('list');
} else if (pathname === '/playlist' && searchParams.has('list')) {
// Some structures might have /playlist?list=...
potentialId = searchParams.get('list');
}
// Final Validation & Return
if (potentialId && isValidYouTubePlaylistId(potentialId)) return potentialId;
if (potentialIdFromSimpleMatch && isValidYouTubePlaylistId(potentialIdFromSimpleMatch)) return potentialIdFromSimpleMatch;
return null;
}