UNPKG

opex-yt-id

Version:

Extracts YouTube video, channel, and playlist IDs from various URLs

233 lines (193 loc) 10.9 kB
/* opex-yt-id/ ├── index.js # Основной код функций ├── index.d.ts # Файл описания типов ├── LICENSE # Файл лицензии (Assuming MIT, not included here) ├── package.json # Файл манифеста ├── README.md # Документация └── test.js # Файл тестов */ import { URL, URLSearchParams } from 'url'; // --- Validation Functions --- function isValidYouTubeVideoId(id) { return typeof id === 'string' && /^[a-zA-Z0-9_-]{11}$/.test(id); } function isValidYouTubeChannelId(id) { // Standard channel IDs start with 'UC' and are 24 chars total. return typeof id === 'string' && /^UC[a-zA-Z0-9_-]{22}$/.test(id); } function isValidYouTubePlaylistId(id) { // Common playlist IDs start with 'PL' and vary in length. // Let's use a reasonable length range (e.g., 10-40 chars after 'PL'). return typeof id === 'string' && /^PL[a-zA-Z0-9_-]{10,40}$/.test(id); } // --- Helper for URL Parsing and Basic Checks --- function parseUrlAndBasicInfo(urlString) { if (!urlString || typeof urlString !== 'string' || urlString.trim() === '') { return null; } let url; let originalInput = urlString; let potentialUrlString = urlString; try { // Attempt to fix common non-standard URL inputs before parsing if (potentialUrlString.startsWith('//')) potentialUrlString = 'https:' + potentialUrlString; else if (!potentialUrlString.match(/^[a-zA-Z]+:/) && potentialUrlString.includes('.') && potentialUrlString.includes('/')) potentialUrlString = 'https:' + potentialUrlString; url = new URL(potentialUrlString); return { url, hostname: url.hostname.toLowerCase(), pathname: url.pathname, searchParams: url.searchParams, originalInput: originalInput }; } catch (e) { // If URL parsing fails, return null but keep original input for regex checks return { originalInput: originalInput, error: true }; } } // --- Core Extraction Functions --- /** * Extracts YouTube Video ID. */ export function getYouTubeVideoId(urlString) { const parsed = parseUrlAndBasicInfo(urlString); if (!parsed) return null; const { url, hostname, pathname, searchParams, originalInput, error } = parsed; // Phase 1: Direct String/Regex Matching (before full URL parse or if parse failed) if (originalInput.trim().startsWith('<iframe')) { const iframeSrcMatch = originalInput.match(/src=["'](?:https?:)?\/\/(?:www\.)?(?:youtube\.com|youtube-nocookie\.com)\/embed\/([a-zA-Z0-9_-]{11})/); if (iframeSrcMatch && iframeSrcMatch[1] && isValidYouTubeVideoId(iframeSrcMatch[1])) return iframeSrcMatch[1]; } if (originalInput.startsWith('youtube://')) { const schemeMatch = originalInput.match(/^youtube:\/\/(?:(?:watch\?v=|youtu\.be\/)?)([a-zA-Z0-9_-]{11})/); if (schemeMatch && schemeMatch[1] && isValidYouTubeVideoId(schemeMatch[1])) return schemeMatch[1]; } const simplePattern = /(?:youtu\.be|y2u\.be)\/([a-zA-Z0-9_-]{11})|(?:v=|vi=|video_id=|docid=|embed\/|shorts\/|live\/|v\/|vi\/|e\/|user\/.+\/|attribution_link.*v(?:%3D|=))([a-zA-Z0-9_-]{11})|i\.ytimg\.com\/(?:vi|an_webp)\/([a-zA-Z0-9_-]{11})\//; const simpleMatch = originalInput.match(simplePattern); const potentialIdFromSimpleMatch = simpleMatch ? (simpleMatch[1] || simpleMatch[2] || simpleMatch[3]) : null; // If URL parsing failed, rely solely on regex match if (error) { return potentialIdFromSimpleMatch && isValidYouTubeVideoId(potentialIdFromSimpleMatch) ? potentialIdFromSimpleMatch : null; } // Phase 2: Full URL Parsing Logic let potentialId = null; // *** Removed Google Redirect Check *** // Check known domains and patterns if (hostname === 'youtu.be' || hostname === 'y2u.be') { potentialId = pathname.split('/')[1]; } else if (hostname === 'i.ytimg.com') { const imgMatch = pathname.match(/^\/(?:vi|an_webp)\/([a-zA-Z0-9_-]{11})\//); if (imgMatch && imgMatch[1]) potentialId = imgMatch[1]; } else { const officialDomains = ['youtube.com', 'youtube-nocookie.com', 'music.youtube.com', 'kids.youtube.com', 'youtubekids.com', 'gaming.youtube.com']; const thirdPartyDomains = ['ymusicapp.com', 'yewtu.be', 'vid.puffyan.us', 'invidious.snopyta.org', 'inv.riverside.rocks', 'invidious.io', 'piped.video', 'piped.kavin.rocks', 'piped.syncpundit.io']; // Example list const isOfficial = officialDomains.some(domain => hostname === domain || hostname.endsWith('.' + domain)); const isKnownThirdParty = thirdPartyDomains.includes(hostname); if (isOfficial || isKnownThirdParty) { const pathMatch = pathname.match(/^\/(?:embed|v|vi|e|shorts|live)\/([a-zA-Z0-9_-]{11})/); if (pathMatch && pathMatch[1]) { potentialId = pathMatch[1]; } else { potentialId = searchParams.get('v') || searchParams.get('vi') || searchParams.get('video_id') || searchParams.get('docid'); if (!potentialId || !isValidYouTubeVideoId(potentialId)) { // Check validity early potentialId = null; // Reset if invalid param found if (pathname === '/attribution_link' && searchParams.has('u')) { const innerUrlPath = searchParams.get('u'); // .get() decodes if (innerUrlPath) { try { // No need to decode innerUrlPath again here either const innerUrlToParse = innerUrlPath.startsWith('http') ? innerUrlPath : `https://www.youtube.com${innerUrlPath.startsWith('/') ? innerUrlPath : '/' + innerUrlPath}`; potentialId = getYouTubeVideoId(innerUrlToParse); // Recursive call } catch (recursiveError) { // Fallback attempt if parsing the constructed URL fails const rawParamMatch = innerUrlPath.match(/(?:v|vi)(?:%3D|=)([a-zA-Z0-9_-]{11})/); // Check raw string if (rawParamMatch && rawParamMatch[1]) potentialId = rawParamMatch[1]; } } } else if (pathname.startsWith('/user/')) { // Legacy user channel page fragment identifier for videos const fragment = url.hash || (originalInput.includes('#') ? originalInput.substring(originalInput.indexOf('#')) : ''); const fragmentMatch = fragment.match(/#p\/(?:[a-z]\/)?u\/\d+\/([a-zA-Z0-9_-]{11})/); if (fragmentMatch && fragmentMatch[1]) potentialId = fragmentMatch[1]; } } } } } // Phase 3: Generic Fallback for /watch?v={ID} on ANY domain if (!potentialId || !isValidYouTubeVideoId(potentialId)) { if (pathname === '/watch' && searchParams.has('v')) { const genericV = searchParams.get('v'); if (isValidYouTubeVideoId(genericV)) potentialId = genericV; } } // Phase 4: Final Validation & Return if (potentialId && isValidYouTubeVideoId(potentialId)) return potentialId; // Fallback to simple regex match if URL parsing didn't yield a valid ID if (potentialIdFromSimpleMatch && isValidYouTubeVideoId(potentialIdFromSimpleMatch)) return potentialIdFromSimpleMatch; return null; } /** * Extracts YouTube Channel ID (starting with UC). * Does NOT resolve custom URLs (/c/), legacy usernames (/user/), or handles (/@/). */ export function getYouTubeChannelId(urlString) { const parsed = parseUrlAndBasicInfo(urlString); if (!parsed) return null; const { url, hostname, pathname, searchParams, originalInput, error } = parsed; let potentialId = null; // *** Removed Google Redirect Check *** // Regex for direct match in the original string (useful if URL parsing fails or for non-standard formats) const simpleMatch = originalInput.match(/(?:channel|c|user)\/(UC[a-zA-Z0-9_-]{22})/); const potentialIdFromSimpleMatch = simpleMatch ? simpleMatch[1] : null; if (error) { return potentialIdFromSimpleMatch && isValidYouTubeChannelId(potentialIdFromSimpleMatch) ? potentialIdFromSimpleMatch : null; } // Check path segment for /channel/UC... const pathMatch = pathname.match(/^\/channel\/(UC[a-zA-Z0-9_-]{22})/); if (pathMatch && pathMatch[1]) { potentialId = pathMatch[1]; } else { // Check known third-party frontend patterns (example) const thirdPartyDomains = ['yewtu.be', 'vid.puffyan.us', 'invidious.snopyta.org', 'inv.riverside.rocks', 'invidious.io', 'piped.video', 'piped.kavin.rocks', 'piped.syncpundit.io']; if (thirdPartyDomains.includes(hostname)) { const thirdPartyPathMatch = pathname.match(/^\/channel\/(UC[a-zA-Z0-9_-]{22})/); if (thirdPartyPathMatch && thirdPartyPathMatch[1]) { potentialId = thirdPartyPathMatch[1]; } } // Note: We intentionally do NOT try to resolve /c/, /user/, or /@ handles here } // Final Validation & Return if (potentialId && isValidYouTubeChannelId(potentialId)) return potentialId; if (potentialIdFromSimpleMatch && isValidYouTubeChannelId(potentialIdFromSimpleMatch)) return potentialIdFromSimpleMatch; return null; } /** * Extracts YouTube Playlist ID (starting with PL). */ export function getYouTubePlaylistId(urlString) { const parsed = parseUrlAndBasicInfo(urlString); if (!parsed) return null; const { url, hostname, pathname, searchParams, originalInput, error } = parsed; let potentialId = null; // *** Removed Google Redirect Check *** // Regex for direct match in the original string const simpleMatch = originalInput.match(/[?&]list=(PL[a-zA-Z0-9_-]{10,40})/); const potentialIdFromSimpleMatch = simpleMatch ? simpleMatch[1] : null; if (error) { return potentialIdFromSimpleMatch && isValidYouTubePlaylistId(potentialIdFromSimpleMatch) ? potentialIdFromSimpleMatch : null; } // Check search parameters for 'list' parameter if (searchParams.has('list')) { potentialId = searchParams.get('list'); } else if (pathname === '/playlist' && searchParams.has('list')) { // Some structures might have /playlist?list=... potentialId = searchParams.get('list'); } // Final Validation & Return if (potentialId && isValidYouTubePlaylistId(potentialId)) return potentialId; if (potentialIdFromSimpleMatch && isValidYouTubePlaylistId(potentialIdFromSimpleMatch)) return potentialIdFromSimpleMatch; return null; }