UNPKG

oto-scraper

Version:

multi scraper

48 lines (44 loc) 1.85 kB
const cheerio = require('cheerio'); const Util = require('./yt_utils'); async function yt_page_parse(webPage) { function extract_data(json) { json = json.contents.twoColumnSearchResultsRenderer.primaryContents; let contents = []; if (json.sectionListRenderer) { contents = json.sectionListRenderer.contents.filter((item) => item?.itemSectionRenderer?.contents.filter(x => x.videoRenderer || x.playlistRenderer || x.channelRenderer) ).shift().itemSectionRenderer.contents; } if (json.richGridRenderer) { contents = json.richGridRenderer.contents.filter((item) => item.richItemRenderer && item.richItemRenderer.content ).map(item => item.richItemRenderer.content); } return contents; } function parse_data(data) { let results = { channels: [], playlists: [], streams: [], videos: [] }; for (const item of data) { if (Util.isVideo(item)) results.videos.push(Util.getVideoData(item)); else if (Util.isPlaylist(item)) results.playlists.push(Util.getPlaylistData(item)); else if (Util.isStream(item)) results.streams.push(Util.getStreamData(item)); else if (Util.isChannel(item)) results.channels.push(Util.getChannelData(item)); } return results; } const $ = cheerio.load(webPage); let pre_data = $('script').text().match(/var\sytInitialData\s*=\s*.*?\(.*?window.ytcsi.*?\)/)[0].replace("var ytInitialData =", ''); let json_data = pre_data.replace(/;\sif.*|;if.*/, '') let data = extract_data(JSON.parse(json_data)); return parse_data(data); } module.exports = yt_page_parse;