oto-scraper
Version:
multi scraper
48 lines (44 loc) • 1.85 kB
JavaScript
const cheerio = require('cheerio');
const Util = require('./yt_utils');
async function yt_page_parse(webPage) {
function extract_data(json) {
json = json.contents.twoColumnSearchResultsRenderer.primaryContents;
let contents = [];
if (json.sectionListRenderer) {
contents = json.sectionListRenderer.contents.filter((item) =>
item?.itemSectionRenderer?.contents.filter(x => x.videoRenderer || x.playlistRenderer || x.channelRenderer)
).shift().itemSectionRenderer.contents;
}
if (json.richGridRenderer) {
contents = json.richGridRenderer.contents.filter((item) =>
item.richItemRenderer && item.richItemRenderer.content
).map(item => item.richItemRenderer.content);
}
return contents;
}
function parse_data(data) {
let results = {
channels: [],
playlists: [],
streams: [],
videos: []
};
for (const item of data) {
if (Util.isVideo(item))
results.videos.push(Util.getVideoData(item));
else if (Util.isPlaylist(item))
results.playlists.push(Util.getPlaylistData(item));
else if (Util.isStream(item))
results.streams.push(Util.getStreamData(item));
else if (Util.isChannel(item))
results.channels.push(Util.getChannelData(item));
}
return results;
}
const $ = cheerio.load(webPage);
let pre_data = $('script').text().match(/var\sytInitialData\s*=\s*.*?\(.*?window.ytcsi.*?\)/)[0].replace("var ytInitialData =", '');
let json_data = pre_data.replace(/;\sif.*|;if.*/, '')
let data = extract_data(JSON.parse(json_data));
return parse_data(data);
}
module.exports = yt_page_parse;