UNPKG

itch-scraper

Version:

Itch scraper is a scraping tool to get data from itch.io pages.

74 lines (64 loc) 2.21 kB
const { JSDOM } = require('jsdom'); const checkLink = require('./checkLink.js'); const getAuthorUser = (link) => { const linkType = checkLink(link); if (linkType === 0 || linkType === 1) return link.slice(8).split('.')[0]; }; const getAuthorUrl = (link) => { const linkType = checkLink(link); if (linkType === 1) return link; else { const splitLink = link.split('/'); return splitLink.slice(0, splitLink.length - 1).join('/'); } }; const getAuthorName = async (link) => { const authorLink = getAuthorUrl(link); const dom = await JSDOM.fromURL(authorLink); const { document } = dom.window; const author = document.querySelector('#profile_header > .text_header > h1').textContent; return author; }; const getAuthorBio = async (link) => { const authorLink = getAuthorUrl(link); const dom = await JSDOM.fromURL(authorLink); const { document } = dom.window; const bio = document.querySelector('div.user_profile > p').textContent; return bio; }; const getAuthorGames = async (link) => { const authorLink = getAuthorUrl(link); const dom = await JSDOM.fromURL(authorLink); const { document } = dom.window; const gameDoms = document.querySelectorAll('div.game_cell.has_cover > a'); const games = []; gameDoms.forEach((game) => games.push(game.getAttribute('href'))); return games; }; const getAuthorSocialLinks = async (link) => { const authorLink = getAuthorUrl(link); const dom = await JSDOM.fromURL(authorLink); const { document } = dom.window; const linkDoms = document.querySelectorAll('div.user_links > span.link_group > a'); const socialLinks = []; linkDoms.forEach((link) => socialLinks.push(link.getAttribute('href'))); return socialLinks; }; const getAuthor = async (link) => { const url = getAuthorUrl(link); const user = getAuthorUser(link); const name = await getAuthorName(link); const bio = await getAuthorBio(link); const games = await getAuthorGames(link); const socialLinks = await getAuthorSocialLinks(link); return { url, user, name, bio, games, socialLinks }; }; module.exports = { getAuthorName, getAuthorUrl, getAuthorUser, getAuthorBio, getAuthorGames, getAuthorSocialLinks, getAuthor, };