UNPKG

bandcamp-fetch

Version:

Scrape Bandcamp content

github.com/patrickkfkan/bandcamp-fetch

patrickkfkan/bandcamp-fetch

68 lines • 2.85 kB

JavaScript

import { load as cheerioLoad } from 'cheerio'; import { URLS } from '../utils/Constants.js'; import { isAbsoluteUrl, normalizeUrl, reformatImageUrl, stripLineBreaks } from '../utils/Parse.js'; export default class ArticleListParser { static parseList(html, opts) { const $ = cheerioLoad(html); const dailyUrl = URLS.DAILY; const result = { articles: [], total: 0, start: 0, end: 0 }; $('articles-list').each((i, list) => { $('.list-article', $(list)).each((i, article) => { article = $(article); const imgSrc = article.find('img').attr('src') || null; // Category const infoText = article.find('.article-info-text'); const infoTextCategoryLink = infoText.find('a.franchise'); const infoTextMiddot = infoText.find('.middot'); const categoryName = infoTextCategoryLink.text(); const category = categoryName ? { name: categoryName } : null; const categoryUrl = infoTextCategoryLink.attr('href'); if (category && categoryUrl) { category.url = isAbsoluteUrl(categoryUrl) ? categoryUrl : normalizeUrl(categoryUrl, dailyUrl); } // Date infoTextCategoryLink.remove(); infoTextMiddot.remove(); const date = stripLineBreaks(infoText.text()).trim(); // Title and url const titleLink = article.find('a.title'); const title = titleLink.text(); let url = titleLink.attr('href'); if (!isAbsoluteUrl(url)) { url = normalizeUrl(url, dailyUrl); } if (titleLink) { const parsed = { url, title, date }; if (category) { parsed.category = category; } const imageUrl = reformatImageUrl(imgSrc, opts.imageFormat); if (imageUrl) { parsed.imageUrl = imageUrl; } result.articles.push(parsed); } }); }); const resultsText = stripLineBreaks($('#num-results').text()).trim(); const rtm = resultsText.match(/(\d+)(?:\s*to\s*)(\d+)(?:\s*of\s*)(\d+)/); if (rtm?.length === 4) { result.total = parseInt(rtm[3], 10); result.start = parseInt(rtm[1], 10); result.end = parseInt(rtm[2], 10); } return result; } } //# sourceMappingURL=ArticleListParser.js.map