bandcamp-fetch
Version:
Scrape Bandcamp content
64 lines • 2.27 kB
JavaScript
import { load as cheerioLoad } from 'cheerio';
import { URLS } from '../utils/Constants.js';
import { isAbsoluteUrl, normalizeUrl } from '../utils/Parse.js';
export default class ArticleCategoryParser {
static parseCategories(html) {
const $ = cheerioLoad(html);
const dailyUrl = URLS.DAILY;
const _parseSection = (section) => {
const h = section.prev('h2');
const title = h.length ? h.text() : '';
const s = {
name: section.attr('class'),
title,
sections: [],
categories: []
};
section.children().each((i, c) => {
const tag = c.tagName;
c = $(c);
if (tag === 'section') {
const parsed = _parseSection($(c));
if (parsed !== null && s.sections) {
s.sections.push(parsed);
}
}
else if (tag === 'div') {
c.find('a').each((i, a) => {
a = $(a);
let url = a.attr('href');
if (!isAbsoluteUrl(url)) {
url = normalizeUrl(url, dailyUrl);
}
if (s.categories) {
s.categories.push({
url,
name: a.text()
});
}
});
}
});
if (s.sections?.length === 0) {
delete s.sections;
}
if (s.categories?.length === 0) {
delete s.categories;
}
if (!s.sections && !s.categories) {
return null;
}
return s;
};
const sections = $('#daily-view-all').children('section');
const results = [];
sections.each((i, section) => {
const parsed = _parseSection($(section));
if (parsed !== null) {
results.push(parsed);
}
});
return results;
}
}
//# sourceMappingURL=ArticleCategoryParser.js.map