bandcamp-fetch
Version:
Scrape Bandcamp content
67 lines • 2.41 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const cheerio_1 = require("cheerio");
const Constants_js_1 = require("../utils/Constants.js");
const Parse_js_1 = require("../utils/Parse.js");
class ArticleCategoryParser {
static parseCategories(html) {
const $ = (0, cheerio_1.load)(html);
const dailyUrl = Constants_js_1.URLS.DAILY;
const _parseSection = (section) => {
const h = section.prev('h2');
const title = h.length ? h.text() : '';
const s = {
name: section.attr('class'),
title,
sections: [],
categories: []
};
section.children().each((i, c) => {
const tag = c.tagName;
c = $(c);
if (tag === 'section') {
const parsed = _parseSection($(c));
if (parsed !== null && s.sections) {
s.sections.push(parsed);
}
}
else if (tag === 'div') {
c.find('a').each((i, a) => {
a = $(a);
let url = a.attr('href');
if (!(0, Parse_js_1.isAbsoluteUrl)(url)) {
url = (0, Parse_js_1.normalizeUrl)(url, dailyUrl);
}
if (s.categories) {
s.categories.push({
url,
name: a.text()
});
}
});
}
});
if (s.sections?.length === 0) {
delete s.sections;
}
if (s.categories?.length === 0) {
delete s.categories;
}
if (!s.sections && !s.categories) {
return null;
}
return s;
};
const sections = $('#daily-view-all').children('section');
const results = [];
sections.each((i, section) => {
const parsed = _parseSection($(section));
if (parsed !== null) {
results.push(parsed);
}
});
return results;
}
}
exports.default = ArticleCategoryParser;
//# sourceMappingURL=ArticleCategoryParser.js.map