bandcamp-fetch
Version:
Scrape Bandcamp content
136 lines • 5.31 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
const cheerio_1 = require("cheerio");
const Parse_js_1 = require("../utils/Parse.js");
const Constants_js_1 = require("../utils/Constants.js");
class DiscoverOptionsParser {
static parseOptions(html) {
const $ = (0, cheerio_1.load)(html);
const blob = $('#DiscoverApp[data-blob]').attr('data-blob');
if (!blob) {
throw new Parse_js_1.ParseError('Failed to parse discover options: blob not found in data.', html);
}
let parsed;
try {
parsed = JSON.parse(blob);
}
catch (error) {
throw new Parse_js_1.ParseError('Failed to parse discover options: JSON error.', blob, error);
}
const options = parsed?.appData?.initialState;
if (options && typeof options === 'object') {
const result = {
categories: [],
genres: [],
subgenres: {},
customTags: [],
sortBys: [],
locations: [],
times: []
};
if (Array.isArray(options.categories)) {
result.categories = options.categories.map((cat) => ({
name: cat.label,
value: cat.id,
slug: cat.slug
}));
}
if (Array.isArray(options.genres)) {
result.genres = options.genres.map((genre) => ({
name: genre.label,
value: genre.slug,
id: genre.id
}));
}
if (Array.isArray(options.categories)) {
result.categories = options.categories.map((cat) => ({
name: cat.label,
value: cat.id,
slug: cat.slug
}));
}
if (Array.isArray(options.slices)) {
result.sortBys = options.slices.map((s) => ({
name: s.label,
value: s.slug
}));
}
if (Array.isArray(options.locations)) {
result.locations = options.locations.map((loc) => ({
name: loc.label,
value: loc.id
}));
}
if (Array.isArray(options.times)) {
result.times = options.times.map((t) => ({
name: t.label,
value: t.id,
slug: t.slug
}));
}
if (Array.isArray(options.subgenres)) {
for (const { id, label, slug, parentSlug } of options.subgenres) {
if (id !== undefined && parentSlug && label && slug) {
if (!result.subgenres[parentSlug]) {
result.subgenres[parentSlug] = [];
}
result.subgenres[parentSlug].push({ name: label, value: slug, id });
}
}
}
return result;
}
throw new Parse_js_1.ParseError("Failed to parse discover options: blob is missing or has invalid 'appData.initialState' field.", parsed);
}
static parseRecommendedTagsAndLocations(html) {
const $ = (0, cheerio_1.load)(html);
const list = {
tags: [],
locations: []
};
const _findTag = (value, name) => {
return list.tags.find((t) => t.value === value && t.name === name);
};
const _findLoc = (value, name) => {
return list.locations.find((l) => l.value === value && l.name === name);
};
$('section#discover')
.find('.pill-group')
.each((_, el) => {
$(el)
.find('a.g-pill')
.each((_, link) => {
const linkEl = $(link);
const name = linkEl.text().trim();
const href = linkEl.attr('href');
const urlObj = href && new URL(href, Constants_js_1.URLS.SITE_URL);
const linkFrom = urlObj && urlObj.searchParams.get('from');
const isLocation = linkFrom === 'hp_disco_locations';
const geo = isLocation && urlObj ?
Number(urlObj.searchParams.get('loc'))
: null;
if (geo && !isNaN(geo) && !_findLoc(geo, name)) {
list.locations.push({
type: 'location',
name,
value: geo
});
}
else if (!isLocation) {
const tagMatch = href ? /\/discover\/(.+)\?/.exec(href) : null;
const value = tagMatch && tagMatch[1];
if (value && !_findTag(value, name)) {
list.tags.push({
type: 'tag',
name,
value
});
}
}
});
});
return list;
}
}
exports.default = DiscoverOptionsParser;
//# sourceMappingURL=DiscoverOptionsParser.js.map