bandcamp-fetch
Version:
Scrape Bandcamp content
194 lines • 8.27 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const cheerio_1 = require("cheerio");
const Parse_js_1 = require("../utils/Parse.js");
const TrackInfoParser_js_1 = __importDefault(require("../track/TrackInfoParser.js"));
const AlbumInfoParser_js_1 = __importDefault(require("../album/AlbumInfoParser.js"));
const html_entities_1 = require("html-entities");
class DiscographyParser {
static parseDiscography(html, opts) {
const $ = (0, cheerio_1.load)(html);
// One-album / one-track artists don't have a discography page.
// The page for the album or track will be loaded instead.
// Check if this is the case and handle accordingly.
const currentAlbumOrTrack = $('script[type="application/ld+json"]');
let isOneTrack = false, isOneAlbum = false;
if (currentAlbumOrTrack.length) {
let currentAlbumOrTrackData;
const currentAlbumOrTrackHtml = currentAlbumOrTrack.html();
if (currentAlbumOrTrackHtml) {
try {
currentAlbumOrTrackData = JSON.parse(currentAlbumOrTrackHtml);
}
catch (error) {
currentAlbumOrTrackData = null;
}
}
if (currentAlbumOrTrackData &&
typeof currentAlbumOrTrackData === 'object') {
// Check if there is a 'discography' element and, if there is, whether
// It is hidden or has only one track / album child
const discographyEl = $('#discography');
if (discographyEl.length === 0 ||
discographyEl.css('display') === 'none' ||
discographyEl.find('li').length === 1) {
const currentAlbumOrTrackUrl = (0, Parse_js_1.splitUrl)(currentAlbumOrTrackData['@id']);
isOneTrack = currentAlbumOrTrackUrl.path.startsWith('/track/');
isOneAlbum = currentAlbumOrTrackUrl.path.startsWith('/album/');
}
}
}
if (isOneTrack || isOneAlbum) {
const newOpts = {
imageBaseUrl: opts.imageBaseUrl,
albumImageFormat: opts.imageFormat,
artistImageFormat: null,
includeRawData: false
};
const info = isOneTrack ?
TrackInfoParser_js_1.default.parseInfo(html, newOpts)
: AlbumInfoParser_js_1.default.parseInfo(html, newOpts);
if (info.artist !== undefined) {
return [
{
...info,
artist: {
name: info.artist.name
}
}
];
}
return [info];
}
const allLinks = $('a');
const items = {};
const defaultArtistName = $('#band-name-location').find('.title').text();
allLinks.each((index, link) => {
const linkEl = $(link);
const href = linkEl.attr('href');
if (typeof href !== 'string' || href === '') {
return true;
}
let host, pathname;
// Regex taken from:
// https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js
if (/^\/(track|album)\/(.+)$/.exec(href)) {
// Relative url starting with '/track' or '/album'
host = opts.bandUrl;
pathname = href;
}
else {
// Full url (label discography)
try {
const _url = (0, Parse_js_1.splitUrl)(href);
if (_url.path && /^\/(track|album)\/(.+)$/.exec(_url.path)) {
host = _url.base;
pathname = _url.path;
}
}
catch (e) {
return true;
}
}
if (host !== undefined && pathname !== undefined) {
const url = (0, Parse_js_1.normalizeUrl)(pathname, host);
if (items[url] === undefined) {
items[url] = {
type: pathname.startsWith('/track/') ? 'track' : 'album'
};
}
// Link element wraps around img and title
const img = linkEl.find('img');
if (img.length) {
const imgSrc = img.attr('data-original') || img.attr('src');
const imageUrl = (0, Parse_js_1.reformatImageUrl)(imgSrc, opts.imageFormat);
if (imageUrl) {
items[url].imageUrl = imageUrl;
}
}
const title = linkEl.find('.title');
if (title.length) {
// For labels, title element contains artist name (when it doesn't, then artist = label).
// For artists, title element may also contain an artist name which overrides the default
const artistNameEl = title.find('.artist-override');
if (artistNameEl.length) {
const artistName = artistNameEl.text().trim();
artistNameEl.remove();
items[url].artist = {
name: artistName
};
}
else {
items[url].artist = {
name: defaultArtistName
};
}
items[url].name = title.text().trim();
}
if (img.length > 0 && title.length > 0) {
items[url].name = linkEl.text().trim();
}
const idStr = linkEl.parent('li').attr('data-item-id');
if (idStr) {
const idMatch = /(?:album|track)-(\d+)$/g.exec(idStr);
if (idMatch && idMatch[1]) {
items[url].id = Number(idMatch[1]);
}
}
}
});
const results = [];
for (const [url, props] of Object.entries(items)) {
if (props.type && props.name) {
const item = {
url,
type: props.type,
id: props.id,
name: props.name,
artist: props.artist || {
name: defaultArtistName
}
};
if (props.imageUrl) {
item.imageUrl = props.imageUrl;
}
results.push(item);
}
}
const rawExtra = (0, html_entities_1.decode)($('ol[data-client-items]').attr('data-client-items'));
let extra;
try {
extra = JSON.parse(rawExtra);
}
catch (error) {
extra = null;
}
if (Array.isArray(extra)) {
const extraItems = extra.reduce((acc, itemData) => {
if (itemData.type === 'album' || itemData.type === 'track') {
const item = {
url: (0, Parse_js_1.normalizeUrl)(itemData.page_url),
type: itemData.type,
id: itemData.id,
name: itemData.title,
artist: {
name: itemData.artist
}
};
if (itemData.art_id && opts.imageFormat?.id) {
item.imageUrl = `${opts.imageBaseUrl}/img/a${itemData.art_id}_${opts.imageFormat.id}.jpg`;
}
acc.push(item);
}
return acc;
}, []);
results.push(...extraItems);
}
return results;
}
}
exports.default = DiscographyParser;
//# sourceMappingURL=DiscographyParser.js.map