UNPKG

bandcamp-fetch

Version:
179 lines 7.72 kB
import { load as cheerioLoad } from 'cheerio'; import { normalizeUrl, reformatImageUrl, splitUrl } from '../utils/Parse.js'; import TrackInfoParser from '../track/TrackInfoParser.js'; import AlbumInfoParser from '../album/AlbumInfoParser.js'; import { decode } from 'html-entities'; export default class DiscographyParser { static parseDiscography(html, opts) { const $ = cheerioLoad(html); // One-album / one-track artists don't have a discography page. // The page for the album or track will be loaded instead. // Check if this is the case and handle accordingly. const currentAlbumOrTrack = $('script[type="application/ld+json"]'); let isOneTrack = false, isOneAlbum = false; if (currentAlbumOrTrack.length) { let currentAlbumOrTrackData; const currentAlbumOrTrackHtml = currentAlbumOrTrack.html(); if (currentAlbumOrTrackHtml) { try { currentAlbumOrTrackData = JSON.parse(currentAlbumOrTrackHtml); } catch (error) { currentAlbumOrTrackData = null; } } if (currentAlbumOrTrackData && typeof currentAlbumOrTrackData === 'object') { // Check if there is a 'discography' element and, if there is, whether // It is hidden or has only one track / album child const discographyEl = $('#discography'); if (discographyEl.length === 0 || discographyEl.css('display') === 'none' || discographyEl.find('li').length === 1) { const currentAlbumOrTrackUrl = splitUrl(currentAlbumOrTrackData['@id']); isOneTrack = !!currentAlbumOrTrackUrl.path.startsWith('/track/'); isOneAlbum = !!currentAlbumOrTrackUrl.path.startsWith('/album/'); } } } if (isOneTrack || isOneAlbum) { const newOpts = { imageBaseUrl: opts.imageBaseUrl, albumImageFormat: opts.imageFormat, artistImageFormat: null, includeRawData: false }; const info = isOneTrack ? TrackInfoParser.parseInfo(html, newOpts) : AlbumInfoParser.parseInfo(html, newOpts); if (info.artist !== undefined) { return [{ ...info, artist: { name: info.artist.name } }]; } return [info]; } const allLinks = $('a'); const items = {}; const defaultArtistName = $('#band-name-location').find('.title').text(); allLinks.each((index, link) => { const linkEl = $(link); const href = linkEl.attr('href'); if (typeof href !== 'string' || href === '') { return true; } let host, pathname; // Regex taken from: // https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js if ((/^\/(track|album)\/(.+)$/).exec(href)) { // Relative url starting with '/track' or '/album' host = opts.bandUrl; pathname = href; } else { // Full url (label discography) try { const _url = splitUrl(href); if (_url.path && (/^\/(track|album)\/(.+)$/).exec(_url.path)) { host = _url.base; pathname = _url.path; } } catch (e) { return true; } } if (host !== undefined && pathname !== undefined) { const url = normalizeUrl(pathname, host); if (items[url] === undefined) { items[url] = { type: pathname.startsWith('/track/') ? 'track' : 'album' }; } // Link element wraps around img and title const img = linkEl.find('img'); if (img.length) { const imgSrc = img.attr('data-original') || img.attr('src'); const imageUrl = reformatImageUrl(imgSrc, opts.imageFormat); if (imageUrl) { items[url].imageUrl = imageUrl; } } const title = linkEl.find('.title'); if (title.length) { // For labels, title element contains artist name (when it doesn't, then artist = label). // For artists, title element may also contain an artist name which overrides the default const artistNameEl = title.find('.artist-override'); if (artistNameEl.length) { const artistName = artistNameEl.text().trim(); artistNameEl.remove(); items[url].artist = { name: artistName }; } else { items[url].artist = { name: defaultArtistName }; } items[url].name = title.text().trim(); } if (img.length > 0 && title.length > 0) { items[url].name = linkEl.text().trim(); } const idStr = linkEl.parent('li').attr('data-item-id'); if (idStr) { const idMatch = /(?:album|track)-(\d+)$/g.exec(idStr); if (idMatch && idMatch[1]) { items[url].id = Number(idMatch[1]); } } } }); const results = []; for (const [url, props] of Object.entries(items)) { if (props.type && props.name) { const item = { url, type: props.type, id: props.id, name: props.name, artist: props.artist || { name: defaultArtistName } }; if (props.imageUrl) { item.imageUrl = props.imageUrl; } results.push(item); } } const rawExtra = decode($('ol[data-client-items]').attr('data-client-items')); let extra; try { extra = JSON.parse(rawExtra); } catch (error) { extra = null; } if (Array.isArray(extra)) { const extraItems = extra.reduce((acc, itemData) => { if (itemData.type === 'album' || itemData.type === 'track') { const item = { url: normalizeUrl(itemData.page_url), type: itemData.type, id: itemData.id, name: itemData.title, artist: { name: itemData.artist } }; if (itemData.art_id && opts.imageFormat?.id) { item.imageUrl = `${opts.imageBaseUrl}/img/a${itemData.art_id}_${opts.imageFormat.id}.jpg`; } acc.push(item); } return acc; }, []); results.push(...extraItems); } return results; } } //# sourceMappingURL=DiscographyParser.js.map