bandcamp-fetch
Version:
Scrape Bandcamp content
143 lines • 4.51 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ParseError = void 0;
exports.parseLabelFromBackToLabelLink = parseLabelFromBackToLabelLink;
exports.parsePublisher = parsePublisher;
exports.normalizeUrl = normalizeUrl;
exports.getAdditionalPropertyValue = getAdditionalPropertyValue;
exports.isAbsoluteUrl = isAbsoluteUrl;
exports.substrAfter = substrAfter;
exports.substrBefore = substrBefore;
exports.splitUrl = splitUrl;
exports.stripTags = stripTags;
exports.stripLineBreaks = stripLineBreaks;
exports.stripMultipleWhitespaces = stripMultipleWhitespaces;
exports.brToNewLine = brToNewLine;
exports.reformatImageUrl = reformatImageUrl;
const Constants_js_1 = require("./Constants.js");
/**
* @internal
*
* @param $
* @returns
*/
function parseLabelFromBackToLabelLink($) {
const labelLink = $('.back-to-label-link');
if (labelLink.length) {
const linkText = labelLink.find('.back-link-text').html();
const labelName = linkText &&
(substrAfter(linkText, '<br/>') ||
substrAfter(linkText, '<br>') ||
substrBefore(linkText, ' に戻る') ||
substrBefore(linkText, ' のアイテムをもっと聴く'));
const linkHref = labelLink.attr('href');
const labelHref = linkHref && splitUrl(linkHref).base;
return labelName && labelHref ?
{
type: 'label',
name: labelName,
url: labelHref
}
: null;
}
return null;
}
function parsePublisher(json, imageFormat) {
if (json.publisher) {
const imageUrl = reformatImageUrl(json.publisher.image, imageFormat);
const publisher = {
name: json.publisher.name,
url: json.publisher['@id'],
description: json.publisher.description
};
if (imageUrl) {
publisher.imageUrl = imageUrl;
}
return publisher;
}
return null;
}
function normalizeUrl(url, baseUrl) {
if (!url) {
return null;
}
if (isAbsoluteUrl(url)) {
return url;
}
if (!baseUrl) {
baseUrl = Constants_js_1.URLS.SITE_URL;
}
return new URL(url, baseUrl).toString();
}
function getAdditionalPropertyValue(o, propName) {
if (Array.isArray(o.additionalProperty)) {
const p = o.additionalProperty.find((prop) => prop.name === propName);
if (p?.value !== undefined) {
return p.value;
}
}
return undefined;
}
function isAbsoluteUrl(url) {
const isAbsolute = new RegExp('^([a-z]+://|//)', 'i');
return isAbsolute.test(url);
}
function substrAfter(str, after) {
const afterIndex = str.indexOf(after);
return afterIndex >= 0 ? str.substring(afterIndex + after.length) : null;
}
function substrBefore(str, before) {
const beforeIndex = str.indexOf(before);
return beforeIndex >= 0 ? str.substring(0, beforeIndex) : null;
}
function splitUrl(url) {
const urlObj = new URL(url);
return {
base: `${urlObj.protocol}//${urlObj.host}`,
path: urlObj.pathname,
query: urlObj.search,
hash: urlObj.hash,
searchParams: urlObj.searchParams
};
}
function stripTags(str) {
// https://css-tricks.com/snippets/javascript/strip-html-tags-in-javascript/
return str.replace(/(<([^>]+)>)/gi, '');
}
function stripLineBreaks(str) {
return str.replace(/(\r\n|\n|\r)/gm, ' ');
}
function stripMultipleWhitespaces(str) {
return str.replace(/\s+/g, ' ');
}
function brToNewLine(str) {
// https://stackoverflow.com/questions/5959415/jquery-javascript-regex-replace-br-with-n
return str.replace(/<br\s*[/]?>/gi, '\n');
}
/**
* @internal
* Takes an image URL and returns one that corresponds to `imageFormat`.
* @param imageUrl
* @param imageFormat
* @returns
*/
function reformatImageUrl(imageUrl, imageFormat) {
if (imageUrl) {
// Regex from: https://github.com/masterT/bandcamp-scraper/blob/master/lib/htmlParser.js
return imageFormat ?
imageUrl.replace(/_\d{1,3}\./, `_${imageFormat.id}.`)
: imageUrl;
}
return null;
}
class ParseError extends Error {
constructor(message, parseTarget, cause) {
super(message);
this.parseTarget = parseTarget;
if (cause) {
this.cause = cause instanceof Error ? cause : Error(cause.toString());
}
}
}
exports.ParseError = ParseError;
//# sourceMappingURL=Parse.js.map