x-ray-scraper
Version:
Scraper next gen based on x-ray (2.3.2)
61 lines (50 loc) • 1.59 kB
JavaScript
const debug = require('debug')('x-ray');
const { isUrl, load } = require('./utils/commonHelpers');
const { resolve } = require('./resolve');
function getLoadedSource(source, scope, filters, request) {
if (isUrl(source)) {
debug('starting at: %s', source);
return request(source).then(html => load(html, source));
} else if (scope && ~scope.indexOf('@')) {
debug('resolving to a url: %s', scope);
const url = resolve(source, false, scope, filters);
// ensure that a@href is a URL
if (!isUrl(url)) {
debug('%s is not a url. Skipping!', url);
return Promise.resolve(load(''));
}
debug('resolved "%s" to a %s', scope, url);
return request(url).then(html => load(html, url));
} else if (source) {
return Promise.resolve(load(source));
}
debug('%s is not a url or html. Skipping!', source);
return Promise.resolve(load(''));
}
function getNextUrl($, paginate, limit, filters, currentObj, abortFn, pageNumber) {
if (!paginate) {
debug('no paginate, ending');
return null;
}
if (limit <= 0) {
debug('reached limit, ending');
return null;
}
let url;
if (typeof paginate === 'function') {
url = paginate(pageNumber, $);
} else {
url = resolve($, false, paginate, filters);
}
debug('paginate(%j) => %j', paginate, url);
if (!isUrl(url)) {
debug('%j is not a url, finishing up', url);
return null;
}
if (abortFn && abortFn(currentObj, url)) {
debug('abort check passed, ending');
return null;
}
return url;
}
module.exports = { getLoadedSource, getNextUrl };