UNPKG

x-ray-scraper

Version:
191 lines (170 loc) 5.1 kB
const Promise = require('bluebird'); const { isObject, compact, root } = require('./utils/commonHelpers'); const parse = require('./utils/parseHelper'); const debug = require('debug')('resolve'); const isArray = Array.isArray; /** * Select the attribute based on `attr` * * @param {Cheerio} $ * @param {String} attr * @return {String} */ function attribute($el, attr) { switch (attr) { case 'html': return $el.html(); case 'text': return $el.text(); default: return $el.attr(attr); } } /** * Filter the value(s) * * @param {Object} obj * @param {Cheerio} $ * @param {String} scope * @param {String|Array} selector * @param {Object} filters * @return {Array|String} */ function filter(obj, $, scope, selector, value, filters) { const ctx = { $: $, selector: obj.selector, attribute: obj.attribute }; return (obj.filters || []).reduce((out, filter) => { const fn = filters[filter.name]; if (typeof fn === 'function') { const args = [out].concat(filter.args || []); const filtered = fn.apply(ctx, args); debug('%s.apply(ctx, %j) => %j', filter.name, args, filtered); return filtered; } else { throw new Error('Invalid filter: ' + filter.name); } }, value); } /** * Selector abstraction, deals * with various instances of $ * * @param {Cheerio} $ * @param {String} selector * @return {Cheerio} */ function select($, selector) { if ($.is && $.is(selector)) return $; return $.find ? $.find(selector) : $(selector); } /** * Find the node(s) * * @param {Cheerio} $ * @param {String} scope * @param {String|Array} selector * @param {String} attr * @return {Array|String} */ function find($, scope, selector, attr) { if (scope) { if (isArray(selector)) { const $scope = select($, scope); return $scope .map(i => { const $el = $scope.eq(i); const $selector = select($el, selector[0]); return $selector.map(i => attribute($selector.eq(i), attr)).get(); }) .get(); } const $scope = select($, scope); return attribute($scope.find(selector).eq(0), attr); } else { if (isArray(selector)) { const $selector = select($, selector[0]); return $selector.map(i => attribute($selector.eq(i), attr)).get(); } const $selector = select($, selector); return attribute($selector.eq(0), attr); } } /** * Initialize `resolve` * * @param {$} cheerio object * @param {String} scope * @param {String|Array} selector * @param {Object} filters * @return {Array|String} */ function resolve($, scope, selector, filters = {}) { debug('resolve($j, %j)', scope, selector); const isAnArray = isArray(selector); let obj = parse(isAnArray ? selector[0] : selector); obj.attribute = obj.attribute || 'text'; if (!obj.selector) { obj.selector = scope; scope = null; } let value = find( $, scope, isAnArray ? [obj.selector] : obj.selector, obj.attribute ); debug('resolved($j, %j) => %j', scope, selector, value); if (isAnArray) { return value.map(v => filter(obj, $, scope, selector, v, filters)); } return filter(obj, $, scope, selector, value, filters); } function resolveStringSelector($, scope, selector, filters) { const value = resolve($, root(scope), selector, filters); return Promise.resolve(value); } function resolveXraySelector(xray, $) { return xray($); } function multipleResolve($, node, scope) { const $scope = $.find ? $.find(scope) : $(scope); if (!$scope.length) return Promise.resolve([]); const promises = $scope.map(i => node($scope.eq(i))).get(); return Promise.all(promises).then(compact); } function resolveArraySelector($, xray, scope, selector, filters) { if (typeof selector[0] === 'string') { return resolveStringSelector($, scope, selector, filters); } else if (typeof selector[0] === 'object') { const node = xray(scope, selector[0]); return multipleResolve($, node, scope); } else if (typeof selector[0] === 'function') { return multipleResolve($, selector[0], scope); } throw new Error("can 't resolve array selector"); } function resolveObjectSelectorRecursive($, xray, scope, selector, filters) { return Promise.reduce( Object.keys(selector), (accum, k) => resolveSelector($, xray, selector[k], scope, filters).then(value => { if (value !== undefined && value !== '') { accum[k] = value; } return accum; }), {} ); } function resolveSelector($, xray, selector, scope, filters) { if (typeof selector === 'string') { return resolveStringSelector($, scope, selector, filters); } else if (typeof selector === 'function') { return resolveXraySelector(selector, $); } else if (Array.isArray(selector)) { return resolveArraySelector($, xray, scope, selector, filters); } else if (isObject(selector)) { return resolveObjectSelectorRecursive($, xray, scope, selector, filters); } throw new Error("can 't resolve selector"); } module.exports = { resolve, resolveSelector };