UNPKG

bquery

Version:

bquery is a useful node module to fetch web page, which use css selector to fetch and structure this html page content.

139 lines (128 loc) 3.74 kB
var _ = require("underscore"), q = require('q'), jsonSelect = require('JSONSelect'), bquery; exports._init = function (n) { bquery = n; }; exports.fetch = fetch; exports.select = select; function fetch (url, query) { var deferred = q.defer(); // if (bquery.cache.check(query)) { // deferred.resolve(bquery.cache.get(query).value); // return deferred.promise; // } else { return bquery.fetch(url, query).then(function (data) { try { var parsed = JSON.parse(data); return select(parsed, query); } catch (e) { throw new Error('Could not parse JSON document'); } }); // } } function select (parsed, query) { var deferred = q.defer(), results; try { if (!query.selector) { deferred.resolve(bquery._wrapResults([parsed], query)); } else { results = pick_value(parsed, query); if (results.length === 0) { deferred.reject(new Error('Could not match with that selector')); } else { deferred.resolve(bquery._wrapResults(results, query)); } } } catch (e) { deferred.reject(new Error('Could not match with that selector')); } return deferred.promise; } /** * 将指定的对象按照query选择器的格式进行构造 * @param {[Object]} parsed 数据源对象 * @param {[Object]} query JSON选择器形如:{ * selector: ".media_types>.buckets", * extract: { * media_type: ":root>.key" * } * } * @return {[Object]} 按照query结构构造的JSON对象 * */ function pick_value(parsed, query){ var result = {}, root = null; if(query.selector){ root = jsonSelect.match(query.selector, [], parsed); root = root[0] } else if(typeof query === "string"){ result = jsonSelect.match(query, parsed); if(_.isArray(result) && result.length == 1){ result = result[0]; } } if(query.extract){ if(_.isArray(root)){ root = _.flatten(root); var rs = []; root.forEach(function(rc){ var t = {}; for(var key in query.extract){ t[key] = pick_value(rc, query.extract[key]); } rs.push(t); }); result = rs; } else{ for(var key in query.extract){ result[key] = pick_value(root, query.extract[key]); } } } else{ result = root; } return result; } /** * 将JSON对象展开为对象数组 * @param {[Object]} source 待展开的对象 * @param {[Object]} res 递归参数,初始值为空对象{} * @param {[Array]} arrs 引用结果,初始值为空数组[] */ function flatten(source, res, arrs){ var tmp = res; for(var key in source){ if(_.isArray(source[key])){ source[key].forEach(function(obj){ if(_.isObject(obj)){ var t = _.extend({}, res); flatten(obj, res, arrs); res = t; } else{ if(!res[key]){ res[key] = []; } res[key].push(obj); } }); } else{ res[key] = source[key]; } } if(tmp == res){ arrs.push(res); } }