UNPKG

chowdown

Version:

A JavaScript library that allows for the quick transformation of DOM documents into useful formats.

148 lines (136 loc) 5.17 kB
"use strict"; var cheerio = require('cheerio'); var _require = require('lodash'), isString = _require.isString, isFunction = _require.isFunction, castArray = _require.castArray; /** * Methods to handle the manipulation of DOM documents. * Uses cheerio under the hood. * * @class DOMDocument * @extends Document */ module.exports = { /** * Loads the document given it's body. If the body provided is a string, * it will be transformed into a cheerio document. * * @param {(string|cheerio)} body The document body or cheerio object. * @return {cheerio} The loaded cheerio document. */ loadDocument: function loadDocument(body) { return cheerio.load(body); }, /** * Loads the root of the document and wraps it in a cheerio instance. * If no root is specified, it's retrieved from the cheerio document itself. * * @param {cheerio} root The root of the document. * @return {cheerio} The root of the document. */ loadRoot: function loadRoot(root) { if (root === undefined) root = this.options.document.root(); return this.options.document(root); }, /** * Formats and prepares a selector for querying. The method expects a string * in a format inspired by the XPath standard. * * @param {string} selector A string selector for format. * @return {array} The formatted selector; */ formatSelector: function formatSelector(selector) { if (isString(selector)) return selector.split('/'); return ['']; }, /** * Queries the cheerio document given a selector. If the first part of the * formatted selector is an empty string, the root of the document is returned. * * @param {array} selector The selector for query. * @return {cheerio} The result of the query. */ query: function query(selector) { if (selector[0] === '') return this.options.root; return this.options.document(selector[0], this.options.root); }, /** * Calls the given document function with the cheerio object as the first * parameter and the document's root as the second parameter. * * @param {function} fn The document function to call. * @return {any} The result of the document function. */ queryRaw: function queryRaw(fn) { return fn(this.options.document, this.options.root); }, /** * Queries the cheerio document for children given a selector. * If the result of the query is either not a cheerio result set or * the set contains no results, then undefined is returned. * * @param {array} selector The selector for query. * @return {cheerio} The children query set. */ queryChildren: function queryChildren(selector) { var result = this.query(selector); if (!(result instanceof cheerio) || result.length == 0) return undefined; return result.toArray(); }, /** * Queries the document for a URI. Will attempt to grab the href * attribute of a dom element if no other attribute was specified in the selector. * * @param {array} selector The selector for the URI. * @return {*} The retrieved URI. */ queryUri: function queryUri(selector) { return this.queryValue(selector, 'href'); }, /** * Queries the document for an element and attempts to get an attribute from it. * * Accepts a proritised array of attributes to return from the element if no * attribute was specified in the second part of the selector. * * Each attribute in this array has a higher return priority than the one succeeding it. * * If no attribute can be resolved from the retrieved element, undefined is returned instead. * If no attribute is specified at all, then the inner text of the element is returned. * * @param {array} selector The parts of the selector. The second part corresponds to the desired attribute. * @param {string[]} defaultAttrs The fallback array of attributes to try for. * @return {any} The retrieved value. */ queryValue: function queryValue(selector) { var defaultAttrs = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : []; var value = this.query(selector); var attrs = castArray(selector[1] || defaultAttrs); if (!(value !== undefined && value.cheerio != null)) return value; if (value.length === 0) return undefined; if (attrs.length == 0 && value.text() !== '') return value.text(); var _iteratorNormalCompletion = true; var _didIteratorError = false; var _iteratorError = undefined; try { for (var _iterator = attrs[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) { var attr = _step.value; if (value.attr(attr) !== undefined) return value.attr(attr); } } catch (err) { _didIteratorError = true; _iteratorError = err; } finally { try { if (!_iteratorNormalCompletion && _iterator["return"] != null) { _iterator["return"](); } } finally { if (_didIteratorError) { throw _iteratorError; } } } } };