UNPKG

@xcrap/parser

Version:

Xcrap Parser is a package of the Xcrap framework, it was developed to take care of the data extraction part of text files (currently supporting only HTML and JSON) using declarative models.

114 lines (113 loc) 6.2 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.fromPreviousElementSibling = exports.fromNextElementSibling = exports.extractRawAttrs = exports.extarctRawAttributes = exports.extractStructuredText = exports.extractStructure = exports.extractRawText = exports.extractRawTagName = exports.extractNodeType = exports.extractLocalName = exports.extractClassNames = exports.extractChildElementCount = exports.extractAttrs = exports.extarctAttributes = exports.extractRange = exports.extractAttribute = exports.extractAllData = exports.extractAriaDisabled = exports.extractAriaChecked = exports.extractAriaExpanded = exports.extractAriaHidden = exports.extractAriaLabel = exports.extractRequired = exports.extractPattern = exports.extractMinLength = exports.extractMaxLength = exports.extractAutocomplete = exports.extractType = exports.extractName = exports.extractSelected = exports.extractChecked = exports.extractReadonly = exports.extractDisabled = exports.extractPlaceholder = exports.extractTitle = exports.extractRole = exports.extractStyle = exports.extractValue = exports.extractSrc = exports.extractHref = exports.extractId = exports.extractClassList = exports.extractTagName = exports.extractOuterHtml = exports.extractInnerHtml = exports.extractText = exports.extractTextContent = exports.extractInnerText = exports.propertyExtractors = void 0; exports.extract = extract; const errors_1 = require("../errors"); exports.propertyExtractors = { innerText: (element) => element.innerText, textContent: (element) => element.textContent, text: (element) => element.text, innerHTML: (element) => element.innerHTML, outerHTML: (element) => element.outerHTML, tagName: (element) => element.tagName, classList: (element) => Array.from(element.classList.values()), id: (element) => element.id, attributes: (element) => element.attributes, attrs: (element) => element.attrs, childElementCount: (element) => element.childElementCount, classNames: (element) => element.classNames, localName: (element) => element.localName, nodeType: (element) => element.localName, range: (element) => element.range, rawAttributes: (element) => element.rawAttributes, rawAttrs: (element) => element.rawAttrs, rawTagName: (element) => element.rawTagName, rawText: (element) => element.rawText, structure: (element) => element.structure, structuredText: (element) => element.structuredText, }; function extract(key, isAttribute = false) { return (element) => { if (isAttribute) { return element.getAttribute(key); } const extractor = exports.propertyExtractors[key]; if (!extractor) { throw new errors_1.ExtractorNotFoundError(key); } return extractor(element); }; } exports.extractInnerText = extract("innerText"); exports.extractTextContent = extract("textContent"); exports.extractText = extract("text"); exports.extractInnerHtml = extract("innerHTML"); exports.extractOuterHtml = extract("outerHTML"); exports.extractTagName = extract("tagName"); exports.extractClassList = extract("classList"); exports.extractId = extract("id"); exports.extractHref = extract("href", true); exports.extractSrc = extract("src", true); exports.extractValue = extract("value", true); exports.extractStyle = extract("style", true); exports.extractRole = extract("role", true); exports.extractTitle = extract("title", true); exports.extractPlaceholder = extract("placeholder", true); exports.extractDisabled = extract("disabled", true); exports.extractReadonly = extract("readonly", true); exports.extractChecked = extract("checked", true); exports.extractSelected = extract("selected", true); exports.extractName = extract("name", true); exports.extractType = extract("type", true); exports.extractAutocomplete = extract("autocomplete", true); exports.extractMaxLength = extract("maxlength", true); exports.extractMinLength = extract("minlength", true); exports.extractPattern = extract("pattern", true); exports.extractRequired = extract("required", true); exports.extractAriaLabel = extract("aria-label", true); exports.extractAriaHidden = extract("aria-hidden", true); exports.extractAriaExpanded = extract("aria-expanded", true); exports.extractAriaChecked = extract("aria-checked", true); exports.extractAriaDisabled = extract("aria-disabled", true); exports.extractAllData = extract("data-*", true); const extractAttribute = (name) => extract(name, true); exports.extractAttribute = extractAttribute; exports.extractRange = extract("range"); exports.extarctAttributes = extract("attributes"); exports.extractAttrs = extract("attrs"); exports.extractChildElementCount = extract("childElementCount"); exports.extractClassNames = extract("classNames"); exports.extractLocalName = extract("localName"); exports.extractNodeType = extract("nodeType"); exports.extractRawTagName = extract("rawTagName"); exports.extractRawText = extract("rawText"); exports.extractStructure = extract("structure"); exports.extractStructuredText = extract("structuredText"); exports.extarctRawAttributes = extract("rawAttributes"); exports.extractRawAttrs = extract("rawAttrs"); const fromNextElementSibling = (extractor, { shouldExists } = { shouldExists: true }) => { return (element) => { const nextElementSibling = element.nextElementSibling; if (!nextElementSibling) { if (shouldExists) { throw new errors_1.HTMLElementNotFoundError(); } return undefined; } return extractor(nextElementSibling); }; }; exports.fromNextElementSibling = fromNextElementSibling; const fromPreviousElementSibling = (extractor, { shouldExists } = { shouldExists: true }) => { return (element) => { const previousElementSibling = element.previousElementSibling; if (!previousElementSibling) { if (shouldExists) { throw new errors_1.HTMLElementNotFoundError(); } return undefined; } return extractor(previousElementSibling); }; }; exports.fromPreviousElementSibling = fromPreviousElementSibling;