UNPKG

@xcrap/parser

Version:

Xcrap Parser is a package of the Xcrap framework, it was developed to take care of the data extraction part of text files (currently supporting only HTML and JSON) using declarative models.

60 lines (59 loc) 5.29 kB
import { Attributes, RawAttributes } from "node-html-parser/dist/nodes/html"; import { HTMLElement, NodeType } from "node-html-parser"; export type ExtractorFunctionReturnType = (string | undefined) | Promise<string | undefined>; export type ExtractorFunction<T = ExtractorFunctionReturnType> = (element: HTMLElement) => T; export type HtmlProperty = "innerText" | "textContent" | "text" | "innerHTML" | "outerHTML" | "tagName" | "classList" | "classNames" | "id" | "childElementCount" | "structure" | "structuredText" | "attributes" | "attrs" | "localName" | "nodeType" | "range" | "rawAttributes" | "rawAttrs" | "rawTagName" | "rawText"; export type HtmlAttribute = "href" | "src" | "value" | "style" | "role" | "alt" | "title" | "placeholder" | "disabled" | "readonly" | "checked" | "selected" | "name" | "type" | "autocomplete" | "maxlength" | "minlength" | "pattern" | "required" | "aria-label" | "aria-hidden" | "aria-expanded" | "aria-checked" | "aria-disabled" | "data-*" | (string & {}); export declare const propertyExtractors: Record<HtmlProperty, (element: HTMLElement) => unknown | undefined>; export declare function extract<T extends HtmlProperty | HtmlAttribute, R = string>(key: T, isAttribute?: boolean): ExtractorFunction<R | undefined>; export declare const extractInnerText: ExtractorFunction<string | undefined>; export declare const extractTextContent: ExtractorFunction<string | undefined>; export declare const extractText: ExtractorFunction<string | undefined>; export declare const extractInnerHtml: ExtractorFunction<string | undefined>; export declare const extractOuterHtml: ExtractorFunction<string | undefined>; export declare const extractTagName: ExtractorFunction<string | undefined>; export declare const extractClassList: ExtractorFunction<string[] | undefined>; export declare const extractId: ExtractorFunction<string | undefined>; export declare const extractHref: ExtractorFunction<string | undefined>; export declare const extractSrc: ExtractorFunction<string | undefined>; export declare const extractValue: ExtractorFunction<string | undefined>; export declare const extractStyle: ExtractorFunction<string | undefined>; export declare const extractRole: ExtractorFunction<string | undefined>; export declare const extractTitle: ExtractorFunction<string | undefined>; export declare const extractPlaceholder: ExtractorFunction<string | undefined>; export declare const extractDisabled: ExtractorFunction<string | undefined>; export declare const extractReadonly: ExtractorFunction<string | undefined>; export declare const extractChecked: ExtractorFunction<string | undefined>; export declare const extractSelected: ExtractorFunction<string | undefined>; export declare const extractName: ExtractorFunction<string | undefined>; export declare const extractType: ExtractorFunction<string | undefined>; export declare const extractAutocomplete: ExtractorFunction<string | undefined>; export declare const extractMaxLength: ExtractorFunction<string | undefined>; export declare const extractMinLength: ExtractorFunction<string | undefined>; export declare const extractPattern: ExtractorFunction<string | undefined>; export declare const extractRequired: ExtractorFunction<string | undefined>; export declare const extractAriaLabel: ExtractorFunction<string | undefined>; export declare const extractAriaHidden: ExtractorFunction<string | undefined>; export declare const extractAriaExpanded: ExtractorFunction<string | undefined>; export declare const extractAriaChecked: ExtractorFunction<string | undefined>; export declare const extractAriaDisabled: ExtractorFunction<string | undefined>; export declare const extractAllData: ExtractorFunction<string | undefined>; export declare const extractAttribute: <T extends string>(name: T) => ExtractorFunction<string | undefined>; export declare const extractRange: ExtractorFunction<[number, number] | undefined>; export declare const extarctAttributes: ExtractorFunction<Record<string, string> | undefined>; export declare const extractAttrs: ExtractorFunction<Attributes | undefined>; export declare const extractChildElementCount: ExtractorFunction<number | undefined>; export declare const extractClassNames: ExtractorFunction<string | undefined>; export declare const extractLocalName: ExtractorFunction<string | undefined>; export declare const extractNodeType: ExtractorFunction<NodeType | undefined>; export declare const extractRawTagName: ExtractorFunction<string | undefined>; export declare const extractRawText: ExtractorFunction<string | undefined>; export declare const extractStructure: ExtractorFunction<string | undefined>; export declare const extractStructuredText: ExtractorFunction<string | undefined>; export declare const extarctRawAttributes: ExtractorFunction<RawAttributes | undefined>; export declare const extractRawAttrs: ExtractorFunction<string | undefined>; export type FromNextOrPreviousElementSiblingOptions = { shouldExists?: boolean; }; export declare const fromNextElementSibling: (extractor: ExtractorFunction, { shouldExists }?: FromNextOrPreviousElementSiblingOptions) => ExtractorFunction; export declare const fromPreviousElementSibling: (extractor: ExtractorFunction, { shouldExists }?: FromNextOrPreviousElementSiblingOptions) => ExtractorFunction;