UNPKG

xscrape

Version:

A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas

33 lines (28 loc) 1.27 kB
import { StandardSchemaV1 } from '@standard-schema/spec'; import { Element } from 'domhandler'; type ExtractDescriptorFn = (el: Element, key: string, obj: Record<string, unknown>) => unknown; interface ExtractDescriptor { selector: string; value?: string | ExtractDescriptorFn | ExtractMap; } type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor]; type ExtractMap = Record<string, ExtractValue>; type SchemaAwareExtractMap<T> = { [K in keyof T]: ExtractMap[string]; }; type ScraperConfig<S extends StandardSchemaV1, R extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>> = { schema: S; extract: SchemaAwareExtractMap<StandardSchemaV1.InferOutput<S>>; transform?: (data: StandardSchemaV1.InferOutput<S>) => Promise<R> | R; }; type ValidationResult<T> = { success: boolean; data?: T; error?: unknown; }; type ScraperResult<T> = { data?: T; error?: unknown; }; declare function defineScraper<S extends StandardSchemaV1, T extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>, R extends T = T>(config: ScraperConfig<S, R>): (html: string) => Promise<ScraperResult<R>>; export { type ScraperConfig, type ScraperResult, type ValidationResult, defineScraper };