UNPKG

xscrape

Version:

A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas

67 lines (62 loc) 2.37 kB
import { z } from 'zod'; import { Element } from 'domhandler'; type ExtractDescriptorFn = (el: Element, key: string, obj: Record<string, unknown>) => unknown; interface ExtractDescriptor { selector: string; value?: string | ExtractDescriptorFn | ExtractMap; } type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor]; interface ExtractMap { [key: string]: ExtractValue; } type ValidatorType = 'zod'; type ZodBuilder = typeof z; type SchemaBuilder<V extends ValidatorType> = V extends 'zod' ? ZodBuilder : never; type SchemaFunction<V extends ValidatorType, T> = (builder: SchemaBuilder<V>) => V extends 'zod' ? z.ZodSchema<T> : never; type ScraperConfig<T extends Record<string, unknown>, V extends ValidatorType, R extends T = T> = { validator: V; schema: SchemaFunction<V, T>; extract: ExtractMap; transform?: (data: T) => Promise<R> | R; }; type BaseFieldOptions = { attribute?: string; }; type LeafFieldConfig = BaseFieldOptions & { selector?: string; selectorAll?: string; } & ({ selector: string; selectorAll?: never; } | { selector?: never; selectorAll: string; }); type FieldConfig<T> = T extends object ? T extends Array<infer U> ? LeafFieldConfig : { fields: Fields<T>; } : LeafFieldConfig; type Fields<T> = { [K in keyof T]: FieldConfig<T[K]>; }; type ValidationResult<T> = { success: boolean; data?: T; error?: unknown; }; type ScraperResult<T> = { data?: T; error?: unknown; }; /** * Defines a scraper with the provided configuration. * * @template T - The shape of the extracted data. * @template V - The type of the validator used for validation. * @template R - The type of the result after optional transformation, defaults to T. * * @param config - The configuration object for the scraper. * @returns A function that takes an HTML string and returns the scraping result, which could be * a scraper result or a promise of a scraper result. */ declare function defineScraper<T extends Record<string, unknown>, V extends ValidatorType, R extends T = T>(config: ScraperConfig<T, V, R>): (html: string) => Promise<ScraperResult<R>>; export { type FieldConfig, type Fields, type LeafFieldConfig, type SchemaBuilder, type SchemaFunction, type ScraperConfig, type ScraperResult, type ValidationResult, type ValidatorType, defineScraper };