xscrape
Version:
A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas
33 lines (28 loc) • 1.27 kB
text/typescript
import { StandardSchemaV1 } from '@standard-schema/spec';
import { Element } from 'domhandler';
type ExtractDescriptorFn = (el: Element, key: string, obj: Record<string, unknown>) => unknown;
interface ExtractDescriptor {
selector: string;
value?: string | ExtractDescriptorFn | ExtractMap;
}
type ExtractValue = string | ExtractDescriptor | [string | ExtractDescriptor];
type ExtractMap = Record<string, ExtractValue>;
type SchemaAwareExtractMap<T> = {
[K in keyof T]: ExtractMap[string];
};
type ScraperConfig<S extends StandardSchemaV1, R extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>> = {
schema: S;
extract: SchemaAwareExtractMap<StandardSchemaV1.InferOutput<S>>;
transform?: (data: StandardSchemaV1.InferOutput<S>) => Promise<R> | R;
};
type ValidationResult<T> = {
success: boolean;
data?: T;
error?: unknown;
};
type ScraperResult<T> = {
data?: T;
error?: unknown;
};
declare function defineScraper<S extends StandardSchemaV1, T extends StandardSchemaV1.InferOutput<S> = StandardSchemaV1.InferOutput<S>, R extends T = T>(config: ScraperConfig<S, R>): (html: string) => Promise<ScraperResult<R>>;
export { type ScraperConfig, type ScraperResult, type ValidationResult, defineScraper };