UNPKG

xscrape

Version:

A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas

80 lines (76 loc) 1.97 kB
// src/defineScraper.ts import * as cheerio from "cheerio"; // src/validators.ts import { z } from "zod"; var Validator = class { constructor(schema, validateFunction) { this.schema = schema; this.validateFunction = validateFunction; } validate(data) { try { const result = this.validateFunction(this.schema, data); return { success: true, data: result }; } catch (error) { return { success: false, error }; } } }; function getSchemaBuilder(type) { switch (type) { case "zod": return z; default: throw new Error(`Unsupported validator type: ${type}`); } } function createValidator(type, schemaFn) { const builder = getSchemaBuilder(type); const schema = schemaFn(builder); switch (type) { case "zod": return new Validator( schema, (schema2, data) => schema2.parse(data) ); default: throw new Error(`Unsupported validator type: ${type}`); } } // src/defineScraper.ts function defineScraper(config) { const validator = createValidator(config.validator, config.schema); return async (html) => { try { const $ = cheerio.load(html); const extractedData = $.extract(config.extract); const validationResult = validator.validate(extractedData); if (!validationResult.success) { return { error: validationResult.error }; } if (!validationResult.data) { return { error: new Error("Validation succeeded but no data was returned") }; } if (config.transform) { try { const transformed = await Promise.resolve( config.transform(validationResult.data) ); return { data: transformed }; } catch (error) { return { error }; } } return { data: validationResult.data }; } catch (error) { return { error }; } }; } // src/types/main.ts import "zod"; export { defineScraper };