UNPKG

xscrape

Version:

A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas

117 lines (111 loc) 3.65 kB
"use strict"; var __create = Object.create; var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __getProtoOf = Object.getPrototypeOf; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps( // If the importer is in node compatibility mode or this is not an ESM // file that has been converted to a CommonJS file using a Babel- // compatible transform (i.e. "__esModule" has not been set), then set // "default" to the CommonJS "module.exports" for node compatibility. isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target, mod )); var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); // src/index.ts var src_exports = {}; __export(src_exports, { defineScraper: () => defineScraper }); module.exports = __toCommonJS(src_exports); // src/defineScraper.ts var cheerio = __toESM(require("cheerio"), 1); // src/validators.ts var import_zod = require("zod"); var Validator = class { constructor(schema, validateFunction) { this.schema = schema; this.validateFunction = validateFunction; } validate(data) { try { const result = this.validateFunction(this.schema, data); return { success: true, data: result }; } catch (error) { return { success: false, error }; } } }; function getSchemaBuilder(type) { switch (type) { case "zod": return import_zod.z; default: throw new Error(`Unsupported validator type: ${type}`); } } function createValidator(type, schemaFn) { const builder = getSchemaBuilder(type); const schema = schemaFn(builder); switch (type) { case "zod": return new Validator( schema, (schema2, data) => schema2.parse(data) ); default: throw new Error(`Unsupported validator type: ${type}`); } } // src/defineScraper.ts function defineScraper(config) { const validator = createValidator(config.validator, config.schema); return async (html) => { try { const $ = cheerio.load(html); const extractedData = $.extract(config.extract); const validationResult = validator.validate(extractedData); if (!validationResult.success) { return { error: validationResult.error }; } if (!validationResult.data) { return { error: new Error("Validation succeeded but no data was returned") }; } if (config.transform) { try { const transformed = await Promise.resolve( config.transform(validationResult.data) ); return { data: transformed }; } catch (error) { return { error }; } } return { data: validationResult.data }; } catch (error) { return { error }; } }; } // src/types/main.ts var import_zod2 = require("zod"); // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { defineScraper });