UNPKG

xscrape

Version:

A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas

36 lines (35 loc) 951 B
// src/defineScraper.ts import * as cheerio from "cheerio"; function defineScraper(config) { return async (html) => { try { const $ = cheerio.load(html); const extractedData = $.extract(config.extract); const validationResult = await Promise.resolve( config.schema["~standard"].validate(extractedData) ); if (validationResult.issues) { return { error: validationResult.issues }; } if (!("value" in validationResult)) { return { error: new Error( "xscrape: Validation succeeded but no data was returned" ) }; } if (config.transform) { const transformed = await Promise.resolve( config.transform(validationResult.value) ); return { data: transformed }; } return { data: validationResult.value }; } catch (error) { return { error }; } }; } export { defineScraper };