xscrape
Version: 
A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas
36 lines (35 loc) • 951 B
JavaScript
// src/defineScraper.ts
import * as cheerio from "cheerio";
function defineScraper(config) {
  return async (html) => {
    try {
      const $ = cheerio.load(html);
      const extractedData = $.extract(config.extract);
      const validationResult = await Promise.resolve(
        config.schema["~standard"].validate(extractedData)
      );
      if (validationResult.issues) {
        return { error: validationResult.issues };
      }
      if (!("value" in validationResult)) {
        return {
          error: new Error(
            "xscrape: Validation succeeded but no data was returned"
          )
        };
      }
      if (config.transform) {
        const transformed = await Promise.resolve(
          config.transform(validationResult.value)
        );
        return { data: transformed };
      }
      return { data: validationResult.value };
    } catch (error) {
      return { error };
    }
  };
}
export {
  defineScraper
};