xscrape
Version:
A flexible and powerful library designed to extract and transform data from HTML documents using user-defined schemas
80 lines (76 loc) • 1.97 kB
JavaScript
// src/defineScraper.ts
import * as cheerio from "cheerio";
// src/validators.ts
import { z } from "zod";
var Validator = class {
constructor(schema, validateFunction) {
this.schema = schema;
this.validateFunction = validateFunction;
}
validate(data) {
try {
const result = this.validateFunction(this.schema, data);
return { success: true, data: result };
} catch (error) {
return { success: false, error };
}
}
};
function getSchemaBuilder(type) {
switch (type) {
case "zod":
return z;
default:
throw new Error(`Unsupported validator type: ${type}`);
}
}
function createValidator(type, schemaFn) {
const builder = getSchemaBuilder(type);
const schema = schemaFn(builder);
switch (type) {
case "zod":
return new Validator(
schema,
(schema2, data) => schema2.parse(data)
);
default:
throw new Error(`Unsupported validator type: ${type}`);
}
}
// src/defineScraper.ts
function defineScraper(config) {
const validator = createValidator(config.validator, config.schema);
return async (html) => {
try {
const $ = cheerio.load(html);
const extractedData = $.extract(config.extract);
const validationResult = validator.validate(extractedData);
if (!validationResult.success) {
return { error: validationResult.error };
}
if (!validationResult.data) {
return {
error: new Error("Validation succeeded but no data was returned")
};
}
if (config.transform) {
try {
const transformed = await Promise.resolve(
config.transform(validationResult.data)
);
return { data: transformed };
} catch (error) {
return { error };
}
}
return { data: validationResult.data };
} catch (error) {
return { error };
}
};
}
// src/types/main.ts
import "zod";
export {
defineScraper
};