@xcrap/factory
Version:
Xcrap Factory is a set of utilities for dynamically creating instances of clients, extractors, and parsing models, making it easier to configure and extend scraping and parsing pipelines.
67 lines (66 loc) • 2.74 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.createParsingModel = createParsingModel;
const zod_1 = require("zod");
const create_extractor_1 = __importDefault(require("./create-extractor"));
const parsingModelSchema = zod_1.z.lazy(() => zod_1.z.object({
type: zod_1.z.string(),
model: zod_1.z.record(zod_1.z.lazy(() => parsingModelFieldSchema)),
}));
const parsingModelFieldSchema = zod_1.z.lazy(() => zod_1.z.object({
query: zod_1.z.string().optional(),
extractor: zod_1.z.string().optional(),
multiple: zod_1.z.boolean().optional(),
default: zod_1.z.union([zod_1.z.string(), zod_1.z.number(), zod_1.z.null()]).optional(),
nested: parsingModelSchema.optional(),
}).superRefine((field, ctx) => {
if (field.nested && !field.query) {
ctx.addIssue({
code: zod_1.z.ZodIssueCode.custom,
message: "`query` is required when `nested` is present.",
path: ["query"],
});
}
}));
function validateModelType(type, allowedModels) {
if (!(type in allowedModels)) {
throw new Error(`Unsupported model type: "${type}"`);
}
}
function createParsingModel({ config: { allowedExtractors, allowedModels, extractorArgumentSeparator }, model: root, }) {
root = parsingModelSchema.parse(root);
const { type, model } = root;
validateModelType(type, allowedModels);
const parsedModel = {};
for (const [fieldName, field] of Object.entries(model)) {
const extractorText = field.extractor;
const extractor = extractorText ? (0, create_extractor_1.default)({
extractorText: extractorText,
config: {
allowedExtractors: allowedExtractors,
argumentSeparator: extractorArgumentSeparator
}
}) : undefined;
parsedModel[fieldName] = {
...(field.query && { query: field.query }),
...(extractor && { extractor }),
...(field.multiple && { multiple: true }),
...(field.default !== undefined && { default: field.default }),
...(field.nested && {
model: createParsingModel({
model: field.nested,
config: {
allowedExtractors: allowedExtractors,
allowedModels: allowedModels,
extractorArgumentSeparator: extractorArgumentSeparator
}
})
})
};
}
return new allowedModels[type](parsedModel);
}
exports.default = createParsingModel;