UNPKG

@xcrap/factory

Version:

Xcrap Factory is a set of utilities for dynamically creating instances of clients, extractors, and parsing models, making it easier to configure and extend scraping and parsing pipelines.

67 lines (66 loc) 2.74 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.createParsingModel = createParsingModel; const zod_1 = require("zod"); const create_extractor_1 = __importDefault(require("./create-extractor")); const parsingModelSchema = zod_1.z.lazy(() => zod_1.z.object({ type: zod_1.z.string(), model: zod_1.z.record(zod_1.z.lazy(() => parsingModelFieldSchema)), })); const parsingModelFieldSchema = zod_1.z.lazy(() => zod_1.z.object({ query: zod_1.z.string().optional(), extractor: zod_1.z.string().optional(), multiple: zod_1.z.boolean().optional(), default: zod_1.z.union([zod_1.z.string(), zod_1.z.number(), zod_1.z.null()]).optional(), nested: parsingModelSchema.optional(), }).superRefine((field, ctx) => { if (field.nested && !field.query) { ctx.addIssue({ code: zod_1.z.ZodIssueCode.custom, message: "`query` is required when `nested` is present.", path: ["query"], }); } })); function validateModelType(type, allowedModels) { if (!(type in allowedModels)) { throw new Error(`Unsupported model type: "${type}"`); } } function createParsingModel({ config: { allowedExtractors, allowedModels, extractorArgumentSeparator }, model: root, }) { root = parsingModelSchema.parse(root); const { type, model } = root; validateModelType(type, allowedModels); const parsedModel = {}; for (const [fieldName, field] of Object.entries(model)) { const extractorText = field.extractor; const extractor = extractorText ? (0, create_extractor_1.default)({ extractorText: extractorText, config: { allowedExtractors: allowedExtractors, argumentSeparator: extractorArgumentSeparator } }) : undefined; parsedModel[fieldName] = { ...(field.query && { query: field.query }), ...(extractor && { extractor }), ...(field.multiple && { multiple: true }), ...(field.default !== undefined && { default: field.default }), ...(field.nested && { model: createParsingModel({ model: field.nested, config: { allowedExtractors: allowedExtractors, allowedModels: allowedModels, extractorArgumentSeparator: extractorArgumentSeparator } }) }) }; } return new allowedModels[type](parsedModel); } exports.default = createParsingModel;