UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

360 lines 13.9 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.PackValidator = void 0; const yaml_1 = require("yaml"); class PackValidator { static RESERVED_FACET_KEYS = [ 'pii', 'temporal', 'numerical', 'categorical', 'identifier' ]; static VERSION_PATTERN = /^\d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?$/; static CID_PATTERN = /^[a-z][a-z0-9]*(\.[a-z][a-z0-9]*)*$/; static PACK_NAME_PATTERN = /^[a-z][a-z0-9]*(\.[a-z][a-z0-9]*)*$/; validatePack(packData) { const errors = []; const warnings = []; const info = []; this.validatePackStructure(packData, errors, warnings, info); if (errors.length === 0) { this.validatePackMetadata(packData, errors, warnings, info); this.validateConcepts(packData.concepts || [], errors, warnings, info); this.validateDependencies(packData.depends_on || [], errors, warnings, info); this.validateSemanticConsistency(packData, errors, warnings, info); } return { valid: errors.length === 0, errors, warnings, info }; } validateYamlStructure(yamlContent) { const errors = []; const warnings = []; const info = []; try { const parsed = (0, yaml_1.parse)(yamlContent); return this.validatePack(parsed); } catch (error) { errors.push({ path: 'root', message: `Invalid YAML structure: ${error}`, severity: 'error', code: 'YAML_PARSE_ERROR' }); } return { valid: false, errors, warnings, info }; } validatePackStructure(packData, errors, warnings, info) { const requiredFields = ['pack', 'version', 'concepts']; for (const field of requiredFields) { if (!(field in packData)) { errors.push({ path: `root.${field}`, message: `Required field '${field}' is missing`, severity: 'error', code: 'MISSING_REQUIRED_FIELD' }); } } if (typeof packData.pack !== 'string') { errors.push({ path: 'root.pack', message: 'Pack name must be a string', severity: 'error', code: 'INVALID_TYPE' }); } if (typeof packData.version !== 'string') { errors.push({ path: 'root.version', message: 'Version must be a string', severity: 'error', code: 'INVALID_TYPE' }); } if (!Array.isArray(packData.concepts)) { errors.push({ path: 'root.concepts', message: 'Concepts must be an array', severity: 'error', code: 'INVALID_TYPE' }); } } validatePackMetadata(packData, errors, warnings, info) { if (packData.pack && !PackValidator.PACK_NAME_PATTERN.test(packData.pack)) { errors.push({ path: 'root.pack', message: 'Pack name must follow naming convention (lowercase, dots allowed)', severity: 'error', code: 'INVALID_PACK_NAME' }); } if (packData.version && !PackValidator.VERSION_PATTERN.test(packData.version)) { errors.push({ path: 'root.version', message: 'Version must follow semantic versioning (x.y.z)', severity: 'error', code: 'INVALID_VERSION' }); } if (packData.description && typeof packData.description !== 'string') { warnings.push({ path: 'root.description', message: 'Description should be a string', severity: 'warning', code: 'INVALID_DESCRIPTION_TYPE' }); } if (!packData.description) { info.push({ path: 'root.description', message: 'Consider adding a description for better documentation', severity: 'info', code: 'MISSING_DESCRIPTION' }); } } validateConcepts(concepts, errors, warnings, info) { const cidSet = new Set(); concepts.forEach((concept, index) => { const path = `concepts[${index}]`; this.validateConcept(concept, path, errors, warnings, info); if (concept.cid) { if (cidSet.has(concept.cid)) { errors.push({ path: `${path}.cid`, message: `Duplicate CID '${concept.cid}'`, severity: 'error', code: 'DUPLICATE_CID' }); } else { cidSet.add(concept.cid); } } }); this.validateCidReferences(concepts, errors, warnings, info); } validateConcept(concept, path, errors, warnings, info) { const requiredFields = ['cid', 'labels', 'facets']; for (const field of requiredFields) { if (!(field in concept)) { errors.push({ path: `${path}.${field}`, message: `Required field '${field}' is missing`, severity: 'error', code: 'MISSING_REQUIRED_FIELD' }); } } if (concept.cid && !PackValidator.CID_PATTERN.test(concept.cid)) { errors.push({ path: `${path}.cid`, message: 'CID must follow naming convention (lowercase, dots allowed)', severity: 'error', code: 'INVALID_CID_FORMAT' }); } if (!Array.isArray(concept.labels)) { errors.push({ path: `${path}.labels`, message: 'Labels must be an array of strings', severity: 'error', code: 'INVALID_LABELS_TYPE' }); } else { concept.labels.forEach((label, labelIndex) => { if (typeof label !== 'string') { errors.push({ path: `${path}.labels[${labelIndex}]`, message: 'Each label must be a string', severity: 'error', code: 'INVALID_LABEL_TYPE' }); } }); if (concept.labels.length === 0) { warnings.push({ path: `${path}.labels`, message: 'Concept should have at least one label', severity: 'warning', code: 'EMPTY_LABELS' }); } } this.validateFacets(concept.facets, `${path}.facets`, errors, warnings, info); if (concept.inference) { this.validateInference(concept.inference, `${path}.inference`, errors, warnings, info); } if (concept.parent_cid && !PackValidator.CID_PATTERN.test(concept.parent_cid)) { errors.push({ path: `${path}.parent_cid`, message: 'Parent CID must follow naming convention', severity: 'error', code: 'INVALID_PARENT_CID_FORMAT' }); } } validateFacets(facets, path, errors, warnings, info) { if (typeof facets !== 'object' || facets === null) { errors.push({ path, message: 'Facets must be an object', severity: 'error', code: 'INVALID_FACETS_TYPE' }); return; } for (const [key, value] of Object.entries(facets)) { if (PackValidator.RESERVED_FACET_KEYS.includes(key)) { if (typeof value !== 'boolean') { errors.push({ path: `${path}.${key}`, message: `Reserved facet '${key}' must be a boolean`, severity: 'error', code: 'INVALID_RESERVED_FACET_TYPE' }); } } } if (Object.keys(facets).length === 0) { warnings.push({ path, message: 'Consider adding facets to improve concept classification', severity: 'warning', code: 'EMPTY_FACETS' }); } } validateInference(inference, path, errors, warnings, info) { if (!inference.rules || !Array.isArray(inference.rules)) { errors.push({ path: `${path}.rules`, message: 'Inference rules must be an array', severity: 'error', code: 'INVALID_INFERENCE_RULES_TYPE' }); return; } inference.rules.forEach((rule, ruleIndex) => { const rulePath = `${path}.rules[${ruleIndex}]`; this.validateInferenceRule(rule, rulePath, errors, warnings, info); }); } validateInferenceRule(rule, path, errors, warnings, info) { const requiredFields = ['condition', 'action', 'confidence']; for (const field of requiredFields) { if (!(field in rule)) { errors.push({ path: `${path}.${field}`, message: `Required field '${field}' is missing in inference rule`, severity: 'error', code: 'MISSING_REQUIRED_FIELD' }); } } if (typeof rule.condition !== 'string') { errors.push({ path: `${path}.condition`, message: 'Rule condition must be a string', severity: 'error', code: 'INVALID_CONDITION_TYPE' }); } if (typeof rule.action !== 'string') { errors.push({ path: `${path}.action`, message: 'Rule action must be a string', severity: 'error', code: 'INVALID_ACTION_TYPE' }); } if (typeof rule.confidence !== 'number' || rule.confidence < 0 || rule.confidence > 1) { errors.push({ path: `${path}.confidence`, message: 'Rule confidence must be a number between 0 and 1', severity: 'error', code: 'INVALID_CONFIDENCE_VALUE' }); } } validateDependencies(dependencies, errors, warnings, info) { dependencies.forEach((dep, index) => { if (typeof dep !== 'string') { errors.push({ path: `depends_on[${index}]`, message: 'Each dependency must be a string', severity: 'error', code: 'INVALID_DEPENDENCY_TYPE' }); return; } const depPattern = /^[a-z][a-z0-9]*(\.[a-z][a-z0-9]*)*(@|>=|<=|>|<|=).+$/; if (!depPattern.test(dep)) { errors.push({ path: `depends_on[${index}]`, message: `Invalid dependency format: '${dep}'. Expected format: 'package@version' or 'package>=version'`, severity: 'error', code: 'INVALID_DEPENDENCY_FORMAT' }); } }); } validateCidReferences(concepts, errors, warnings, info) { const cidSet = new Set(concepts.map(c => c.cid).filter(Boolean)); concepts.forEach((concept, index) => { if (concept.parent_cid && !cidSet.has(concept.parent_cid)) { warnings.push({ path: `concepts[${index}].parent_cid`, message: `Parent CID '${concept.parent_cid}' not found in this pack`, severity: 'warning', code: 'UNKNOWN_PARENT_CID' }); } }); } validateSemanticConsistency(packData, errors, warnings, info) { const concepts = packData.concepts || []; const labelCounts = new Map(); concepts.forEach((concept) => { if (concept.labels) { concept.labels.forEach((label) => { const normalizedLabel = label.toLowerCase(); labelCounts.set(normalizedLabel, (labelCounts.get(normalizedLabel) || 0) + 1); }); } }); for (const [label, count] of labelCounts) { if (count > 1) { warnings.push({ path: 'concepts', message: `Label '${label}' appears in ${count} concepts, which may cause ambiguity`, severity: 'warning', code: 'DUPLICATE_LABEL' }); } } } static createPackFromValidatedData(validatedData) { return { pack: validatedData.pack, version: validatedData.version, description: validatedData.description, depends_on: validatedData.depends_on || [], concepts: validatedData.concepts.map((concept) => ({ cid: concept.cid, labels: concept.labels, description: concept.description, facets: concept.facets, inference: concept.inference, examples: concept.examples, parent_cid: concept.parent_cid })) }; } } exports.PackValidator = PackValidator; //# sourceMappingURL=pack-validator.js.map