semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
360 lines • 13.9 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.PackValidator = void 0;
const yaml_1 = require("yaml");
class PackValidator {
static RESERVED_FACET_KEYS = [
'pii', 'temporal', 'numerical', 'categorical', 'identifier'
];
static VERSION_PATTERN = /^\d+\.\d+\.\d+(-[a-zA-Z0-9-]+)?$/;
static CID_PATTERN = /^[a-z][a-z0-9]*(\.[a-z][a-z0-9]*)*$/;
static PACK_NAME_PATTERN = /^[a-z][a-z0-9]*(\.[a-z][a-z0-9]*)*$/;
validatePack(packData) {
const errors = [];
const warnings = [];
const info = [];
this.validatePackStructure(packData, errors, warnings, info);
if (errors.length === 0) {
this.validatePackMetadata(packData, errors, warnings, info);
this.validateConcepts(packData.concepts || [], errors, warnings, info);
this.validateDependencies(packData.depends_on || [], errors, warnings, info);
this.validateSemanticConsistency(packData, errors, warnings, info);
}
return {
valid: errors.length === 0,
errors,
warnings,
info
};
}
validateYamlStructure(yamlContent) {
const errors = [];
const warnings = [];
const info = [];
try {
const parsed = (0, yaml_1.parse)(yamlContent);
return this.validatePack(parsed);
}
catch (error) {
errors.push({
path: 'root',
message: `Invalid YAML structure: ${error}`,
severity: 'error',
code: 'YAML_PARSE_ERROR'
});
}
return { valid: false, errors, warnings, info };
}
validatePackStructure(packData, errors, warnings, info) {
const requiredFields = ['pack', 'version', 'concepts'];
for (const field of requiredFields) {
if (!(field in packData)) {
errors.push({
path: `root.${field}`,
message: `Required field '${field}' is missing`,
severity: 'error',
code: 'MISSING_REQUIRED_FIELD'
});
}
}
if (typeof packData.pack !== 'string') {
errors.push({
path: 'root.pack',
message: 'Pack name must be a string',
severity: 'error',
code: 'INVALID_TYPE'
});
}
if (typeof packData.version !== 'string') {
errors.push({
path: 'root.version',
message: 'Version must be a string',
severity: 'error',
code: 'INVALID_TYPE'
});
}
if (!Array.isArray(packData.concepts)) {
errors.push({
path: 'root.concepts',
message: 'Concepts must be an array',
severity: 'error',
code: 'INVALID_TYPE'
});
}
}
validatePackMetadata(packData, errors, warnings, info) {
if (packData.pack && !PackValidator.PACK_NAME_PATTERN.test(packData.pack)) {
errors.push({
path: 'root.pack',
message: 'Pack name must follow naming convention (lowercase, dots allowed)',
severity: 'error',
code: 'INVALID_PACK_NAME'
});
}
if (packData.version && !PackValidator.VERSION_PATTERN.test(packData.version)) {
errors.push({
path: 'root.version',
message: 'Version must follow semantic versioning (x.y.z)',
severity: 'error',
code: 'INVALID_VERSION'
});
}
if (packData.description && typeof packData.description !== 'string') {
warnings.push({
path: 'root.description',
message: 'Description should be a string',
severity: 'warning',
code: 'INVALID_DESCRIPTION_TYPE'
});
}
if (!packData.description) {
info.push({
path: 'root.description',
message: 'Consider adding a description for better documentation',
severity: 'info',
code: 'MISSING_DESCRIPTION'
});
}
}
validateConcepts(concepts, errors, warnings, info) {
const cidSet = new Set();
concepts.forEach((concept, index) => {
const path = `concepts[${index}]`;
this.validateConcept(concept, path, errors, warnings, info);
if (concept.cid) {
if (cidSet.has(concept.cid)) {
errors.push({
path: `${path}.cid`,
message: `Duplicate CID '${concept.cid}'`,
severity: 'error',
code: 'DUPLICATE_CID'
});
}
else {
cidSet.add(concept.cid);
}
}
});
this.validateCidReferences(concepts, errors, warnings, info);
}
validateConcept(concept, path, errors, warnings, info) {
const requiredFields = ['cid', 'labels', 'facets'];
for (const field of requiredFields) {
if (!(field in concept)) {
errors.push({
path: `${path}.${field}`,
message: `Required field '${field}' is missing`,
severity: 'error',
code: 'MISSING_REQUIRED_FIELD'
});
}
}
if (concept.cid && !PackValidator.CID_PATTERN.test(concept.cid)) {
errors.push({
path: `${path}.cid`,
message: 'CID must follow naming convention (lowercase, dots allowed)',
severity: 'error',
code: 'INVALID_CID_FORMAT'
});
}
if (!Array.isArray(concept.labels)) {
errors.push({
path: `${path}.labels`,
message: 'Labels must be an array of strings',
severity: 'error',
code: 'INVALID_LABELS_TYPE'
});
}
else {
concept.labels.forEach((label, labelIndex) => {
if (typeof label !== 'string') {
errors.push({
path: `${path}.labels[${labelIndex}]`,
message: 'Each label must be a string',
severity: 'error',
code: 'INVALID_LABEL_TYPE'
});
}
});
if (concept.labels.length === 0) {
warnings.push({
path: `${path}.labels`,
message: 'Concept should have at least one label',
severity: 'warning',
code: 'EMPTY_LABELS'
});
}
}
this.validateFacets(concept.facets, `${path}.facets`, errors, warnings, info);
if (concept.inference) {
this.validateInference(concept.inference, `${path}.inference`, errors, warnings, info);
}
if (concept.parent_cid && !PackValidator.CID_PATTERN.test(concept.parent_cid)) {
errors.push({
path: `${path}.parent_cid`,
message: 'Parent CID must follow naming convention',
severity: 'error',
code: 'INVALID_PARENT_CID_FORMAT'
});
}
}
validateFacets(facets, path, errors, warnings, info) {
if (typeof facets !== 'object' || facets === null) {
errors.push({
path,
message: 'Facets must be an object',
severity: 'error',
code: 'INVALID_FACETS_TYPE'
});
return;
}
for (const [key, value] of Object.entries(facets)) {
if (PackValidator.RESERVED_FACET_KEYS.includes(key)) {
if (typeof value !== 'boolean') {
errors.push({
path: `${path}.${key}`,
message: `Reserved facet '${key}' must be a boolean`,
severity: 'error',
code: 'INVALID_RESERVED_FACET_TYPE'
});
}
}
}
if (Object.keys(facets).length === 0) {
warnings.push({
path,
message: 'Consider adding facets to improve concept classification',
severity: 'warning',
code: 'EMPTY_FACETS'
});
}
}
validateInference(inference, path, errors, warnings, info) {
if (!inference.rules || !Array.isArray(inference.rules)) {
errors.push({
path: `${path}.rules`,
message: 'Inference rules must be an array',
severity: 'error',
code: 'INVALID_INFERENCE_RULES_TYPE'
});
return;
}
inference.rules.forEach((rule, ruleIndex) => {
const rulePath = `${path}.rules[${ruleIndex}]`;
this.validateInferenceRule(rule, rulePath, errors, warnings, info);
});
}
validateInferenceRule(rule, path, errors, warnings, info) {
const requiredFields = ['condition', 'action', 'confidence'];
for (const field of requiredFields) {
if (!(field in rule)) {
errors.push({
path: `${path}.${field}`,
message: `Required field '${field}' is missing in inference rule`,
severity: 'error',
code: 'MISSING_REQUIRED_FIELD'
});
}
}
if (typeof rule.condition !== 'string') {
errors.push({
path: `${path}.condition`,
message: 'Rule condition must be a string',
severity: 'error',
code: 'INVALID_CONDITION_TYPE'
});
}
if (typeof rule.action !== 'string') {
errors.push({
path: `${path}.action`,
message: 'Rule action must be a string',
severity: 'error',
code: 'INVALID_ACTION_TYPE'
});
}
if (typeof rule.confidence !== 'number' || rule.confidence < 0 || rule.confidence > 1) {
errors.push({
path: `${path}.confidence`,
message: 'Rule confidence must be a number between 0 and 1',
severity: 'error',
code: 'INVALID_CONFIDENCE_VALUE'
});
}
}
validateDependencies(dependencies, errors, warnings, info) {
dependencies.forEach((dep, index) => {
if (typeof dep !== 'string') {
errors.push({
path: `depends_on[${index}]`,
message: 'Each dependency must be a string',
severity: 'error',
code: 'INVALID_DEPENDENCY_TYPE'
});
return;
}
const depPattern = /^[a-z][a-z0-9]*(\.[a-z][a-z0-9]*)*(@|>=|<=|>|<|=).+$/;
if (!depPattern.test(dep)) {
errors.push({
path: `depends_on[${index}]`,
message: `Invalid dependency format: '${dep}'. Expected format: 'package@version' or 'package>=version'`,
severity: 'error',
code: 'INVALID_DEPENDENCY_FORMAT'
});
}
});
}
validateCidReferences(concepts, errors, warnings, info) {
const cidSet = new Set(concepts.map(c => c.cid).filter(Boolean));
concepts.forEach((concept, index) => {
if (concept.parent_cid && !cidSet.has(concept.parent_cid)) {
warnings.push({
path: `concepts[${index}].parent_cid`,
message: `Parent CID '${concept.parent_cid}' not found in this pack`,
severity: 'warning',
code: 'UNKNOWN_PARENT_CID'
});
}
});
}
validateSemanticConsistency(packData, errors, warnings, info) {
const concepts = packData.concepts || [];
const labelCounts = new Map();
concepts.forEach((concept) => {
if (concept.labels) {
concept.labels.forEach((label) => {
const normalizedLabel = label.toLowerCase();
labelCounts.set(normalizedLabel, (labelCounts.get(normalizedLabel) || 0) + 1);
});
}
});
for (const [label, count] of labelCounts) {
if (count > 1) {
warnings.push({
path: 'concepts',
message: `Label '${label}' appears in ${count} concepts, which may cause ambiguity`,
severity: 'warning',
code: 'DUPLICATE_LABEL'
});
}
}
}
static createPackFromValidatedData(validatedData) {
return {
pack: validatedData.pack,
version: validatedData.version,
description: validatedData.description,
depends_on: validatedData.depends_on || [],
concepts: validatedData.concepts.map((concept) => ({
cid: concept.cid,
labels: concept.labels,
description: concept.description,
facets: concept.facets,
inference: concept.inference,
examples: concept.examples,
parent_cid: concept.parent_cid
}))
};
}
}
exports.PackValidator = PackValidator;
//# sourceMappingURL=pack-validator.js.map