UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

223 lines 8.25 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.CIDRegistry = void 0; class CIDRegistry { concepts = new Map(); labelIndex = new Map(); packs = new Map(); loadedPackNames = new Set(); registerPack(pack) { this.packs.set(pack.pack, pack); this.loadedPackNames.add(pack.pack); for (const concept of pack.concepts) { this.concepts.set(concept.cid, { concept, pack: pack.pack }); // Index labels for fast lookup for (const label of concept.labels) { const normalizedLabel = this.normalizeLabel(label); if (!this.labelIndex.has(normalizedLabel)) { this.labelIndex.set(normalizedLabel, []); } this.labelIndex.get(normalizedLabel).push(concept.cid); } } } getConcept(cid) { const entry = this.concepts.get(cid); return entry ? entry.concept : null; } lookupByLabel(label) { const normalizedLabel = this.normalizeLabel(label); const results = []; // Exact label match const exactMatches = this.labelIndex.get(normalizedLabel) || []; for (const cid of exactMatches) { const entry = this.concepts.get(cid); if (entry) { results.push({ concept: entry.concept, pack: entry.pack, confidence: 1.0, match_type: 'exact' }); } } // Fuzzy label matching if (results.length === 0) { const fuzzyMatches = this.fuzzyLabelSearch(normalizedLabel); results.push(...fuzzyMatches); } return results.sort((a, b) => b.confidence - a.confidence); } lookupByCriteria(criteria) { const results = []; for (const [cid, entry] of this.concepts) { let confidence = 0; const matchReasons = []; // Facet matching if (criteria.facets) { const facetMatch = this.matchFacets(entry.concept.facets, criteria.facets); confidence += facetMatch.score; if (facetMatch.score > 0) { matchReasons.push('facets'); } } // Pattern matching against labels if (criteria.patterns) { const patternMatch = this.matchPatterns(entry.concept.labels, criteria.patterns); confidence += patternMatch * 0.8; if (patternMatch > 0) { matchReasons.push('patterns'); } } // Inference rules if (entry.concept.inference?.rules) { const inferenceMatch = this.evaluateInferenceRules(entry.concept.inference.rules, criteria); confidence += inferenceMatch * 0.9; if (inferenceMatch > 0) { matchReasons.push('inference'); } } if (confidence > 0.3) { results.push({ concept: entry.concept, pack: entry.pack, confidence: Math.min(confidence, 1.0), match_type: matchReasons.includes('inference') ? 'inference' : 'semantic' }); } } return results.sort((a, b) => b.confidence - a.confidence); } getLoadedPacks() { return Array.from(this.loadedPackNames); } getPack(packName) { return this.packs.get(packName) || null; } getAllConcepts() { return Array.from(this.concepts.values()).map(entry => entry.concept); } normalizeLabel(label) { return label.toLowerCase() .replace(/[_\-\s]+/g, '_') .replace(/^_+|_+$/g, ''); } fuzzyLabelSearch(normalizedLabel) { const results = []; const searchTerms = normalizedLabel.split('_'); for (const [label, cids] of this.labelIndex) { const labelTerms = label.split('_'); const jaccardSim = this.calculateSimilarity(searchTerms, labelTerms); const stringSim = this.calculateStringSimilarity(normalizedLabel, label); const maxSimilarity = Math.max(jaccardSim, stringSim); if (maxSimilarity > 0.5) { for (const cid of cids) { const entry = this.concepts.get(cid); if (entry) { results.push({ concept: entry.concept, pack: entry.pack, confidence: maxSimilarity, match_type: 'label' }); } } } } return results; } calculateSimilarity(terms1, terms2) { const set1 = new Set(terms1); const set2 = new Set(terms2); const intersection = new Set([...set1].filter(x => set2.has(x))); const union = new Set([...set1, ...set2]); if (union.size === 0) return 0; return intersection.size / union.size; } matchFacets(conceptFacets, criteriaFacets) { let matches = 0; let total = 0; for (const [key, value] of Object.entries(criteriaFacets)) { total++; if (conceptFacets[key] === value) { matches++; } } return { score: total > 0 ? matches / total : 0 }; } matchPatterns(labels, patterns) { let bestMatch = 0; for (const label of labels) { for (const pattern of patterns) { try { const regex = new RegExp(pattern, 'i'); if (regex.test(label)) { bestMatch = Math.max(bestMatch, 0.9); } else { const similarity = this.calculateStringSimilarity(label, pattern); bestMatch = Math.max(bestMatch, similarity); } } catch { const similarity = this.calculateStringSimilarity(label, pattern); bestMatch = Math.max(bestMatch, similarity); } } } return bestMatch; } calculateStringSimilarity(str1, str2) { const len1 = str1.length; const len2 = str2.length; const maxLen = Math.max(len1, len2); if (maxLen === 0) return 1; const editDistance = this.levenshteinDistance(str1.toLowerCase(), str2.toLowerCase()); return (maxLen - editDistance) / maxLen; } levenshteinDistance(str1, str2) { const matrix = Array(str2.length + 1).fill(null).map(() => Array(str1.length + 1).fill(null)); for (let i = 0; i <= str1.length; i++) matrix[0][i] = i; for (let j = 0; j <= str2.length; j++) matrix[j][0] = j; for (let j = 1; j <= str2.length; j++) { for (let i = 1; i <= str1.length; i++) { const substitutionCost = str1[i - 1] === str2[j - 1] ? 0 : 1; matrix[j][i] = Math.min(matrix[j][i - 1] + 1, matrix[j - 1][i] + 1, matrix[j - 1][i - 1] + substitutionCost); } } return matrix[str2.length][str1.length]; } evaluateInferenceRules(rules, criteria) { let bestConfidence = 0; for (const rule of rules) { try { if (this.evaluateCondition(rule.condition, criteria)) { bestConfidence = Math.max(bestConfidence, rule.confidence); } } catch { continue; } } return bestConfidence; } evaluateCondition(condition, context) { try { const func = new Function('context', ` with(context) { return ${condition}; } `); return func(context); } catch { return false; } } } exports.CIDRegistry = CIDRegistry; //# sourceMappingURL=cid-registry.js.map