UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

285 lines 10.6 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); Object.defineProperty(exports, "__esModule", { value: true }); exports.AnchorStoreManager = void 0; const fs = __importStar(require("fs/promises")); const path = __importStar(require("path")); const yaml_1 = require("yaml"); class AnchorStoreManager { storePath; cache = new Map(); datasetIndex = new Map(); dirty = false; constructor(storePath = './semantics/anchors') { this.storePath = storePath; } async ensureStoreDirectory() { try { await fs.access(this.storePath); } catch { await fs.mkdir(this.storePath, { recursive: true }); } } getAnchorFilePath(anchorId) { const prefix = anchorId.substring(4, 6); // Extract first 2 chars after 'sca_' return path.join(this.storePath, `${prefix}.yml`); } getDatasetIndexPath() { return path.join(this.storePath, 'index.yml'); } anchorToYaml(anchor) { return (0, yaml_1.stringify)({ anchor }); } yamlToAnchor(yamlContent) { try { const obj = (0, yaml_1.parse)(yamlContent); if (!obj || typeof obj !== 'object' || !('anchor' in obj)) return null; const a = obj.anchor; if (a && a.dataset && a.column_name && a.anchor_id && a.fingerprint && a.first_seen && a.last_seen) { return a; } return null; } catch (error) { console.error('Error parsing YAML anchor:', error); return null; } } async loadAnchors() { await this.ensureStoreDirectory(); try { const files = await fs.readdir(this.storePath); const yamlFiles = files.filter(file => file.endsWith('.yml') && file !== 'index.yml'); this.cache.clear(); this.datasetIndex.clear(); for (const file of yamlFiles) { const filePath = path.join(this.storePath, file); const content = await fs.readFile(filePath, 'utf-8'); const sections = content.split(/\n---\n/g); for (const section of sections) { if (section.trim()) { const anchor = this.yamlToAnchor(section); if (anchor) { this.cache.set(anchor.anchor_id, anchor); if (!this.datasetIndex.has(anchor.dataset)) { this.datasetIndex.set(anchor.dataset, []); } this.datasetIndex.get(anchor.dataset).push(anchor.anchor_id); } } } } await this.loadDatasetIndex(); } catch (error) { console.error('Error loading anchors:', error); } } async loadDatasetIndex() { try { const indexPath = this.getDatasetIndexPath(); const content = await fs.readFile(indexPath, 'utf-8'); const obj = (0, yaml_1.parse)(content); if (obj && Array.isArray(obj.datasets)) { for (const entry of obj.datasets) { const ds = entry.dataset; const ids = Array.isArray(entry.anchors) ? entry.anchors : []; if (!this.datasetIndex.has(ds)) this.datasetIndex.set(ds, []); const existing = this.datasetIndex.get(ds); for (const id of ids) { if (!existing.includes(id)) existing.push(id); } } } } catch (error) { // Index file doesn't exist yet, that's okay } } async saveAnchor(anchor) { await this.ensureStoreDirectory(); const filePath = this.getAnchorFilePath(anchor.anchor_id); const yamlContent = this.anchorToYaml(anchor); let existingContent = ''; try { existingContent = await fs.readFile(filePath, 'utf-8'); } catch { // File doesn't exist, that's fine } const sections = existingContent ? existingContent.split('\n---\n') : []; let updated = false; for (let i = 0; i < sections.length; i++) { const existingAnchor = this.yamlToAnchor(sections[i]); if (existingAnchor && existingAnchor.anchor_id === anchor.anchor_id) { sections[i] = yamlContent; updated = true; break; } } if (!updated) { sections.push(yamlContent); } const finalContent = sections.filter(s => s.trim()).join('\n---\n'); await fs.writeFile(filePath, finalContent, 'utf-8'); this.cache.set(anchor.anchor_id, anchor); if (!this.datasetIndex.has(anchor.dataset)) { this.datasetIndex.set(anchor.dataset, []); } const datasetAnchors = this.datasetIndex.get(anchor.dataset); if (!datasetAnchors.includes(anchor.anchor_id)) { datasetAnchors.push(anchor.anchor_id); this.dirty = true; } if (this.dirty) { await this.saveDatasetIndex(); } } async saveDatasetIndex() { const indexPath = this.getDatasetIndexPath(); const datasets = Array.from(this.datasetIndex.entries()).map(([dataset, anchors]) => ({ dataset, anchors })); const yaml = (0, yaml_1.stringify)({ datasets }); await fs.writeFile(indexPath, yaml, 'utf-8'); this.dirty = false; } async getAnchor(anchorId) { if (this.cache.size === 0) { await this.loadAnchors(); } return this.cache.get(anchorId) || null; } async getAnchorsForDataset(dataset) { if (this.cache.size === 0) { await this.loadAnchors(); } const anchorIds = this.datasetIndex.get(dataset) || []; const anchors = []; for (const anchorId of anchorIds) { const anchor = this.cache.get(anchorId); if (anchor) { anchors.push(anchor); } } return anchors; } async getAllAnchors() { if (this.cache.size === 0) { await this.loadAnchors(); } return Array.from(this.cache.values()); } async deleteAnchor(anchorId) { const anchor = await this.getAnchor(anchorId); if (!anchor) return false; this.cache.delete(anchorId); const datasetAnchors = this.datasetIndex.get(anchor.dataset); if (datasetAnchors) { const index = datasetAnchors.indexOf(anchorId); if (index >= 0) { datasetAnchors.splice(index, 1); this.dirty = true; } } const filePath = this.getAnchorFilePath(anchorId); try { const content = await fs.readFile(filePath, 'utf-8'); const sections = content.split('\n---\n'); const filteredSections = sections.filter(section => { const parsedAnchor = this.yamlToAnchor(section); return !parsedAnchor || parsedAnchor.anchor_id !== anchorId; }); if (filteredSections.length === 0) { await fs.unlink(filePath); } else { const newContent = filteredSections.join('\n---\n'); await fs.writeFile(filePath, newContent, 'utf-8'); } if (this.dirty) { await this.saveDatasetIndex(); } return true; } catch (error) { console.error('Error deleting anchor:', error); return false; } } async getStats() { if (this.cache.size === 0) { await this.loadAnchors(); } const anchorsPerDataset = {}; for (const [dataset, anchorIds] of this.datasetIndex.entries()) { anchorsPerDataset[dataset] = anchorIds.length; } const allAnchors = Array.from(this.cache.values()); const lastUpdated = allAnchors.length > 0 ? Math.max(...allAnchors.map(a => new Date(a.last_seen).getTime())) : Date.now(); return { total_anchors: this.cache.size, datasets: this.datasetIndex.size, anchors_per_dataset: anchorsPerDataset, last_updated: new Date(lastUpdated).toISOString() }; } async bulkSaveAnchors(anchors) { for (const anchor of anchors) { await this.saveAnchor(anchor); } } async findAnchorsByPattern(pattern) { if (this.cache.size === 0) { await this.loadAnchors(); } return Array.from(this.cache.values()).filter(anchor => pattern.test(anchor.column_name) || pattern.test(anchor.fingerprint) || (anchor.mapped_cid && pattern.test(anchor.mapped_cid))); } } exports.AnchorStoreManager = AnchorStoreManager; //# sourceMappingURL=anchor-store.js.map