semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
285 lines • 10.6 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.AnchorStoreManager = void 0;
const fs = __importStar(require("fs/promises"));
const path = __importStar(require("path"));
const yaml_1 = require("yaml");
class AnchorStoreManager {
storePath;
cache = new Map();
datasetIndex = new Map();
dirty = false;
constructor(storePath = './semantics/anchors') {
this.storePath = storePath;
}
async ensureStoreDirectory() {
try {
await fs.access(this.storePath);
}
catch {
await fs.mkdir(this.storePath, { recursive: true });
}
}
getAnchorFilePath(anchorId) {
const prefix = anchorId.substring(4, 6); // Extract first 2 chars after 'sca_'
return path.join(this.storePath, `${prefix}.yml`);
}
getDatasetIndexPath() {
return path.join(this.storePath, 'index.yml');
}
anchorToYaml(anchor) {
return (0, yaml_1.stringify)({ anchor });
}
yamlToAnchor(yamlContent) {
try {
const obj = (0, yaml_1.parse)(yamlContent);
if (!obj || typeof obj !== 'object' || !('anchor' in obj))
return null;
const a = obj.anchor;
if (a &&
a.dataset &&
a.column_name &&
a.anchor_id &&
a.fingerprint &&
a.first_seen &&
a.last_seen) {
return a;
}
return null;
}
catch (error) {
console.error('Error parsing YAML anchor:', error);
return null;
}
}
async loadAnchors() {
await this.ensureStoreDirectory();
try {
const files = await fs.readdir(this.storePath);
const yamlFiles = files.filter(file => file.endsWith('.yml') && file !== 'index.yml');
this.cache.clear();
this.datasetIndex.clear();
for (const file of yamlFiles) {
const filePath = path.join(this.storePath, file);
const content = await fs.readFile(filePath, 'utf-8');
const sections = content.split(/\n---\n/g);
for (const section of sections) {
if (section.trim()) {
const anchor = this.yamlToAnchor(section);
if (anchor) {
this.cache.set(anchor.anchor_id, anchor);
if (!this.datasetIndex.has(anchor.dataset)) {
this.datasetIndex.set(anchor.dataset, []);
}
this.datasetIndex.get(anchor.dataset).push(anchor.anchor_id);
}
}
}
}
await this.loadDatasetIndex();
}
catch (error) {
console.error('Error loading anchors:', error);
}
}
async loadDatasetIndex() {
try {
const indexPath = this.getDatasetIndexPath();
const content = await fs.readFile(indexPath, 'utf-8');
const obj = (0, yaml_1.parse)(content);
if (obj && Array.isArray(obj.datasets)) {
for (const entry of obj.datasets) {
const ds = entry.dataset;
const ids = Array.isArray(entry.anchors) ? entry.anchors : [];
if (!this.datasetIndex.has(ds))
this.datasetIndex.set(ds, []);
const existing = this.datasetIndex.get(ds);
for (const id of ids) {
if (!existing.includes(id))
existing.push(id);
}
}
}
}
catch (error) {
// Index file doesn't exist yet, that's okay
}
}
async saveAnchor(anchor) {
await this.ensureStoreDirectory();
const filePath = this.getAnchorFilePath(anchor.anchor_id);
const yamlContent = this.anchorToYaml(anchor);
let existingContent = '';
try {
existingContent = await fs.readFile(filePath, 'utf-8');
}
catch {
// File doesn't exist, that's fine
}
const sections = existingContent ? existingContent.split('\n---\n') : [];
let updated = false;
for (let i = 0; i < sections.length; i++) {
const existingAnchor = this.yamlToAnchor(sections[i]);
if (existingAnchor && existingAnchor.anchor_id === anchor.anchor_id) {
sections[i] = yamlContent;
updated = true;
break;
}
}
if (!updated) {
sections.push(yamlContent);
}
const finalContent = sections.filter(s => s.trim()).join('\n---\n');
await fs.writeFile(filePath, finalContent, 'utf-8');
this.cache.set(anchor.anchor_id, anchor);
if (!this.datasetIndex.has(anchor.dataset)) {
this.datasetIndex.set(anchor.dataset, []);
}
const datasetAnchors = this.datasetIndex.get(anchor.dataset);
if (!datasetAnchors.includes(anchor.anchor_id)) {
datasetAnchors.push(anchor.anchor_id);
this.dirty = true;
}
if (this.dirty) {
await this.saveDatasetIndex();
}
}
async saveDatasetIndex() {
const indexPath = this.getDatasetIndexPath();
const datasets = Array.from(this.datasetIndex.entries()).map(([dataset, anchors]) => ({ dataset, anchors }));
const yaml = (0, yaml_1.stringify)({ datasets });
await fs.writeFile(indexPath, yaml, 'utf-8');
this.dirty = false;
}
async getAnchor(anchorId) {
if (this.cache.size === 0) {
await this.loadAnchors();
}
return this.cache.get(anchorId) || null;
}
async getAnchorsForDataset(dataset) {
if (this.cache.size === 0) {
await this.loadAnchors();
}
const anchorIds = this.datasetIndex.get(dataset) || [];
const anchors = [];
for (const anchorId of anchorIds) {
const anchor = this.cache.get(anchorId);
if (anchor) {
anchors.push(anchor);
}
}
return anchors;
}
async getAllAnchors() {
if (this.cache.size === 0) {
await this.loadAnchors();
}
return Array.from(this.cache.values());
}
async deleteAnchor(anchorId) {
const anchor = await this.getAnchor(anchorId);
if (!anchor)
return false;
this.cache.delete(anchorId);
const datasetAnchors = this.datasetIndex.get(anchor.dataset);
if (datasetAnchors) {
const index = datasetAnchors.indexOf(anchorId);
if (index >= 0) {
datasetAnchors.splice(index, 1);
this.dirty = true;
}
}
const filePath = this.getAnchorFilePath(anchorId);
try {
const content = await fs.readFile(filePath, 'utf-8');
const sections = content.split('\n---\n');
const filteredSections = sections.filter(section => {
const parsedAnchor = this.yamlToAnchor(section);
return !parsedAnchor || parsedAnchor.anchor_id !== anchorId;
});
if (filteredSections.length === 0) {
await fs.unlink(filePath);
}
else {
const newContent = filteredSections.join('\n---\n');
await fs.writeFile(filePath, newContent, 'utf-8');
}
if (this.dirty) {
await this.saveDatasetIndex();
}
return true;
}
catch (error) {
console.error('Error deleting anchor:', error);
return false;
}
}
async getStats() {
if (this.cache.size === 0) {
await this.loadAnchors();
}
const anchorsPerDataset = {};
for (const [dataset, anchorIds] of this.datasetIndex.entries()) {
anchorsPerDataset[dataset] = anchorIds.length;
}
const allAnchors = Array.from(this.cache.values());
const lastUpdated = allAnchors.length > 0
? Math.max(...allAnchors.map(a => new Date(a.last_seen).getTime()))
: Date.now();
return {
total_anchors: this.cache.size,
datasets: this.datasetIndex.size,
anchors_per_dataset: anchorsPerDataset,
last_updated: new Date(lastUpdated).toISOString()
};
}
async bulkSaveAnchors(anchors) {
for (const anchor of anchors) {
await this.saveAnchor(anchor);
}
}
async findAnchorsByPattern(pattern) {
if (this.cache.size === 0) {
await this.loadAnchors();
}
return Array.from(this.cache.values()).filter(anchor => pattern.test(anchor.column_name) ||
pattern.test(anchor.fingerprint) ||
(anchor.mapped_cid && pattern.test(anchor.mapped_cid)));
}
}
exports.AnchorStoreManager = AnchorStoreManager;
//# sourceMappingURL=anchor-store.js.map