UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

412 lines 14.4 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.globalLazyEvaluator = exports.LazyComputation = exports.LazyEvaluator = void 0; class LazyEvaluator { computations = new Map(); evaluationQueue = []; runningComputations = new Set(); batchJobs = new Map(); maxConcurrency; resultCache = new Map(); constructor(maxConcurrency = 10) { this.maxConcurrency = maxConcurrency; } // Register a lazy computation lazy(id, computation, context = { priority: 'medium' }) { const node = { id, computation, context, state: 'pending', dependencies: new Set(), dependents: new Set() }; // Handle dependencies if (context.dependencies) { context.dependencies.forEach(depId => { node.dependencies.add(depId); const depNode = this.computations.get(depId); if (depNode) { depNode.dependents.add(id); } }); } this.computations.set(id, node); return new LazyComputation(this, id); } // Lazy column fingerprinting lazyFingerprint(columnId, column, fingerprintFn) { return this.lazy(`fingerprint:${columnId}`, () => fingerprintFn(column), { priority: 'high', cacheable: true, timeout: 5000 }); } // Lazy anchor creation lazyAnchor(anchorId, datasetName, column, anchorFn) { const fingerprintId = `fingerprint:${anchorId}`; return this.lazy(`anchor:${anchorId}`, () => anchorFn(datasetName, column), { priority: 'medium', dependencies: [fingerprintId], cacheable: true, timeout: 10000 }); } // Lazy batch processing lazyBatch(batchId, items, processFn, batchSize = 100) { return this.lazy(`batch:${batchId}`, () => { const results = []; for (let i = 0; i < items.length; i += batchSize) { const batch = items.slice(i, i + batchSize); const batchResults = processFn(batch); results.push(...batchResults); } return results; }, { priority: 'low', cacheable: true, timeout: 60000 }); } // Execute a specific computation async evaluate(computationId) { const node = this.computations.get(computationId); if (!node) { throw new Error(`Computation ${computationId} not found`); } if (node.state === 'completed') { return node.result; } if (node.state === 'failed') { throw node.error || new Error(`Computation ${computationId} failed`); } if (node.state === 'running') { // Wait for completion return this.waitForCompletion(computationId); } // Check cache first if (node.context.cacheable) { const cached = this.getCachedResult(computationId); if (cached !== undefined) { node.result = cached; node.state = 'completed'; return cached; } } // Ensure dependencies are satisfied await this.resolveDependencies(computationId); // Execute the computation return this.executeComputation(computationId); } async resolveDependencies(computationId) { const node = this.computations.get(computationId); if (!node) return; const depPromises = Array.from(node.dependencies).map(depId => this.evaluate(depId)); await Promise.all(depPromises); } async executeComputation(computationId) { const node = this.computations.get(computationId); if (!node) { throw new Error(`Computation ${computationId} not found`); } if (this.runningComputations.size >= this.maxConcurrency) { await this.waitForSlot(); } node.state = 'running'; node.startTime = Date.now(); this.runningComputations.add(computationId); try { const result = await this.executeWithTimeout(node); node.result = result; node.state = 'completed'; node.endTime = Date.now(); // Cache result if configured if (node.context.cacheable) { this.cacheResult(computationId, result); } return result; } catch (error) { node.error = error; node.state = 'failed'; node.endTime = Date.now(); // Retry logic if (node.context.retryAttempts && node.context.retryAttempts > 0) { node.context.retryAttempts--; node.state = 'pending'; return this.executeComputation(computationId); } throw error; } finally { this.runningComputations.delete(computationId); } } async executeWithTimeout(node) { const computation = node.computation; const timeout = node.context.timeout; if (!timeout) { return this.isPromiseFunction(computation) ? await computation() : computation(); } return new Promise((resolve, reject) => { const timeoutHandle = setTimeout(() => { reject(new Error(`Computation ${node.id} timed out after ${timeout}ms`)); }, timeout); const execute = async () => { try { const result = this.isPromiseFunction(computation) ? await computation() : computation(); clearTimeout(timeoutHandle); resolve(result); } catch (error) { clearTimeout(timeoutHandle); reject(error); } }; execute(); }); } isPromiseFunction(fn) { const result = fn.constructor.name === 'AsyncFunction' || fn.toString().includes('async') || fn.toString().includes('Promise'); return result; } async waitForCompletion(computationId) { return new Promise((resolve, reject) => { const checkCompletion = () => { const node = this.computations.get(computationId); if (!node) { reject(new Error(`Computation ${computationId} not found`)); return; } if (node.state === 'completed') { resolve(node.result); } else if (node.state === 'failed') { reject(node.error || new Error(`Computation ${computationId} failed`)); } else { setTimeout(checkCompletion, 10); } }; checkCompletion(); }); } async waitForSlot() { return new Promise(resolve => { const checkSlot = () => { if (this.runningComputations.size < this.maxConcurrency) { resolve(); } else { setTimeout(checkSlot, 10); } }; checkSlot(); }); } getCachedResult(computationId) { const cached = this.resultCache.get(computationId); if (!cached) return undefined; // Check TTL if (cached.ttl && Date.now() > cached.timestamp + cached.ttl) { this.resultCache.delete(computationId); return undefined; } return cached.value; } cacheResult(computationId, result, ttl) { this.resultCache.set(computationId, { value: result, timestamp: Date.now(), ttl }); } // Batch evaluation for improved performance async evaluateBatch(computationIds) { const batchId = `batch_${Date.now()}`; const job = { id: batchId, computations: computationIds, priority: 'medium', parallelism: Math.min(computationIds.length, this.maxConcurrency), state: 'queued' }; this.batchJobs.set(batchId, job); try { job.state = 'running'; // Group computations by dependencies for optimal execution order const sortedIds = this.topologicalSort(computationIds); const results = new Map(); // Execute in parallel batches respecting dependencies for (let i = 0; i < sortedIds.length; i += job.parallelism) { const batch = sortedIds.slice(i, i + job.parallelism); const batchPromises = batch.map(async (id) => { try { const result = await this.evaluate(id); return { id, result, error: null }; } catch (error) { return { id, result: null, error }; } }); const batchResults = await Promise.all(batchPromises); batchResults.forEach(({ id, result, error }) => { if (error) { results.set(id, { error }); } else { results.set(id, result); } }); } job.state = 'completed'; return results; } catch (error) { job.state = 'failed'; throw error; } } // Topological sort for dependency-aware execution topologicalSort(computationIds) { const visited = new Set(); const recursionStack = new Set(); const result = []; const visit = (id) => { if (recursionStack.has(id)) { throw new Error(`Circular dependency detected involving ${id}`); } if (visited.has(id)) { return; } visited.add(id); recursionStack.add(id); const node = this.computations.get(id); if (node) { node.dependencies.forEach(depId => { if (computationIds.includes(depId)) { visit(depId); } }); } recursionStack.delete(id); result.push(id); }; computationIds.forEach(id => { if (!visited.has(id)) { visit(id); } }); return result; } // Cleanup and resource management cleanup(olderThanMs = 3600000) { const cutoff = Date.now() - olderThanMs; // Clean completed computations for (const [id, node] of this.computations.entries()) { if (node.state === 'completed' && node.endTime && node.endTime < cutoff) { this.computations.delete(id); } } // Clean cache entries for (const [id, cached] of this.resultCache.entries()) { if (cached.timestamp < cutoff) { this.resultCache.delete(id); } } // Clean batch jobs for (const [id, job] of this.batchJobs.entries()) { if (job.state === 'completed' || job.state === 'failed') { this.batchJobs.delete(id); } } } getStats() { let completed = 0; let failed = 0; let totalExecutionTime = 0; for (const node of this.computations.values()) { if (node.state === 'completed') { completed++; if (node.startTime && node.endTime) { totalExecutionTime += node.endTime - node.startTime; } } else if (node.state === 'failed') { failed++; } } return { totalComputations: this.computations.size, completedComputations: completed, failedComputations: failed, runningComputations: this.runningComputations.size, cacheSize: this.resultCache.size, averageExecutionTime: completed > 0 ? totalExecutionTime / completed : 0 }; } } exports.LazyEvaluator = LazyEvaluator; class LazyComputation { evaluator; computationId; constructor(evaluator, computationId) { this.evaluator = evaluator; this.computationId = computationId; } async getValue() { return this.evaluator.evaluate(this.computationId); } // Transform the computation map(fn) { const newId = `${this.computationId}:map:${Date.now()}`; return this.evaluator.lazy(newId, async () => { const value = await this.getValue(); return fn(value); }, { priority: 'medium', dependencies: [this.computationId], cacheable: true }); } // Chain computations flatMap(fn) { const newId = `${this.computationId}:flatMap:${Date.now()}`; return this.evaluator.lazy(newId, async () => { const value = await this.getValue(); const nextComputation = fn(value); return nextComputation.getValue(); }, { priority: 'medium', dependencies: [this.computationId], cacheable: true }); } // Combine with another computation combine(other, combiner) { const newId = `${this.computationId}:combine:${Date.now()}`; return this.evaluator.lazy(newId, async () => { const [valueA, valueB] = await Promise.all([ this.getValue(), other.getValue() ]); return combiner(valueA, valueB); }, { priority: 'medium', dependencies: [this.computationId], cacheable: true }); } } exports.LazyComputation = LazyComputation; // Global lazy evaluator instance exports.globalLazyEvaluator = new LazyEvaluator(20); //# sourceMappingURL=lazy-evaluator.js.map