semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
412 lines • 14.4 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.globalLazyEvaluator = exports.LazyComputation = exports.LazyEvaluator = void 0;
class LazyEvaluator {
computations = new Map();
evaluationQueue = [];
runningComputations = new Set();
batchJobs = new Map();
maxConcurrency;
resultCache = new Map();
constructor(maxConcurrency = 10) {
this.maxConcurrency = maxConcurrency;
}
// Register a lazy computation
lazy(id, computation, context = { priority: 'medium' }) {
const node = {
id,
computation,
context,
state: 'pending',
dependencies: new Set(),
dependents: new Set()
};
// Handle dependencies
if (context.dependencies) {
context.dependencies.forEach(depId => {
node.dependencies.add(depId);
const depNode = this.computations.get(depId);
if (depNode) {
depNode.dependents.add(id);
}
});
}
this.computations.set(id, node);
return new LazyComputation(this, id);
}
// Lazy column fingerprinting
lazyFingerprint(columnId, column, fingerprintFn) {
return this.lazy(`fingerprint:${columnId}`, () => fingerprintFn(column), {
priority: 'high',
cacheable: true,
timeout: 5000
});
}
// Lazy anchor creation
lazyAnchor(anchorId, datasetName, column, anchorFn) {
const fingerprintId = `fingerprint:${anchorId}`;
return this.lazy(`anchor:${anchorId}`, () => anchorFn(datasetName, column), {
priority: 'medium',
dependencies: [fingerprintId],
cacheable: true,
timeout: 10000
});
}
// Lazy batch processing
lazyBatch(batchId, items, processFn, batchSize = 100) {
return this.lazy(`batch:${batchId}`, () => {
const results = [];
for (let i = 0; i < items.length; i += batchSize) {
const batch = items.slice(i, i + batchSize);
const batchResults = processFn(batch);
results.push(...batchResults);
}
return results;
}, {
priority: 'low',
cacheable: true,
timeout: 60000
});
}
// Execute a specific computation
async evaluate(computationId) {
const node = this.computations.get(computationId);
if (!node) {
throw new Error(`Computation ${computationId} not found`);
}
if (node.state === 'completed') {
return node.result;
}
if (node.state === 'failed') {
throw node.error || new Error(`Computation ${computationId} failed`);
}
if (node.state === 'running') {
// Wait for completion
return this.waitForCompletion(computationId);
}
// Check cache first
if (node.context.cacheable) {
const cached = this.getCachedResult(computationId);
if (cached !== undefined) {
node.result = cached;
node.state = 'completed';
return cached;
}
}
// Ensure dependencies are satisfied
await this.resolveDependencies(computationId);
// Execute the computation
return this.executeComputation(computationId);
}
async resolveDependencies(computationId) {
const node = this.computations.get(computationId);
if (!node)
return;
const depPromises = Array.from(node.dependencies).map(depId => this.evaluate(depId));
await Promise.all(depPromises);
}
async executeComputation(computationId) {
const node = this.computations.get(computationId);
if (!node) {
throw new Error(`Computation ${computationId} not found`);
}
if (this.runningComputations.size >= this.maxConcurrency) {
await this.waitForSlot();
}
node.state = 'running';
node.startTime = Date.now();
this.runningComputations.add(computationId);
try {
const result = await this.executeWithTimeout(node);
node.result = result;
node.state = 'completed';
node.endTime = Date.now();
// Cache result if configured
if (node.context.cacheable) {
this.cacheResult(computationId, result);
}
return result;
}
catch (error) {
node.error = error;
node.state = 'failed';
node.endTime = Date.now();
// Retry logic
if (node.context.retryAttempts && node.context.retryAttempts > 0) {
node.context.retryAttempts--;
node.state = 'pending';
return this.executeComputation(computationId);
}
throw error;
}
finally {
this.runningComputations.delete(computationId);
}
}
async executeWithTimeout(node) {
const computation = node.computation;
const timeout = node.context.timeout;
if (!timeout) {
return this.isPromiseFunction(computation) ?
await computation() :
computation();
}
return new Promise((resolve, reject) => {
const timeoutHandle = setTimeout(() => {
reject(new Error(`Computation ${node.id} timed out after ${timeout}ms`));
}, timeout);
const execute = async () => {
try {
const result = this.isPromiseFunction(computation) ?
await computation() :
computation();
clearTimeout(timeoutHandle);
resolve(result);
}
catch (error) {
clearTimeout(timeoutHandle);
reject(error);
}
};
execute();
});
}
isPromiseFunction(fn) {
const result = fn.constructor.name === 'AsyncFunction' ||
fn.toString().includes('async') ||
fn.toString().includes('Promise');
return result;
}
async waitForCompletion(computationId) {
return new Promise((resolve, reject) => {
const checkCompletion = () => {
const node = this.computations.get(computationId);
if (!node) {
reject(new Error(`Computation ${computationId} not found`));
return;
}
if (node.state === 'completed') {
resolve(node.result);
}
else if (node.state === 'failed') {
reject(node.error || new Error(`Computation ${computationId} failed`));
}
else {
setTimeout(checkCompletion, 10);
}
};
checkCompletion();
});
}
async waitForSlot() {
return new Promise(resolve => {
const checkSlot = () => {
if (this.runningComputations.size < this.maxConcurrency) {
resolve();
}
else {
setTimeout(checkSlot, 10);
}
};
checkSlot();
});
}
getCachedResult(computationId) {
const cached = this.resultCache.get(computationId);
if (!cached)
return undefined;
// Check TTL
if (cached.ttl && Date.now() > cached.timestamp + cached.ttl) {
this.resultCache.delete(computationId);
return undefined;
}
return cached.value;
}
cacheResult(computationId, result, ttl) {
this.resultCache.set(computationId, {
value: result,
timestamp: Date.now(),
ttl
});
}
// Batch evaluation for improved performance
async evaluateBatch(computationIds) {
const batchId = `batch_${Date.now()}`;
const job = {
id: batchId,
computations: computationIds,
priority: 'medium',
parallelism: Math.min(computationIds.length, this.maxConcurrency),
state: 'queued'
};
this.batchJobs.set(batchId, job);
try {
job.state = 'running';
// Group computations by dependencies for optimal execution order
const sortedIds = this.topologicalSort(computationIds);
const results = new Map();
// Execute in parallel batches respecting dependencies
for (let i = 0; i < sortedIds.length; i += job.parallelism) {
const batch = sortedIds.slice(i, i + job.parallelism);
const batchPromises = batch.map(async (id) => {
try {
const result = await this.evaluate(id);
return { id, result, error: null };
}
catch (error) {
return { id, result: null, error };
}
});
const batchResults = await Promise.all(batchPromises);
batchResults.forEach(({ id, result, error }) => {
if (error) {
results.set(id, { error });
}
else {
results.set(id, result);
}
});
}
job.state = 'completed';
return results;
}
catch (error) {
job.state = 'failed';
throw error;
}
}
// Topological sort for dependency-aware execution
topologicalSort(computationIds) {
const visited = new Set();
const recursionStack = new Set();
const result = [];
const visit = (id) => {
if (recursionStack.has(id)) {
throw new Error(`Circular dependency detected involving ${id}`);
}
if (visited.has(id)) {
return;
}
visited.add(id);
recursionStack.add(id);
const node = this.computations.get(id);
if (node) {
node.dependencies.forEach(depId => {
if (computationIds.includes(depId)) {
visit(depId);
}
});
}
recursionStack.delete(id);
result.push(id);
};
computationIds.forEach(id => {
if (!visited.has(id)) {
visit(id);
}
});
return result;
}
// Cleanup and resource management
cleanup(olderThanMs = 3600000) {
const cutoff = Date.now() - olderThanMs;
// Clean completed computations
for (const [id, node] of this.computations.entries()) {
if (node.state === 'completed' && node.endTime && node.endTime < cutoff) {
this.computations.delete(id);
}
}
// Clean cache entries
for (const [id, cached] of this.resultCache.entries()) {
if (cached.timestamp < cutoff) {
this.resultCache.delete(id);
}
}
// Clean batch jobs
for (const [id, job] of this.batchJobs.entries()) {
if (job.state === 'completed' || job.state === 'failed') {
this.batchJobs.delete(id);
}
}
}
getStats() {
let completed = 0;
let failed = 0;
let totalExecutionTime = 0;
for (const node of this.computations.values()) {
if (node.state === 'completed') {
completed++;
if (node.startTime && node.endTime) {
totalExecutionTime += node.endTime - node.startTime;
}
}
else if (node.state === 'failed') {
failed++;
}
}
return {
totalComputations: this.computations.size,
completedComputations: completed,
failedComputations: failed,
runningComputations: this.runningComputations.size,
cacheSize: this.resultCache.size,
averageExecutionTime: completed > 0 ? totalExecutionTime / completed : 0
};
}
}
exports.LazyEvaluator = LazyEvaluator;
class LazyComputation {
evaluator;
computationId;
constructor(evaluator, computationId) {
this.evaluator = evaluator;
this.computationId = computationId;
}
async getValue() {
return this.evaluator.evaluate(this.computationId);
}
// Transform the computation
map(fn) {
const newId = `${this.computationId}:map:${Date.now()}`;
return this.evaluator.lazy(newId, async () => {
const value = await this.getValue();
return fn(value);
}, {
priority: 'medium',
dependencies: [this.computationId],
cacheable: true
});
}
// Chain computations
flatMap(fn) {
const newId = `${this.computationId}:flatMap:${Date.now()}`;
return this.evaluator.lazy(newId, async () => {
const value = await this.getValue();
const nextComputation = fn(value);
return nextComputation.getValue();
}, {
priority: 'medium',
dependencies: [this.computationId],
cacheable: true
});
}
// Combine with another computation
combine(other, combiner) {
const newId = `${this.computationId}:combine:${Date.now()}`;
return this.evaluator.lazy(newId, async () => {
const [valueA, valueB] = await Promise.all([
this.getValue(),
other.getValue()
]);
return combiner(valueA, valueB);
}, {
priority: 'medium',
dependencies: [this.computationId],
cacheable: true
});
}
}
exports.LazyComputation = LazyComputation;
// Global lazy evaluator instance
exports.globalLazyEvaluator = new LazyEvaluator(20);
//# sourceMappingURL=lazy-evaluator.js.map