semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
424 lines ⢠19.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.ShadowSystemBenchmark = void 0;
exports.runShadowSystemBenchmark = runShadowSystemBenchmark;
exports.generateBenchmarkReport = generateBenchmarkReport;
const perf_hooks_1 = require("perf_hooks");
const shadow_semantics_1 = require("../core/shadow-semantics");
const reconciler_1 = require("../core/reconciler");
const attachment_api_1 = require("../core/attachment-api");
class ShadowSystemBenchmark {
shadowLayer;
reconciler;
constructor(options) {
this.shadowLayer = new shadow_semantics_1.ShadowSemanticsLayer(options);
this.reconciler = new reconciler_1.SmartAnchorReconciler();
}
async runComprehensiveBenchmark() {
const suiteStartTime = perf_hooks_1.performance.now();
const results = [];
console.log('š Starting Shadow Semantics Performance Benchmark Suite');
results.push(await this.benchmarkBasicAttachment());
results.push(await this.benchmarkLargeDataset());
results.push(await this.benchmarkComplexReconciliation());
results.push(await this.benchmarkMemoryEfficiency());
results.push(await this.benchmarkConcurrentOperations());
results.push(await this.benchmarkScalability());
const totalTime = perf_hooks_1.performance.now() - suiteStartTime;
const summary = this.calculateSummary(results);
return {
suite_name: 'Shadow Semantics Comprehensive Benchmark',
total_execution_time_ms: totalTime,
results: results,
summary: summary
};
}
async benchmarkBasicAttachment() {
console.log(' š Benchmarking basic semantic attachment...');
const testDataFrame = this.generateTestDataFrame(1000, 10);
const startTime = perf_hooks_1.performance.now();
const startMemory = this.getMemoryUsage();
const result = (0, attachment_api_1.attachSemanticsShadow)(testDataFrame, {
confidence_threshold: 0.8,
reconciliation_strategy: 'balanced'
});
const endTime = perf_hooks_1.performance.now();
const endMemory = this.getMemoryUsage();
const executionTime = endTime - startTime;
return {
operation: 'basic_attachment',
execution_time_ms: executionTime,
memory_usage_mb: endMemory - startMemory,
throughput: {
columns_per_second: testDataFrame.columns.length / (executionTime / 1000),
rows_per_second: testDataFrame.shape[0] / (executionTime / 1000)
},
accuracy_metrics: this.calculateAccuracyMetrics(result.semantic_attachments, 0.8),
scalability_metrics: {
dataset_size: testDataFrame.shape[0],
column_count: testDataFrame.columns.length,
performance_degradation: 0
}
};
}
async benchmarkLargeDataset() {
console.log(' š Benchmarking large dataset performance...');
const testDataFrame = this.generateTestDataFrame(100000, 50);
const startTime = perf_hooks_1.performance.now();
const startMemory = this.getMemoryUsage();
const result = (0, attachment_api_1.attachSemanticsShadow)(testDataFrame, {
confidence_threshold: 0.75,
reconciliation_strategy: 'balanced'
});
const endTime = perf_hooks_1.performance.now();
const endMemory = this.getMemoryUsage();
const executionTime = endTime - startTime;
return {
operation: 'large_dataset',
execution_time_ms: executionTime,
memory_usage_mb: endMemory - startMemory,
throughput: {
columns_per_second: testDataFrame.columns.length / (executionTime / 1000),
rows_per_second: testDataFrame.shape[0] / (executionTime / 1000)
},
accuracy_metrics: this.calculateAccuracyMetrics(result.semantic_attachments, 0.75),
scalability_metrics: {
dataset_size: testDataFrame.shape[0],
column_count: testDataFrame.columns.length,
performance_degradation: 0
}
};
}
async benchmarkComplexReconciliation() {
console.log(' š Benchmarking complex reconciliation scenarios...');
const newColumns = this.generateTestColumns(25);
const existingAnchors = this.generateTestAnchors(30);
const startTime = perf_hooks_1.performance.now();
const startMemory = this.getMemoryUsage();
const result = (0, attachment_api_1.reconcileAnchors)('benchmark_dataset', newColumns, existingAnchors, {
strategy: 'aggressive',
confidence_threshold: 0.7,
drift_tolerance: 0.3
});
const endTime = perf_hooks_1.performance.now();
const endMemory = this.getMemoryUsage();
const executionTime = endTime - startTime;
return {
operation: 'complex_reconciliation',
execution_time_ms: executionTime,
memory_usage_mb: endMemory - startMemory,
throughput: {
columns_per_second: newColumns.length / (executionTime / 1000),
rows_per_second: 0
},
accuracy_metrics: {
confidence_threshold: 0.7,
high_confidence_matches: result.reconciliation_result.matched_anchors.filter(m => m.confidence >= 0.9).length,
low_confidence_matches: result.reconciliation_result.matched_anchors.filter(m => m.confidence < 0.9).length,
false_positives: 0,
false_negatives: 0
},
scalability_metrics: {
dataset_size: 0,
column_count: newColumns.length,
performance_degradation: 0
}
};
}
async benchmarkMemoryEfficiency() {
console.log(' š¾ Benchmarking memory efficiency...');
const iterations = 10;
let totalTime = 0;
let peakMemory = 0;
let totalColumns = 0;
for (let i = 0; i < iterations; i++) {
const testDataFrame = this.generateTestDataFrame(5000, 20);
const startTime = perf_hooks_1.performance.now();
const startMemory = this.getMemoryUsage();
(0, attachment_api_1.attachSemanticsShadow)(testDataFrame, {
confidence_threshold: 0.8,
shadow_options: { enable_caching: false }
});
const endTime = perf_hooks_1.performance.now();
const endMemory = this.getMemoryUsage();
totalTime += (endTime - startTime);
peakMemory = Math.max(peakMemory, endMemory - startMemory);
totalColumns += testDataFrame.columns.length;
if (global.gc) {
global.gc();
}
}
return {
operation: 'memory_efficiency',
execution_time_ms: totalTime,
memory_usage_mb: peakMemory,
throughput: {
columns_per_second: totalColumns / (totalTime / 1000),
rows_per_second: 0
},
accuracy_metrics: {
confidence_threshold: 0.8,
high_confidence_matches: 0,
low_confidence_matches: 0,
false_positives: 0,
false_negatives: 0
},
scalability_metrics: {
dataset_size: 5000 * iterations,
column_count: totalColumns,
performance_degradation: 0
}
};
}
async benchmarkConcurrentOperations() {
console.log(' ā” Benchmarking concurrent operations...');
const testDataFrames = Array.from({ length: 5 }, () => this.generateTestDataFrame(2000, 15));
const startTime = perf_hooks_1.performance.now();
const startMemory = this.getMemoryUsage();
const promises = testDataFrames.map(df => (0, attachment_api_1.attachSemanticsShadow)(df, {
confidence_threshold: 0.8,
reconciliation_strategy: 'balanced'
}));
const results = await Promise.all(promises);
const endTime = perf_hooks_1.performance.now();
const endMemory = this.getMemoryUsage();
const executionTime = endTime - startTime;
const totalColumns = testDataFrames.reduce((sum, df) => sum + df.columns.length, 0);
const totalRows = testDataFrames.reduce((sum, df) => sum + df.shape[0], 0);
return {
operation: 'concurrent_operations',
execution_time_ms: executionTime,
memory_usage_mb: endMemory - startMemory,
throughput: {
columns_per_second: totalColumns / (executionTime / 1000),
rows_per_second: totalRows / (executionTime / 1000)
},
accuracy_metrics: {
confidence_threshold: 0.8,
high_confidence_matches: results.reduce((sum, r) => sum + r.semantic_attachments.filter(a => a.confidence_score >= 0.9).length, 0),
low_confidence_matches: results.reduce((sum, r) => sum + r.semantic_attachments.filter(a => a.confidence_score < 0.9).length, 0),
false_positives: 0,
false_negatives: 0
},
scalability_metrics: {
dataset_size: totalRows,
column_count: totalColumns,
performance_degradation: 0
}
};
}
async benchmarkScalability() {
console.log(' š Benchmarking scalability characteristics...');
const baseSizes = [
{ rows: 1000, cols: 5 },
{ rows: 10000, cols: 15 },
{ rows: 50000, cols: 30 },
{ rows: 100000, cols: 50 }
];
let baselineTime = 0;
let currentTime = 0;
let totalColumns = 0;
let totalRows = 0;
for (let i = 0; i < baseSizes.length; i++) {
const { rows, cols } = baseSizes[i];
const testDataFrame = this.generateTestDataFrame(rows, cols);
const startTime = perf_hooks_1.performance.now();
(0, attachment_api_1.attachSemanticsShadow)(testDataFrame, {
confidence_threshold: 0.8
});
const endTime = perf_hooks_1.performance.now();
const executionTime = endTime - startTime;
if (i === 0) {
baselineTime = executionTime;
}
currentTime = executionTime;
totalColumns += cols;
totalRows += rows;
}
const performanceDegradation = baselineTime > 0 ? (currentTime / baselineTime) : 1;
return {
operation: 'scalability_analysis',
execution_time_ms: currentTime,
memory_usage_mb: this.getMemoryUsage(),
throughput: {
columns_per_second: totalColumns / (currentTime / 1000),
rows_per_second: totalRows / (currentTime / 1000)
},
accuracy_metrics: {
confidence_threshold: 0.8,
high_confidence_matches: 0,
low_confidence_matches: 0,
false_positives: 0,
false_negatives: 0
},
scalability_metrics: {
dataset_size: totalRows,
column_count: totalColumns,
performance_degradation: performanceDegradation
}
};
}
generateTestDataFrame(rows, cols) {
const columns = [];
const dtypes = {};
const data = {};
const columnTypes = ['id', 'name', 'email', 'amount', 'date', 'status', 'code', 'value'];
for (let i = 0; i < cols; i++) {
const colType = columnTypes[i % columnTypes.length];
const colName = `${colType}_${i}`;
columns.push(colName);
switch (colType) {
case 'id':
dtypes[colName] = 'int64';
data[colName] = Array.from({ length: rows }, (_, idx) => idx + 1);
break;
case 'name':
dtypes[colName] = 'string';
data[colName] = Array.from({ length: rows }, (_, idx) => `Name_${idx}`);
break;
case 'email':
dtypes[colName] = 'string';
data[colName] = Array.from({ length: rows }, (_, idx) => `user${idx}@example.com`);
break;
case 'amount':
dtypes[colName] = 'float64';
data[colName] = Array.from({ length: rows }, () => Math.random() * 1000);
break;
case 'date':
dtypes[colName] = 'datetime';
data[colName] = Array.from({ length: rows }, () => new Date(Date.now() - Math.random() * 365 * 24 * 60 * 60 * 1000).toISOString());
break;
default:
dtypes[colName] = 'string';
data[colName] = Array.from({ length: rows }, (_, idx) => `Value_${idx}`);
}
}
return {
columns: columns,
dtypes: dtypes,
shape: [rows, cols],
sample: (n = 100) => {
const result = {};
for (const col of columns) {
result[col] = data[col].slice(0, n);
}
return result;
},
getColumn: (name) => data[name] || []
};
}
generateTestColumns(count) {
const columns = [];
const types = ['string', 'int64', 'float64', 'boolean', 'datetime'];
for (let i = 0; i < count; i++) {
const dataType = types[i % types.length];
let values;
switch (dataType) {
case 'int64':
values = Array.from({ length: 100 }, (_, idx) => idx);
break;
case 'float64':
values = Array.from({ length: 100 }, () => Math.random() * 1000);
break;
case 'boolean':
values = Array.from({ length: 100 }, () => Math.random() > 0.5);
break;
case 'datetime':
values = Array.from({ length: 100 }, () => new Date().toISOString());
break;
default:
values = Array.from({ length: 100 }, (_, idx) => `Value_${idx}`);
}
columns.push({
name: `test_column_${i}`,
values: values,
data_type: dataType
});
}
return columns;
}
generateTestAnchors(count) {
const anchors = [];
for (let i = 0; i < count; i++) {
anchors.push({
dataset: 'test_dataset',
column_name: `anchor_column_${i}`,
anchor_id: `anchor_${i}`,
fingerprint: `dtype=string;card=100;null_ratio=0.000;unique_ratio=0.900`,
first_seen: '2024-01-01',
last_seen: '2024-01-01',
confidence: 0.8 + Math.random() * 0.2
});
}
return anchors;
}
calculateAccuracyMetrics(attachments, threshold) {
const highConfidence = attachments.filter(a => a.confidence_score >= 0.9).length;
const lowConfidence = attachments.filter(a => a.confidence_score < 0.9 && a.confidence_score >= threshold).length;
return {
confidence_threshold: threshold,
high_confidence_matches: highConfidence,
low_confidence_matches: lowConfidence,
false_positives: 0,
false_negatives: 0
};
}
calculateSummary(results) {
const totalColumns = results.reduce((sum, r) => sum + r.scalability_metrics.column_count, 0);
const totalTime = results.reduce((sum, r) => sum + r.execution_time_ms, 0);
const totalMemory = results.reduce((sum, r) => sum + r.memory_usage_mb, 0);
const confidenceResults = results.filter(r => r.accuracy_metrics.high_confidence_matches > 0);
const avgConfidence = confidenceResults.length > 0
? confidenceResults.reduce((sum, r) => sum + (r.accuracy_metrics.high_confidence_matches /
(r.accuracy_metrics.high_confidence_matches + r.accuracy_metrics.low_confidence_matches || 1)), 0) / confidenceResults.length
: 0;
return {
average_confidence: avgConfidence,
total_columns_processed: totalColumns,
overall_throughput: totalColumns / (totalTime / 1000),
memory_efficiency: totalColumns / Math.max(totalMemory, 1)
};
}
getMemoryUsage() {
if (typeof process !== 'undefined' && process.memoryUsage) {
return process.memoryUsage().heapUsed / 1024 / 1024;
}
return 0;
}
generateReport(benchmarkSuite) {
let report = `\nš Shadow Semantics Performance Report\n`;
report += `${'='.repeat(50)}\n\n`;
report += `Suite: ${benchmarkSuite.suite_name}\n`;
report += `Total Execution Time: ${benchmarkSuite.total_execution_time_ms.toFixed(2)}ms\n`;
report += `Overall Throughput: ${benchmarkSuite.summary.overall_throughput.toFixed(2)} columns/sec\n`;
report += `Memory Efficiency: ${benchmarkSuite.summary.memory_efficiency.toFixed(2)} columns/MB\n`;
report += `Average Confidence: ${(benchmarkSuite.summary.average_confidence * 100).toFixed(1)}%\n\n`;
report += `Individual Benchmark Results:\n`;
report += `${'-'.repeat(30)}\n`;
for (const result of benchmarkSuite.results) {
report += `\n${result.operation.replace(/_/g, ' ').toUpperCase()}\n`;
report += ` Execution Time: ${result.execution_time_ms.toFixed(2)}ms\n`;
report += ` Memory Usage: ${result.memory_usage_mb.toFixed(2)}MB\n`;
report += ` Throughput: ${result.throughput.columns_per_second.toFixed(2)} cols/sec\n`;
if (result.accuracy_metrics.high_confidence_matches > 0) {
report += ` High Confidence Matches: ${result.accuracy_metrics.high_confidence_matches}\n`;
report += ` Low Confidence Matches: ${result.accuracy_metrics.low_confidence_matches}\n`;
}
if (result.scalability_metrics.performance_degradation > 1) {
report += ` Performance Degradation: ${result.scalability_metrics.performance_degradation.toFixed(2)}x\n`;
}
}
report += `\n${('='.repeat(50))}\n`;
return report;
}
}
exports.ShadowSystemBenchmark = ShadowSystemBenchmark;
async function runShadowSystemBenchmark(options) {
const benchmark = new ShadowSystemBenchmark(options);
return await benchmark.runComprehensiveBenchmark();
}
function generateBenchmarkReport(benchmarkSuite) {
const benchmark = new ShadowSystemBenchmark();
return benchmark.generateReport(benchmarkSuite);
}
//# sourceMappingURL=shadow-performance.js.map