semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
240 lines • 9.45 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.globalProfiler = exports.PerformanceProfiler = void 0;
const perf_hooks_1 = require("perf_hooks");
class PerformanceProfiler {
metrics = [];
activeOperations = new Map();
simdOps;
constructor() {
this.simdOps = this.initializeSimdOperations();
}
initializeSimdOperations() {
// Initialize SIMD-optimized operations for high-performance computing
return {
hash64: (data) => {
// xxHash64 implementation optimized for SIMD
const view = new DataView(data);
const PRIME64_1 = 0x9e3779b185ebca87n;
const PRIME64_2 = 0xc2b2ae3d27d4eb4fn;
const PRIME64_3 = 0x165667b19e3779f9n;
const PRIME64_4 = 0x85ebca77c2b2ae63n;
const PRIME64_5 = 0x27d4eb2f165667c5n;
let hash = PRIME64_5;
let pos = 0;
const len = data.byteLength;
// Process 8-byte chunks with SIMD-like operations
while (pos + 8 <= len) {
const k1 = BigInt(view.getUint32(pos, true)) |
(BigInt(view.getUint32(pos + 4, true)) << 32n);
hash ^= this.mixHash64(k1);
hash = this.rotateLeft64(hash, 27n) * PRIME64_1 + PRIME64_4;
pos += 8;
}
// Process remaining bytes
while (pos < len) {
hash ^= BigInt(view.getUint8(pos)) * PRIME64_5;
hash = this.rotateLeft64(hash, 11n) * PRIME64_1;
pos++;
}
// Final avalanche
hash ^= hash >> 33n;
hash *= PRIME64_2;
hash ^= hash >> 29n;
hash *= PRIME64_3;
hash ^= hash >> 32n;
return hash;
},
vectorSum: (values) => {
// Unrolled loop for better CPU pipeline utilization
let sum = 0;
let i = 0;
const len = values.length;
const remainder = len % 4;
// Process 4 elements at a time
for (; i < len - remainder; i += 4) {
sum += values[i] + values[i + 1] + values[i + 2] + values[i + 3];
}
// Process remaining elements
for (; i < len; i++) {
sum += values[i];
}
return sum;
},
vectorDot: (a, b) => {
if (a.length !== b.length) {
throw new Error('Vector dimensions must match');
}
let dot = 0;
let i = 0;
const len = a.length;
const remainder = len % 4;
// Unrolled dot product for better performance
for (; i < len - remainder; i += 4) {
dot += a[i] * b[i] + a[i + 1] * b[i + 1] +
a[i + 2] * b[i + 2] + a[i + 3] * b[i + 3];
}
for (; i < len; i++) {
dot += a[i] * b[i];
}
return dot;
},
vectorDistance: (a, b) => {
if (a.length !== b.length) {
throw new Error('Vector dimensions must match');
}
let distSq = 0;
let i = 0;
const len = a.length;
const remainder = len % 4;
// Unrolled Euclidean distance calculation
for (; i < len - remainder; i += 4) {
const d0 = a[i] - b[i];
const d1 = a[i + 1] - b[i + 1];
const d2 = a[i + 2] - b[i + 2];
const d3 = a[i + 3] - b[i + 3];
distSq += d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3;
}
for (; i < len; i++) {
const d = a[i] - b[i];
distSq += d * d;
}
return Math.sqrt(distSq);
},
batchHash: (datasets) => {
// Parallel processing simulation for batch operations
return datasets.map(data => this.simdOps.hash64(data));
}
};
}
mixHash64(k) {
const PRIME64_2 = 0xc2b2ae3d27d4eb4fn;
const PRIME64_3 = 0x165667b19e3779f9n;
k *= PRIME64_2;
k = this.rotateLeft64(k, 31n);
k *= PRIME64_3;
return k;
}
rotateLeft64(value, shift) {
return (value << shift) | (value >> (64n - shift));
}
startOperation(operationId, metadata) {
this.activeOperations.set(operationId, {
start: perf_hooks_1.performance.now(),
metadata
});
}
endOperation(operationId, rowsProcessed) {
const operation = this.activeOperations.get(operationId);
if (!operation) {
throw new Error(`No active operation found for ID: ${operationId}`);
}
const duration = perf_hooks_1.performance.now() - operation.start;
const throughput = rowsProcessed ? (rowsProcessed / (duration / 1000)) : undefined;
const metric = {
operation: operationId,
duration,
throughput,
memoryUsage: process.memoryUsage(),
timestamp: Date.now(),
metadata: operation.metadata
};
this.metrics.push(metric);
this.activeOperations.delete(operationId);
return metric;
}
profileColumnProcessing(column, operation) {
const operationId = `${operation}_${Date.now()}`;
this.startOperation(operationId, {
columnName: column.name,
dataType: column.data_type,
rowCount: column.values.length
});
// Simulate high-performance column processing
const buffer = this.columnToBuffer(column);
const hash = this.simdOps.hash64(buffer);
if (column.data_type === 'float64' || column.data_type === 'int64') {
const values = new Float64Array(column.values.filter(v => v !== null).map(Number));
const sum = this.simdOps.vectorSum(values);
}
return this.endOperation(operationId, column.values.length);
}
profileBatchOperation(columns, operation) {
const operationId = `batch_${operation}_${Date.now()}`;
const totalRows = columns.reduce((sum, col) => sum + col.values.length, 0);
this.startOperation(operationId, {
columnCount: columns.length,
totalRows,
operation
});
// Batch processing with SIMD operations
const buffers = columns.map(col => this.columnToBuffer(col));
const hashes = this.simdOps.batchHash(buffers);
return this.endOperation(operationId, totalRows);
}
columnToBuffer(column) {
// Convert column data to binary buffer for SIMD processing
const encoder = new TextEncoder();
const serialized = JSON.stringify({
name: column.name,
type: column.data_type,
sample: column.values.slice(0, 100) // Use sample for fingerprinting
});
return encoder.encode(serialized).buffer;
}
benchmarkThroughput(operation, iterations = 1000) {
const initialMemory = process.memoryUsage().heapUsed;
const start = perf_hooks_1.performance.now();
for (let i = 0; i < iterations; i++) {
operation();
}
const end = perf_hooks_1.performance.now();
const finalMemory = process.memoryUsage().heapUsed;
const avgDuration = (end - start) / iterations;
const throughput = 1000 / avgDuration; // operations per second
const memoryDelta = finalMemory - initialMemory;
return {
avgDuration,
throughput,
memoryDelta
};
}
getMetrics() {
return [...this.metrics];
}
getMetricsByOperation(operation) {
return this.metrics.filter(m => m.operation.includes(operation));
}
getThroughputStats() {
const throughputs = this.metrics
.filter(m => m.throughput !== undefined)
.map(m => m.throughput)
.sort((a, b) => a - b);
if (throughputs.length === 0) {
return { avgThroughput: 0, maxThroughput: 0, p95Throughput: 0 };
}
const avgThroughput = throughputs.reduce((sum, t) => sum + t, 0) / throughputs.length;
const maxThroughput = Math.max(...throughputs);
const p95Index = Math.floor(throughputs.length * 0.95);
const p95Throughput = throughputs[p95Index];
return { avgThroughput, maxThroughput, p95Throughput };
}
clearMetrics() {
this.metrics = [];
}
// High-performance hash function access
get hashFunction() {
return this.simdOps.hash64;
}
// Vector operations access
get vectorOps() {
return {
sum: this.simdOps.vectorSum,
dot: this.simdOps.vectorDot,
distance: this.simdOps.vectorDistance
};
}
}
exports.PerformanceProfiler = PerformanceProfiler;
exports.globalProfiler = new PerformanceProfiler();
//# sourceMappingURL=performance-profiler.js.map