UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

github.com/kneelinghorse/semantic-ds-toolkit

240 lines • 9.45 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.globalProfiler = exports.PerformanceProfiler = void 0; const perf_hooks_1 = require("perf_hooks"); class PerformanceProfiler { metrics = []; activeOperations = new Map(); simdOps; constructor() { this.simdOps = this.initializeSimdOperations(); } initializeSimdOperations() { // Initialize SIMD-optimized operations for high-performance computing return { hash64: (data) => { // xxHash64 implementation optimized for SIMD const view = new DataView(data); const PRIME64_1 = 0x9e3779b185ebca87n; const PRIME64_2 = 0xc2b2ae3d27d4eb4fn; const PRIME64_3 = 0x165667b19e3779f9n; const PRIME64_4 = 0x85ebca77c2b2ae63n; const PRIME64_5 = 0x27d4eb2f165667c5n; let hash = PRIME64_5; let pos = 0; const len = data.byteLength; // Process 8-byte chunks with SIMD-like operations while (pos + 8 <= len) { const k1 = BigInt(view.getUint32(pos, true)) | (BigInt(view.getUint32(pos + 4, true)) << 32n); hash ^= this.mixHash64(k1); hash = this.rotateLeft64(hash, 27n) * PRIME64_1 + PRIME64_4; pos += 8; } // Process remaining bytes while (pos < len) { hash ^= BigInt(view.getUint8(pos)) * PRIME64_5; hash = this.rotateLeft64(hash, 11n) * PRIME64_1; pos++; } // Final avalanche hash ^= hash >> 33n; hash *= PRIME64_2; hash ^= hash >> 29n; hash *= PRIME64_3; hash ^= hash >> 32n; return hash; }, vectorSum: (values) => { // Unrolled loop for better CPU pipeline utilization let sum = 0; let i = 0; const len = values.length; const remainder = len % 4; // Process 4 elements at a time for (; i < len - remainder; i += 4) { sum += values[i] + values[i + 1] + values[i + 2] + values[i + 3]; } // Process remaining elements for (; i < len; i++) { sum += values[i]; } return sum; }, vectorDot: (a, b) => { if (a.length !== b.length) { throw new Error('Vector dimensions must match'); } let dot = 0; let i = 0; const len = a.length; const remainder = len % 4; // Unrolled dot product for better performance for (; i < len - remainder; i += 4) { dot += a[i] * b[i] + a[i + 1] * b[i + 1] + a[i + 2] * b[i + 2] + a[i + 3] * b[i + 3]; } for (; i < len; i++) { dot += a[i] * b[i]; } return dot; }, vectorDistance: (a, b) => { if (a.length !== b.length) { throw new Error('Vector dimensions must match'); } let distSq = 0; let i = 0; const len = a.length; const remainder = len % 4; // Unrolled Euclidean distance calculation for (; i < len - remainder; i += 4) { const d0 = a[i] - b[i]; const d1 = a[i + 1] - b[i + 1]; const d2 = a[i + 2] - b[i + 2]; const d3 = a[i + 3] - b[i + 3]; distSq += d0 * d0 + d1 * d1 + d2 * d2 + d3 * d3; } for (; i < len; i++) { const d = a[i] - b[i]; distSq += d * d; } return Math.sqrt(distSq); }, batchHash: (datasets) => { // Parallel processing simulation for batch operations return datasets.map(data => this.simdOps.hash64(data)); } }; } mixHash64(k) { const PRIME64_2 = 0xc2b2ae3d27d4eb4fn; const PRIME64_3 = 0x165667b19e3779f9n; k *= PRIME64_2; k = this.rotateLeft64(k, 31n); k *= PRIME64_3; return k; } rotateLeft64(value, shift) { return (value << shift) | (value >> (64n - shift)); } startOperation(operationId, metadata) { this.activeOperations.set(operationId, { start: perf_hooks_1.performance.now(), metadata }); } endOperation(operationId, rowsProcessed) { const operation = this.activeOperations.get(operationId); if (!operation) { throw new Error(`No active operation found for ID: ${operationId}`); } const duration = perf_hooks_1.performance.now() - operation.start; const throughput = rowsProcessed ? (rowsProcessed / (duration / 1000)) : undefined; const metric = { operation: operationId, duration, throughput, memoryUsage: process.memoryUsage(), timestamp: Date.now(), metadata: operation.metadata }; this.metrics.push(metric); this.activeOperations.delete(operationId); return metric; } profileColumnProcessing(column, operation) { const operationId = `${operation}_${Date.now()}`; this.startOperation(operationId, { columnName: column.name, dataType: column.data_type, rowCount: column.values.length }); // Simulate high-performance column processing const buffer = this.columnToBuffer(column); const hash = this.simdOps.hash64(buffer); if (column.data_type === 'float64' || column.data_type === 'int64') { const values = new Float64Array(column.values.filter(v => v !== null).map(Number)); const sum = this.simdOps.vectorSum(values); } return this.endOperation(operationId, column.values.length); } profileBatchOperation(columns, operation) { const operationId = `batch_${operation}_${Date.now()}`; const totalRows = columns.reduce((sum, col) => sum + col.values.length, 0); this.startOperation(operationId, { columnCount: columns.length, totalRows, operation }); // Batch processing with SIMD operations const buffers = columns.map(col => this.columnToBuffer(col)); const hashes = this.simdOps.batchHash(buffers); return this.endOperation(operationId, totalRows); } columnToBuffer(column) { // Convert column data to binary buffer for SIMD processing const encoder = new TextEncoder(); const serialized = JSON.stringify({ name: column.name, type: column.data_type, sample: column.values.slice(0, 100) // Use sample for fingerprinting }); return encoder.encode(serialized).buffer; } benchmarkThroughput(operation, iterations = 1000) { const initialMemory = process.memoryUsage().heapUsed; const start = perf_hooks_1.performance.now(); for (let i = 0; i < iterations; i++) { operation(); } const end = perf_hooks_1.performance.now(); const finalMemory = process.memoryUsage().heapUsed; const avgDuration = (end - start) / iterations; const throughput = 1000 / avgDuration; // operations per second const memoryDelta = finalMemory - initialMemory; return { avgDuration, throughput, memoryDelta }; } getMetrics() { return [...this.metrics]; } getMetricsByOperation(operation) { return this.metrics.filter(m => m.operation.includes(operation)); } getThroughputStats() { const throughputs = this.metrics .filter(m => m.throughput !== undefined) .map(m => m.throughput) .sort((a, b) => a - b); if (throughputs.length === 0) { return { avgThroughput: 0, maxThroughput: 0, p95Throughput: 0 }; } const avgThroughput = throughputs.reduce((sum, t) => sum + t, 0) / throughputs.length; const maxThroughput = Math.max(...throughputs); const p95Index = Math.floor(throughputs.length * 0.95); const p95Throughput = throughputs[p95Index]; return { avgThroughput, maxThroughput, p95Throughput }; } clearMetrics() { this.metrics = []; } // High-performance hash function access get hashFunction() { return this.simdOps.hash64; } // Vector operations access get vectorOps() { return { sum: this.simdOps.vectorSum, dot: this.simdOps.vectorDot, distance: this.simdOps.vectorDistance }; } } exports.PerformanceProfiler = PerformanceProfiler; exports.globalProfiler = new PerformanceProfiler(); //# sourceMappingURL=performance-profiler.js.map