datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
377 lines • 17.3 kB
JavaScript
"use strict";
/**
* Adaptive Streaming Engine
* Intelligent chunk sizing and streaming optimization for large files
*/
Object.defineProperty(exports, "__esModule", { value: true });
exports.AdaptiveStreamer = void 0;
exports.getGlobalAdaptiveStreamer = getGlobalAdaptiveStreamer;
exports.shutdownGlobalAdaptiveStreamer = shutdownGlobalAdaptiveStreamer;
const events_1 = require("events");
const perf_hooks_1 = require("perf_hooks");
const fs_1 = require("fs");
const memory_optimizer_1 = require("./memory-optimizer");
const logger_1 = require("../utils/logger");
/**
* Adaptive streaming engine with intelligent chunk sizing
*/
class AdaptiveStreamer extends events_1.EventEmitter {
options;
memoryOptimizer;
activeSessions = new Map();
performanceHistory = [];
adaptationHistory = [];
constructor(options = {}) {
super();
this.options = {
initialChunkSize: options.initialChunkSize || 64 * 1024, // 64KB
minChunkSize: options.minChunkSize || 4 * 1024, // 4KB
maxChunkSize: options.maxChunkSize || 16 * 1024 * 1024, // 16MB
adaptationInterval: options.adaptationInterval || 5, // Every 5 chunks
performanceTargetMBps: options.performanceTargetMBps || 50, // 50 MB/s target
memoryPressureThreshold: options.memoryPressureThreshold || 0.8,
enableProgressiveLoading: options.enableProgressiveLoading ?? true,
maxConcurrentChunks: options.maxConcurrentChunks || 3,
};
this.memoryOptimizer = (0, memory_optimizer_1.getGlobalMemoryOptimizer)();
// Listen to memory pressure events
this.memoryOptimizer.on('memory-pressure', (data) => {
this.handleMemoryPressure(data.pressure);
});
this.memoryOptimizer.on('memory-critical', (data) => {
this.handleCriticalMemory(data.pressure);
});
logger_1.logger.info(`Adaptive streamer initialized with ${this.formatBytes(this.options.initialChunkSize)} initial chunk size`);
}
/**
* Create a new streaming session
*/
async createSession(filePath) {
// Using Math.random() is acceptable here as session IDs are used only for
// internal performance tracking and session management, not for security purposes.
// These IDs don't control access to sensitive resources or authentication.
const sessionId = `stream-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
try {
const fileStats = await fs_1.promises.stat(filePath);
const estimatedChunks = Math.ceil(fileStats.size / this.options.initialChunkSize);
const session = {
id: sessionId,
filePath,
fileSize: fileStats.size,
totalChunks: estimatedChunks,
processedChunks: 0,
metrics: {
bytesProcessed: 0,
chunksProcessed: 0,
averageChunkSize: this.options.initialChunkSize,
processingRate: 0,
memoryEfficiency: 1,
adaptationCount: 0,
totalTime: 0,
},
startTime: perf_hooks_1.performance.now(),
adaptiveChunkSize: this.options.initialChunkSize,
};
this.activeSessions.set(sessionId, session);
logger_1.logger.info(`Created streaming session ${sessionId} for ${this.formatBytes(fileStats.size)} file`);
this.emit('session-created', { sessionId, fileSize: fileStats.size, estimatedChunks });
return sessionId;
}
catch (error) {
throw new Error(`Failed to create streaming session: ${error.message}`);
}
}
/**
* Process file with adaptive streaming
*/
async *streamFile(sessionId, processor) {
const session = this.activeSessions.get(sessionId);
if (!session) {
throw new Error(`Session ${sessionId} not found`);
}
const fileHandle = await fs_1.promises.open(session.filePath, 'r');
let currentPosition = 0;
let chunkIndex = 0;
try {
while (currentPosition < session.fileSize) {
const chunkStartTime = perf_hooks_1.performance.now();
// Determine optimal chunk size
const optimalChunkSize = this.calculateOptimalChunkSize(session, chunkIndex);
const actualChunkSize = Math.floor(Math.min(optimalChunkSize, session.fileSize - currentPosition));
// Get buffer from memory optimizer
const buffer = this.memoryOptimizer.getBuffer(actualChunkSize);
try {
// Read chunk - ensure position is an integer for Windows compatibility
const readResult = await fileHandle.read(buffer, 0, actualChunkSize, Math.floor(currentPosition));
const chunk = buffer.subarray(0, readResult.bytesRead);
// Process chunk
const metadata = {
chunkIndex,
chunkSize: readResult.bytesRead,
filePosition: currentPosition,
isLastChunk: currentPosition + readResult.bytesRead >= session.fileSize,
sessionId,
adaptiveSize: optimalChunkSize,
};
const processingStartTime = perf_hooks_1.performance.now();
const result = await processor(chunk, metadata);
const processingTime = perf_hooks_1.performance.now() - processingStartTime;
// Update session metrics
this.updateSessionMetrics(session, readResult.bytesRead, processingTime, chunkStartTime);
// Adapt chunk size if needed
if (chunkIndex % this.options.adaptationInterval === 0 && chunkIndex > 0) {
await this.adaptChunkSize(session, processingTime, actualChunkSize);
}
currentPosition += readResult.bytesRead;
chunkIndex++;
// Return buffer to pool
this.memoryOptimizer.returnBuffer(buffer);
// Emit progress
this.emit('chunk-processed', {
sessionId,
chunkIndex,
progress: currentPosition / session.fileSize,
processingRate: session.metrics.processingRate,
adaptiveChunkSize: session.adaptiveChunkSize,
});
yield result;
}
catch (processingError) {
this.memoryOptimizer.returnBuffer(buffer);
throw processingError;
}
}
}
finally {
await fileHandle.close();
this.finalizeSession(session);
}
}
/**
* Calculate optimal chunk size based on current conditions
*/
calculateOptimalChunkSize(session, chunkIndex) {
let baseSize = session.adaptiveChunkSize;
// Get memory optimizer recommendation
const memoryRecommendation = this.memoryOptimizer.getAdaptiveChunkSize(baseSize);
baseSize = memoryRecommendation.recommendedSize;
// Apply performance-based adaptations
if (session.metrics.processingRate > 0) {
const performanceRatio = session.metrics.processingRate / this.options.performanceTargetMBps;
if (performanceRatio < 0.5) {
// Performance is poor - reduce chunk size for better parallelization
baseSize = Math.max(this.options.minChunkSize, Math.floor(baseSize * 0.7));
}
else if (performanceRatio > 1.5 && memoryRecommendation.memoryPressure < 0.6) {
// Performance is good and memory is available - can increase chunk size
baseSize = Math.min(this.options.maxChunkSize, Math.floor(baseSize * 1.3));
}
}
// File size consideration
const remainingSize = session.fileSize - session.metrics.bytesProcessed;
const remainingChunks = Math.max(1, Math.ceil(remainingSize / baseSize));
// If near end of file, adjust to avoid tiny last chunk
if (remainingChunks <= 2 && remainingSize > 0) {
baseSize = Math.floor(remainingSize / 2);
}
// Ensure within bounds
return Math.max(this.options.minChunkSize, Math.min(this.options.maxChunkSize, baseSize));
}
/**
* Update session metrics
*/
updateSessionMetrics(session, bytesProcessed, processingTime, chunkStartTime) {
const totalChunkTime = perf_hooks_1.performance.now() - chunkStartTime;
const currentTime = perf_hooks_1.performance.now();
const elapsedTime = (currentTime - session.startTime) / 1000; // seconds
session.metrics.bytesProcessed += bytesProcessed;
session.metrics.chunksProcessed++;
session.metrics.totalTime = elapsedTime;
// Calculate average chunk size
session.metrics.averageChunkSize =
session.metrics.bytesProcessed / session.metrics.chunksProcessed;
// Calculate processing rate (MB/s)
if (elapsedTime > 0) {
session.metrics.processingRate = session.metrics.bytesProcessed / 1024 / 1024 / elapsedTime;
}
// Calculate memory efficiency
const memoryPressure = this.memoryOptimizer.getMemoryPressure();
session.metrics.memoryEfficiency = 1 - memoryPressure;
// Update performance history for trend analysis
this.performanceHistory.push(session.metrics.processingRate);
if (this.performanceHistory.length > 20) {
this.performanceHistory.shift();
}
}
/**
* Adapt chunk size based on performance
*/
async adaptChunkSize(session, processingTime, currentChunkSize) {
const currentRate = session.metrics.processingRate;
const targetRate = this.options.performanceTargetMBps;
const memoryPressure = this.memoryOptimizer.getMemoryPressure();
let newChunkSize = session.adaptiveChunkSize;
let adaptationReason = 'No change needed';
// Performance-based adaptation
if (currentRate > 0) {
const performanceRatio = currentRate / targetRate;
if (performanceRatio < 0.6) {
// Performance is significantly below target
if (memoryPressure < 0.5) {
// Try larger chunks for better throughput
newChunkSize = Math.min(this.options.maxChunkSize, Math.floor(session.adaptiveChunkSize * 1.4));
adaptationReason = 'Low performance, increasing chunk size';
}
else {
// Memory pressure is high, reduce chunk size
newChunkSize = Math.max(this.options.minChunkSize, Math.floor(session.adaptiveChunkSize * 0.8));
adaptationReason = 'Low performance with memory pressure, reducing chunk size';
}
}
else if (performanceRatio > 1.5 && memoryPressure < 0.3) {
// Performance is good, memory is available - optimize for efficiency
newChunkSize = Math.min(this.options.maxChunkSize, Math.floor(session.adaptiveChunkSize * 1.2));
adaptationReason = 'Good performance, optimizing chunk size';
}
}
// Memory pressure adaptation
if (memoryPressure > this.options.memoryPressureThreshold) {
const pressureReduction = Math.max(0.5, 1 - memoryPressure);
newChunkSize = Math.max(this.options.minChunkSize, Math.floor(newChunkSize * pressureReduction));
adaptationReason = `Memory pressure adaptation (${(memoryPressure * 100).toFixed(1)}%)`;
}
// Apply adaptation if significant change
const changeRatio = Math.abs(newChunkSize - session.adaptiveChunkSize) / session.adaptiveChunkSize;
if (changeRatio > 0.1) {
// 10% change threshold
const oldSize = session.adaptiveChunkSize;
session.adaptiveChunkSize = newChunkSize;
session.metrics.adaptationCount++;
this.adaptationHistory.push(newChunkSize);
if (this.adaptationHistory.length > 10) {
this.adaptationHistory.shift();
}
logger_1.logger.info(`Session ${session.id}: Adapted chunk size from ${this.formatBytes(oldSize)} to ${this.formatBytes(newChunkSize)} - ${adaptationReason}`);
this.emit('chunk-size-adapted', {
sessionId: session.id,
oldSize,
newSize: newChunkSize,
reason: adaptationReason,
performanceRate: currentRate,
memoryPressure,
});
}
}
/**
* Handle memory pressure by reducing chunk sizes
*/
handleMemoryPressure(pressure) {
const reductionFactor = Math.max(0.3, 1 - pressure);
for (const session of this.activeSessions.values()) {
const oldSize = session.adaptiveChunkSize;
session.adaptiveChunkSize = Math.max(this.options.minChunkSize, Math.floor(session.adaptiveChunkSize * reductionFactor));
if (session.adaptiveChunkSize !== oldSize) {
logger_1.logger.warn(`Session ${session.id}: Reduced chunk size due to memory pressure: ${this.formatBytes(oldSize)} → ${this.formatBytes(session.adaptiveChunkSize)}`);
}
}
}
/**
* Handle critical memory by aggressively reducing chunk sizes
*/
handleCriticalMemory(pressure) {
logger_1.logger.error(`Critical memory pressure (${(pressure * 100).toFixed(1)}%) - emergency chunk size reduction`);
for (const session of this.activeSessions.values()) {
session.adaptiveChunkSize = this.options.minChunkSize;
}
// Force garbage collection
this.memoryOptimizer.forceGarbageCollection();
}
/**
* Finalize session and cleanup
*/
finalizeSession(session) {
const finalTime = perf_hooks_1.performance.now();
session.metrics.totalTime = (finalTime - session.startTime) / 1000;
logger_1.logger.info(`Session ${session.id} completed: ${this.formatBytes(session.metrics.bytesProcessed)} in ${session.metrics.totalTime.toFixed(2)}s (${session.metrics.processingRate.toFixed(2)} MB/s)`);
this.emit('session-completed', {
sessionId: session.id,
metrics: session.metrics,
adaptationCount: session.metrics.adaptationCount,
});
this.activeSessions.delete(session.id);
}
/**
* Get session statistics
*/
getSessionStats(sessionId) {
return this.activeSessions.get(sessionId) || null;
}
/**
* Get overall streaming statistics
*/
getOverallStats() {
const activeSessions = Array.from(this.activeSessions.values());
const totalBytesProcessed = activeSessions.reduce((sum, s) => sum + s.metrics.bytesProcessed, 0);
const avgProcessingRate = activeSessions.length > 0
? activeSessions.reduce((sum, s) => sum + s.metrics.processingRate, 0) /
activeSessions.length
: 0;
return {
activeSessions: activeSessions.length,
totalBytesProcessed,
averageProcessingRate: avgProcessingRate,
performanceHistory: [...this.performanceHistory],
adaptationHistory: [...this.adaptationHistory],
memoryStats: this.memoryOptimizer.getDetailedStats(),
options: this.options,
};
}
/**
* Format bytes for human-readable output
*/
formatBytes(bytes) {
const units = ['B', 'KB', 'MB', 'GB'];
let size = bytes;
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(unitIndex > 0 ? 1 : 0)}${units[unitIndex]}`;
}
/**
* Cleanup all sessions and resources
*/
cleanup() {
for (const sessionId of this.activeSessions.keys()) {
this.activeSessions.delete(sessionId);
}
this.performanceHistory = [];
this.adaptationHistory = [];
logger_1.logger.info('Adaptive streamer cleanup completed');
}
}
exports.AdaptiveStreamer = AdaptiveStreamer;
/**
* Global adaptive streamer instance
*/
let globalAdaptiveStreamer = null;
/**
* Get or create global adaptive streamer
*/
function getGlobalAdaptiveStreamer(options) {
if (!globalAdaptiveStreamer) {
globalAdaptiveStreamer = new AdaptiveStreamer(options);
}
return globalAdaptiveStreamer;
}
/**
* Shutdown global adaptive streamer
*/
function shutdownGlobalAdaptiveStreamer() {
if (globalAdaptiveStreamer) {
globalAdaptiveStreamer.cleanup();
globalAdaptiveStreamer = null;
}
}
//# sourceMappingURL=adaptive-streamer.js.map