datapilot-cli
Version:
Enterprise-grade streaming multi-format data analysis with comprehensive statistical insights and intelligent relationship detection - supports CSV, JSON, Excel, TSV, Parquet - memory-efficient, cross-platform
540 lines • 21.1 kB
JavaScript
"use strict";
/**
* Section Cache Manager - Intelligent caching of section results
* Addresses GitHub issue #23: Prevent re-processing files for each section
*/
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
Object.defineProperty(exports, "__esModule", { value: true });
exports.SectionCacheManager = void 0;
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const os = __importStar(require("os"));
const crypto = __importStar(require("crypto"));
const logger_1 = require("../utils/logger");
class SectionCacheManager {
memoryCache = new Map();
cacheStats = {
hits: 0,
misses: 0,
evictions: 0,
};
config;
CACHE_VERSION = '1.0.0';
constructor(config) {
this.config = {
enabled: true,
maxSizeBytes: 500 * 1024 * 1024, // 500MB default
maxEntries: 1000,
ttlMs: 24 * 60 * 60 * 1000, // 24 hours
cacheDirectory: path.join(os.tmpdir(), 'datapilot-cache'),
enableDiskCache: true,
enableMemoryCache: true,
compressionLevel: 6, // gzip compression level
...config,
};
this.ensureCacheDirectory();
this.cleanupExpiredEntries();
}
/**
* Get cached result for a section
*/
async get(filePath, section) {
if (!this.config.enabled)
return null;
try {
const cacheKey = await this.generateCacheKey(filePath, section);
// Try memory cache first
if (this.config.enableMemoryCache) {
const memoryEntry = this.memoryCache.get(cacheKey);
if (memoryEntry && this.isValidEntry(memoryEntry, filePath)) {
this.cacheStats.hits++;
logger_1.logger.debug(`Cache hit (memory): ${section} for ${path.basename(filePath)}`);
return memoryEntry.data;
}
}
// Try disk cache
if (this.config.enableDiskCache) {
const diskEntry = await this.getDiskEntry(cacheKey);
if (diskEntry && this.isValidEntry(diskEntry, filePath)) {
// Promote to memory cache if enabled
if (this.config.enableMemoryCache) {
this.setMemoryEntry(cacheKey, diskEntry);
}
this.cacheStats.hits++;
logger_1.logger.debug(`Cache hit (disk): ${section} for ${path.basename(filePath)}`);
return diskEntry.data;
}
}
this.cacheStats.misses++;
logger_1.logger.debug(`Cache miss: ${section} for ${path.basename(filePath)}`);
return null;
}
catch (error) {
logger_1.logger.warn(`Cache get error for ${section}: ${error instanceof Error ? error.message : 'Unknown error'}`);
this.cacheStats.misses++;
return null;
}
}
/**
* Set cached result for a section
*/
async set(filePath, section, data) {
if (!this.config.enabled)
return;
try {
const cacheKey = await this.generateCacheKey(filePath, section);
const entry = await this.createCacheEntry(filePath, section, data);
// Store in memory cache if enabled and data is not too large
if (this.config.enableMemoryCache) {
const isSmallEnough = entry.dataSize < 10 * 1024 * 1024; // 10MB threshold
if (isSmallEnough) {
this.setMemoryEntry(cacheKey, entry);
logger_1.logger.debug(`Cached in memory: ${section} for ${path.basename(filePath)} (${this.formatBytes(entry.dataSize)})`);
}
}
// Store in disk cache if enabled
if (this.config.enableDiskCache) {
await this.setDiskEntry(cacheKey, entry);
logger_1.logger.debug(`Cached to disk: ${section} for ${path.basename(filePath)} (${this.formatBytes(entry.dataSize)})`);
}
// Trigger cleanup if needed
await this.cleanup();
}
catch (error) {
logger_1.logger.warn(`Cache set error for ${section}: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Check if cache entry exists and is valid
*/
async has(filePath, section) {
const cached = await this.get(filePath, section);
return cached !== null;
}
/**
* Clear cache for a specific file
*/
async clearFile(filePath) {
try {
const fileHash = await this.calculateFileHash(filePath);
const keysToRemove = [];
// Remove from memory cache
for (const [key, entry] of this.memoryCache.entries()) {
if (entry.fileHash === fileHash) {
keysToRemove.push(key);
}
}
keysToRemove.forEach(key => this.memoryCache.delete(key));
// Remove from disk cache
if (this.config.enableDiskCache) {
const files = await fs.promises.readdir(this.config.cacheDirectory);
const diskKeysToRemove = files.filter(file => file.includes(fileHash));
await Promise.all(diskKeysToRemove.map(file => fs.promises.unlink(path.join(this.config.cacheDirectory, file)).catch(() => { })));
}
logger_1.logger.info(`Cleared cache for ${path.basename(filePath)} (${keysToRemove.length} entries)`);
}
catch (error) {
logger_1.logger.warn(`Cache clear error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Clear all cache entries
*/
async clearAll() {
try {
// Clear memory cache
this.memoryCache.clear();
// Clear disk cache
if (this.config.enableDiskCache) {
const files = await fs.promises.readdir(this.config.cacheDirectory);
await Promise.all(files.map(file => fs.promises.unlink(path.join(this.config.cacheDirectory, file)).catch(() => { })));
}
// Reset stats
this.cacheStats = { hits: 0, misses: 0, evictions: 0 };
logger_1.logger.info('Cleared all cache entries');
}
catch (error) {
logger_1.logger.warn(`Cache clear all error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Get cache statistics
*/
async getStats() {
const memoryEntries = Array.from(this.memoryCache.values());
const memorySizeBytes = memoryEntries.reduce((sum, entry) => sum + entry.dataSize, 0);
let diskEntries = 0;
let diskSizeBytes = 0;
if (this.config.enableDiskCache) {
try {
const files = await fs.promises.readdir(this.config.cacheDirectory);
diskEntries = files.length;
const stats = await Promise.all(files.map(file => fs.promises.stat(path.join(this.config.cacheDirectory, file)).catch(() => null)));
diskSizeBytes = stats
.filter(stat => stat !== null)
.reduce((sum, stat) => sum + stat.size, 0);
}
catch (error) {
// Ignore errors for stats
}
}
const allTimestamps = memoryEntries.map(e => e.timestamp);
const totalRequests = this.cacheStats.hits + this.cacheStats.misses;
return {
totalEntries: memoryEntries.length + diskEntries,
totalSizeBytes: memorySizeBytes + diskSizeBytes,
hitRate: totalRequests > 0 ? this.cacheStats.hits / totalRequests : 0,
totalHits: this.cacheStats.hits,
totalMisses: this.cacheStats.misses,
oldestEntry: allTimestamps.length > 0 ? Math.min(...allTimestamps) : 0,
newestEntry: allTimestamps.length > 0 ? Math.max(...allTimestamps) : 0,
};
}
/**
* Enable or disable caching
*/
setEnabled(enabled) {
this.config.enabled = enabled;
logger_1.logger.info(`Cache ${enabled ? 'enabled' : 'disabled'}`);
}
/**
* Update cache configuration
*/
updateConfig(newConfig) {
this.config = { ...this.config, ...newConfig };
this.ensureCacheDirectory();
logger_1.logger.info('Cache configuration updated');
}
/**
* Generate cache key for file and section
*/
async generateCacheKey(filePath, section) {
const fileHash = await this.calculateFileHash(filePath);
const stats = await fs.promises.stat(filePath);
const keyData = {
fileHash,
section,
fileSize: stats.size,
lastModified: stats.mtime.getTime(),
version: this.CACHE_VERSION,
};
return crypto
.createHash('sha256')
.update(JSON.stringify(keyData))
.digest('hex');
}
/**
* Calculate file hash for cache invalidation
*/
async calculateFileHash(filePath) {
try {
const stats = await fs.promises.stat(filePath);
// For large files, use file metadata instead of content hash for performance
if (stats.size > 100 * 1024 * 1024) { // 100MB
return crypto
.createHash('sha256')
.update(`${filePath}-${stats.size}-${stats.mtime.getTime()}`)
.digest('hex');
}
// For smaller files, use content hash
const content = await fs.promises.readFile(filePath);
return crypto.createHash('sha256').update(content).digest('hex');
}
catch (error) {
// Fallback to path-based hash
return crypto.createHash('sha256').update(filePath).digest('hex');
}
}
/**
* Create cache entry
*/
async createCacheEntry(filePath, section, data) {
const stats = await fs.promises.stat(filePath);
const fileHash = await this.calculateFileHash(filePath);
const serializedData = JSON.stringify(data);
return {
data,
timestamp: Date.now(),
fileHash,
fileSize: stats.size,
lastModified: stats.mtime.getTime(),
dataSize: Buffer.byteLength(serializedData, 'utf8'),
section,
version: this.CACHE_VERSION,
};
}
/**
* Check if cache entry is valid
*/
async isValidEntry(entry, filePath) {
try {
// Check version compatibility
if (entry.version !== this.CACHE_VERSION) {
return false;
}
// Check TTL
if (Date.now() - entry.timestamp > this.config.ttlMs) {
return false;
}
// Check file modification
const stats = await fs.promises.stat(filePath);
if (stats.mtime.getTime() !== entry.lastModified || stats.size !== entry.fileSize) {
return false;
}
return true;
}
catch (error) {
return false;
}
}
/**
* Set entry in memory cache with eviction
*/
setMemoryEntry(key, entry) {
// Check if we need to evict
while (this.memoryCache.size >= this.config.maxEntries) {
this.evictOldestMemoryEntry();
}
this.memoryCache.set(key, entry);
}
/**
* Evict oldest entry from memory cache
*/
evictOldestMemoryEntry() {
let oldestKey = null;
let oldestTimestamp = Infinity;
for (const [key, entry] of this.memoryCache.entries()) {
if (entry.timestamp < oldestTimestamp) {
oldestTimestamp = entry.timestamp;
oldestKey = key;
}
}
if (oldestKey) {
this.memoryCache.delete(oldestKey);
this.cacheStats.evictions++;
}
}
/**
* Date fields that need restoration during deserialization
*/
static DATE_FIELDS = [
'generatedAt',
'lastModified',
'analysisStartTimestamp',
'timestamp',
'createdAt',
'modifiedAt'
];
/**
* Custom JSON reviver to restore Date objects from strings
*/
static dateReviver(key, value) {
if (typeof value === 'string' && SectionCacheManager.DATE_FIELDS.includes(key)) {
const date = new Date(value);
return isNaN(date.getTime()) ? value : date;
}
return value;
}
/**
* Get entry from disk cache
*/
async getDiskEntry(key) {
try {
const filePath = path.join(this.config.cacheDirectory, `${key}.json`);
const content = await fs.promises.readFile(filePath, 'utf8');
return JSON.parse(content, SectionCacheManager.dateReviver);
}
catch (error) {
return null;
}
}
/**
* Set entry in disk cache
*/
async setDiskEntry(key, entry) {
try {
const filePath = path.join(this.config.cacheDirectory, `${key}.json`);
const content = JSON.stringify(entry);
await fs.promises.writeFile(filePath, content, 'utf8');
}
catch (error) {
logger_1.logger.warn(`Failed to write disk cache entry: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Ensure cache directory exists
*/
ensureCacheDirectory() {
try {
if (!fs.existsSync(this.config.cacheDirectory)) {
fs.mkdirSync(this.config.cacheDirectory, { recursive: true });
}
}
catch (error) {
logger_1.logger.warn(`Failed to create cache directory: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Clean up expired entries and enforce size limits
*/
async cleanup() {
try {
await this.cleanupExpiredEntries();
await this.enforceSizeLimit();
}
catch (error) {
logger_1.logger.warn(`Cache cleanup error: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
/**
* Remove expired entries
*/
async cleanupExpiredEntries() {
const now = Date.now();
// Clean memory cache
const expiredKeys = [];
for (const [key, entry] of this.memoryCache.entries()) {
if (now - entry.timestamp > this.config.ttlMs) {
expiredKeys.push(key);
}
}
expiredKeys.forEach(key => this.memoryCache.delete(key));
// Clean disk cache
if (this.config.enableDiskCache) {
try {
const files = await fs.promises.readdir(this.config.cacheDirectory);
const expiredFiles = [];
for (const file of files) {
try {
const filePath = path.join(this.config.cacheDirectory, file);
const content = await fs.promises.readFile(filePath, 'utf8');
const entry = JSON.parse(content);
if (now - entry.timestamp > this.config.ttlMs) {
expiredFiles.push(file);
}
}
catch (error) {
// If we can't read the file, consider it expired
expiredFiles.push(file);
}
}
await Promise.all(expiredFiles.map(file => fs.promises.unlink(path.join(this.config.cacheDirectory, file)).catch(() => { })));
if (expiredFiles.length > 0) {
logger_1.logger.debug(`Cleaned up ${expiredFiles.length} expired cache entries`);
}
}
catch (error) {
// Ignore cleanup errors
}
}
}
/**
* Enforce cache size limits
*/
async enforceSizeLimit() {
// Get current total size
const stats = await this.getStats();
if (stats.totalSizeBytes <= this.config.maxSizeBytes) {
return; // Within limits
}
logger_1.logger.info(`Cache size (${this.formatBytes(stats.totalSizeBytes)}) exceeds limit (${this.formatBytes(this.config.maxSizeBytes)}), cleaning up...`);
// First, clean memory cache (keep only most recent entries)
const memoryEntries = Array.from(this.memoryCache.entries());
memoryEntries.sort(([, a], [, b]) => b.timestamp - a.timestamp);
const keepCount = Math.floor(this.config.maxEntries * 0.8); // Keep 80%
const toRemove = memoryEntries.slice(keepCount);
toRemove.forEach(([key]) => this.memoryCache.delete(key));
// Then clean disk cache if still over limit
if (this.config.enableDiskCache) {
try {
const files = await fs.promises.readdir(this.config.cacheDirectory);
const fileEntries = [];
for (const file of files) {
try {
const filePath = path.join(this.config.cacheDirectory, file);
const content = await fs.promises.readFile(filePath, 'utf8');
const entry = JSON.parse(content);
const fileStat = await fs.promises.stat(filePath);
fileEntries.push({
file,
timestamp: entry.timestamp,
size: fileStat.size,
});
}
catch (error) {
// Remove corrupted files
try {
await fs.promises.unlink(path.join(this.config.cacheDirectory, file));
}
catch (_error) {
// Ignore unlink errors - file may not exist or be locked
}
}
}
// Sort by timestamp (oldest first) and remove until under size limit
fileEntries.sort((a, b) => a.timestamp - b.timestamp);
let currentSize = fileEntries.reduce((sum, entry) => sum + entry.size, 0);
const toRemoveFiles = [];
while (currentSize > this.config.maxSizeBytes && fileEntries.length > 0) {
const entry = fileEntries.shift();
toRemoveFiles.push(entry.file);
currentSize -= entry.size;
}
await Promise.all(toRemoveFiles.map(file => fs.promises.unlink(path.join(this.config.cacheDirectory, file)).catch(() => { })));
if (toRemoveFiles.length > 0) {
logger_1.logger.info(`Removed ${toRemoveFiles.length} cache files to enforce size limit`);
}
}
catch (error) {
// Ignore cleanup errors
}
}
}
/**
* Format bytes for display
*/
formatBytes(bytes) {
const units = ['B', 'KB', 'MB', 'GB'];
let size = bytes;
let unitIndex = 0;
while (size >= 1024 && unitIndex < units.length - 1) {
size /= 1024;
unitIndex++;
}
return `${size.toFixed(1)}${units[unitIndex]}`;
}
}
exports.SectionCacheManager = SectionCacheManager;
//# sourceMappingURL=section-cache-manager.js.map