UNPKG

@kaifronsdal/transcript-viewer

Version:

A web-based viewer for AI conversation transcripts with rollback support

444 lines (378 loc) 13.8 kB
import { promises as fs } from 'fs'; import path from 'path'; import { watch } from 'chokidar'; import { EventEmitter } from 'events'; import type { TranscriptDisplayFull, TranscriptMetadata } from '$lib/shared/types'; import { loadTranscriptFromFile, loadMetadataFromFile } from '$lib/server/core/transcript-loader'; import { DEFAULT_TRANSCRIPT_DIR } from '$lib/shared/constants'; import { TRANSCRIPT_DIR } from '$lib/server/config'; // Server-side check if (typeof window !== 'undefined') { throw new Error('transcript-cache can only be used on the server side'); } interface CacheEntry<T> { data: T; filePath: string; lastModified: number; accessTime: number; } interface CacheStats { metadataCount: number; fullTranscriptCount: number; cacheHits: number; cacheMisses: number; fileWatcherActive: boolean; } /** * Intelligent transcript caching system with file watching * - Unlimited metadata cache * - LRU cache for 200 full transcripts * - File system watching for automatic invalidation */ export class TranscriptCache { private metadataCache = new Map<string, CacheEntry<TranscriptMetadata>>(); private fullTranscriptCache = new Map<string, CacheEntry<TranscriptDisplayFull>>(); private maxFullTranscripts: number; private watcherInitialized = false; private watcher: any = null; private changeEmitter = new EventEmitter(); private versionToken: string = `${Date.now()}`; private stats: CacheStats = { metadataCount: 0, fullTranscriptCount: 0, cacheHits: 0, cacheMisses: 0, fileWatcherActive: false }; constructor(maxFullTranscripts = 200) { this.maxFullTranscripts = maxFullTranscripts; console.log(`🗄️ [CACHE] TranscriptCache initialized with max ${maxFullTranscripts} full transcripts`); } /** * Initialize file system watcher for the transcript directory */ async initializeWatcher(transcriptDir: string = DEFAULT_TRANSCRIPT_DIR): Promise<void> { if (this.watcherInitialized) { console.log('🔍 [CACHE] File watcher already initialized'); return; } try { // Check if directory exists await fs.access(transcriptDir); console.log(`🔍 [CACHE] Initializing file watcher for: ${transcriptDir}`); // Watch for JSON files in the transcript directory and subdirectories this.watcher = watch(path.join(transcriptDir, '**/*.json'), { ignored: /(^|[\/\\])\../, // ignore dotfiles persistent: true, ignoreInitial: true // Don't trigger for existing files }); // Handle file changes this.watcher .on('add', (filePath: string) => { console.log(`📄 [CACHE] File added: ${filePath}`); this.bumpVersionAndEmit('add', filePath); this.invalidateFile(filePath); }) .on('change', (filePath: string) => { console.log(`📝 [CACHE] File changed: ${filePath}`); this.bumpVersionAndEmit('change', filePath); this.invalidateFile(filePath); }) .on('unlink', (filePath: string) => { console.log(`🗑️ [CACHE] File deleted: ${filePath}`); this.bumpVersionAndEmit('unlink', filePath); this.invalidateFile(filePath); }) .on('error', (error: any) => { console.error('🚨 [CACHE] File watcher error:', error); }) .on('ready', () => { console.log('✅ [CACHE] File watcher is ready'); this.stats.fileWatcherActive = true; }); this.watcherInitialized = true; } catch (error) { console.error(`🚨 [CACHE] Failed to initialize file watcher for ${transcriptDir}:`, error); // Don't throw - cache should work without file watching } } /** * Increment version token and emit a structured change event */ private bumpVersionAndEmit(eventType: 'add' | 'change' | 'unlink', absoluteFilePath: string): void { // Monotonic version token for clients to detect any change this.versionToken = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; let relativePath: string; try { const baseDir = TRANSCRIPT_DIR; relativePath = path.relative(baseDir, absoluteFilePath); } catch { relativePath = absoluteFilePath; } this.changeEmitter.emit('change', { type: eventType, absolutePath: absoluteFilePath, relativePath, version: this.versionToken, updatedAt: Date.now() }); } /** * Get metadata from cache or load from file */ async getMetadata(filePath: string): Promise<TranscriptMetadata | null> { const normalizedPath = path.normalize(filePath); const cached = this.metadataCache.get(normalizedPath); // Check if cached data is still valid if (cached && await this.isCacheValid(cached)) { cached.accessTime = Date.now(); this.stats.cacheHits++; // console.log(`💾 [CACHE] Metadata cache hit: ${normalizedPath}`); return cached.data; } // Load from file // console.log(`📁 [CACHE] Loading metadata from file: ${normalizedPath}`); this.stats.cacheMisses++; const metadata = await loadMetadataFromFile(normalizedPath); if (metadata === null) { return null; } // Cache the result const stat = await fs.stat(normalizedPath); this.metadataCache.set(normalizedPath, { data: metadata, filePath: normalizedPath, lastModified: stat.mtime.getTime(), accessTime: Date.now() }); this.stats.metadataCount = this.metadataCache.size; return metadata; } /** * Get full transcript from cache or load from file */ async getFullTranscript(filePath: string): Promise<TranscriptDisplayFull | null> { const normalizedPath = path.normalize(filePath); const cached = this.fullTranscriptCache.get(normalizedPath); // Check if cached data is still valid if (cached && await this.isCacheValid(cached)) { cached.accessTime = Date.now(); this.stats.cacheHits++; console.log(`💾 [CACHE] Full transcript cache hit: ${normalizedPath}`); return cached.data; } // Load from file console.log(`📁 [CACHE] Loading full transcript from file: ${normalizedPath}`); this.stats.cacheMisses++; const transcript = await loadTranscriptFromFile(normalizedPath); if (transcript === null) { return null; } // Convert absolute path back to relative path for _filePath // Get the transcript directory from environment or default const absoluteBaseDir = TRANSCRIPT_DIR; // Normalize to POSIX separators for client URL safety const relativePath = path.relative(absoluteBaseDir, normalizedPath).replace(/\\/g, '/'); // Update the transcript's _filePath to be relative const updatedTranscript = { ...transcript, _filePath: relativePath }; // Cache the result with LRU eviction const stat = await fs.stat(normalizedPath); const entry: CacheEntry<TranscriptDisplayFull> = { data: updatedTranscript, filePath: normalizedPath, lastModified: stat.mtime.getTime(), accessTime: Date.now() }; this.fullTranscriptCache.set(normalizedPath, entry); // Enforce LRU eviction for full transcripts await this.enforceFullTranscriptLimit(); this.stats.fullTranscriptCount = this.fullTranscriptCache.size; return updatedTranscript; } /** * Check if a cached entry is still valid by comparing modification times */ private async isCacheValid(cached: CacheEntry<any>): Promise<boolean> { try { const stat = await fs.stat(cached.filePath); return cached.lastModified >= stat.mtime.getTime(); } catch (error) { // File doesn't exist anymore, cache is invalid return false; } } /** * Enforce LRU eviction for full transcript cache */ private async enforceFullTranscriptLimit(): Promise<void> { if (this.fullTranscriptCache.size <= this.maxFullTranscripts) { return; } // Sort by access time and remove oldest entries const entries = Array.from(this.fullTranscriptCache.entries()); entries.sort(([, a], [, b]) => a.accessTime - b.accessTime); const toRemove = entries.slice(0, entries.length - this.maxFullTranscripts); for (const [key] of toRemove) { this.fullTranscriptCache.delete(key); console.log(`🗑️ [CACHE] Evicted full transcript from cache: ${key}`); } console.log(`📊 [CACHE] LRU eviction completed. Full transcript cache size: ${this.fullTranscriptCache.size}`); } /** * Invalidate cache entries for a specific file */ private invalidateFile(filePath: string): void { const normalizedPath = path.normalize(filePath); let invalidated = false; if (this.metadataCache.has(normalizedPath)) { this.metadataCache.delete(normalizedPath); invalidated = true; console.log(`🗑️ [CACHE] Invalidated metadata cache for: ${normalizedPath}`); } if (this.fullTranscriptCache.has(normalizedPath)) { this.fullTranscriptCache.delete(normalizedPath); invalidated = true; console.log(`🗑️ [CACHE] Invalidated full transcript cache for: ${normalizedPath}`); } if (invalidated) { this.updateStats(); } } /** * Subscribe to change events (file add/change/delete) */ onChange(listener: (evt: { type: 'add' | 'change' | 'unlink'; absolutePath: string; relativePath: string; version: string; updatedAt: number }) => void): () => void { this.changeEmitter.on('change', listener); return () => this.changeEmitter.off('change', listener); } /** * Get current version token that changes whenever any transcript file changes */ getVersion(): string { return this.versionToken; } /** * Clear all cache entries */ clearCache(): void { const metadataCount = this.metadataCache.size; const fullTranscriptCount = this.fullTranscriptCache.size; this.metadataCache.clear(); this.fullTranscriptCache.clear(); this.updateStats(); console.log(`🗑️ [CACHE] Cleared all cache entries (${metadataCount} metadata, ${fullTranscriptCount} full transcripts)`); } /** * Get cache statistics */ getStats(): CacheStats { return { ...this.stats }; } /** * Update internal statistics */ private updateStats(): void { this.stats.metadataCount = this.metadataCache.size; this.stats.fullTranscriptCount = this.fullTranscriptCache.size; } /** * Shutdown the cache and cleanup resources */ async shutdown(): Promise<void> { if (this.watcher) { console.log('🔍 [CACHE] Shutting down file watcher'); await this.watcher.close(); this.watcher = null; this.stats.fileWatcherActive = false; } this.clearCache(); this.watcherInitialized = false; console.log('🗄️ [CACHE] TranscriptCache shutdown completed'); } /** * Preload metadata for all files in a directory (for bulk operations) */ async preloadMetadata(transcriptDir: string = DEFAULT_TRANSCRIPT_DIR): Promise<void> { console.log(`🔄 [CACHE] Preloading metadata from: ${transcriptDir}`); try { const files = await this.findAllTranscriptFiles(transcriptDir); console.log(`📁 [CACHE] Found ${files.length} transcript files to preload`); // Load metadata for all files in parallel (with concurrency limit) const concurrency = 10; const chunks = this.chunkArray(files, concurrency); for (const chunk of chunks) { await Promise.all( chunk.map(async (filePath) => { try { await this.getMetadata(filePath); } catch (error) { console.warn(`⚠️ [CACHE] Failed to preload metadata for ${filePath}:`, error); } }) ); } console.log(`✅ [CACHE] Preloaded metadata for ${this.metadataCache.size} transcripts`); } catch (error) { console.error('🚨 [CACHE] Failed to preload metadata:', error); } } /** * Find all transcript files recursively */ private async findAllTranscriptFiles(dir: string): Promise<string[]> { const files: string[] = []; async function scanDir(currentDir: string): Promise<void> { const entries = await fs.readdir(currentDir, { withFileTypes: true }); for (const entry of entries) { const fullPath = path.join(currentDir, entry.name); if (entry.isDirectory()) { await scanDir(fullPath); } else if (entry.isFile() && entry.name.endsWith('.json')) { files.push(fullPath); } } } await scanDir(dir); return files; } /** * Utility function to chunk an array */ private chunkArray<T>(array: T[], chunkSize: number): T[][] { const chunks: T[][] = []; for (let i = 0; i < array.length; i += chunkSize) { chunks.push(array.slice(i, i + chunkSize)); } return chunks; } } // Global cache instance let globalCache: TranscriptCache | null = null; /** * Get or create the global transcript cache instance */ export function getTranscriptCache(): TranscriptCache { if (!globalCache) { globalCache = new TranscriptCache(); } return globalCache; } /** * Initialize the global cache with file watching */ export async function initializeGlobalCache(transcriptDir?: string): Promise<void> { const cache = getTranscriptCache(); await cache.initializeWatcher(transcriptDir); } /** * Shutdown the global cache */ export async function shutdownGlobalCache(): Promise<void> { if (globalCache) { await globalCache.shutdown(); globalCache = null; } }