@kaifronsdal/transcript-viewer
Version:
A web-based viewer for AI conversation transcripts with rollback support
444 lines (378 loc) • 13.8 kB
text/typescript
import { promises as fs } from 'fs';
import path from 'path';
import { watch } from 'chokidar';
import { EventEmitter } from 'events';
import type { TranscriptDisplayFull, TranscriptMetadata } from '$lib/shared/types';
import { loadTranscriptFromFile, loadMetadataFromFile } from '$lib/server/core/transcript-loader';
import { DEFAULT_TRANSCRIPT_DIR } from '$lib/shared/constants';
import { TRANSCRIPT_DIR } from '$lib/server/config';
// Server-side check
if (typeof window !== 'undefined') {
throw new Error('transcript-cache can only be used on the server side');
}
interface CacheEntry<T> {
data: T;
filePath: string;
lastModified: number;
accessTime: number;
}
interface CacheStats {
metadataCount: number;
fullTranscriptCount: number;
cacheHits: number;
cacheMisses: number;
fileWatcherActive: boolean;
}
/**
* Intelligent transcript caching system with file watching
* - Unlimited metadata cache
* - LRU cache for 200 full transcripts
* - File system watching for automatic invalidation
*/
export class TranscriptCache {
private metadataCache = new Map<string, CacheEntry<TranscriptMetadata>>();
private fullTranscriptCache = new Map<string, CacheEntry<TranscriptDisplayFull>>();
private maxFullTranscripts: number;
private watcherInitialized = false;
private watcher: any = null;
private changeEmitter = new EventEmitter();
private versionToken: string = `${Date.now()}`;
private stats: CacheStats = {
metadataCount: 0,
fullTranscriptCount: 0,
cacheHits: 0,
cacheMisses: 0,
fileWatcherActive: false
};
constructor(maxFullTranscripts = 200) {
this.maxFullTranscripts = maxFullTranscripts;
console.log(`🗄️ [CACHE] TranscriptCache initialized with max ${maxFullTranscripts} full transcripts`);
}
/**
* Initialize file system watcher for the transcript directory
*/
async initializeWatcher(transcriptDir: string = DEFAULT_TRANSCRIPT_DIR): Promise<void> {
if (this.watcherInitialized) {
console.log('🔍 [CACHE] File watcher already initialized');
return;
}
try {
// Check if directory exists
await fs.access(transcriptDir);
console.log(`🔍 [CACHE] Initializing file watcher for: ${transcriptDir}`);
// Watch for JSON files in the transcript directory and subdirectories
this.watcher = watch(path.join(transcriptDir, '**/*.json'), {
ignored: /(^|[\/\\])\../, // ignore dotfiles
persistent: true,
ignoreInitial: true // Don't trigger for existing files
});
// Handle file changes
this.watcher
.on('add', (filePath: string) => {
console.log(`📄 [CACHE] File added: ${filePath}`);
this.bumpVersionAndEmit('add', filePath);
this.invalidateFile(filePath);
})
.on('change', (filePath: string) => {
console.log(`📝 [CACHE] File changed: ${filePath}`);
this.bumpVersionAndEmit('change', filePath);
this.invalidateFile(filePath);
})
.on('unlink', (filePath: string) => {
console.log(`🗑️ [CACHE] File deleted: ${filePath}`);
this.bumpVersionAndEmit('unlink', filePath);
this.invalidateFile(filePath);
})
.on('error', (error: any) => {
console.error('🚨 [CACHE] File watcher error:', error);
})
.on('ready', () => {
console.log('✅ [CACHE] File watcher is ready');
this.stats.fileWatcherActive = true;
});
this.watcherInitialized = true;
} catch (error) {
console.error(`🚨 [CACHE] Failed to initialize file watcher for ${transcriptDir}:`, error);
// Don't throw - cache should work without file watching
}
}
/**
* Increment version token and emit a structured change event
*/
private bumpVersionAndEmit(eventType: 'add' | 'change' | 'unlink', absoluteFilePath: string): void {
// Monotonic version token for clients to detect any change
this.versionToken = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
let relativePath: string;
try {
const baseDir = TRANSCRIPT_DIR;
relativePath = path.relative(baseDir, absoluteFilePath);
} catch {
relativePath = absoluteFilePath;
}
this.changeEmitter.emit('change', {
type: eventType,
absolutePath: absoluteFilePath,
relativePath,
version: this.versionToken,
updatedAt: Date.now()
});
}
/**
* Get metadata from cache or load from file
*/
async getMetadata(filePath: string): Promise<TranscriptMetadata | null> {
const normalizedPath = path.normalize(filePath);
const cached = this.metadataCache.get(normalizedPath);
// Check if cached data is still valid
if (cached && await this.isCacheValid(cached)) {
cached.accessTime = Date.now();
this.stats.cacheHits++;
// console.log(`💾 [CACHE] Metadata cache hit: ${normalizedPath}`);
return cached.data;
}
// Load from file
// console.log(`📁 [CACHE] Loading metadata from file: ${normalizedPath}`);
this.stats.cacheMisses++;
const metadata = await loadMetadataFromFile(normalizedPath);
if (metadata === null) {
return null;
}
// Cache the result
const stat = await fs.stat(normalizedPath);
this.metadataCache.set(normalizedPath, {
data: metadata,
filePath: normalizedPath,
lastModified: stat.mtime.getTime(),
accessTime: Date.now()
});
this.stats.metadataCount = this.metadataCache.size;
return metadata;
}
/**
* Get full transcript from cache or load from file
*/
async getFullTranscript(filePath: string): Promise<TranscriptDisplayFull | null> {
const normalizedPath = path.normalize(filePath);
const cached = this.fullTranscriptCache.get(normalizedPath);
// Check if cached data is still valid
if (cached && await this.isCacheValid(cached)) {
cached.accessTime = Date.now();
this.stats.cacheHits++;
console.log(`💾 [CACHE] Full transcript cache hit: ${normalizedPath}`);
return cached.data;
}
// Load from file
console.log(`📁 [CACHE] Loading full transcript from file: ${normalizedPath}`);
this.stats.cacheMisses++;
const transcript = await loadTranscriptFromFile(normalizedPath);
if (transcript === null) {
return null;
}
// Convert absolute path back to relative path for _filePath
// Get the transcript directory from environment or default
const absoluteBaseDir = TRANSCRIPT_DIR;
// Normalize to POSIX separators for client URL safety
const relativePath = path.relative(absoluteBaseDir, normalizedPath).replace(/\\/g, '/');
// Update the transcript's _filePath to be relative
const updatedTranscript = {
...transcript,
_filePath: relativePath
};
// Cache the result with LRU eviction
const stat = await fs.stat(normalizedPath);
const entry: CacheEntry<TranscriptDisplayFull> = {
data: updatedTranscript,
filePath: normalizedPath,
lastModified: stat.mtime.getTime(),
accessTime: Date.now()
};
this.fullTranscriptCache.set(normalizedPath, entry);
// Enforce LRU eviction for full transcripts
await this.enforceFullTranscriptLimit();
this.stats.fullTranscriptCount = this.fullTranscriptCache.size;
return updatedTranscript;
}
/**
* Check if a cached entry is still valid by comparing modification times
*/
private async isCacheValid(cached: CacheEntry<any>): Promise<boolean> {
try {
const stat = await fs.stat(cached.filePath);
return cached.lastModified >= stat.mtime.getTime();
} catch (error) {
// File doesn't exist anymore, cache is invalid
return false;
}
}
/**
* Enforce LRU eviction for full transcript cache
*/
private async enforceFullTranscriptLimit(): Promise<void> {
if (this.fullTranscriptCache.size <= this.maxFullTranscripts) {
return;
}
// Sort by access time and remove oldest entries
const entries = Array.from(this.fullTranscriptCache.entries());
entries.sort(([, a], [, b]) => a.accessTime - b.accessTime);
const toRemove = entries.slice(0, entries.length - this.maxFullTranscripts);
for (const [key] of toRemove) {
this.fullTranscriptCache.delete(key);
console.log(`🗑️ [CACHE] Evicted full transcript from cache: ${key}`);
}
console.log(`📊 [CACHE] LRU eviction completed. Full transcript cache size: ${this.fullTranscriptCache.size}`);
}
/**
* Invalidate cache entries for a specific file
*/
private invalidateFile(filePath: string): void {
const normalizedPath = path.normalize(filePath);
let invalidated = false;
if (this.metadataCache.has(normalizedPath)) {
this.metadataCache.delete(normalizedPath);
invalidated = true;
console.log(`🗑️ [CACHE] Invalidated metadata cache for: ${normalizedPath}`);
}
if (this.fullTranscriptCache.has(normalizedPath)) {
this.fullTranscriptCache.delete(normalizedPath);
invalidated = true;
console.log(`🗑️ [CACHE] Invalidated full transcript cache for: ${normalizedPath}`);
}
if (invalidated) {
this.updateStats();
}
}
/**
* Subscribe to change events (file add/change/delete)
*/
onChange(listener: (evt: { type: 'add' | 'change' | 'unlink'; absolutePath: string; relativePath: string; version: string; updatedAt: number }) => void): () => void {
this.changeEmitter.on('change', listener);
return () => this.changeEmitter.off('change', listener);
}
/**
* Get current version token that changes whenever any transcript file changes
*/
getVersion(): string {
return this.versionToken;
}
/**
* Clear all cache entries
*/
clearCache(): void {
const metadataCount = this.metadataCache.size;
const fullTranscriptCount = this.fullTranscriptCache.size;
this.metadataCache.clear();
this.fullTranscriptCache.clear();
this.updateStats();
console.log(`🗑️ [CACHE] Cleared all cache entries (${metadataCount} metadata, ${fullTranscriptCount} full transcripts)`);
}
/**
* Get cache statistics
*/
getStats(): CacheStats {
return { ...this.stats };
}
/**
* Update internal statistics
*/
private updateStats(): void {
this.stats.metadataCount = this.metadataCache.size;
this.stats.fullTranscriptCount = this.fullTranscriptCache.size;
}
/**
* Shutdown the cache and cleanup resources
*/
async shutdown(): Promise<void> {
if (this.watcher) {
console.log('🔍 [CACHE] Shutting down file watcher');
await this.watcher.close();
this.watcher = null;
this.stats.fileWatcherActive = false;
}
this.clearCache();
this.watcherInitialized = false;
console.log('🗄️ [CACHE] TranscriptCache shutdown completed');
}
/**
* Preload metadata for all files in a directory (for bulk operations)
*/
async preloadMetadata(transcriptDir: string = DEFAULT_TRANSCRIPT_DIR): Promise<void> {
console.log(`🔄 [CACHE] Preloading metadata from: ${transcriptDir}`);
try {
const files = await this.findAllTranscriptFiles(transcriptDir);
console.log(`📁 [CACHE] Found ${files.length} transcript files to preload`);
// Load metadata for all files in parallel (with concurrency limit)
const concurrency = 10;
const chunks = this.chunkArray(files, concurrency);
for (const chunk of chunks) {
await Promise.all(
chunk.map(async (filePath) => {
try {
await this.getMetadata(filePath);
} catch (error) {
console.warn(`⚠️ [CACHE] Failed to preload metadata for ${filePath}:`, error);
}
})
);
}
console.log(`✅ [CACHE] Preloaded metadata for ${this.metadataCache.size} transcripts`);
} catch (error) {
console.error('🚨 [CACHE] Failed to preload metadata:', error);
}
}
/**
* Find all transcript files recursively
*/
private async findAllTranscriptFiles(dir: string): Promise<string[]> {
const files: string[] = [];
async function scanDir(currentDir: string): Promise<void> {
const entries = await fs.readdir(currentDir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(currentDir, entry.name);
if (entry.isDirectory()) {
await scanDir(fullPath);
} else if (entry.isFile() && entry.name.endsWith('.json')) {
files.push(fullPath);
}
}
}
await scanDir(dir);
return files;
}
/**
* Utility function to chunk an array
*/
private chunkArray<T>(array: T[], chunkSize: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < array.length; i += chunkSize) {
chunks.push(array.slice(i, i + chunkSize));
}
return chunks;
}
}
// Global cache instance
let globalCache: TranscriptCache | null = null;
/**
* Get or create the global transcript cache instance
*/
export function getTranscriptCache(): TranscriptCache {
if (!globalCache) {
globalCache = new TranscriptCache();
}
return globalCache;
}
/**
* Initialize the global cache with file watching
*/
export async function initializeGlobalCache(transcriptDir?: string): Promise<void> {
const cache = getTranscriptCache();
await cache.initializeWatcher(transcriptDir);
}
/**
* Shutdown the global cache
*/
export async function shutdownGlobalCache(): Promise<void> {
if (globalCache) {
await globalCache.shutdown();
globalCache = null;
}
}