@kaifronsdal/transcript-viewer
Version:
A web-based viewer for AI conversation transcripts with rollback support
263 lines (231 loc) • 9.93 kB
text/typescript
import type { TranscriptDisplay } from '$lib/shared/types';
import { scanDirectoryForTranscripts, checkPathAccess } from './file-scanner';
import type { LoadingResult, LoadingError, LoadingStats } from './types';
import { getTranscriptCache } from '$lib/server/cache/transcript-cache';
import { DEFAULT_CONCURRENCY } from '$lib/shared/constants';
import { extractModelName } from '$lib/shared/utils/transcript-utils';
import path from 'path';
// Server-side check
if (typeof window !== 'undefined') {
throw new Error('cached-bulk-loader can only be used on the server side');
}
/**
* Load transcripts metadata using cache for optimal performance
*/
export async function loadCachedTranscriptsMetadataOnly(
outputDir: string = './transcripts',
includeErrors: boolean = true
): Promise<LoadingResult> {
console.log(`📂 [CACHED-LOADER] Starting cached metadata loading from: ${outputDir}`);
// Check if directory exists
const directoryExists = await checkPathAccess(outputDir);
if (!directoryExists) {
const error: LoadingError = {
type: 'file_not_found',
message: `Directory not found: ${outputDir}`,
file: outputDir
};
return {
transcripts: [],
errors: [error],
directories: [],
stats: {
totalFiles: 0,
successfulFiles: 0,
failedFiles: 1,
validationErrors: 0,
parseErrors: 0,
totalDirectories: 0,
emptyDirectories: 0,
}
};
}
// Scan directory for transcript files and directory structure
console.log('🔍 [CACHED-LOADER] Scanning directory structure...');
const scanResult = await scanDirectoryForTranscripts(outputDir);
console.log(`📁 [CACHED-LOADER] Found ${scanResult.files.length} files and ${scanResult.directories.length} directories`);
// Get cache instance
const cache = getTranscriptCache();
// Process files through cache with concurrency control
console.log('⚙️ [CACHED-LOADER] Loading metadata through cache...');
const transcripts: TranscriptDisplay[] = [];
const errors: LoadingError[] = [...scanResult.errors];
// Process files in batches to control concurrency
const concurrency = DEFAULT_CONCURRENCY;
const chunks = chunkArray(scanResult.files, concurrency);
for (const chunk of chunks) {
const results = await Promise.allSettled(
chunk.map(async (relativePath) => {
try {
// Convert relative path to absolute path for cache operations
const absolutePath = path.resolve(outputDir, relativePath);
const metadata = await cache.getMetadata(absolutePath);
if (metadata) {
// Convert metadata to TranscriptDisplay format for consistency
// We need to create a minimal TranscriptDisplay object from metadata
const pathParts = relativePath.split('/');
const behaviorDir = pathParts.length > 1 ? pathParts[pathParts.length - 2] : '';
const fileName = pathParts[pathParts.length - 1];
const transcriptNumber = fileName.endsWith('.json') ? fileName.slice(0, -5) : fileName;
// Create optimized transcript object for homepage (remove heavy fields)
const summary = metadata.judge_output?.summary || metadata.description || 'No summary available';
const transcript: TranscriptDisplay = {
id: metadata.transcript_id || transcriptNumber,
model: extractModelName(metadata.target_model || ''),
split: behaviorDir,
concerningScore: metadata.judge_output?.scores?.concerning || 0,
summary: summary.length > 200 ? summary.substring(0, 200) + '...' : summary, // Truncate long summaries
scores: metadata.judge_output?.scores || {},
scoreDescriptions: metadata.judge_output?.score_descriptions,
judgeSummary: '', // Remove heavy field - not needed for homepage
justification: '', // Remove heavy field - not needed for homepage
tags: metadata.tags || [],
systemPrompt: undefined, // Cannot extract system prompt from metadata-only (no events)
transcript: undefined as any, // Remove heavy field - not needed for homepage
_filePath: relativePath // Store relative path
};
return transcript;
}
return null;
} catch (error) {
throw { filePath: relativePath, error };
}
})
);
// Process results
for (const result of results) {
if (result.status === 'fulfilled' && result.value) {
transcripts.push(result.value);
} else if (result.status === 'rejected') {
const rejection = result.reason as { filePath: string; error: any };
errors.push({
type: 'unknown_error',
message: `Failed to load metadata: ${rejection.filePath}`,
file: rejection.filePath,
details: rejection.error instanceof Error ? rejection.error.message : String(rejection.error)
});
}
}
}
console.log(`✅ [CACHED-LOADER] Successfully loaded ${transcripts.length} transcript metadata`);
console.log(`❌ [CACHED-LOADER] Failed to load ${errors.length - scanResult.errors.length} files`);
// Calculate statistics
const stats: LoadingStats = {
totalFiles: scanResult.files.length,
successfulFiles: transcripts.length,
failedFiles: errors.length - scanResult.errors.length,
validationErrors: errors.filter(e => e.type === 'validation_error').length,
parseErrors: errors.filter(e => e.type === 'parse_error').length,
totalDirectories: scanResult.directories.length,
emptyDirectories: scanResult.directories.filter(d => d.isEmpty).length,
};
console.log('📈 [CACHED-LOADER] Final statistics:', stats);
return {
transcripts,
errors: includeErrors ? errors : [],
directories: scanResult.directories,
stats
};
}
/**
* Load full transcripts using cache (for when full transcript data is needed)
*/
export async function loadCachedTranscriptsWithErrors(
outputDir: string = './transcripts',
includeErrors: boolean = true
): Promise<LoadingResult> {
console.log(`📂 [CACHED-LOADER] Starting cached full transcript loading from: ${outputDir}`);
// Check if directory exists
const directoryExists = await checkPathAccess(outputDir);
if (!directoryExists) {
const error: LoadingError = {
type: 'file_not_found',
message: `Directory not found: ${outputDir}`,
file: outputDir
};
return {
transcripts: [],
errors: [error],
directories: [],
stats: {
totalFiles: 0,
successfulFiles: 0,
failedFiles: 1,
validationErrors: 0,
parseErrors: 0,
totalDirectories: 0,
emptyDirectories: 0,
}
};
}
// Scan directory for transcript files and directory structure
console.log('🔍 [CACHED-LOADER] Scanning directory structure...');
const scanResult = await scanDirectoryForTranscripts(outputDir);
console.log(`📁 [CACHED-LOADER] Found ${scanResult.files.length} files and ${scanResult.directories.length} directories`);
// Get cache instance
const cache = getTranscriptCache();
// Process files through cache with concurrency control
console.log('⚙️ [CACHED-LOADER] Loading full transcripts through cache...');
const transcripts: TranscriptDisplay[] = [];
const errors: LoadingError[] = [...scanResult.errors];
// Process files in batches to control concurrency
const concurrency = DEFAULT_CONCURRENCY;
const chunks = chunkArray(scanResult.files, concurrency);
for (const chunk of chunks) {
const results = await Promise.allSettled(
chunk.map(async (relativePath) => {
try {
// Convert relative path to absolute path for cache operations
const absolutePath = path.resolve(outputDir, relativePath);
const transcript = await cache.getFullTranscript(absolutePath);
return transcript;
} catch (error) {
throw { filePath: relativePath, error };
}
})
);
// Process results
for (const result of results) {
if (result.status === 'fulfilled' && result.value) {
transcripts.push(result.value);
} else if (result.status === 'rejected') {
const rejection = result.reason as { filePath: string; error: any };
errors.push({
type: 'unknown_error',
message: `Failed to load transcript: ${rejection.filePath}`,
file: rejection.filePath,
details: rejection.error instanceof Error ? rejection.error.message : String(rejection.error)
});
}
}
}
console.log(`✅ [CACHED-LOADER] Successfully loaded ${transcripts.length} full transcripts`);
console.log(`❌ [CACHED-LOADER] Failed to load ${errors.length - scanResult.errors.length} files`);
// Calculate statistics
const stats: LoadingStats = {
totalFiles: scanResult.files.length,
successfulFiles: transcripts.length,
failedFiles: errors.length - scanResult.errors.length,
validationErrors: errors.filter(e => e.type === 'validation_error').length,
parseErrors: errors.filter(e => e.type === 'parse_error').length,
totalDirectories: scanResult.directories.length,
emptyDirectories: scanResult.directories.filter(d => d.isEmpty).length,
};
console.log('📈 [CACHED-LOADER] Final statistics:', stats);
return {
transcripts,
errors: includeErrors ? errors : [],
directories: scanResult.directories,
stats
};
}
/**
* Utility function to chunk an array
*/
function chunkArray<T>(array: T[], chunkSize: number): T[][] {
const chunks: T[][] = [];
for (let i = 0; i < array.length; i += chunkSize) {
chunks.push(array.slice(i, i + chunkSize));
}
return chunks;
}