@kaifronsdal/transcript-viewer
Version:
A web-based viewer for AI conversation transcripts with rollback support
564 lines (492 loc) • 18.9 kB
text/typescript
import type { Transcript, TranscriptDisplay } from './types';
import { promises as fs } from 'fs';
import path from 'path';
import { validateAndParseTranscript, formatValidationErrors, extractTranscriptInfo, type ValidationResult } from './schema-validator';
import type { TranscriptMetadata } from './types';
// This module only works on the server side
if (typeof window !== 'undefined') {
throw new Error('data-loader can only be used on the server side');
}
export interface LoadingError {
type: 'file_not_found' | 'permission_denied' | 'parse_error' | 'validation_error' | 'unknown_error';
message: string;
file?: string;
details?: string;
validationErrors?: string;
partialData?: any;
}
export interface DirectoryInfo {
path: string;
relativePath: string;
isEmpty: boolean;
hasTranscripts: boolean;
transcriptCount: number;
subdirectoryCount: number;
}
export interface LoadingResult {
transcripts: TranscriptDisplay[];
errors: LoadingError[];
directories: DirectoryInfo[];
stats: {
totalFiles: number;
successfulFiles: number;
failedFiles: number;
validationErrors: number;
parseErrors: number;
totalDirectories: number;
emptyDirectories: number;
};
}
// Utility function to create display data from transcript
function createTranscriptDisplay(transcript: Transcript, behaviorDir: string, transcriptNumber: string, filePath: string): TranscriptDisplay {
const targetModel = extractTargetModel(transcript);
return {
id: transcriptNumber, // Use just the transcript number as ID (e.g., "2025-07-15_14-40-37_5e6f1e03")
model: extractModelName(targetModel),
split: behaviorDir,
concerningScore: transcript.metadata.judge_output?.scores?.concerning || 0,
summary: transcript.metadata.judge_output?.summary || transcript.metadata.description || 'No summary available',
scores: transcript.metadata.judge_output?.scores || {},
judgeSummary: transcript.metadata.judge_output?.summary || 'No judgment summary available',
justification: transcript.metadata.judge_output?.justification || 'No justification available',
systemPrompt: extractSystemPrompt(transcript),
transcript: transcript,
_filePath: filePath // Store file path for debugging
};
}
// Extract system prompt from transcript events
function extractSystemPrompt(transcript: Transcript): string | undefined {
for (const event of transcript.events) {
if (event.edit.operation === 'add' && event.edit.message?.type === 'system') {
const content = event.edit.message.content;
return typeof content === 'string' ? content : JSON.stringify(content);
}
}
return undefined;
}
// Extract target model from transcript metadata or system prompt content
function extractTargetModel(transcript: Transcript): string {
// First try the metadata field
if (transcript.metadata.target_model && transcript.metadata.target_model !== 'unknown') {
return transcript.metadata.target_model;
}
// If metadata is unknown, try to extract from system prompt content
const systemPrompt = extractSystemPrompt(transcript);
if (systemPrompt) {
// Look for pattern like "The target AI is a `anthropic:claude-3-5-sonnet-latest` model"
const match = systemPrompt.match(/The target AI is a `([^`]+)` model/);
if (match && match[1]) {
return match[1];
}
}
// Fallback to unknown
return 'unknown';
}
export async function loadRealTranscripts(outputDir: string = './transcripts'): Promise<TranscriptDisplay[]> {
const result = await loadRealTranscriptsWithErrors(outputDir);
// Log errors but return only successful transcripts for backward compatibility
if (result.errors.length > 0) {
console.warn(`Transcript loading completed with ${result.errors.length} errors`);
console.log(`Loading stats:`, result.stats);
}
return result.transcripts;
}
export async function loadRealTranscriptsWithErrors(outputDir: string = './transcripts'): Promise<LoadingResult> {
const transcripts: TranscriptDisplay[] = [];
const errors: LoadingError[] = [];
const directories: DirectoryInfo[] = [];
const stats = {
totalFiles: 0,
successfulFiles: 0,
failedFiles: 0,
validationErrors: 0,
parseErrors: 0,
totalDirectories: 0,
emptyDirectories: 0
};
try {
// Check if the directory exists
try {
await fs.access(outputDir);
} catch (err) {
const error: LoadingError = {
type: 'file_not_found',
message: `Directory does not exist: ${outputDir}`,
details: err instanceof Error ? err.message : 'Unknown error'
};
return { transcripts: [], errors: [error], directories: [], stats };
}
// Recursively scan for transcript files and directories
await scanDirectoryForTranscriptsAndFolders(outputDir, outputDir, transcripts, errors, directories, stats);
} catch (err) {
const error: LoadingError = {
type: 'unknown_error',
message: 'Failed to load real transcripts',
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
}
// Sort transcripts by concerning score (highest first)
transcripts.sort((a, b) => b.concerningScore - a.concerningScore);
// Sort directories by path for consistent ordering
directories.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
return { transcripts, errors, directories, stats };
}
// Recursively scan directory for transcript files and collect directory information
async function scanDirectoryForTranscriptsAndFolders(
currentDir: string,
rootDir: string,
transcripts: TranscriptDisplay[],
errors: LoadingError[],
directories: DirectoryInfo[],
stats: { totalFiles: number; successfulFiles: number; failedFiles: number; validationErrors: number; parseErrors: number; totalDirectories: number; emptyDirectories: number }
): Promise<void> {
try {
const items = await fs.readdir(currentDir);
const relativePath = path.relative(rootDir, currentDir);
// Initialize directory info
const directoryInfo: DirectoryInfo = {
path: currentDir,
relativePath: relativePath || 'root',
isEmpty: true,
hasTranscripts: false,
transcriptCount: 0,
subdirectoryCount: 0
};
stats.totalDirectories++;
const subdirectories: string[] = [];
const transcriptFiles: string[] = [];
for (const item of items) {
const fullPath = path.join(currentDir, item);
try {
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
subdirectories.push(fullPath);
directoryInfo.subdirectoryCount++;
directoryInfo.isEmpty = false;
// Recursively scan subdirectories
await scanDirectoryForTranscriptsAndFolders(fullPath, rootDir, transcripts, errors, directories, stats);
} else if (item.startsWith('transcript_') && item.endsWith('.json')) {
transcriptFiles.push(fullPath);
directoryInfo.hasTranscripts = true;
directoryInfo.isEmpty = false;
} else {
// Any other file makes the directory non-empty
directoryInfo.isEmpty = false;
}
} catch (err) {
const error: LoadingError = {
type: 'permission_denied',
message: `Failed to access file or directory: ${fullPath}`,
file: fullPath,
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
stats.failedFiles++;
directoryInfo.isEmpty = false; // Assume non-empty if we can't access it
}
}
// Process transcript files
for (const filePath of transcriptFiles) {
stats.totalFiles++;
const success = await processTranscriptFile(filePath, rootDir, transcripts, errors, stats);
if (success) {
directoryInfo.transcriptCount++;
}
}
// Update empty directory count
if (directoryInfo.isEmpty) {
stats.emptyDirectories++;
}
// Add directory to the list (including empty ones)
directories.push(directoryInfo);
} catch (err) {
const error: LoadingError = {
type: 'permission_denied',
message: `Failed to scan directory: ${currentDir}`,
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
}
}
async function processTranscriptFile(
filePath: string,
rootDir: string,
transcripts: TranscriptDisplay[],
errors: LoadingError[],
stats: { totalFiles: number; successfulFiles: number; failedFiles: number; validationErrors: number; parseErrors: number }
): Promise<boolean> {
try {
// Read file content
const content = await fs.readFile(filePath, 'utf-8');
// Validate and parse transcript
const validationResult: ValidationResult = validateAndParseTranscript(content, path.basename(filePath));
if (!validationResult.valid) {
// Handle validation errors - but still try to use the data if it has basic structure
const info = extractTranscriptInfo(validationResult.data);
// Only reject files that are completely unusable
if (!info.hasMetadata || !info.hasEvents) {
const error: LoadingError = {
type: 'validation_error',
message: `Schema validation failed for ${path.basename(filePath)} - missing essential structure`,
file: filePath,
details: `File has ${info.hasMetadata ? 'metadata' : 'no metadata'}, ${info.hasEvents ? `${info.eventCount} events` : 'no events'}, version: ${info.version || 'unknown'}`,
validationErrors: `${validationResult.errors.length} validation errors`,
partialData: info
};
errors.push(error);
stats.failedFiles++;
stats.validationErrors++;
return false;
} else {
// Log validation issues but continue processing
const error: LoadingError = {
type: 'validation_error',
message: `Schema validation warnings for ${path.basename(filePath)}`,
file: filePath,
details: `File has ${info.hasMetadata ? 'metadata' : 'no metadata'}, ${info.hasEvents ? `${info.eventCount} events` : 'no events'}, version: ${info.version || 'unknown'}`,
validationErrors: `${validationResult.errors.length} validation errors`,
partialData: info
};
errors.push(error);
stats.validationErrors++;
return false;
}
}
const transcript: Transcript = validationResult.data;
// Extract transcript number from filename (handles both old and new formats)
// Old format: transcript_1.json -> 1
// New format: transcript_2025-07-11_20-01-36_2.json -> 2
const filename = path.basename(filePath);
const transcriptNumber = filename.match(/^transcript_(.+)\.json$/)?.[1] || '0';
// Create relative path from root directory for split/category
const relativePath = path.relative(rootDir, path.dirname(filePath));
const splitPath = relativePath || 'root';
const displayTranscript = createTranscriptDisplay(transcript, splitPath, transcriptNumber, filePath);
transcripts.push(displayTranscript);
stats.successfulFiles++;
return true;
} catch (err) {
// Handle file reading or other errors
const error: LoadingError = {
type: 'parse_error',
message: `Failed to process transcript file: ${path.basename(filePath)}`,
file: filePath,
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
stats.failedFiles++;
stats.parseErrors++;
return false;
}
}
// New function to load only metadata for performance optimization
export async function loadTranscriptMetadataOnly(filePath: string): Promise<TranscriptMetadata | null> {
try {
// Read the entire file (for now - we'll optimize this later)
const content = await fs.readFile(filePath, 'utf-8');
// Parse the JSON
const data = JSON.parse(content);
// Validate that it has the expected structure
if (!data || typeof data !== 'object' || !data.metadata) {
console.warn(`File ${filePath} does not have valid metadata structure`);
return null;
}
// Return only the metadata portion
return data.metadata as TranscriptMetadata;
} catch (err) {
console.error(`Failed to load metadata from ${filePath}:`, err);
return null;
}
}
// New function to load transcripts with metadata-only optimization
export async function loadRealTranscriptsMetadataOnly(outputDir: string = './transcripts'): Promise<LoadingResult> {
const transcripts: TranscriptDisplay[] = [];
const errors: LoadingError[] = [];
const directories: DirectoryInfo[] = [];
const stats = {
totalFiles: 0,
successfulFiles: 0,
failedFiles: 0,
validationErrors: 0,
parseErrors: 0,
totalDirectories: 0,
emptyDirectories: 0
};
try {
// Check if the directory exists
try {
await fs.access(outputDir);
} catch (err) {
const error: LoadingError = {
type: 'file_not_found',
message: `Directory does not exist: ${outputDir}`,
details: err instanceof Error ? err.message : 'Unknown error'
};
return { transcripts: [], errors: [error], directories: [], stats };
}
// Recursively scan for transcript files and directories
await scanDirectoryForTranscriptsAndFoldersMetadataOnly(outputDir, outputDir, transcripts, errors, directories, stats);
} catch (err) {
const error: LoadingError = {
type: 'unknown_error',
message: 'Failed to load real transcripts (metadata only)',
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
}
// Sort transcripts by concerning score (highest first)
transcripts.sort((a, b) => b.concerningScore - a.concerningScore);
// Sort directories by path for consistent ordering
directories.sort((a, b) => a.relativePath.localeCompare(b.relativePath));
return { transcripts, errors, directories, stats };
}
// New scan function that uses metadata-only loading
async function scanDirectoryForTranscriptsAndFoldersMetadataOnly(
currentDir: string,
rootDir: string,
transcripts: TranscriptDisplay[],
errors: LoadingError[],
directories: DirectoryInfo[],
stats: { totalFiles: number; successfulFiles: number; failedFiles: number; validationErrors: number; parseErrors: number; totalDirectories: number; emptyDirectories: number }
): Promise<void> {
console.log('scanDirectoryForTranscriptsAndFoldersMetadataOnly', currentDir);
try {
const items = await fs.readdir(currentDir);
// Track directory information
stats.totalDirectories++;
const directoryInfo: DirectoryInfo = {
path: currentDir,
relativePath: path.relative(rootDir, currentDir) || 'root',
isEmpty: true,
hasTranscripts: false,
transcriptCount: 0,
subdirectoryCount: 0
};
// Separate files and directories
const transcriptFiles: string[] = [];
const subdirectories: string[] = [];
for (const item of items) {
const fullPath = path.join(currentDir, item);
try {
const stat = await fs.stat(fullPath);
if (stat.isDirectory()) {
subdirectories.push(fullPath);
directoryInfo.subdirectoryCount++;
directoryInfo.isEmpty = false;
// Recursively scan subdirectories
await scanDirectoryForTranscriptsAndFoldersMetadataOnly(fullPath, rootDir, transcripts, errors, directories, stats);
} else if (item.startsWith('transcript_') && item.endsWith('.json')) {
transcriptFiles.push(fullPath);
directoryInfo.hasTranscripts = true;
directoryInfo.isEmpty = false;
} else {
// Any other file makes the directory non-empty
directoryInfo.isEmpty = false;
}
} catch (err) {
const error: LoadingError = {
type: 'permission_denied',
message: `Failed to access file or directory: ${fullPath}`,
file: fullPath,
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
stats.failedFiles++;
directoryInfo.isEmpty = false; // Assume non-empty if we can't access it
}
}
// Process transcript files using metadata-only loading
for (const filePath of transcriptFiles) {
stats.totalFiles++;
const success = await processTranscriptFileMetadataOnly(filePath, rootDir, transcripts, errors, stats);
if (success) {
directoryInfo.transcriptCount++;
}
}
// Update empty directory count
if (directoryInfo.isEmpty) {
stats.emptyDirectories++;
}
// Add directory to the list (including empty ones)
directories.push(directoryInfo);
} catch (err) {
const error: LoadingError = {
type: 'permission_denied',
message: `Failed to scan directory: ${currentDir}`,
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
}
}
// New process function that uses metadata-only loading
async function processTranscriptFileMetadataOnly(
filePath: string,
rootDir: string,
transcripts: TranscriptDisplay[],
errors: LoadingError[],
stats: { totalFiles: number; successfulFiles: number; failedFiles: number; validationErrors: number; parseErrors: number }
): Promise<boolean> {
// console.log('processTranscriptFileMetadataOnly', rootDir, filePath);
try {
// Load only metadata
const metadata = await loadTranscriptMetadataOnly(filePath);
if (!metadata) {
const error: LoadingError = {
type: 'parse_error',
message: `Failed to load metadata from ${path.basename(filePath)}`,
file: filePath,
details: 'Invalid metadata structure'
};
errors.push(error);
stats.failedFiles++;
stats.parseErrors++;
return false;
}
// Create a minimal transcript object with only metadata (no events)
const transcript: Transcript = {
metadata: metadata,
events: [] // Empty events array since we're only loading metadata
};
// Extract transcript number from filename
const filename = path.basename(filePath);
const transcriptNumber = filename.match(/^transcript_(.+)\.json$/)?.[1] || '0';
// Create relative path from root directory for split/category
const relativePath = path.relative(rootDir, path.dirname(filePath));
const splitPath = relativePath || 'root';
const displayTranscript = createTranscriptDisplay(transcript, splitPath, transcriptNumber, filePath);
transcripts.push(displayTranscript);
stats.successfulFiles++;
return true;
} catch (err) {
// Handle file reading or other errors
const error: LoadingError = {
type: 'parse_error',
message: `Failed to process transcript file (metadata only): ${path.basename(filePath)}`,
file: filePath,
details: err instanceof Error ? err.message : 'Unknown error'
};
errors.push(error);
stats.failedFiles++;
stats.parseErrors++;
return false;
}
}
function extractModelName(targetModel: string): string {
// console.log('targetModel', targetModel);
// Handle unknown target model case
if (targetModel === 'unknown' || !targetModel) {
return 'Unknown';
}
// Convert "anthropic:claude-3-5-sonnet-latest" to "claude-3-5-sonnet"
if (targetModel.includes('claude')) {
const parts = targetModel.split(':')[1] || targetModel;
return parts.replace('-latest', '').replace('-20241022', '');
}
return targetModel.split(':').pop() || targetModel;
}
// For development/testing, provide a static load function
export function loadStaticTranscripts(): Transcript[] {
// This would be populated by a build-time script in production
return [];
}