@kaifronsdal/transcript-viewer
Version:
A web-based viewer for AI conversation transcripts with rollback support
440 lines (375 loc) • 14.8 kB
text/typescript
import { json } from '@sveltejs/kit';
import type { RequestHandler } from './$types';
import { loadRealTranscriptsWithErrors, loadRealTranscriptsMetadataOnly, type LoadingResult, type DirectoryInfo } from '$lib/data-loader';
import { existsSync } from 'fs';
import { promises as fs } from 'fs';
import type { TranscriptDisplay } from '$lib/types';
interface FolderNode {
name: string;
path: string;
type: 'folder' | 'transcript';
children?: FolderNode[];
transcript?: TranscriptDisplay;
isEmpty?: boolean;
transcriptCount?: number;
}
// Cache structure to store loaded data by root directory
interface CacheEntry {
rootDir: string;
lastModified: number;
directoryCount: number;
metadataOnlyResult: LoadingResult;
fullResult: LoadingResult;
folderTree: FolderNode[];
}
// Simple in-memory cache
const transcriptCache = new Map<string, CacheEntry>();
// Function to get the last modified time of a directory (limited recursion for performance)
async function getDirectoryLastModified(dirPath: string): Promise<number> {
try {
let latestTime = 0;
// Check the directory itself
const dirStat = await fs.stat(dirPath);
latestTime = Math.max(latestTime, dirStat.mtimeMs);
// Check immediate subdirectories only (not fully recursive)
const items = await fs.readdir(dirPath);
for (const item of items) {
const itemPath = `${dirPath}/${item}`;
try {
const itemStat = await fs.stat(itemPath);
if (itemStat.isDirectory()) {
// Check subdirectory modification time but don't recurse further
latestTime = Math.max(latestTime, itemStat.mtimeMs);
}
} catch (err) {
// Skip files we can't access
continue;
}
}
return latestTime;
} catch (err) {
// If we can't access the directory, return current time to force refresh
return Date.now();
}
}
// Function to get cached data or load fresh data
async function getCachedTranscriptData(rootDir: string, metadataOnly: boolean): Promise<LoadingResult> {
const cacheKey = rootDir;
const existingCache = transcriptCache.get(cacheKey);
// Get current directory modification time
const currentLastModified = await getDirectoryLastModified(rootDir);
// Also check if the directory count has changed (to detect new directories)
const currentItems = await fs.readdir(rootDir);
const currentDirectoryCount = currentItems.length;
// Check if we have valid cached data
if (existingCache &&
existingCache.lastModified >= currentLastModified &&
existingCache.directoryCount === currentDirectoryCount) {
console.log(`Using cached data for ${rootDir} (metadata only: ${metadataOnly})`);
return metadataOnly ? existingCache.metadataOnlyResult : existingCache.fullResult;
}
// Load fresh data
console.log(`Loading fresh data for ${rootDir} (cache ${existingCache ? 'stale' : 'miss'})`);
const metadataOnlyResult = await loadRealTranscriptsMetadataOnly(rootDir);
const fullResult = await loadRealTranscriptsWithErrors(rootDir);
// Build folder tree once for both results
const folderTree = buildFolderTreeFromTranscriptsAndDirectories(
metadataOnlyResult.transcripts,
metadataOnlyResult.directories,
true // includeEmptyFolders
);
// Cache the results
transcriptCache.set(cacheKey, {
rootDir,
lastModified: currentLastModified,
directoryCount: currentDirectoryCount,
metadataOnlyResult,
fullResult,
folderTree
});
return metadataOnly ? metadataOnlyResult : fullResult;
}
// Function to get cached folder tree
function getCachedFolderTree(rootDir: string, includeEmptyFolders: boolean = true): FolderNode[] {
const cacheKey = rootDir;
const existingCache = transcriptCache.get(cacheKey);
if (existingCache) {
// If we need to filter out empty folders, do it here
if (!includeEmptyFolders) {
return filterEmptyFolders(existingCache.folderTree);
}
return existingCache.folderTree;
}
// Fallback - shouldn't happen if getCachedTranscriptData was called first
return [];
}
// Function to recursively filter out empty folders
function filterEmptyFolders(nodes: FolderNode[]): FolderNode[] {
return nodes
.map(node => {
if (node.type === 'folder') {
const filteredChildren = filterEmptyFolders(node.children || []);
if (filteredChildren.length === 0 && node.isEmpty) {
return null; // Remove empty folder
}
return {
...node,
children: filteredChildren
};
}
return node; // Keep transcript nodes
})
.filter((node): node is FolderNode => node !== null);
}
// Function to load a specific transcript by ID and directory path with full event data
async function loadSpecificTranscript(rootDir: string, transcriptId: string, directoryPath: string = ''): Promise<TranscriptDisplay | null> {
try {
console.log(`Loading specific transcript: ${transcriptId} from rootDir: ${rootDir}, directoryPath: ${directoryPath}`);
// First, try to get the transcript from cache if available
const cacheKey = rootDir;
const existingCache = transcriptCache.get(cacheKey);
if (existingCache) {
console.log(`Found cache for ${rootDir}, searching for transcript ${transcriptId} in directory ${directoryPath}`);
// Look for the transcript in cached data, matching both ID and directory path
const cachedTranscript = existingCache.metadataOnlyResult.transcripts.find(t =>
t.id === transcriptId && t.split === directoryPath
);
if (cachedTranscript) {
console.log(`Found cached transcript: ${transcriptId}`);
// We found it in cache, but we need to load the full event data
const filePath = (cachedTranscript as any)._filePath;
if (filePath) {
try {
const fs = await import('fs/promises');
const content = await fs.readFile(filePath, 'utf-8');
const fullData = JSON.parse(content);
// Create a new TranscriptDisplay with full event data
const fullTranscript = {
...cachedTranscript,
transcript: fullData
};
return fullTranscript;
} catch (err) {
console.error(`Failed to load full transcript data for ${transcriptId}:`, err);
}
}
} else {
console.log(`Transcript ${transcriptId} not found in cache. Available IDs:`, existingCache.metadataOnlyResult.transcripts.map(t => t.id).slice(0, 10));
}
} else {
console.log(`No cache found for ${rootDir}`);
}
// Fallback: load the full result and search for the transcript
console.log(`Loading full result from ${rootDir} to find transcript ${transcriptId} in directory ${directoryPath}`);
const fullResult = await loadRealTranscriptsWithErrors(rootDir);
console.log(`Loaded ${fullResult.transcripts.length} transcripts, searching for ${transcriptId}`);
const found = fullResult.transcripts.find(t => t.id === transcriptId && t.split === directoryPath);
if (found) {
console.log(`Found transcript ${transcriptId} in full result`);
} else {
console.log(`Transcript ${transcriptId} not found in directory ${directoryPath}. Available transcripts:`,
fullResult.transcripts.filter(t => t.split === directoryPath).map(t => t.id).slice(0, 10));
}
return found || null;
} catch (err) {
console.error(`Failed to load specific transcript ${transcriptId}:`, err);
return null;
}
}
export const GET: RequestHandler = async ({ url }) => {
try {
// Get transcript directory from environment variable or default
const envDir = process.env.TRANSCRIPT_DIR;
const rootDir = envDir || './transcripts';
console.log('Environment variable TRANSCRIPT_DIR:', envDir);
console.log('Using rootDir:', rootDir);
const flat = url.searchParams.get('flat') === 'true';
const includeErrors = url.searchParams.get('includeErrors') === 'true';
const includeEmptyFolders = url.searchParams.get('includeEmptyFolders') !== 'false'; // Default to true
const metadataOnly = url.searchParams.get('metadataOnly') === 'true'; // New parameter for metadata-only loading
const transcriptId = url.searchParams.get('transcriptId'); // New parameter for loading specific transcript
const directoryPath = url.searchParams.get('directoryPath') || ''; // Directory path for specific transcript
console.log(`Loading transcripts from: ${rootDir} (metadata only: ${metadataOnly}, transcript ID: ${transcriptId || 'none'})`);
// Check if directory exists
if (!existsSync(rootDir)) {
console.warn(`Transcript directory does not exist: ${rootDir}`);
return json({
error: 'Transcript directory not found',
details: `The directory "${rootDir}" does not exist. Please create it or specify a different directory using the --dir option when starting the server.`,
suggestedPath: rootDir
}, { status: 404 });
}
// Handle specific transcript loading
if (transcriptId) {
const transcript = await loadSpecificTranscript(rootDir, transcriptId, directoryPath);
if (!transcript) {
return json({
error: 'Transcript not found',
details: `Transcript with ID "${transcriptId}" not found in directory "${directoryPath}" within "${rootDir}"`
}, { status: 404 });
}
return json({
transcript,
...(includeErrors && {
// We can include basic stats, but no detailed errors for single transcript
stats: { found: true, transcriptId }
})
});
}
// Load transcripts with caching
const result: LoadingResult = await getCachedTranscriptData(rootDir, metadataOnly);
console.log(`Loading completed: ${result.stats.successfulFiles}/${result.stats.totalFiles} files successful, ${result.errors.length} errors`);
// Log errors for server-side debugging (summary only)
if (result.errors.length > 0) {
console.warn(`${result.errors.length} transcript loading errors occurred`);
}
if (flat) {
// Return flat list of all transcripts
const response = {
transcripts: result.transcripts,
...(includeErrors && {
errors: result.errors,
stats: result.stats,
directories: result.directories
})
};
return json(includeErrors ? response : result.transcripts);
} else {
// Return cached folder tree structure
const folderTree = getCachedFolderTree(rootDir, includeEmptyFolders);
console.log(`Built folder tree with ${folderTree.length} root nodes`);
const response = {
folderTree,
...(includeErrors && {
errors: result.errors,
stats: result.stats,
directories: result.directories
})
};
return json(includeErrors ? response : folderTree);
}
} catch (error: any) {
console.error('Failed to load transcripts:', error);
return json({
error: 'Failed to load transcripts',
details: error?.message || 'Unknown error',
stack: error?.stack
}, { status: 500 });
}
};
function buildFolderTreeFromTranscriptsAndDirectories(
transcripts: TranscriptDisplay[],
directories: DirectoryInfo[],
includeEmptyFolders: boolean = true
): FolderNode[] {
const rootNode: FolderNode = {
name: 'root',
path: '',
type: 'folder',
children: [],
isEmpty: false,
transcriptCount: 0
};
// Create a map for quick folder lookup
const folderMap = new Map<string, FolderNode>();
folderMap.set('', rootNode); // Root folder
// First, create all directories (including empty ones)
for (const dirInfo of directories) {
if (dirInfo.relativePath === 'root') continue; // Skip root
// Skip empty directories if not including them
if (!includeEmptyFolders && dirInfo.isEmpty) continue;
const pathParts = dirInfo.relativePath.split('/').filter(part => part !== '');
let currentPath = '';
let currentNode = rootNode;
// Navigate/create the folder structure
for (const part of pathParts) {
const parentPath = currentPath;
currentPath = currentPath ? `${currentPath}/${part}` : part;
// Check if this folder already exists
let existingFolder = folderMap.get(currentPath);
if (!existingFolder) {
// Create new folder
existingFolder = {
name: part,
path: currentPath,
type: 'folder',
children: [],
isEmpty: false,
transcriptCount: 0
};
currentNode.children!.push(existingFolder);
folderMap.set(currentPath, existingFolder);
}
currentNode = existingFolder;
}
// Update folder metadata
const folderNode = folderMap.get(dirInfo.relativePath);
if (folderNode) {
folderNode.isEmpty = dirInfo.isEmpty;
folderNode.transcriptCount = dirInfo.transcriptCount;
}
}
// Then, add transcripts to their respective folders
for (const transcript of transcripts) {
const pathParts = transcript.split.split('/').filter(part => part !== '');
let currentPath = '';
let currentNode = rootNode;
// Navigate to the correct folder (create if it doesn't exist)
for (const part of pathParts) {
currentPath = currentPath ? `${currentPath}/${part}` : part;
let existingFolder = folderMap.get(currentPath);
if (!existingFolder) {
// Create folder if it doesn't exist (shouldn't happen if directories are complete)
existingFolder = {
name: part,
path: currentPath,
type: 'folder',
children: [],
isEmpty: false,
transcriptCount: 0
};
currentNode.children!.push(existingFolder);
folderMap.set(currentPath, existingFolder);
}
currentNode = existingFolder;
}
// Add the transcript to the current folder
// Construct URL with directory path + transcript ID, encoding each segment separately
const pathSegments = transcript.split ? transcript.split.split('/').concat([transcript.id]) : [transcript.id];
const encodedPath = pathSegments.map(segment => encodeURIComponent(segment)).join('/');
currentNode.children!.push({
name: `Transcript ${transcript.id}`,
path: `/transcript/${encodedPath}`,
type: 'transcript',
transcript: transcript
});
}
// Sort all folders and transcripts recursively
sortFolderTree(rootNode);
// Return the children of the root node (we don't want to show the root itself)
return rootNode.children || [];
}
function sortFolderTree(node: FolderNode): void {
if (!node.children) return;
// Sort children: folders first, then transcripts
node.children.sort((a, b) => {
// Folders come before transcripts
if (a.type === 'folder' && b.type === 'transcript') return -1;
if (a.type === 'transcript' && b.type === 'folder') return 1;
// Sort folders by name
if (a.type === 'folder' && b.type === 'folder') {
return a.name.localeCompare(b.name);
}
// Sort transcripts by concerning score (highest first)
if (a.type === 'transcript' && b.type === 'transcript') {
return (b.transcript?.concerningScore || 0) - (a.transcript?.concerningScore || 0);
}
return 0;
});
// Recursively sort subfolders
for (const child of node.children) {
if (child.type === 'folder') {
sortFolderTree(child);
}
}
}