UNPKG

@kaifronsdal/transcript-viewer

Version:

A web-based viewer for AI conversation transcripts with rollback support

440 lines (375 loc) 14.8 kB
import { json } from '@sveltejs/kit'; import type { RequestHandler } from './$types'; import { loadRealTranscriptsWithErrors, loadRealTranscriptsMetadataOnly, type LoadingResult, type DirectoryInfo } from '$lib/data-loader'; import { existsSync } from 'fs'; import { promises as fs } from 'fs'; import type { TranscriptDisplay } from '$lib/types'; interface FolderNode { name: string; path: string; type: 'folder' | 'transcript'; children?: FolderNode[]; transcript?: TranscriptDisplay; isEmpty?: boolean; transcriptCount?: number; } // Cache structure to store loaded data by root directory interface CacheEntry { rootDir: string; lastModified: number; directoryCount: number; metadataOnlyResult: LoadingResult; fullResult: LoadingResult; folderTree: FolderNode[]; } // Simple in-memory cache const transcriptCache = new Map<string, CacheEntry>(); // Function to get the last modified time of a directory (limited recursion for performance) async function getDirectoryLastModified(dirPath: string): Promise<number> { try { let latestTime = 0; // Check the directory itself const dirStat = await fs.stat(dirPath); latestTime = Math.max(latestTime, dirStat.mtimeMs); // Check immediate subdirectories only (not fully recursive) const items = await fs.readdir(dirPath); for (const item of items) { const itemPath = `${dirPath}/${item}`; try { const itemStat = await fs.stat(itemPath); if (itemStat.isDirectory()) { // Check subdirectory modification time but don't recurse further latestTime = Math.max(latestTime, itemStat.mtimeMs); } } catch (err) { // Skip files we can't access continue; } } return latestTime; } catch (err) { // If we can't access the directory, return current time to force refresh return Date.now(); } } // Function to get cached data or load fresh data async function getCachedTranscriptData(rootDir: string, metadataOnly: boolean): Promise<LoadingResult> { const cacheKey = rootDir; const existingCache = transcriptCache.get(cacheKey); // Get current directory modification time const currentLastModified = await getDirectoryLastModified(rootDir); // Also check if the directory count has changed (to detect new directories) const currentItems = await fs.readdir(rootDir); const currentDirectoryCount = currentItems.length; // Check if we have valid cached data if (existingCache && existingCache.lastModified >= currentLastModified && existingCache.directoryCount === currentDirectoryCount) { console.log(`Using cached data for ${rootDir} (metadata only: ${metadataOnly})`); return metadataOnly ? existingCache.metadataOnlyResult : existingCache.fullResult; } // Load fresh data console.log(`Loading fresh data for ${rootDir} (cache ${existingCache ? 'stale' : 'miss'})`); const metadataOnlyResult = await loadRealTranscriptsMetadataOnly(rootDir); const fullResult = await loadRealTranscriptsWithErrors(rootDir); // Build folder tree once for both results const folderTree = buildFolderTreeFromTranscriptsAndDirectories( metadataOnlyResult.transcripts, metadataOnlyResult.directories, true // includeEmptyFolders ); // Cache the results transcriptCache.set(cacheKey, { rootDir, lastModified: currentLastModified, directoryCount: currentDirectoryCount, metadataOnlyResult, fullResult, folderTree }); return metadataOnly ? metadataOnlyResult : fullResult; } // Function to get cached folder tree function getCachedFolderTree(rootDir: string, includeEmptyFolders: boolean = true): FolderNode[] { const cacheKey = rootDir; const existingCache = transcriptCache.get(cacheKey); if (existingCache) { // If we need to filter out empty folders, do it here if (!includeEmptyFolders) { return filterEmptyFolders(existingCache.folderTree); } return existingCache.folderTree; } // Fallback - shouldn't happen if getCachedTranscriptData was called first return []; } // Function to recursively filter out empty folders function filterEmptyFolders(nodes: FolderNode[]): FolderNode[] { return nodes .map(node => { if (node.type === 'folder') { const filteredChildren = filterEmptyFolders(node.children || []); if (filteredChildren.length === 0 && node.isEmpty) { return null; // Remove empty folder } return { ...node, children: filteredChildren }; } return node; // Keep transcript nodes }) .filter((node): node is FolderNode => node !== null); } // Function to load a specific transcript by ID and directory path with full event data async function loadSpecificTranscript(rootDir: string, transcriptId: string, directoryPath: string = ''): Promise<TranscriptDisplay | null> { try { console.log(`Loading specific transcript: ${transcriptId} from rootDir: ${rootDir}, directoryPath: ${directoryPath}`); // First, try to get the transcript from cache if available const cacheKey = rootDir; const existingCache = transcriptCache.get(cacheKey); if (existingCache) { console.log(`Found cache for ${rootDir}, searching for transcript ${transcriptId} in directory ${directoryPath}`); // Look for the transcript in cached data, matching both ID and directory path const cachedTranscript = existingCache.metadataOnlyResult.transcripts.find(t => t.id === transcriptId && t.split === directoryPath ); if (cachedTranscript) { console.log(`Found cached transcript: ${transcriptId}`); // We found it in cache, but we need to load the full event data const filePath = (cachedTranscript as any)._filePath; if (filePath) { try { const fs = await import('fs/promises'); const content = await fs.readFile(filePath, 'utf-8'); const fullData = JSON.parse(content); // Create a new TranscriptDisplay with full event data const fullTranscript = { ...cachedTranscript, transcript: fullData }; return fullTranscript; } catch (err) { console.error(`Failed to load full transcript data for ${transcriptId}:`, err); } } } else { console.log(`Transcript ${transcriptId} not found in cache. Available IDs:`, existingCache.metadataOnlyResult.transcripts.map(t => t.id).slice(0, 10)); } } else { console.log(`No cache found for ${rootDir}`); } // Fallback: load the full result and search for the transcript console.log(`Loading full result from ${rootDir} to find transcript ${transcriptId} in directory ${directoryPath}`); const fullResult = await loadRealTranscriptsWithErrors(rootDir); console.log(`Loaded ${fullResult.transcripts.length} transcripts, searching for ${transcriptId}`); const found = fullResult.transcripts.find(t => t.id === transcriptId && t.split === directoryPath); if (found) { console.log(`Found transcript ${transcriptId} in full result`); } else { console.log(`Transcript ${transcriptId} not found in directory ${directoryPath}. Available transcripts:`, fullResult.transcripts.filter(t => t.split === directoryPath).map(t => t.id).slice(0, 10)); } return found || null; } catch (err) { console.error(`Failed to load specific transcript ${transcriptId}:`, err); return null; } } export const GET: RequestHandler = async ({ url }) => { try { // Get transcript directory from environment variable or default const envDir = process.env.TRANSCRIPT_DIR; const rootDir = envDir || './transcripts'; console.log('Environment variable TRANSCRIPT_DIR:', envDir); console.log('Using rootDir:', rootDir); const flat = url.searchParams.get('flat') === 'true'; const includeErrors = url.searchParams.get('includeErrors') === 'true'; const includeEmptyFolders = url.searchParams.get('includeEmptyFolders') !== 'false'; // Default to true const metadataOnly = url.searchParams.get('metadataOnly') === 'true'; // New parameter for metadata-only loading const transcriptId = url.searchParams.get('transcriptId'); // New parameter for loading specific transcript const directoryPath = url.searchParams.get('directoryPath') || ''; // Directory path for specific transcript console.log(`Loading transcripts from: ${rootDir} (metadata only: ${metadataOnly}, transcript ID: ${transcriptId || 'none'})`); // Check if directory exists if (!existsSync(rootDir)) { console.warn(`Transcript directory does not exist: ${rootDir}`); return json({ error: 'Transcript directory not found', details: `The directory "${rootDir}" does not exist. Please create it or specify a different directory using the --dir option when starting the server.`, suggestedPath: rootDir }, { status: 404 }); } // Handle specific transcript loading if (transcriptId) { const transcript = await loadSpecificTranscript(rootDir, transcriptId, directoryPath); if (!transcript) { return json({ error: 'Transcript not found', details: `Transcript with ID "${transcriptId}" not found in directory "${directoryPath}" within "${rootDir}"` }, { status: 404 }); } return json({ transcript, ...(includeErrors && { // We can include basic stats, but no detailed errors for single transcript stats: { found: true, transcriptId } }) }); } // Load transcripts with caching const result: LoadingResult = await getCachedTranscriptData(rootDir, metadataOnly); console.log(`Loading completed: ${result.stats.successfulFiles}/${result.stats.totalFiles} files successful, ${result.errors.length} errors`); // Log errors for server-side debugging (summary only) if (result.errors.length > 0) { console.warn(`${result.errors.length} transcript loading errors occurred`); } if (flat) { // Return flat list of all transcripts const response = { transcripts: result.transcripts, ...(includeErrors && { errors: result.errors, stats: result.stats, directories: result.directories }) }; return json(includeErrors ? response : result.transcripts); } else { // Return cached folder tree structure const folderTree = getCachedFolderTree(rootDir, includeEmptyFolders); console.log(`Built folder tree with ${folderTree.length} root nodes`); const response = { folderTree, ...(includeErrors && { errors: result.errors, stats: result.stats, directories: result.directories }) }; return json(includeErrors ? response : folderTree); } } catch (error: any) { console.error('Failed to load transcripts:', error); return json({ error: 'Failed to load transcripts', details: error?.message || 'Unknown error', stack: error?.stack }, { status: 500 }); } }; function buildFolderTreeFromTranscriptsAndDirectories( transcripts: TranscriptDisplay[], directories: DirectoryInfo[], includeEmptyFolders: boolean = true ): FolderNode[] { const rootNode: FolderNode = { name: 'root', path: '', type: 'folder', children: [], isEmpty: false, transcriptCount: 0 }; // Create a map for quick folder lookup const folderMap = new Map<string, FolderNode>(); folderMap.set('', rootNode); // Root folder // First, create all directories (including empty ones) for (const dirInfo of directories) { if (dirInfo.relativePath === 'root') continue; // Skip root // Skip empty directories if not including them if (!includeEmptyFolders && dirInfo.isEmpty) continue; const pathParts = dirInfo.relativePath.split('/').filter(part => part !== ''); let currentPath = ''; let currentNode = rootNode; // Navigate/create the folder structure for (const part of pathParts) { const parentPath = currentPath; currentPath = currentPath ? `${currentPath}/${part}` : part; // Check if this folder already exists let existingFolder = folderMap.get(currentPath); if (!existingFolder) { // Create new folder existingFolder = { name: part, path: currentPath, type: 'folder', children: [], isEmpty: false, transcriptCount: 0 }; currentNode.children!.push(existingFolder); folderMap.set(currentPath, existingFolder); } currentNode = existingFolder; } // Update folder metadata const folderNode = folderMap.get(dirInfo.relativePath); if (folderNode) { folderNode.isEmpty = dirInfo.isEmpty; folderNode.transcriptCount = dirInfo.transcriptCount; } } // Then, add transcripts to their respective folders for (const transcript of transcripts) { const pathParts = transcript.split.split('/').filter(part => part !== ''); let currentPath = ''; let currentNode = rootNode; // Navigate to the correct folder (create if it doesn't exist) for (const part of pathParts) { currentPath = currentPath ? `${currentPath}/${part}` : part; let existingFolder = folderMap.get(currentPath); if (!existingFolder) { // Create folder if it doesn't exist (shouldn't happen if directories are complete) existingFolder = { name: part, path: currentPath, type: 'folder', children: [], isEmpty: false, transcriptCount: 0 }; currentNode.children!.push(existingFolder); folderMap.set(currentPath, existingFolder); } currentNode = existingFolder; } // Add the transcript to the current folder // Construct URL with directory path + transcript ID, encoding each segment separately const pathSegments = transcript.split ? transcript.split.split('/').concat([transcript.id]) : [transcript.id]; const encodedPath = pathSegments.map(segment => encodeURIComponent(segment)).join('/'); currentNode.children!.push({ name: `Transcript ${transcript.id}`, path: `/transcript/${encodedPath}`, type: 'transcript', transcript: transcript }); } // Sort all folders and transcripts recursively sortFolderTree(rootNode); // Return the children of the root node (we don't want to show the root itself) return rootNode.children || []; } function sortFolderTree(node: FolderNode): void { if (!node.children) return; // Sort children: folders first, then transcripts node.children.sort((a, b) => { // Folders come before transcripts if (a.type === 'folder' && b.type === 'transcript') return -1; if (a.type === 'transcript' && b.type === 'folder') return 1; // Sort folders by name if (a.type === 'folder' && b.type === 'folder') { return a.name.localeCompare(b.name); } // Sort transcripts by concerning score (highest first) if (a.type === 'transcript' && b.type === 'transcript') { return (b.transcript?.concerningScore || 0) - (a.transcript?.concerningScore || 0); } return 0; }); // Recursively sort subfolders for (const child of node.children) { if (child.type === 'folder') { sortFolderTree(child); } } }