@kaifronsdal/transcript-viewer
Version:
A web-based viewer for AI conversation transcripts with rollback support
203 lines (181 loc) • 5.89 kB
text/typescript
import Ajv from 'ajv';
import addFormats from 'ajv-formats';
import transcriptSchema from './transcript-schema.json';
// Create AJV instance with proper configuration
const ajv = new Ajv({
allErrors: true, // Collect all errors, not just the first one
verbose: true, // Include schema and data information in errors
strict: false // Allow unknown keywords (for Pydantic-generated schemas)
});
// Add standard formats (including date-time)
addFormats(ajv);
// Add a more lenient date-time format that accepts various formats
ajv.addFormat('date-time', {
type: 'string',
validate: function(dateTimeString: string) {
// Accept ISO 8601 format with "T" separator
if (/^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})?$/.test(dateTimeString)) {
return !isNaN(Date.parse(dateTimeString));
}
// Accept space-separated ISO format (common in transcript files), with optional fractional seconds and timezone
if (/^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:\d{2})?$/.test(dateTimeString)) {
const normalized = dateTimeString.replace(' ', 'T');
return !isNaN(Date.parse(normalized));
}
return false;
}
});
// Compile the transcript schema
const validateTranscript = ajv.compile(transcriptSchema);
export interface ValidationError {
message: string;
path: string;
value: any;
schema: any;
keyword?: string;
params?: any;
schemaPath?: string;
}
export interface ValidationResult {
isValid: boolean;
valid: boolean;
errors: ValidationError[];
transcript?: any;
partialData?: any;
data?: any;
}
/** Convert an AJV instancePath like "/events/5/edit/patch/0/path" to "events[5].edit.patch[0].path" */
function jsonPointerToPath(pointer: string): string {
if (!pointer || pointer === '/') return 'root';
const parts = pointer.split('/').filter(Boolean).map(decodeURIComponent);
let path = '';
for (const part of parts) {
const isIndex = /^\d+$/.test(part);
if (path === '') {
path = isIndex ? `[${part}]` : part;
} else if (isIndex) {
path += `[${part}]`;
} else {
path += `.${part}`;
}
}
return path || 'root';
}
/**
* Validate transcript data against the JSON schema
*/
export function validateTranscriptData(data: any): ValidationResult {
const valid = validateTranscript(data);
if (valid) {
return {
isValid: true,
valid: true,
errors: [],
transcript: data,
data
};
}
const errors: ValidationError[] = (validateTranscript.errors || []).map((error: any) => {
// Improve messages for common keywords
let message = error.message || 'Unknown validation error';
if (error.keyword === 'required' && error.params?.missingProperty) {
message = `missing required property "${error.params.missingProperty}"`;
} else if (error.keyword === 'const' && error.params?.allowedValue !== undefined) {
message = `must be equal to constant (${JSON.stringify(error.params.allowedValue)})`;
}
return {
message,
path: jsonPointerToPath(error.instancePath || error.schemaPath || ''),
value: error.data,
schema: error.schema,
keyword: error.keyword,
params: error.params,
schemaPath: error.schemaPath
} as ValidationError;
});
// Extract partial data for debugging
const partialData = extractTranscriptInfo(data);
return {
isValid: false,
valid: false,
errors,
partialData,
data
};
}
/**
* Format validation errors into a human-readable message
*/
export function formatValidationErrors(errors: ValidationError[]): string {
if (errors.length === 0) return 'No validation errors';
const errorMessages = errors.map((error, index) => {
const path = error.path || 'root';
// Include a short preview of the offending value, if available
let valueSnippet = '';
try {
const json = JSON.stringify(error.value);
if (json && json.length <= 80) valueSnippet = ` (value: ${json})`;
} catch {}
return `${index + 1}. ${path}: ${error.message}${valueSnippet}`;
});
return `Schema validation failed (${errors.length} error${errors.length === 1 ? '' : 's'}):\n${errorMessages.join('\n')}`;
}
/**
* Validate and parse transcript file content
*/
export function validateAndParseTranscript(content: string, filename?: string): ValidationResult {
try {
// First, try to parse as JSON
const data = JSON.parse(content);
// Then validate against schema
const result = validateTranscriptData(data);
if (!result.valid) {
console.error(
`Transcript validation failed${filename ? ` for ${filename}` : ''}:\n` +
formatValidationErrors(result.errors)
);
}
return result;
} catch (parseError) {
return {
isValid: false,
valid: false,
errors: [{
message: `JSON parsing failed: ${parseError instanceof Error ? parseError.message : 'Unknown parse error'}`,
path: 'root',
value: content,
schema: null
}],
data: null
};
}
}
/**
* Check if data looks like a transcript (basic structure check)
*/
export function isTranscriptLike(data: any): boolean {
return (
data &&
typeof data === 'object' &&
data.metadata &&
Array.isArray(data.events)
);
}
/**
* Extract basic info from potentially invalid transcript data
*/
export function extractTranscriptInfo(data: any): {
hasMetadata: boolean;
hasEvents: boolean;
eventCount: number;
version: string | null;
sessionId: string | null;
} {
return {
hasMetadata: !!(data && data.metadata),
hasEvents: !!(data && Array.isArray(data.events)),
eventCount: (data && Array.isArray(data.events)) ? data.events.length : 0,
version: data?.metadata?.version || null,
sessionId: data?.metadata?.session_id || null
};
}