codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
1,718 lines (1,487 loc) • 48.8 kB
text/typescript
/**
* Structured Output Manager - JSON Schema Validation System
* Implements modern structured output generation with validation and type safety
*
* Features:
* - JSON Schema validation with detailed error reporting
* - Type-safe response handling with TypeScript inference
* - Streaming structured output with partial validation
* - Schema inference from examples and descriptions
* - Multiple output formats (JSON, YAML, XML, etc.)
* - Schema evolution and version management
* - Confidence scoring for generated structures
* - Auto-correction for common schema violations
*/
import { EventEmitter } from 'events';
import { logger } from '../logger.js';
import { getTelemetryProvider } from '../observability/observability-system.js';
// JSON Schema types
export interface JsonSchema {
$schema?: string;
$id?: string;
title?: string;
description?: string;
type?: JsonSchemaType | JsonSchemaType[];
properties?: Record<string, JsonSchema>;
required?: string[];
items?: JsonSchema;
additionalProperties?: boolean | JsonSchema;
enum?: any[];
const?: any;
minimum?: number;
maximum?: number;
minLength?: number;
maxLength?: number;
pattern?: string;
format?: string;
minItems?: number;
maxItems?: number;
uniqueItems?: boolean;
anyOf?: JsonSchema[];
oneOf?: JsonSchema[];
allOf?: JsonSchema[];
not?: JsonSchema;
if?: JsonSchema;
then?: JsonSchema;
else?: JsonSchema;
definitions?: Record<string, JsonSchema>;
examples?: any[];
default?: any;
'xml-attributes'?: boolean;
}
export type JsonSchemaType =
| 'null'
| 'boolean'
| 'object'
| 'array'
| 'number'
| 'integer'
| 'string';
export interface StructuredOutputConfig {
enablePartialValidation?: boolean;
enableAutoCorrection?: boolean;
maxValidationErrors?: number;
confidenceThreshold?: number;
outputFormat?: OutputFormat;
strictMode?: boolean;
schemaValidation?: SchemaValidationLevel;
}
export type OutputFormat = 'json' | 'yaml' | 'xml' | 'toml' | 'csv';
export type SchemaValidationLevel = 'strict' | 'moderate' | 'lenient' | 'disabled';
export interface StructuredResponse<T = any> {
data: T;
schema: JsonSchema;
valid: boolean;
confidence: number;
metadata: StructuredResponseMetadata;
errors?: ValidationError[];
warnings?: ValidationWarning[];
partial?: boolean;
corrected?: boolean;
}
export interface StructuredResponseMetadata {
generationTime: number;
validationTime: number;
schemaComplexity: number;
outputFormat: OutputFormat;
schemaVersion?: string;
reasoning?: string;
alternatives?: AlternativeOutput[];
}
export interface AlternativeOutput {
data: any;
confidence: number;
reasoning: string;
validationErrors: number;
}
export interface ValidationError {
path: string;
message: string;
code: string;
severity: 'error' | 'warning';
suggestions?: string[];
actualValue?: any;
expectedType?: string;
}
export interface ValidationWarning {
path: string;
message: string;
suggestion: string;
impact: 'low' | 'medium' | 'high';
}
export interface SchemaInferenceOptions {
includeExamples?: boolean;
strictTypes?: boolean;
minimumOccurrence?: number;
inferOptionalFields?: boolean;
generateDescriptions?: boolean;
}
export interface SchemaGenerationRequest {
description: string;
examples?: any[];
constraints?: SchemaConstraint[];
outputFormat?: OutputFormat;
inferenceOptions?: SchemaInferenceOptions;
}
export interface SchemaConstraint {
type: 'required' | 'optional' | 'pattern' | 'range' | 'enum';
field: string;
value: any;
description?: string;
}
export interface IStructuredOutputManager {
// Schema operations
generateSchema(request: SchemaGenerationRequest): Promise<JsonSchema>;
validateSchema(schema: JsonSchema): Promise<SchemaValidationResult>;
inferSchemaFromData(data: any[], options?: SchemaInferenceOptions): Promise<JsonSchema>;
// Output generation
generateStructuredOutput<T>(
prompt: string,
schema: JsonSchema,
options?: StructuredOutputConfig
): Promise<StructuredResponse<T>>;
streamStructuredOutput<T>(
prompt: string,
schema: JsonSchema,
onPartial: (partial: Partial<T>) => void,
options?: StructuredOutputConfig
): Promise<StructuredResponse<T>>;
// Validation
validateOutput(data: any, schema: JsonSchema): Promise<ValidationResult>;
autoCorrectOutput(data: any, schema: JsonSchema): Promise<CorrectionResult>;
// Format conversion
convertFormat(data: any, fromFormat: OutputFormat, toFormat: OutputFormat): Promise<string>;
// Schema management
registerSchema(id: string, schema: JsonSchema): void;
getSchema(id: string): JsonSchema | null;
updateSchema(id: string, schema: JsonSchema, version?: string): void;
// Utilities
calculateSchemaComplexity(schema: JsonSchema): number;
generateSchemaDocumentation(schema: JsonSchema): string;
}
export interface SchemaValidationResult {
valid: boolean;
errors: SchemaValidationError[];
warnings: string[];
complexity: number;
}
export interface SchemaValidationError {
path: string;
message: string;
code: string;
}
export interface ValidationResult {
valid: boolean;
errors: ValidationError[];
warnings: ValidationWarning[];
confidence: number;
partiallyValid?: boolean;
}
export interface CorrectionResult {
corrected: boolean;
data: any;
corrections: Correction[];
confidence: number;
metadata: CorrectionMetadata;
}
export interface Correction {
path: string;
originalValue: any;
correctedValue: any;
reasoning: string;
confidence: number;
}
export interface CorrectionMetadata {
totalCorrections: number;
criticalCorrections: number;
automaticCorrections: number;
manualReviewNeeded: boolean;
}
/**
* Structured Output Manager Implementation
* Provides comprehensive structured output generation and validation
*/
export class StructuredOutputManager extends EventEmitter implements IStructuredOutputManager {
private schemas: Map<string, { schema: JsonSchema; version?: string }> = new Map();
private telemetry = getTelemetryProvider();
private readonly defaultConfig: StructuredOutputConfig = {
enablePartialValidation: true,
enableAutoCorrection: true,
maxValidationErrors: 50,
confidenceThreshold: 0.8,
outputFormat: 'json',
strictMode: false,
schemaValidation: 'moderate',
};
constructor() {
super();
this.setupEventHandlers();
}
private setupEventHandlers(): void {
this.on('schema-generated', (complexity: number) => {
logger.debug('Schema generated', { complexity });
});
this.on('output-validated', (valid: boolean, errors: number) => {
logger.debug('Output validated', { valid, errors });
});
this.on('output-corrected', (corrections: number) => {
logger.info('Output auto-corrected', { corrections });
});
}
/**
* Generate JSON Schema from description and examples
*/
async generateSchema(request: SchemaGenerationRequest): Promise<JsonSchema> {
const startTime = Date.now();
try {
logger.info('Generating schema', {
hasExamples: !!request.examples?.length,
hasConstraints: !!request.constraints?.length,
outputFormat: request.outputFormat,
});
let schema: JsonSchema = {
type: 'object',
title: this.extractTitleFromDescription(request.description),
description: request.description,
properties: {},
required: [],
additionalProperties: false,
};
// Infer from examples if provided
if (request.examples && request.examples.length > 0) {
const inferredSchema = await this.inferSchemaFromData(
request.examples,
request.inferenceOptions
);
schema = this.mergeSchemas(schema, inferredSchema);
}
// Apply constraints
if (request.constraints) {
schema = this.applyConstraints(schema, request.constraints);
}
// Add format-specific properties
if (request.outputFormat && request.outputFormat !== 'json') {
schema = this.adaptSchemaForFormat(schema, request.outputFormat);
}
// Generate descriptions if requested
if (request.inferenceOptions?.generateDescriptions) {
schema = await this.generatePropertyDescriptions(schema, request.description);
}
const complexity = this.calculateSchemaComplexity(schema);
this.emit('schema-generated', complexity);
logger.info('Schema generation completed', {
complexity,
properties: Object.keys(schema.properties || {}).length,
generationTime: Date.now() - startTime,
});
return schema;
} catch (error) {
logger.error('Schema generation failed', error);
throw new Error(
`Schema generation failed: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Generate structured output with schema validation
*/
async generateStructuredOutput<T>(
prompt: string,
schema: JsonSchema,
options?: StructuredOutputConfig
): Promise<StructuredResponse<T>> {
const config = { ...this.defaultConfig, ...options };
const startTime = Date.now();
try {
logger.info('Generating structured output', {
schemaTitle: schema.title,
outputFormat: config.outputFormat,
strictMode: config.strictMode,
});
// Enhanced prompt with schema information
const enhancedPrompt = this.createSchemaAwarePrompt(prompt, schema, config);
// Simulate LLM call with structured generation
// In production, this would call the actual LLM with schema constraints
const rawOutput = await this.simulateStructuredGeneration(enhancedPrompt, schema, config);
const generationTime = Date.now() - startTime;
const validationStartTime = Date.now();
// Validate generated output
const validationResult = await this.validateOutput(rawOutput, schema);
const validationTime = Date.now() - validationStartTime;
// Auto-correct if enabled and validation failed
let finalData = rawOutput;
let corrected = false;
let corrections: Correction[] = [];
if (!validationResult.valid && config.enableAutoCorrection) {
const correctionResult = await this.autoCorrectOutput(rawOutput, schema);
if (
correctionResult.corrected &&
correctionResult.confidence >= (config.confidenceThreshold || 0.8)
) {
finalData = correctionResult.data;
corrected = true;
corrections = correctionResult.corrections;
this.emit('output-corrected', corrections.length);
}
}
// Calculate confidence
const confidence = this.calculateOutputConfidence(
finalData,
schema,
validationResult,
corrected
);
this.emit(
'output-validated',
validationResult.valid || corrected,
validationResult.errors.length
);
const response: StructuredResponse<T> = {
data: finalData as T,
schema,
valid: validationResult.valid || corrected,
confidence,
metadata: {
generationTime,
validationTime,
schemaComplexity: this.calculateSchemaComplexity(schema),
outputFormat: config.outputFormat || 'json',
reasoning: this.generateReasoningExplanation(finalData, schema, validationResult),
alternatives: [],
},
errors: validationResult.errors,
warnings: validationResult.warnings,
corrected,
};
if (corrected) {
response.metadata.reasoning = `Output was auto-corrected with ${corrections.length} changes. ${response.metadata.reasoning}`;
}
return response;
} catch (error) {
logger.error('Structured output generation failed', error);
throw new Error(
`Structured output generation failed: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Stream structured output with partial validation
*/
async streamStructuredOutput<T>(
prompt: string,
schema: JsonSchema,
onPartial: (partial: Partial<T>) => void,
options?: StructuredOutputConfig
): Promise<StructuredResponse<T>> {
const config = { ...this.defaultConfig, ...options };
logger.info('Starting structured output streaming', {
schemaTitle: schema.title,
partialValidation: config.enablePartialValidation,
});
// Simulate streaming generation
return await this.simulateStreamingGeneration(prompt, schema, onPartial, config);
}
/**
* Validate data against schema
*/
async validateOutput(data: any, schema: JsonSchema): Promise<ValidationResult> {
const errors: ValidationError[] = [];
const warnings: ValidationWarning[] = [];
try {
// Perform comprehensive validation
this.validateRecursive(data, schema, '', errors, warnings);
const valid = errors.filter(e => e.severity === 'error').length === 0;
const confidence = this.calculateValidationConfidence(errors, warnings);
return {
valid,
errors,
warnings,
confidence,
partiallyValid: !valid && warnings.length > 0,
};
} catch (error) {
errors.push({
path: '',
message: `Validation error: ${error instanceof Error ? error.message : String(error)}`,
code: 'VALIDATION_ERROR',
severity: 'error',
});
return {
valid: false,
errors,
warnings,
confidence: 0,
};
}
}
/**
* Auto-correct common validation issues
*/
async autoCorrectOutput(data: any, schema: JsonSchema): Promise<CorrectionResult> {
const corrections: Correction[] = [];
const correctedData = JSON.parse(JSON.stringify(data)); // Deep copy
try {
// Apply various correction strategies
this.correctTypes(correctedData, schema, '', corrections);
this.correctMissingRequired(correctedData, schema, '', corrections);
this.correctInvalidValues(correctedData, schema, '', corrections);
this.removeExtraProperties(correctedData, schema, '', corrections);
const confidence = this.calculateCorrectionConfidence(corrections);
const metadata: CorrectionMetadata = {
totalCorrections: corrections.length,
criticalCorrections: corrections.filter(c => c.confidence < 0.7).length,
automaticCorrections: corrections.filter(c => c.confidence >= 0.8).length,
manualReviewNeeded: corrections.some(c => c.confidence < 0.6),
};
return {
corrected: corrections.length > 0,
data: correctedData,
corrections,
confidence,
metadata,
};
} catch (error) {
logger.error('Auto-correction failed', error);
return {
corrected: false,
data: data,
corrections: [],
confidence: 0,
metadata: {
totalCorrections: 0,
criticalCorrections: 0,
automaticCorrections: 0,
manualReviewNeeded: true,
},
};
}
}
/**
* Infer schema from data examples
*/
async inferSchemaFromData(data: any[], options?: SchemaInferenceOptions): Promise<JsonSchema> {
const opts = {
includeExamples: false,
strictTypes: false,
minimumOccurrence: 1,
inferOptionalFields: true,
generateDescriptions: false,
...options,
};
if (data.length === 0) {
return { type: 'object', properties: {}, additionalProperties: false };
}
const schema: JsonSchema = {
type: 'object',
properties: {},
required: [],
additionalProperties: !opts.strictTypes,
};
// Analyze all data samples
const fieldAnalysis = this.analyzeFields(data, opts);
// Build schema from analysis
for (const [fieldName, analysis] of fieldAnalysis.entries()) {
schema.properties![fieldName] = this.createPropertySchema(analysis, opts);
// Determine if field should be required
const occurrenceRatio = analysis.occurrences / data.length;
if (occurrenceRatio >= opts.minimumOccurrence / data.length && !opts.inferOptionalFields) {
schema.required!.push(fieldName);
}
}
if (opts.includeExamples) {
schema.examples = data.slice(0, 3); // Include first 3 examples
}
return schema;
}
/**
* Convert output format
*/
async convertFormat(
data: any,
fromFormat: OutputFormat,
toFormat: OutputFormat
): Promise<string> {
if (fromFormat === toFormat) {
return typeof data === 'string' ? data : JSON.stringify(data, null, 2);
}
try {
// Parse data from source format
let parsedData = data;
if (typeof data === 'string') {
switch (fromFormat) {
case 'json':
parsedData = JSON.parse(data);
break;
case 'yaml':
// Would use yaml parser in production
parsedData = this.parseSimpleYaml(data);
break;
default:
throw new Error(`Unsupported source format: ${fromFormat}`);
}
}
// Convert to target format
switch (toFormat) {
case 'json':
return JSON.stringify(parsedData, null, 2);
case 'yaml':
return this.toSimpleYaml(parsedData);
case 'xml':
return this.toSimpleXml(parsedData);
case 'csv':
return this.toCsv(parsedData);
default:
throw new Error(`Unsupported target format: ${toFormat}`);
}
} catch (error) {
logger.error('Format conversion failed', { fromFormat, toFormat, error });
throw new Error(
`Format conversion failed: ${error instanceof Error ? error.message : String(error)}`
);
}
}
/**
* Calculate schema complexity
*/
calculateSchemaComplexity(schema: JsonSchema): number {
let complexity = 0;
if (schema.properties) {
complexity += Object.keys(schema.properties).length;
for (const property of Object.values(schema.properties)) {
complexity += this.calculateSchemaComplexity(property);
}
}
if (schema.items) {
complexity += this.calculateSchemaComplexity(schema.items);
}
if (schema.anyOf || schema.oneOf || schema.allOf) {
const unions = schema.anyOf || schema.oneOf || schema.allOf || [];
complexity += unions.reduce((sum, s) => sum + this.calculateSchemaComplexity(s), 0);
}
return complexity;
}
/**
* Register schema for reuse
*/
registerSchema(id: string, schema: JsonSchema): void {
this.schemas.set(id, { schema });
logger.debug('Schema registered', { id, complexity: this.calculateSchemaComplexity(schema) });
}
/**
* Get registered schema
*/
getSchema(id: string): JsonSchema | null {
const entry = this.schemas.get(id);
return entry ? entry.schema : null;
}
/**
* Update registered schema with versioning
*/
updateSchema(id: string, schema: JsonSchema, version?: string): void {
this.schemas.set(id, { schema, version });
logger.info('Schema updated', { id, version });
}
/**
* Generate schema documentation
*/
generateSchemaDocumentation(schema: JsonSchema): string {
let docs = `# ${schema.title || 'Schema Documentation'}\n\n`;
if (schema.description) {
docs += `${schema.description}\n\n`;
}
docs += '## Properties\n\n';
if (schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
docs += this.generatePropertyDocumentation(
name,
prop,
schema.required?.includes(name) || false
);
}
}
return docs;
}
/**
* Validate schema itself
*/
async validateSchema(schema: JsonSchema): Promise<SchemaValidationResult> {
const errors: SchemaValidationError[] = [];
const warnings: string[] = [];
try {
// Basic schema validation
if (!schema.type && !schema.anyOf && !schema.oneOf && !schema.allOf) {
errors.push({
path: '',
message: 'Schema must specify a type or use composition (anyOf, oneOf, allOf)',
code: 'MISSING_TYPE',
});
}
// Validate properties if it's an object schema
if (schema.type === 'object' && schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
this.validatePropertySchema(prop, name, errors, warnings);
}
}
// Check for circular references
if (this.hasCircularReference(schema)) {
warnings.push('Schema contains circular references which may cause issues');
}
const complexity = this.calculateSchemaComplexity(schema);
if (complexity > 100) {
warnings.push(`Schema is quite complex (${complexity} points) - consider simplifying`);
}
return {
valid: errors.length === 0,
errors,
warnings,
complexity,
};
} catch (error) {
errors.push({
path: '',
message: `Schema validation error: ${error instanceof Error ? error.message : String(error)}`,
code: 'VALIDATION_ERROR',
});
return {
valid: false,
errors,
warnings,
complexity: 0,
};
}
}
// Private helper methods
private extractTitleFromDescription(description: string): string {
const firstSentence = description.split('.')[0];
return firstSentence.length > 50 ? firstSentence.substring(0, 47) + '...' : firstSentence;
}
private mergeSchemas(base: JsonSchema, inferred: JsonSchema): JsonSchema {
return {
...base,
properties: { ...base.properties, ...inferred.properties },
required: [...(base.required || []), ...(inferred.required || [])].filter(
(v, i, a) => a.indexOf(v) === i
),
};
}
private applyConstraints(schema: JsonSchema, constraints: SchemaConstraint[]): JsonSchema {
const updatedSchema = { ...schema };
for (const constraint of constraints) {
if (!updatedSchema.properties) updatedSchema.properties = {};
const fieldSchema = updatedSchema.properties[constraint.field] || { type: 'string' };
switch (constraint.type) {
case 'required':
if (!updatedSchema.required) updatedSchema.required = [];
if (!updatedSchema.required.includes(constraint.field)) {
updatedSchema.required.push(constraint.field);
}
break;
case 'enum':
fieldSchema.enum = Array.isArray(constraint.value)
? constraint.value
: [constraint.value];
break;
case 'pattern':
if (fieldSchema.type === 'string') {
fieldSchema.pattern = constraint.value;
}
break;
case 'range':
if (fieldSchema.type === 'number' || fieldSchema.type === 'integer') {
if (constraint.value.min !== undefined) fieldSchema.minimum = constraint.value.min;
if (constraint.value.max !== undefined) fieldSchema.maximum = constraint.value.max;
}
break;
}
if (constraint.description) {
fieldSchema.description = constraint.description;
}
updatedSchema.properties[constraint.field] = fieldSchema;
}
return updatedSchema;
}
private adaptSchemaForFormat(schema: JsonSchema, format: OutputFormat): JsonSchema {
// Add format-specific adaptations
switch (format) {
case 'csv':
// CSV schemas should be flat objects or arrays
if (schema.type === 'object' && schema.properties) {
const flatSchema = { ...schema };
// Flatten nested objects for CSV compatibility
flatSchema.additionalProperties = false;
return flatSchema;
}
break;
case 'xml':
// XML schemas might need special handling for attributes
return { ...schema, 'xml-attributes': true };
default:
return schema;
}
return schema;
}
private async generatePropertyDescriptions(
schema: JsonSchema,
context: string
): Promise<JsonSchema> {
// In production, this would use AI to generate descriptions
const updatedSchema = { ...schema };
if (updatedSchema.properties) {
for (const [name, prop] of Object.entries(updatedSchema.properties)) {
if (!prop.description) {
prop.description = `The ${name} property for ${context}`;
}
}
}
return updatedSchema;
}
private createSchemaAwarePrompt(
prompt: string,
schema: JsonSchema,
config: StructuredOutputConfig
): string {
let enhancedPrompt = prompt;
enhancedPrompt += '\n\nPlease respond with a JSON object that follows this schema:\n';
enhancedPrompt += JSON.stringify(schema, null, 2);
if (config.strictMode) {
enhancedPrompt +=
'\n\nIMPORTANT: The response must strictly adhere to the schema. No additional properties are allowed.';
}
if (schema.examples && schema.examples.length > 0) {
enhancedPrompt += '\n\nExample format:\n';
enhancedPrompt += JSON.stringify(schema.examples[0], null, 2);
}
return enhancedPrompt;
}
private async simulateStructuredGeneration(
prompt: string,
schema: JsonSchema,
config: StructuredOutputConfig
): Promise<any> {
// Simulate LLM response generation - in production this would call actual LLM
const example = this.generateExampleFromSchema(schema);
// Add some realistic variation/errors for testing
if (!config.strictMode && Math.random() > 0.8) {
// Occasionally add extra properties or minor errors
if (typeof example === 'object' && example !== null) {
(example as any).extra_property = 'test';
}
}
return example;
}
private async simulateStreamingGeneration<T>(
prompt: string,
schema: JsonSchema,
onPartial: (partial: Partial<T>) => void,
config: StructuredOutputConfig
): Promise<StructuredResponse<T>> {
const fullData = await this.simulateStructuredGeneration(prompt, schema, config);
// Simulate streaming by sending partial objects
if (typeof fullData === 'object' && fullData !== null) {
const keys = Object.keys(fullData);
const partial: any = {};
for (let i = 0; i < keys.length; i++) {
partial[keys[i]] = fullData[keys[i]];
onPartial(partial as Partial<T>);
// Add delay to simulate streaming
await new Promise(resolve => setTimeout(resolve, 100));
}
}
// Return full response
return await this.generateStructuredOutput(prompt, schema, config);
}
private generateExampleFromSchema(schema: JsonSchema): any {
if (schema.examples && schema.examples.length > 0) {
return schema.examples[0];
}
switch (schema.type) {
case 'string':
return schema.enum ? schema.enum[0] : 'example string';
case 'number':
return schema.minimum || 42;
case 'integer':
return schema.minimum || 42;
case 'boolean':
return true;
case 'array':
const item = schema.items ? this.generateExampleFromSchema(schema.items) : 'item';
return [item];
case 'object':
const obj: any = {};
if (schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
obj[name] = this.generateExampleFromSchema(prop);
}
}
return obj;
default:
return null;
}
}
private validateRecursive(
data: any,
schema: JsonSchema,
path: string,
errors: ValidationError[],
warnings: ValidationWarning[]
): void {
// Type validation
if (schema.type && !this.validateType(data, schema.type)) {
errors.push({
path,
message: `Expected type ${schema.type} but got ${typeof data}`,
code: 'TYPE_MISMATCH',
severity: 'error',
actualValue: data,
expectedType: schema.type as string,
});
return;
}
// Object validation
if (schema.type === 'object' && typeof data === 'object' && data !== null) {
this.validateObject(data, schema, path, errors, warnings);
}
// Array validation
if (schema.type === 'array' && Array.isArray(data)) {
this.validateArray(data, schema, path, errors, warnings);
}
// String validation
if (schema.type === 'string' && typeof data === 'string') {
this.validateString(data, schema, path, errors, warnings);
}
// Number validation
if ((schema.type === 'number' || schema.type === 'integer') && typeof data === 'number') {
this.validateNumber(data, schema, path, errors, warnings);
}
}
private validateType(data: any, type: JsonSchemaType | JsonSchemaType[]): boolean {
const types = Array.isArray(type) ? type : [type];
for (const t of types) {
switch (t) {
case 'null':
if (data === null) return true;
break;
case 'boolean':
if (typeof data === 'boolean') return true;
break;
case 'object':
if (typeof data === 'object' && data !== null && !Array.isArray(data)) return true;
break;
case 'array':
if (Array.isArray(data)) return true;
break;
case 'number':
if (typeof data === 'number') return true;
break;
case 'integer':
if (typeof data === 'number' && Number.isInteger(data)) return true;
break;
case 'string':
if (typeof data === 'string') return true;
break;
}
}
return false;
}
private validateObject(
data: Record<string, any>,
schema: JsonSchema,
path: string,
errors: ValidationError[],
warnings: ValidationWarning[]
): void {
// Required properties
if (schema.required) {
for (const required of schema.required) {
if (!(required in data)) {
errors.push({
path: path ? `${path}.${required}` : required,
message: `Missing required property: ${required}`,
code: 'MISSING_REQUIRED',
severity: 'error',
});
}
}
}
// Property validation
if (schema.properties) {
for (const [key, value] of Object.entries(data)) {
const propertySchema = schema.properties[key];
if (propertySchema) {
this.validateRecursive(
value,
propertySchema,
path ? `${path}.${key}` : key,
errors,
warnings
);
} else if (schema.additionalProperties === false) {
warnings.push({
path: path ? `${path}.${key}` : key,
message: `Unexpected property: ${key}`,
suggestion: 'Remove this property or update schema to allow additional properties',
impact: 'medium',
});
}
}
}
}
private validateArray(
data: any[],
schema: JsonSchema,
path: string,
errors: ValidationError[],
warnings: ValidationWarning[]
): void {
// Length constraints
if (schema.minItems !== undefined && data.length < schema.minItems) {
errors.push({
path,
message: `Array must have at least ${schema.minItems} items but has ${data.length}`,
code: 'MIN_ITEMS',
severity: 'error',
actualValue: data.length,
});
}
if (schema.maxItems !== undefined && data.length > schema.maxItems) {
errors.push({
path,
message: `Array must have at most ${schema.maxItems} items but has ${data.length}`,
code: 'MAX_ITEMS',
severity: 'error',
actualValue: data.length,
});
}
// Item validation
if (schema.items) {
data.forEach((item, index) => {
this.validateRecursive(item, schema.items!, `${path}[${index}]`, errors, warnings);
});
}
// Unique items
if (schema.uniqueItems && !this.areItemsUnique(data)) {
errors.push({
path,
message: 'Array items must be unique',
code: 'UNIQUE_ITEMS',
severity: 'error',
});
}
}
private validateString(
data: string,
schema: JsonSchema,
path: string,
errors: ValidationError[],
warnings: ValidationWarning[]
): void {
// Length constraints
if (schema.minLength !== undefined && data.length < schema.minLength) {
errors.push({
path,
message: `String must be at least ${schema.minLength} characters but is ${data.length}`,
code: 'MIN_LENGTH',
severity: 'error',
actualValue: data.length,
});
}
if (schema.maxLength !== undefined && data.length > schema.maxLength) {
errors.push({
path,
message: `String must be at most ${schema.maxLength} characters but is ${data.length}`,
code: 'MAX_LENGTH',
severity: 'error',
actualValue: data.length,
});
}
// Pattern validation
if (schema.pattern) {
const regex = new RegExp(schema.pattern);
if (!regex.test(data)) {
errors.push({
path,
message: `String does not match pattern: ${schema.pattern}`,
code: 'PATTERN_MISMATCH',
severity: 'error',
actualValue: data,
});
}
}
// Enum validation
if (schema.enum && !schema.enum.includes(data)) {
errors.push({
path,
message: `Value must be one of: ${schema.enum.join(', ')}`,
code: 'ENUM_MISMATCH',
severity: 'error',
actualValue: data,
suggestions: schema.enum.map(String),
});
}
}
private validateNumber(
data: number,
schema: JsonSchema,
path: string,
errors: ValidationError[],
warnings: ValidationWarning[]
): void {
// Range validation
if (schema.minimum !== undefined && data < schema.minimum) {
errors.push({
path,
message: `Number must be at least ${schema.minimum} but is ${data}`,
code: 'MIN_VALUE',
severity: 'error',
actualValue: data,
});
}
if (schema.maximum !== undefined && data > schema.maximum) {
errors.push({
path,
message: `Number must be at most ${schema.maximum} but is ${data}`,
code: 'MAX_VALUE',
severity: 'error',
actualValue: data,
});
}
}
private areItemsUnique(array: any[]): boolean {
const seen = new Set();
for (const item of array) {
const key = typeof item === 'object' ? JSON.stringify(item) : item;
if (seen.has(key)) return false;
seen.add(key);
}
return true;
}
private calculateOutputConfidence(
data: any,
schema: JsonSchema,
validationResult: ValidationResult,
corrected: boolean
): number {
let confidence = 0.8; // Base confidence
if (validationResult.valid) {
confidence += 0.2;
} else {
confidence -= 0.1 * validationResult.errors.length;
}
if (corrected) {
confidence -= 0.1; // Reduced confidence for corrected output
}
if (validationResult.warnings.length > 0) {
confidence -= 0.05 * validationResult.warnings.length;
}
return Math.max(0, Math.min(1, confidence));
}
private calculateValidationConfidence(
errors: ValidationError[],
warnings: ValidationWarning[]
): number {
let confidence = 1.0;
confidence -= errors.length * 0.1;
confidence -= warnings.length * 0.05;
return Math.max(0, confidence);
}
private generateReasoningExplanation(
data: any,
schema: JsonSchema,
validation: ValidationResult
): string {
if (validation.valid) {
return `Output successfully matches the provided schema with ${Object.keys(data).length} properties.`;
} else {
const errorCount = validation.errors.length;
const warningCount = validation.warnings.length;
return `Output has ${errorCount} validation errors and ${warningCount} warnings that need attention.`;
}
}
// Auto-correction methods
private correctTypes(
data: any,
schema: JsonSchema,
path: string,
corrections: Correction[]
): void {
if (schema.type && !this.validateType(data, schema.type)) {
const correctedValue = this.attemptTypeConversion(data, schema.type);
if (correctedValue !== undefined) {
corrections.push({
path,
originalValue: data,
correctedValue,
reasoning: `Converted ${typeof data} to ${schema.type}`,
confidence: 0.8,
});
// Apply correction to data object
this.setValueAtPath(data, path, correctedValue);
}
}
}
private correctMissingRequired(
data: any,
schema: JsonSchema,
path: string,
corrections: Correction[]
): void {
if (schema.type === 'object' && schema.required && typeof data === 'object' && data !== null) {
for (const required of schema.required) {
if (!(required in data)) {
const defaultValue = this.generateDefaultValue(schema.properties?.[required]);
data[required] = defaultValue;
corrections.push({
path: path ? `${path}.${required}` : required,
originalValue: undefined,
correctedValue: defaultValue,
reasoning: `Added missing required property with default value`,
confidence: 0.7,
});
}
}
}
}
private correctInvalidValues(
data: any,
schema: JsonSchema,
path: string,
corrections: Correction[]
): void {
// Correct enum mismatches
if (schema.enum && !schema.enum.includes(data)) {
const closest = this.findClosestEnumValue(data, schema.enum);
if (closest !== null) {
corrections.push({
path,
originalValue: data,
correctedValue: closest,
reasoning: `Corrected to closest valid enum value`,
confidence: 0.6,
});
this.setValueAtPath(data, path, closest);
}
}
}
private removeExtraProperties(
data: any,
schema: JsonSchema,
path: string,
corrections: Correction[]
): void {
if (
schema.type === 'object' &&
schema.additionalProperties === false &&
typeof data === 'object' &&
data !== null &&
schema.properties
) {
for (const key of Object.keys(data)) {
if (!schema.properties[key]) {
delete data[key];
corrections.push({
path: path ? `${path}.${key}` : key,
originalValue: data[key],
correctedValue: undefined,
reasoning: `Removed extra property not allowed by schema`,
confidence: 0.9,
});
}
}
}
}
private attemptTypeConversion(value: any, targetType: JsonSchemaType | JsonSchemaType[]): any {
const types = Array.isArray(targetType) ? targetType : [targetType];
for (const type of types) {
try {
switch (type) {
case 'string':
return String(value);
case 'number':
const num = Number(value);
return !isNaN(num) ? num : undefined;
case 'integer':
const int = parseInt(String(value), 10);
return !isNaN(int) ? int : undefined;
case 'boolean':
if (typeof value === 'string') {
return value.toLowerCase() === 'true';
}
return Boolean(value);
case 'array':
return Array.isArray(value) ? value : [value];
}
} catch {
continue;
}
}
return undefined;
}
private generateDefaultValue(schema?: JsonSchema): any {
if (!schema) return null;
if (schema.default !== undefined) return schema.default;
switch (schema.type) {
case 'string':
return schema.enum ? schema.enum[0] : '';
case 'number':
return schema.minimum || 0;
case 'integer':
return schema.minimum || 0;
case 'boolean':
return false;
case 'array':
return [];
case 'object':
return {};
default:
return null;
}
}
private findClosestEnumValue(value: any, enumValues: any[]): any {
if (typeof value === 'string') {
// Find string with minimum edit distance
let closest = enumValues[0];
let minDistance = this.editDistance(value.toLowerCase(), String(closest).toLowerCase());
for (let i = 1; i < enumValues.length; i++) {
const distance = this.editDistance(
value.toLowerCase(),
String(enumValues[i]).toLowerCase()
);
if (distance < minDistance) {
minDistance = distance;
closest = enumValues[i];
}
}
return minDistance <= 3 ? closest : null; // Only suggest if reasonably close
}
return enumValues[0]; // Default to first value
}
private editDistance(a: string, b: string): number {
const matrix: number[][] = [];
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] = Math.min(
matrix[i - 1][j - 1] + 1,
matrix[i][j - 1] + 1,
matrix[i - 1][j] + 1
);
}
}
}
return matrix[b.length][a.length];
}
private setValueAtPath(obj: any, path: string, value: any): void {
if (!path) return;
const keys = path.split('.');
let current = obj;
for (let i = 0; i < keys.length - 1; i++) {
if (!(keys[i] in current)) {
current[keys[i]] = {};
}
current = current[keys[i]];
}
current[keys[keys.length - 1]] = value;
}
private calculateCorrectionConfidence(corrections: Correction[]): number {
if (corrections.length === 0) return 1.0;
const avgConfidence =
corrections.reduce((sum, c) => sum + c.confidence, 0) / corrections.length;
const complexityPenalty = Math.max(0, (corrections.length - 5) * 0.1);
return Math.max(0.3, avgConfidence - complexityPenalty);
}
// Field analysis for schema inference
private analyzeFields(data: any[], options: SchemaInferenceOptions): Map<string, FieldAnalysis> {
const analysis = new Map<string, FieldAnalysis>();
for (const item of data) {
if (typeof item === 'object' && item !== null) {
for (const [key, value] of Object.entries(item)) {
if (!analysis.has(key)) {
analysis.set(key, {
name: key,
types: new Set(),
occurrences: 0,
examples: [],
nullCount: 0,
undefinedCount: 0,
});
}
const fieldAnalysis = analysis.get(key)!;
fieldAnalysis.occurrences++;
if (value === null) {
fieldAnalysis.nullCount++;
} else if (value === undefined) {
fieldAnalysis.undefinedCount++;
} else {
fieldAnalysis.types.add(this.getJsonSchemaType(value));
if (fieldAnalysis.examples.length < 3) {
fieldAnalysis.examples.push(value);
}
}
}
}
}
return analysis;
}
private getJsonSchemaType(value: any): JsonSchemaType {
if (value === null) return 'null';
if (typeof value === 'boolean') return 'boolean';
if (Array.isArray(value)) return 'array';
if (typeof value === 'object') return 'object';
if (typeof value === 'number') {
return Number.isInteger(value) ? 'integer' : 'number';
}
if (typeof value === 'string') return 'string';
return 'string'; // fallback
}
private createPropertySchema(
analysis: FieldAnalysis,
options: SchemaInferenceOptions
): JsonSchema {
const schema: JsonSchema = {};
// Determine type
if (analysis.types.size === 1) {
schema.type = Array.from(analysis.types)[0];
} else if (analysis.types.size > 1) {
schema.type = Array.from(analysis.types);
} else {
schema.type = 'string'; // fallback
}
// Add examples if requested
if (options.includeExamples && analysis.examples.length > 0) {
schema.examples = analysis.examples;
}
// Generate description if requested
if (options.generateDescriptions) {
schema.description = `Property ${analysis.name} (appears in ${analysis.occurrences} samples)`;
}
return schema;
}
private validatePropertySchema(
schema: JsonSchema,
path: string,
errors: SchemaValidationError[],
warnings: string[]
): void {
if (!schema.type && !schema.anyOf && !schema.oneOf && !schema.allOf) {
errors.push({
path,
message: 'Property schema must specify a type',
code: 'MISSING_TYPE',
});
}
// Recursively validate nested schemas
if (schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
this.validatePropertySchema(prop, `${path}.${name}`, errors, warnings);
}
}
}
private hasCircularReference(schema: JsonSchema, visited = new Set<any>()): boolean {
if (visited.has(schema)) return true;
visited.add(schema);
if (schema.properties) {
for (const prop of Object.values(schema.properties)) {
if (this.hasCircularReference(prop, visited)) return true;
}
}
if (schema.items && this.hasCircularReference(schema.items, visited)) return true;
visited.delete(schema);
return false;
}
private generatePropertyDocumentation(
name: string,
schema: JsonSchema,
required: boolean
): string {
let doc = `### ${name}${required ? ' (required)' : ''}\n\n`;
if (schema.description) {
doc += `${schema.description}\n\n`;
}
doc += `- Type: \`${schema.type || 'any'}\`\n`;
if (schema.enum) {
doc += `- Allowed values: ${schema.enum.map(v => `\`${v}\``).join(', ')}\n`;
}
if (schema.minimum !== undefined) {
doc += `- Minimum: ${schema.minimum}\n`;
}
if (schema.maximum !== undefined) {
doc += `- Maximum: ${schema.maximum}\n`;
}
if (schema.pattern) {
doc += `- Pattern: \`${schema.pattern}\`\n`;
}
doc += '\n';
return doc;
}
// Simple format converters (would use proper libraries in production)
private parseSimpleYaml(yaml: string): any {
// Very basic YAML parser - would use js-yaml in production
const lines = yaml.split('\n').filter(line => line.trim());
const result: any = {};
for (const line of lines) {
const [key, ...valueParts] = line.split(':');
if (key && valueParts.length > 0) {
result[key.trim()] = valueParts.join(':').trim();
}
}
return result;
}
private toSimpleYaml(data: any): string {
if (typeof data !== 'object' || data === null) {
return String(data);
}
let yaml = '';
for (const [key, value] of Object.entries(data)) {
yaml += `${key}: ${typeof value === 'object' ? JSON.stringify(value) : value}\n`;
}
return yaml;
}
private toSimpleXml(data: any): string {
if (typeof data !== 'object' || data === null) {
return `<root>${String(data)}</root>`;
}
let xml = '<root>\n';
for (const [key, value] of Object.entries(data)) {
xml += ` <${key}>${typeof value === 'object' ? JSON.stringify(value) : value}</${key}>\n`;
}
xml += '</root>';
return xml;
}
private toCsv(data: any): string {
if (Array.isArray(data) && data.length > 0) {
const headers = Object.keys(data[0]);
let csv = headers.join(',') + '\n';
for (const row of data) {
const values = headers.map(header => {
const value = row[header];
return typeof value === 'string' && value.includes(',') ? `"${value}"` : String(value);
});
csv += values.join(',') + '\n';
}
return csv;
}
return JSON.stringify(data);
}
}
interface FieldAnalysis {
name: string;
types: Set<JsonSchemaType>;
occurrences: number;
examples: any[];
nullCount: number;
undefinedCount: number;
}
// Factory function
export function createStructuredOutputManager(): IStructuredOutputManager {
return new StructuredOutputManager();
}
// Default export
export default StructuredOutputManager;