codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
1,192 lines • 46.9 kB
JavaScript
/**
* Structured Output Manager - JSON Schema Validation System
* Implements modern structured output generation with validation and type safety
*
* Features:
* - JSON Schema validation with detailed error reporting
* - Type-safe response handling with TypeScript inference
* - Streaming structured output with partial validation
* - Schema inference from examples and descriptions
* - Multiple output formats (JSON, YAML, XML, etc.)
* - Schema evolution and version management
* - Confidence scoring for generated structures
* - Auto-correction for common schema violations
*/
import { EventEmitter } from 'events';
import { logger } from '../logger.js';
import { getTelemetryProvider } from '../observability/observability-system.js';
/**
* Structured Output Manager Implementation
* Provides comprehensive structured output generation and validation
*/
export class StructuredOutputManager extends EventEmitter {
schemas = new Map();
telemetry = getTelemetryProvider();
defaultConfig = {
enablePartialValidation: true,
enableAutoCorrection: true,
maxValidationErrors: 50,
confidenceThreshold: 0.8,
outputFormat: 'json',
strictMode: false,
schemaValidation: 'moderate',
};
constructor() {
super();
this.setupEventHandlers();
}
setupEventHandlers() {
this.on('schema-generated', (complexity) => {
logger.debug('Schema generated', { complexity });
});
this.on('output-validated', (valid, errors) => {
logger.debug('Output validated', { valid, errors });
});
this.on('output-corrected', (corrections) => {
logger.info('Output auto-corrected', { corrections });
});
}
/**
* Generate JSON Schema from description and examples
*/
async generateSchema(request) {
const startTime = Date.now();
try {
logger.info('Generating schema', {
hasExamples: !!request.examples?.length,
hasConstraints: !!request.constraints?.length,
outputFormat: request.outputFormat,
});
let schema = {
type: 'object',
title: this.extractTitleFromDescription(request.description),
description: request.description,
properties: {},
required: [],
additionalProperties: false,
};
// Infer from examples if provided
if (request.examples && request.examples.length > 0) {
const inferredSchema = await this.inferSchemaFromData(request.examples, request.inferenceOptions);
schema = this.mergeSchemas(schema, inferredSchema);
}
// Apply constraints
if (request.constraints) {
schema = this.applyConstraints(schema, request.constraints);
}
// Add format-specific properties
if (request.outputFormat && request.outputFormat !== 'json') {
schema = this.adaptSchemaForFormat(schema, request.outputFormat);
}
// Generate descriptions if requested
if (request.inferenceOptions?.generateDescriptions) {
schema = await this.generatePropertyDescriptions(schema, request.description);
}
const complexity = this.calculateSchemaComplexity(schema);
this.emit('schema-generated', complexity);
logger.info('Schema generation completed', {
complexity,
properties: Object.keys(schema.properties || {}).length,
generationTime: Date.now() - startTime,
});
return schema;
}
catch (error) {
logger.error('Schema generation failed', error);
throw new Error(`Schema generation failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Generate structured output with schema validation
*/
async generateStructuredOutput(prompt, schema, options) {
const config = { ...this.defaultConfig, ...options };
const startTime = Date.now();
try {
logger.info('Generating structured output', {
schemaTitle: schema.title,
outputFormat: config.outputFormat,
strictMode: config.strictMode,
});
// Enhanced prompt with schema information
const enhancedPrompt = this.createSchemaAwarePrompt(prompt, schema, config);
// Simulate LLM call with structured generation
// In production, this would call the actual LLM with schema constraints
const rawOutput = await this.simulateStructuredGeneration(enhancedPrompt, schema, config);
const generationTime = Date.now() - startTime;
const validationStartTime = Date.now();
// Validate generated output
const validationResult = await this.validateOutput(rawOutput, schema);
const validationTime = Date.now() - validationStartTime;
// Auto-correct if enabled and validation failed
let finalData = rawOutput;
let corrected = false;
let corrections = [];
if (!validationResult.valid && config.enableAutoCorrection) {
const correctionResult = await this.autoCorrectOutput(rawOutput, schema);
if (correctionResult.corrected &&
correctionResult.confidence >= (config.confidenceThreshold || 0.8)) {
finalData = correctionResult.data;
corrected = true;
corrections = correctionResult.corrections;
this.emit('output-corrected', corrections.length);
}
}
// Calculate confidence
const confidence = this.calculateOutputConfidence(finalData, schema, validationResult, corrected);
this.emit('output-validated', validationResult.valid || corrected, validationResult.errors.length);
const response = {
data: finalData,
schema,
valid: validationResult.valid || corrected,
confidence,
metadata: {
generationTime,
validationTime,
schemaComplexity: this.calculateSchemaComplexity(schema),
outputFormat: config.outputFormat || 'json',
reasoning: this.generateReasoningExplanation(finalData, schema, validationResult),
alternatives: [],
},
errors: validationResult.errors,
warnings: validationResult.warnings,
corrected,
};
if (corrected) {
response.metadata.reasoning = `Output was auto-corrected with ${corrections.length} changes. ${response.metadata.reasoning}`;
}
return response;
}
catch (error) {
logger.error('Structured output generation failed', error);
throw new Error(`Structured output generation failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Stream structured output with partial validation
*/
async streamStructuredOutput(prompt, schema, onPartial, options) {
const config = { ...this.defaultConfig, ...options };
logger.info('Starting structured output streaming', {
schemaTitle: schema.title,
partialValidation: config.enablePartialValidation,
});
// Simulate streaming generation
return await this.simulateStreamingGeneration(prompt, schema, onPartial, config);
}
/**
* Validate data against schema
*/
async validateOutput(data, schema) {
const errors = [];
const warnings = [];
try {
// Perform comprehensive validation
this.validateRecursive(data, schema, '', errors, warnings);
const valid = errors.filter(e => e.severity === 'error').length === 0;
const confidence = this.calculateValidationConfidence(errors, warnings);
return {
valid,
errors,
warnings,
confidence,
partiallyValid: !valid && warnings.length > 0,
};
}
catch (error) {
errors.push({
path: '',
message: `Validation error: ${error instanceof Error ? error.message : String(error)}`,
code: 'VALIDATION_ERROR',
severity: 'error',
});
return {
valid: false,
errors,
warnings,
confidence: 0,
};
}
}
/**
* Auto-correct common validation issues
*/
async autoCorrectOutput(data, schema) {
const corrections = [];
const correctedData = JSON.parse(JSON.stringify(data)); // Deep copy
try {
// Apply various correction strategies
this.correctTypes(correctedData, schema, '', corrections);
this.correctMissingRequired(correctedData, schema, '', corrections);
this.correctInvalidValues(correctedData, schema, '', corrections);
this.removeExtraProperties(correctedData, schema, '', corrections);
const confidence = this.calculateCorrectionConfidence(corrections);
const metadata = {
totalCorrections: corrections.length,
criticalCorrections: corrections.filter(c => c.confidence < 0.7).length,
automaticCorrections: corrections.filter(c => c.confidence >= 0.8).length,
manualReviewNeeded: corrections.some(c => c.confidence < 0.6),
};
return {
corrected: corrections.length > 0,
data: correctedData,
corrections,
confidence,
metadata,
};
}
catch (error) {
logger.error('Auto-correction failed', error);
return {
corrected: false,
data: data,
corrections: [],
confidence: 0,
metadata: {
totalCorrections: 0,
criticalCorrections: 0,
automaticCorrections: 0,
manualReviewNeeded: true,
},
};
}
}
/**
* Infer schema from data examples
*/
async inferSchemaFromData(data, options) {
const opts = {
includeExamples: false,
strictTypes: false,
minimumOccurrence: 1,
inferOptionalFields: true,
generateDescriptions: false,
...options,
};
if (data.length === 0) {
return { type: 'object', properties: {}, additionalProperties: false };
}
const schema = {
type: 'object',
properties: {},
required: [],
additionalProperties: !opts.strictTypes,
};
// Analyze all data samples
const fieldAnalysis = this.analyzeFields(data, opts);
// Build schema from analysis
for (const [fieldName, analysis] of fieldAnalysis.entries()) {
schema.properties[fieldName] = this.createPropertySchema(analysis, opts);
// Determine if field should be required
const occurrenceRatio = analysis.occurrences / data.length;
if (occurrenceRatio >= opts.minimumOccurrence / data.length && !opts.inferOptionalFields) {
schema.required.push(fieldName);
}
}
if (opts.includeExamples) {
schema.examples = data.slice(0, 3); // Include first 3 examples
}
return schema;
}
/**
* Convert output format
*/
async convertFormat(data, fromFormat, toFormat) {
if (fromFormat === toFormat) {
return typeof data === 'string' ? data : JSON.stringify(data, null, 2);
}
try {
// Parse data from source format
let parsedData = data;
if (typeof data === 'string') {
switch (fromFormat) {
case 'json':
parsedData = JSON.parse(data);
break;
case 'yaml':
// Would use yaml parser in production
parsedData = this.parseSimpleYaml(data);
break;
default:
throw new Error(`Unsupported source format: ${fromFormat}`);
}
}
// Convert to target format
switch (toFormat) {
case 'json':
return JSON.stringify(parsedData, null, 2);
case 'yaml':
return this.toSimpleYaml(parsedData);
case 'xml':
return this.toSimpleXml(parsedData);
case 'csv':
return this.toCsv(parsedData);
default:
throw new Error(`Unsupported target format: ${toFormat}`);
}
}
catch (error) {
logger.error('Format conversion failed', { fromFormat, toFormat, error });
throw new Error(`Format conversion failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Calculate schema complexity
*/
calculateSchemaComplexity(schema) {
let complexity = 0;
if (schema.properties) {
complexity += Object.keys(schema.properties).length;
for (const property of Object.values(schema.properties)) {
complexity += this.calculateSchemaComplexity(property);
}
}
if (schema.items) {
complexity += this.calculateSchemaComplexity(schema.items);
}
if (schema.anyOf || schema.oneOf || schema.allOf) {
const unions = schema.anyOf || schema.oneOf || schema.allOf || [];
complexity += unions.reduce((sum, s) => sum + this.calculateSchemaComplexity(s), 0);
}
return complexity;
}
/**
* Register schema for reuse
*/
registerSchema(id, schema) {
this.schemas.set(id, { schema });
logger.debug('Schema registered', { id, complexity: this.calculateSchemaComplexity(schema) });
}
/**
* Get registered schema
*/
getSchema(id) {
const entry = this.schemas.get(id);
return entry ? entry.schema : null;
}
/**
* Update registered schema with versioning
*/
updateSchema(id, schema, version) {
this.schemas.set(id, { schema, version });
logger.info('Schema updated', { id, version });
}
/**
* Generate schema documentation
*/
generateSchemaDocumentation(schema) {
let docs = `# ${schema.title || 'Schema Documentation'}\n\n`;
if (schema.description) {
docs += `${schema.description}\n\n`;
}
docs += '## Properties\n\n';
if (schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
docs += this.generatePropertyDocumentation(name, prop, schema.required?.includes(name) || false);
}
}
return docs;
}
/**
* Validate schema itself
*/
async validateSchema(schema) {
const errors = [];
const warnings = [];
try {
// Basic schema validation
if (!schema.type && !schema.anyOf && !schema.oneOf && !schema.allOf) {
errors.push({
path: '',
message: 'Schema must specify a type or use composition (anyOf, oneOf, allOf)',
code: 'MISSING_TYPE',
});
}
// Validate properties if it's an object schema
if (schema.type === 'object' && schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
this.validatePropertySchema(prop, name, errors, warnings);
}
}
// Check for circular references
if (this.hasCircularReference(schema)) {
warnings.push('Schema contains circular references which may cause issues');
}
const complexity = this.calculateSchemaComplexity(schema);
if (complexity > 100) {
warnings.push(`Schema is quite complex (${complexity} points) - consider simplifying`);
}
return {
valid: errors.length === 0,
errors,
warnings,
complexity,
};
}
catch (error) {
errors.push({
path: '',
message: `Schema validation error: ${error instanceof Error ? error.message : String(error)}`,
code: 'VALIDATION_ERROR',
});
return {
valid: false,
errors,
warnings,
complexity: 0,
};
}
}
// Private helper methods
extractTitleFromDescription(description) {
const firstSentence = description.split('.')[0];
return firstSentence.length > 50 ? firstSentence.substring(0, 47) + '...' : firstSentence;
}
mergeSchemas(base, inferred) {
return {
...base,
properties: { ...base.properties, ...inferred.properties },
required: [...(base.required || []), ...(inferred.required || [])].filter((v, i, a) => a.indexOf(v) === i),
};
}
applyConstraints(schema, constraints) {
const updatedSchema = { ...schema };
for (const constraint of constraints) {
if (!updatedSchema.properties)
updatedSchema.properties = {};
const fieldSchema = updatedSchema.properties[constraint.field] || { type: 'string' };
switch (constraint.type) {
case 'required':
if (!updatedSchema.required)
updatedSchema.required = [];
if (!updatedSchema.required.includes(constraint.field)) {
updatedSchema.required.push(constraint.field);
}
break;
case 'enum':
fieldSchema.enum = Array.isArray(constraint.value)
? constraint.value
: [constraint.value];
break;
case 'pattern':
if (fieldSchema.type === 'string') {
fieldSchema.pattern = constraint.value;
}
break;
case 'range':
if (fieldSchema.type === 'number' || fieldSchema.type === 'integer') {
if (constraint.value.min !== undefined)
fieldSchema.minimum = constraint.value.min;
if (constraint.value.max !== undefined)
fieldSchema.maximum = constraint.value.max;
}
break;
}
if (constraint.description) {
fieldSchema.description = constraint.description;
}
updatedSchema.properties[constraint.field] = fieldSchema;
}
return updatedSchema;
}
adaptSchemaForFormat(schema, format) {
// Add format-specific adaptations
switch (format) {
case 'csv':
// CSV schemas should be flat objects or arrays
if (schema.type === 'object' && schema.properties) {
const flatSchema = { ...schema };
// Flatten nested objects for CSV compatibility
flatSchema.additionalProperties = false;
return flatSchema;
}
break;
case 'xml':
// XML schemas might need special handling for attributes
return { ...schema, 'xml-attributes': true };
default:
return schema;
}
return schema;
}
async generatePropertyDescriptions(schema, context) {
// In production, this would use AI to generate descriptions
const updatedSchema = { ...schema };
if (updatedSchema.properties) {
for (const [name, prop] of Object.entries(updatedSchema.properties)) {
if (!prop.description) {
prop.description = `The ${name} property for ${context}`;
}
}
}
return updatedSchema;
}
createSchemaAwarePrompt(prompt, schema, config) {
let enhancedPrompt = prompt;
enhancedPrompt += '\n\nPlease respond with a JSON object that follows this schema:\n';
enhancedPrompt += JSON.stringify(schema, null, 2);
if (config.strictMode) {
enhancedPrompt +=
'\n\nIMPORTANT: The response must strictly adhere to the schema. No additional properties are allowed.';
}
if (schema.examples && schema.examples.length > 0) {
enhancedPrompt += '\n\nExample format:\n';
enhancedPrompt += JSON.stringify(schema.examples[0], null, 2);
}
return enhancedPrompt;
}
async simulateStructuredGeneration(prompt, schema, config) {
// Simulate LLM response generation - in production this would call actual LLM
const example = this.generateExampleFromSchema(schema);
// Add some realistic variation/errors for testing
if (!config.strictMode && Math.random() > 0.8) {
// Occasionally add extra properties or minor errors
if (typeof example === 'object' && example !== null) {
example.extra_property = 'test';
}
}
return example;
}
async simulateStreamingGeneration(prompt, schema, onPartial, config) {
const fullData = await this.simulateStructuredGeneration(prompt, schema, config);
// Simulate streaming by sending partial objects
if (typeof fullData === 'object' && fullData !== null) {
const keys = Object.keys(fullData);
const partial = {};
for (let i = 0; i < keys.length; i++) {
partial[keys[i]] = fullData[keys[i]];
onPartial(partial);
// Add delay to simulate streaming
await new Promise(resolve => setTimeout(resolve, 100));
}
}
// Return full response
return await this.generateStructuredOutput(prompt, schema, config);
}
generateExampleFromSchema(schema) {
if (schema.examples && schema.examples.length > 0) {
return schema.examples[0];
}
switch (schema.type) {
case 'string':
return schema.enum ? schema.enum[0] : 'example string';
case 'number':
return schema.minimum || 42;
case 'integer':
return schema.minimum || 42;
case 'boolean':
return true;
case 'array':
const item = schema.items ? this.generateExampleFromSchema(schema.items) : 'item';
return [item];
case 'object':
const obj = {};
if (schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
obj[name] = this.generateExampleFromSchema(prop);
}
}
return obj;
default:
return null;
}
}
validateRecursive(data, schema, path, errors, warnings) {
// Type validation
if (schema.type && !this.validateType(data, schema.type)) {
errors.push({
path,
message: `Expected type ${schema.type} but got ${typeof data}`,
code: 'TYPE_MISMATCH',
severity: 'error',
actualValue: data,
expectedType: schema.type,
});
return;
}
// Object validation
if (schema.type === 'object' && typeof data === 'object' && data !== null) {
this.validateObject(data, schema, path, errors, warnings);
}
// Array validation
if (schema.type === 'array' && Array.isArray(data)) {
this.validateArray(data, schema, path, errors, warnings);
}
// String validation
if (schema.type === 'string' && typeof data === 'string') {
this.validateString(data, schema, path, errors, warnings);
}
// Number validation
if ((schema.type === 'number' || schema.type === 'integer') && typeof data === 'number') {
this.validateNumber(data, schema, path, errors, warnings);
}
}
validateType(data, type) {
const types = Array.isArray(type) ? type : [type];
for (const t of types) {
switch (t) {
case 'null':
if (data === null)
return true;
break;
case 'boolean':
if (typeof data === 'boolean')
return true;
break;
case 'object':
if (typeof data === 'object' && data !== null && !Array.isArray(data))
return true;
break;
case 'array':
if (Array.isArray(data))
return true;
break;
case 'number':
if (typeof data === 'number')
return true;
break;
case 'integer':
if (typeof data === 'number' && Number.isInteger(data))
return true;
break;
case 'string':
if (typeof data === 'string')
return true;
break;
}
}
return false;
}
validateObject(data, schema, path, errors, warnings) {
// Required properties
if (schema.required) {
for (const required of schema.required) {
if (!(required in data)) {
errors.push({
path: path ? `${path}.${required}` : required,
message: `Missing required property: ${required}`,
code: 'MISSING_REQUIRED',
severity: 'error',
});
}
}
}
// Property validation
if (schema.properties) {
for (const [key, value] of Object.entries(data)) {
const propertySchema = schema.properties[key];
if (propertySchema) {
this.validateRecursive(value, propertySchema, path ? `${path}.${key}` : key, errors, warnings);
}
else if (schema.additionalProperties === false) {
warnings.push({
path: path ? `${path}.${key}` : key,
message: `Unexpected property: ${key}`,
suggestion: 'Remove this property or update schema to allow additional properties',
impact: 'medium',
});
}
}
}
}
validateArray(data, schema, path, errors, warnings) {
// Length constraints
if (schema.minItems !== undefined && data.length < schema.minItems) {
errors.push({
path,
message: `Array must have at least ${schema.minItems} items but has ${data.length}`,
code: 'MIN_ITEMS',
severity: 'error',
actualValue: data.length,
});
}
if (schema.maxItems !== undefined && data.length > schema.maxItems) {
errors.push({
path,
message: `Array must have at most ${schema.maxItems} items but has ${data.length}`,
code: 'MAX_ITEMS',
severity: 'error',
actualValue: data.length,
});
}
// Item validation
if (schema.items) {
data.forEach((item, index) => {
this.validateRecursive(item, schema.items, `${path}[${index}]`, errors, warnings);
});
}
// Unique items
if (schema.uniqueItems && !this.areItemsUnique(data)) {
errors.push({
path,
message: 'Array items must be unique',
code: 'UNIQUE_ITEMS',
severity: 'error',
});
}
}
validateString(data, schema, path, errors, warnings) {
// Length constraints
if (schema.minLength !== undefined && data.length < schema.minLength) {
errors.push({
path,
message: `String must be at least ${schema.minLength} characters but is ${data.length}`,
code: 'MIN_LENGTH',
severity: 'error',
actualValue: data.length,
});
}
if (schema.maxLength !== undefined && data.length > schema.maxLength) {
errors.push({
path,
message: `String must be at most ${schema.maxLength} characters but is ${data.length}`,
code: 'MAX_LENGTH',
severity: 'error',
actualValue: data.length,
});
}
// Pattern validation
if (schema.pattern) {
const regex = new RegExp(schema.pattern);
if (!regex.test(data)) {
errors.push({
path,
message: `String does not match pattern: ${schema.pattern}`,
code: 'PATTERN_MISMATCH',
severity: 'error',
actualValue: data,
});
}
}
// Enum validation
if (schema.enum && !schema.enum.includes(data)) {
errors.push({
path,
message: `Value must be one of: ${schema.enum.join(', ')}`,
code: 'ENUM_MISMATCH',
severity: 'error',
actualValue: data,
suggestions: schema.enum.map(String),
});
}
}
validateNumber(data, schema, path, errors, warnings) {
// Range validation
if (schema.minimum !== undefined && data < schema.minimum) {
errors.push({
path,
message: `Number must be at least ${schema.minimum} but is ${data}`,
code: 'MIN_VALUE',
severity: 'error',
actualValue: data,
});
}
if (schema.maximum !== undefined && data > schema.maximum) {
errors.push({
path,
message: `Number must be at most ${schema.maximum} but is ${data}`,
code: 'MAX_VALUE',
severity: 'error',
actualValue: data,
});
}
}
areItemsUnique(array) {
const seen = new Set();
for (const item of array) {
const key = typeof item === 'object' ? JSON.stringify(item) : item;
if (seen.has(key))
return false;
seen.add(key);
}
return true;
}
calculateOutputConfidence(data, schema, validationResult, corrected) {
let confidence = 0.8; // Base confidence
if (validationResult.valid) {
confidence += 0.2;
}
else {
confidence -= 0.1 * validationResult.errors.length;
}
if (corrected) {
confidence -= 0.1; // Reduced confidence for corrected output
}
if (validationResult.warnings.length > 0) {
confidence -= 0.05 * validationResult.warnings.length;
}
return Math.max(0, Math.min(1, confidence));
}
calculateValidationConfidence(errors, warnings) {
let confidence = 1.0;
confidence -= errors.length * 0.1;
confidence -= warnings.length * 0.05;
return Math.max(0, confidence);
}
generateReasoningExplanation(data, schema, validation) {
if (validation.valid) {
return `Output successfully matches the provided schema with ${Object.keys(data).length} properties.`;
}
else {
const errorCount = validation.errors.length;
const warningCount = validation.warnings.length;
return `Output has ${errorCount} validation errors and ${warningCount} warnings that need attention.`;
}
}
// Auto-correction methods
correctTypes(data, schema, path, corrections) {
if (schema.type && !this.validateType(data, schema.type)) {
const correctedValue = this.attemptTypeConversion(data, schema.type);
if (correctedValue !== undefined) {
corrections.push({
path,
originalValue: data,
correctedValue,
reasoning: `Converted ${typeof data} to ${schema.type}`,
confidence: 0.8,
});
// Apply correction to data object
this.setValueAtPath(data, path, correctedValue);
}
}
}
correctMissingRequired(data, schema, path, corrections) {
if (schema.type === 'object' && schema.required && typeof data === 'object' && data !== null) {
for (const required of schema.required) {
if (!(required in data)) {
const defaultValue = this.generateDefaultValue(schema.properties?.[required]);
data[required] = defaultValue;
corrections.push({
path: path ? `${path}.${required}` : required,
originalValue: undefined,
correctedValue: defaultValue,
reasoning: `Added missing required property with default value`,
confidence: 0.7,
});
}
}
}
}
correctInvalidValues(data, schema, path, corrections) {
// Correct enum mismatches
if (schema.enum && !schema.enum.includes(data)) {
const closest = this.findClosestEnumValue(data, schema.enum);
if (closest !== null) {
corrections.push({
path,
originalValue: data,
correctedValue: closest,
reasoning: `Corrected to closest valid enum value`,
confidence: 0.6,
});
this.setValueAtPath(data, path, closest);
}
}
}
removeExtraProperties(data, schema, path, corrections) {
if (schema.type === 'object' &&
schema.additionalProperties === false &&
typeof data === 'object' &&
data !== null &&
schema.properties) {
for (const key of Object.keys(data)) {
if (!schema.properties[key]) {
delete data[key];
corrections.push({
path: path ? `${path}.${key}` : key,
originalValue: data[key],
correctedValue: undefined,
reasoning: `Removed extra property not allowed by schema`,
confidence: 0.9,
});
}
}
}
}
attemptTypeConversion(value, targetType) {
const types = Array.isArray(targetType) ? targetType : [targetType];
for (const type of types) {
try {
switch (type) {
case 'string':
return String(value);
case 'number':
const num = Number(value);
return !isNaN(num) ? num : undefined;
case 'integer':
const int = parseInt(String(value), 10);
return !isNaN(int) ? int : undefined;
case 'boolean':
if (typeof value === 'string') {
return value.toLowerCase() === 'true';
}
return Boolean(value);
case 'array':
return Array.isArray(value) ? value : [value];
}
}
catch {
continue;
}
}
return undefined;
}
generateDefaultValue(schema) {
if (!schema)
return null;
if (schema.default !== undefined)
return schema.default;
switch (schema.type) {
case 'string':
return schema.enum ? schema.enum[0] : '';
case 'number':
return schema.minimum || 0;
case 'integer':
return schema.minimum || 0;
case 'boolean':
return false;
case 'array':
return [];
case 'object':
return {};
default:
return null;
}
}
findClosestEnumValue(value, enumValues) {
if (typeof value === 'string') {
// Find string with minimum edit distance
let closest = enumValues[0];
let minDistance = this.editDistance(value.toLowerCase(), String(closest).toLowerCase());
for (let i = 1; i < enumValues.length; i++) {
const distance = this.editDistance(value.toLowerCase(), String(enumValues[i]).toLowerCase());
if (distance < minDistance) {
minDistance = distance;
closest = enumValues[i];
}
}
return minDistance <= 3 ? closest : null; // Only suggest if reasonably close
}
return enumValues[0]; // Default to first value
}
editDistance(a, b) {
const matrix = [];
for (let i = 0; i <= b.length; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= a.length; j++) {
matrix[0][j] = j;
}
for (let i = 1; i <= b.length; i++) {
for (let j = 1; j <= a.length; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
}
else {
matrix[i][j] = Math.min(matrix[i - 1][j - 1] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j] + 1);
}
}
}
return matrix[b.length][a.length];
}
setValueAtPath(obj, path, value) {
if (!path)
return;
const keys = path.split('.');
let current = obj;
for (let i = 0; i < keys.length - 1; i++) {
if (!(keys[i] in current)) {
current[keys[i]] = {};
}
current = current[keys[i]];
}
current[keys[keys.length - 1]] = value;
}
calculateCorrectionConfidence(corrections) {
if (corrections.length === 0)
return 1.0;
const avgConfidence = corrections.reduce((sum, c) => sum + c.confidence, 0) / corrections.length;
const complexityPenalty = Math.max(0, (corrections.length - 5) * 0.1);
return Math.max(0.3, avgConfidence - complexityPenalty);
}
// Field analysis for schema inference
analyzeFields(data, options) {
const analysis = new Map();
for (const item of data) {
if (typeof item === 'object' && item !== null) {
for (const [key, value] of Object.entries(item)) {
if (!analysis.has(key)) {
analysis.set(key, {
name: key,
types: new Set(),
occurrences: 0,
examples: [],
nullCount: 0,
undefinedCount: 0,
});
}
const fieldAnalysis = analysis.get(key);
fieldAnalysis.occurrences++;
if (value === null) {
fieldAnalysis.nullCount++;
}
else if (value === undefined) {
fieldAnalysis.undefinedCount++;
}
else {
fieldAnalysis.types.add(this.getJsonSchemaType(value));
if (fieldAnalysis.examples.length < 3) {
fieldAnalysis.examples.push(value);
}
}
}
}
}
return analysis;
}
getJsonSchemaType(value) {
if (value === null)
return 'null';
if (typeof value === 'boolean')
return 'boolean';
if (Array.isArray(value))
return 'array';
if (typeof value === 'object')
return 'object';
if (typeof value === 'number') {
return Number.isInteger(value) ? 'integer' : 'number';
}
if (typeof value === 'string')
return 'string';
return 'string'; // fallback
}
createPropertySchema(analysis, options) {
const schema = {};
// Determine type
if (analysis.types.size === 1) {
schema.type = Array.from(analysis.types)[0];
}
else if (analysis.types.size > 1) {
schema.type = Array.from(analysis.types);
}
else {
schema.type = 'string'; // fallback
}
// Add examples if requested
if (options.includeExamples && analysis.examples.length > 0) {
schema.examples = analysis.examples;
}
// Generate description if requested
if (options.generateDescriptions) {
schema.description = `Property ${analysis.name} (appears in ${analysis.occurrences} samples)`;
}
return schema;
}
validatePropertySchema(schema, path, errors, warnings) {
if (!schema.type && !schema.anyOf && !schema.oneOf && !schema.allOf) {
errors.push({
path,
message: 'Property schema must specify a type',
code: 'MISSING_TYPE',
});
}
// Recursively validate nested schemas
if (schema.properties) {
for (const [name, prop] of Object.entries(schema.properties)) {
this.validatePropertySchema(prop, `${path}.${name}`, errors, warnings);
}
}
}
hasCircularReference(schema, visited = new Set()) {
if (visited.has(schema))
return true;
visited.add(schema);
if (schema.properties) {
for (const prop of Object.values(schema.properties)) {
if (this.hasCircularReference(prop, visited))
return true;
}
}
if (schema.items && this.hasCircularReference(schema.items, visited))
return true;
visited.delete(schema);
return false;
}
generatePropertyDocumentation(name, schema, required) {
let doc = `### ${name}${required ? ' (required)' : ''}\n\n`;
if (schema.description) {
doc += `${schema.description}\n\n`;
}
doc += `- Type: \`${schema.type || 'any'}\`\n`;
if (schema.enum) {
doc += `- Allowed values: ${schema.enum.map(v => `\`${v}\``).join(', ')}\n`;
}
if (schema.minimum !== undefined) {
doc += `- Minimum: ${schema.minimum}\n`;
}
if (schema.maximum !== undefined) {
doc += `- Maximum: ${schema.maximum}\n`;
}
if (schema.pattern) {
doc += `- Pattern: \`${schema.pattern}\`\n`;
}
doc += '\n';
return doc;
}
// Simple format converters (would use proper libraries in production)
parseSimpleYaml(yaml) {
// Very basic YAML parser - would use js-yaml in production
const lines = yaml.split('\n').filter(line => line.trim());
const result = {};
for (const line of lines) {
const [key, ...valueParts] = line.split(':');
if (key && valueParts.length > 0) {
result[key.trim()] = valueParts.join(':').trim();
}
}
return result;
}
toSimpleYaml(data) {
if (typeof data !== 'object' || data === null) {
return String(data);
}
let yaml = '';
for (const [key, value] of Object.entries(data)) {
yaml += `${key}: ${typeof value === 'object' ? JSON.stringify(value) : value}\n`;
}
return yaml;
}
toSimpleXml(data) {
if (typeof data !== 'object' || data === null) {
return `<root>${String(data)}</root>`;
}
let xml = '<root>\n';
for (const [key, value] of Object.entries(data)) {
xml += ` <${key}>${typeof value === 'object' ? JSON.stringify(value) : value}</${key}>\n`;
}
xml += '</root>';
return xml;
}
toCsv(data) {
if (Array.isArray(data) && data.length > 0) {
const headers = Object.keys(data[0]);
let csv = headers.join(',') + '\n';
for (const row of data) {
const values = headers.map(header => {
const value = row[header];
return typeof value === 'string' && value.includes(',') ? `"${value}"` : String(value);
});
csv += values.join(',') + '\n';
}
return csv;
}
return JSON.stringify(data);
}
}
// Factory function
export function createStructuredOutputManager() {
return new StructuredOutputManager();
}
// Default export
export default StructuredOutputManager;
//# sourceMappingURL=structured-output-manager.js.map