agent-team-composer
Version:
Transform README files into GitHub project plans with AI-powered agent teams
335 lines (332 loc) • 12.8 kB
JavaScript
import { TelemetryService } from './telemetry.js';
export class BulletproofJSONExtractor {
static extractionMethods = [
this.extractWithCodeBlocks,
this.extractWithoutCodeBlocks,
this.extractWithRegexCleanup,
this.extractWithJSONRepair,
this.extractWithManualParsing,
this.extractWithFuzzyMatching
];
/**
* Main extraction method that tries multiple strategies
*/
static async extract(content, schema, context) {
const startTime = Date.now();
const telemetry = TelemetryService.getInstance();
// Store original for debugging
const originalContent = content;
// Try each extraction method in order
for (const [index, method] of this.extractionMethods.entries()) {
try {
const extracted = method(content);
if (extracted) {
// Validate against schema
const parsed = schema.safeParse(extracted);
if (parsed.success) {
// Track successful extraction
telemetry.trackExtractionMetrics({
method: method.name,
success: true,
duration: Date.now() - startTime,
retryCount: index,
context: context || 'unknown',
contentLength: content.length
});
return {
success: true,
data: parsed.data,
extractionMethod: method.name
};
}
}
}
catch (error) {
// Continue to next method
continue;
}
}
// All methods failed - track failure
telemetry.trackExtractionMetrics({
method: 'all_failed',
success: false,
duration: Date.now() - startTime,
retryCount: this.extractionMethods.length,
context: context || 'unknown',
errorType: 'extraction_failed',
contentLength: content.length
});
return {
success: false,
error: 'Failed to extract valid JSON after trying all methods',
rawContent: originalContent
};
}
/**
* Method 1: Standard code block extraction
*/
static extractWithCodeBlocks(content) {
const patterns = [
/```json\s*\n([\s\S]*?)\n```/,
/```JSON\s*\n([\s\S]*?)\n```/,
/```\s*\n(\{[\s\S]*?\})\n```/,
/```\s*\n(\[[\s\S]*?\])\n```/
];
for (const pattern of patterns) {
const match = content.match(pattern);
if (match) {
return JSON.parse(match[1].trim());
}
}
return null;
}
/**
* Method 2: Extract JSON without code blocks
*/
static extractWithoutCodeBlocks(content) {
// Find the first { or [ and the last } or ]
const jsonStart = Math.min(content.indexOf('{') !== -1 ? content.indexOf('{') : Infinity, content.indexOf('[') !== -1 ? content.indexOf('[') : Infinity);
const jsonEnd = Math.max(content.lastIndexOf('}'), content.lastIndexOf(']'));
if (jsonStart !== Infinity && jsonEnd > jsonStart) {
const jsonStr = content.substring(jsonStart, jsonEnd + 1);
return JSON.parse(jsonStr);
}
return null;
}
/**
* Method 3: Clean up common formatting issues
*/
static extractWithRegexCleanup(content) {
let cleaned = content;
// Remove common prefixes/suffixes
cleaned = cleaned.replace(/^.*?(?=\{|\[)/s, '');
cleaned = cleaned.replace(/(\}|\])(?!.*(\}|\]))[\s\S]*$/s, '$1');
// Fix common JSON issues
cleaned = cleaned
.replace(/,\s*\}/g, '}') // Remove trailing commas
.replace(/,\s*\]/g, ']')
.replace(/'/g, '"') // Replace single quotes
.replace(/(\w+):/g, '"$1":') // Quote unquoted keys
.replace(/:\s*undefined/g, ': null') // Replace undefined
.replace(/\n/g, ' ') // Remove newlines in strings
.replace(/\t/g, ' '); // Remove tabs
return JSON.parse(cleaned);
}
/**
* Method 4: Use JSON repair library logic
*/
static extractWithJSONRepair(content) {
// Extract potential JSON
const jsonMatch = content.match(/\{[\s\S]*\}|\[[\s\S]*\]/);
if (!jsonMatch)
return null;
let json = jsonMatch[0];
// Common repairs
const repairs = [
// Fix quotes
(s) => s.replace(/([^"\\])'([^']*)'([^"])/g, '$1"$2"$3'),
// Fix trailing commas
(s) => s.replace(/,(\s*[}\]])/g, '$1'),
// Quote unquoted keys
(s) => s.replace(/([{,]\s*)([a-zA-Z_$][a-zA-Z0-9_$]*)\s*:/g, '$1"$2":'),
// Fix boolean values
(s) => s.replace(/:\s*(true|false|null)\s*([,}])/gi, ': $1$2'),
// Escape unescaped quotes in strings
(s) => {
// This is complex but handles quotes inside string values
return s.replace(/"([^"]*)":/g, (match, key) => {
return `"${key.replace(/"/g, '\\"')}":`;
});
}
];
for (const repair of repairs) {
try {
json = repair(json);
}
catch {
// Continue with next repair
}
}
return JSON.parse(json);
}
/**
* Method 5: Manual character-by-character parsing
*/
static extractWithManualParsing(content) {
const stack = [];
let inString = false;
let escaped = false;
let jsonStart = -1;
let jsonEnd = -1;
for (let i = 0; i < content.length; i++) {
const char = content[i];
if (escaped) {
escaped = false;
continue;
}
if (char === '\\') {
escaped = true;
continue;
}
if (char === '"' && !escaped) {
inString = !inString;
continue;
}
if (!inString) {
if (char === '{' || char === '[') {
if (stack.length === 0)
jsonStart = i;
stack.push(char);
}
else if (char === '}' || char === ']') {
const expected = char === '}' ? '{' : '[';
if (stack[stack.length - 1] === expected) {
stack.pop();
if (stack.length === 0) {
jsonEnd = i;
break;
}
}
}
}
}
if (jsonStart !== -1 && jsonEnd !== -1) {
const jsonStr = content.substring(jsonStart, jsonEnd + 1);
return JSON.parse(jsonStr);
}
return null;
}
/**
* Method 6: Fuzzy matching with structure detection
*/
static extractWithFuzzyMatching(content) {
// Look for JSON-like structures even if malformed
const structures = [
// Object with phases array
/phases\s*:\s*\[([\s\S]*?)\]/,
// Object with tasks array
/tasks\s*:\s*\[([\s\S]*?)\]/,
// Generic object pattern
/\{\s*"?\w+"?\s*:\s*[\s\S]*?\}/
];
for (const pattern of structures) {
const match = content.match(pattern);
if (match) {
// Try to reconstruct valid JSON
try {
// This is a last resort - try to build valid JSON from fragments
const reconstructed = `{${match[0]}}`;
return JSON.parse(reconstructed);
}
catch {
continue;
}
}
}
return null;
}
/**
* Retry with explicit format instructions
*/
static createRetryPrompt(originalPrompt, error) {
return `${originalPrompt}
CRITICAL: Your previous response could not be parsed as JSON. Error: ${error}
Please respond with ONLY valid JSON, no additional text. Start your response with { or [ and end with } or ].
Example of correct format:
{
"field": "value",
"array": ["item1", "item2"]
}`;
}
/**
* Log extraction failures for monitoring
*/
static logFailure(context, content, error) {
// In production, this would send to telemetry service
console.error('JSON Extraction Failure:', {
context,
contentLength: content.length,
contentPreview: content.substring(0, 200),
error,
timestamp: new Date().toISOString()
});
}
}
import { ResilienceManager } from './resilience.js';
/**
* Enhanced LLM response handler with retries and resilience
*/
export class RobustLLMHandler {
static MAX_RETRIES = 3;
static telemetry = TelemetryService.getInstance();
static resilience = ResilienceManager.getInstance();
static async getStructuredResponse(anthropic, prompt, schema, context) {
const circuitBreaker = this.resilience.getCircuitBreaker('anthropic-api');
const rateLimiter = this.resilience.getRateLimiter('anthropic-api', {
maxRequests: 50, // Adjust based on your API limits
windowMs: 60000 // 1 minute
});
return await circuitBreaker.execute(async () => {
return await rateLimiter.execute(async () => {
return await this.executeWithRetries(anthropic, prompt, schema, context);
});
});
}
static async executeWithRetries(anthropic, prompt, schema, context) {
let lastError = '';
const startTime = Date.now();
for (let attempt = 0; attempt < this.MAX_RETRIES; attempt++) {
try {
// Add explicit JSON instructions on retry
const enhancedPrompt = attempt === 0
? prompt
: BulletproofJSONExtractor.createRetryPrompt(prompt, lastError);
// Track API call
this.telemetry.trackEvent('llm_api_call', {
context,
attempt,
promptLength: enhancedPrompt.length
});
const response = await anthropic.messages.create({
model: 'claude-3-sonnet-20240229',
max_tokens: 4000,
temperature: attempt === 0 ? 0.7 : 0.3, // Lower temperature on retry
system: attempt === 0
? 'You are a helpful assistant that generates project plans. Always respond with valid JSON in the specified format.'
: 'You MUST respond with ONLY valid JSON. No explanations, no markdown, just JSON.',
messages: [{
role: 'user',
content: enhancedPrompt
}]
});
const content = response.content[0].type === 'text' ? response.content[0].text : '';
const result = await BulletproofJSONExtractor.extract(content, schema, context);
if (result.success && result.data) {
// Track successful response
this.telemetry.trackEvent('llm_api_success', {
context,
attempt,
duration: Date.now() - startTime,
extractionMethod: result.extractionMethod
});
return result.data;
}
lastError = result.error || 'Unknown extraction error';
}
catch (error) {
lastError = error instanceof Error ? error.message : 'Unknown error';
// Track API errors
this.telemetry.trackEvent('llm_api_error', {
context,
attempt,
error: lastError,
duration: Date.now() - startTime
});
}
}
// Log failure for monitoring
BulletproofJSONExtractor.logFailure(context, prompt, lastError);
throw new Error(`Failed to get valid response after ${this.MAX_RETRIES} attempts. Last error: ${lastError}`);
}
}
//# sourceMappingURL=json-extractor.js.map