UNPKG

@flexabrain/mcp-server

Version:

Advanced electrical schematic analysis MCP server with rail engineering expertise

475 lines 20.1 kB
/** * FlexaBrain MCP Server - Enhanced OCR Service * * Advanced OCR service specifically designed for complex technical drawings * and multi-page traction generator monitoring control schematics. * * Extends the base OCR service with: * - Dense layout text extraction * - Line and connection detection * - Generator monitoring component patterns * - Multi-resolution processing * - Technical drawing feature recognition */ import { createWorker, PSM } from 'tesseract.js'; export class EnhancedOCRService { worker = null; isInitialized = false; processingCache = new Map(); // Enhanced patterns for generator monitoring systems GENERATOR_COMPONENT_PATTERNS = { GENERATOR_CONTROL: { component_type: 'generator_control', patterns: [ /^GEN\d{1,4}[A-Z]?$/i, // GEN001, GEN205A /^G\d{1,4}[A-Z]?$/i, // G001, G205A /^GENMON\d+[A-Z]?$/i, // GENMON01 /^GENCTRL\d+[A-Z]?$/i // GENCTRL01 ], prefixes: ['GEN', 'G', 'GENMON', 'GENCTRL'], context_clues: ['generator', 'monitoring', 'control', 'output', 'excitation'], confidence_boost: 0.94, typical_locations: ['control_room', 'generator_bay', 'monitoring_panel'], associated_parameters: ['voltage', 'frequency', 'power', 'temperature', 'current'] }, MONITORING_SENSOR: { component_type: 'monitoring_sensor', patterns: [ /^TE\d{1,4}[A-Z]?$/i, // TE001 (Temperature Element) /^PE\d{1,4}[A-Z]?$/i, // PE001 (Pressure Element) /^VE\d{1,4}[A-Z]?$/i, // VE001 (Vibration Element) /^FE\d{1,4}[A-Z]?$/i, // FE001 (Flow Element) /^MON\d{1,4}[A-Z]?$/i // MON001 (General Monitor) ], prefixes: ['TE', 'PE', 'VE', 'FE', 'MON'], context_clues: ['temperature', 'pressure', 'vibration', 'flow', 'sensor', 'monitor'], confidence_boost: 0.88, typical_locations: ['generator', 'bearing', 'cooling_system', 'lubrication'], associated_parameters: ['range', 'alarm', 'trip', 'units', 'calibration'] }, CONTROL_RELAY: { component_type: 'control_relay', patterns: [ /^[A-Z]{1,3}R\d*[A-Z]?$/i, // KR101, TR205A, AR01 /^R\d{1,4}[A-Z]?$/i, // R101, R205A /^REL\d+[A-Z]?$/i, // REL101A /^CR\d+[A-Z]?$/i // CR101 (Control Relay) ], prefixes: ['R', 'KR', 'TR', 'AR', 'REL', 'CR'], context_clues: ['relay', 'control', 'auxiliary', 'protection', 'interlock'], confidence_boost: 0.85, typical_locations: ['control_cabinet', 'relay_panel', 'protection_panel'], associated_parameters: ['coil_voltage', 'contact_rating', 'pickup', 'dropout'] }, PROTECTION_DEVICE: { component_type: 'protection_device', patterns: [ /^[0-9]{2}[A-Z]{1,3}\d*$/i, // 27GEN, 59GEN, 81GEN (IEEE device numbers) /^[0-9]{2}\/[0-9]{2}[A-Z]*$/i, // 87/87N (Differential protection) /^PROT\d+[A-Z]?$/i // PROT01 ], prefixes: ['27', '59', '81', '87', 'PROT'], context_clues: ['protection', 'undervoltage', 'overvoltage', 'frequency', 'differential'], confidence_boost: 0.92, typical_locations: ['protection_panel', 'control_room'], associated_parameters: ['setpoint', 'time_delay', 'pickup', 'reset'] } }; constructor() { this.initializeWorker(); } async initializeWorker() { try { console.error('FlexaBrain Enhanced OCR: Initializing Tesseract worker...'); this.worker = await createWorker('eng'); // Configure for technical drawing text recognition await this.worker.setParameters({ tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/()', tessedit_pageseg_mode: PSM.SPARSE_TEXT, tessedit_ocr_engine_mode: 1, // Neural nets LSTM engine // Enhanced settings for technical drawings tessedit_enable_bigram_correction: '1', tessedit_enable_dict_correction: '0', // Disable dictionary for technical terms classify_enable_learning: '0', // Disable learning for consistent results textord_really_old_xheight: '1' // Better handling of mixed text sizes }); this.isInitialized = true; console.error('FlexaBrain Enhanced OCR: Worker initialized successfully'); } catch (error) { console.error('FlexaBrain Enhanced OCR: Failed to initialize:', error); throw new Error(`Enhanced OCR initialization failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Process a complete schematic page with enhanced OCR capabilities */ async processSchematicPage(page, options = {}) { if (!this.isInitialized || !this.worker) { throw new Error('Enhanced OCR service not initialized'); } const startTime = Date.now(); console.error(`FlexaBrain Enhanced OCR: Processing page ${page.page_number}`); try { // Apply preprocessing filters const preprocessedImage = await this.preprocessImage(page.image_data, options); // Perform multi-resolution OCR for better accuracy const ocrResult = await this.performMultiResolutionOCR(preprocessedImage, options); // Enhance results with generator monitoring patterns if (options.generator_monitoring_mode) { ocrResult.words = this.enhanceGeneratorComponentRecognition(ocrResult.words); } // Extract technical text regions const textRegions = await this.extractTechnicalTextRegions(ocrResult, page.dimensions); // Detect lines and connections if enabled let lineDetection; if (options.enable_line_detection) { lineDetection = await this.detectLinesAndConnections(preprocessedImage, ocrResult); } // Recognize electrical symbols if enabled let symbolRecognition; if (options.enable_symbol_recognition) { symbolRecognition = await this.recognizeElectricalSymbols(preprocessedImage); } // Cache results for performance const cacheKey = `${page.page_number}_${JSON.stringify(options)}`; this.processingCache.set(cacheKey, ocrResult); const totalTime = Date.now() - startTime; console.error(`FlexaBrain Enhanced OCR: Page ${page.page_number} processed in ${totalTime}ms`); const result = { ocr_result: { ...ocrResult, processing_time: totalTime } }; if (lineDetection) { result.line_detection = lineDetection; } if (symbolRecognition) { result.symbols = symbolRecognition; } return result; } catch (error) { console.error(`Enhanced OCR processing failed for page ${page.page_number}:`, error); throw new Error(`Enhanced OCR failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Preprocess image for better OCR results on technical drawings */ async preprocessImage(imageData, options) { // Mock implementation - in production would use Sharp or similar console.error('FlexaBrain Enhanced OCR: Applying image preprocessing filters'); const filters = options.preprocessing_filters || ['denoise', 'sharpen', 'contrast']; // Simulate preprocessing operations for (const filter of filters) { switch (filter) { case 'denoise': console.error(' - Applying noise reduction'); break; case 'sharpen': console.error(' - Applying sharpening filter'); break; case 'contrast': console.error(' - Enhancing contrast'); break; case 'binarize': console.error(' - Converting to binary image'); break; case 'deskew': console.error(' - Correcting skew'); break; } } return imageData; // Return original for mock - would return processed image } /** * Perform OCR at multiple resolutions for better accuracy */ async performMultiResolutionOCR(imageData, options) { if (!this.worker) { throw new Error('OCR worker not available'); } const resolutions = [options.dpi || 300]; if (options.dpi && options.dpi < 400) { resolutions.push(options.dpi * 1.5); // Try higher resolution for small text } let bestResult = null; let bestConfidence = 0; for (const dpi of resolutions) { try { console.error(`FlexaBrain Enhanced OCR: Processing at ${dpi} DPI`); // Configure OCR parameters for current resolution await this.worker.setParameters({ tessedit_pageseg_mode: options.psm || PSM.SPARSE_TEXT, // Adjust parameters based on DPI textord_min_xheight: dpi < 200 ? 8 : 12, textord_max_xheight: dpi < 200 ? 40 : 60 }); const result = await this.worker.recognize(imageData); if (result.data.confidence > bestConfidence) { bestConfidence = result.data.confidence; bestResult = result; } } catch (error) { console.error(`OCR failed at ${dpi} DPI:`, error); } } if (!bestResult) { throw new Error('OCR failed at all resolutions'); } // Extract words with enhanced bounding boxes const words = bestResult.data.words .filter((word) => word.text.trim().length > 0) .map((word) => ({ text: word.text.trim(), confidence: word.confidence / 100, bbox: { x: word.bbox.x0, y: word.bbox.y0, width: word.bbox.x1 - word.bbox.x0, height: word.bbox.y1 - word.bbox.y0 } })); return { text: bestResult.data.text.trim(), confidence: bestResult.data.confidence / 100, words, processing_time: 0, // Will be set by caller language: options.language || 'eng' }; } /** * Enhance component recognition with generator monitoring patterns */ enhanceGeneratorComponentRecognition(words) { return words.map(word => { const upperText = word.text.toUpperCase(); let maxConfidenceBoost = 0; let bestPattern = ''; // Check against generator monitoring patterns for (const [patternName, pattern] of Object.entries(this.GENERATOR_COMPONENT_PATTERNS)) { for (const regex of pattern.patterns) { if (regex.test(upperText)) { if (pattern.confidence_boost > maxConfidenceBoost) { maxConfidenceBoost = pattern.confidence_boost; bestPattern = patternName; } } } } // Apply confidence boost if pattern matched if (maxConfidenceBoost > 0) { const enhancedConfidence = Math.min(word.confidence * maxConfidenceBoost, 1.0); console.error(`FlexaBrain Enhanced OCR: Boosted confidence for ${word.text} (${bestPattern}): ${word.confidence.toFixed(3)} -> ${enhancedConfidence.toFixed(3)}`); return { ...word, confidence: enhancedConfidence }; } return word; }); } /** * Extract technical text regions (title blocks, component lists, etc.) */ async extractTechnicalTextRegions(ocrResult, dimensions) { const regions = []; // Mock implementation - would analyze text positioning and content const titleBlockRegion = { region_type: 'title_block', location: { x: dimensions.width - 400, y: dimensions.height - 200, width: 380, height: 180 }, text_content: 'SCHEMATIC TRACTION F6.00.32 Generator Monitor Control P01', confidence: 0.9 }; regions.push(titleBlockRegion); // Look for component lists based on text patterns const componentListWords = ocrResult.words.filter(word => /^[A-Z]\d+[A-Z]?$/.test(word.text.toUpperCase())); if (componentListWords.length > 5) { // Likely has a component list const minX = Math.min(...componentListWords.map(w => w.bbox.x)); const maxX = Math.max(...componentListWords.map(w => w.bbox.x + w.bbox.width)); const minY = Math.min(...componentListWords.map(w => w.bbox.y)); const maxY = Math.max(...componentListWords.map(w => w.bbox.y + w.bbox.height)); regions.push({ region_type: 'component_list', location: { x: minX, y: minY, width: maxX - minX, height: maxY - minY }, text_content: componentListWords.map(w => w.text).join(' '), confidence: 0.85 }); } return regions; } /** * Detect lines and electrical connections */ async detectLinesAndConnections(imageData, ocrResult) { const startTime = Date.now(); // Mock implementation - would use computer vision for line detection console.error('FlexaBrain Enhanced OCR: Detecting lines and connections'); const mockLines = [ { id: 'line_001', start_point: { x: 100, y: 150 }, end_point: { x: 300, y: 150 }, line_type: 'power', confidence: 0.85, thickness: 2, style: 'solid' }, { id: 'line_002', start_point: { x: 300, y: 150 }, end_point: { x: 500, y: 150 }, line_type: 'control', confidence: 0.78, thickness: 1, style: 'dashed' } ]; const mockConnections = [ { line_id: 'line_001', connected_components: ['A601', 'CB101'], connection_type: 'bidirectional', wire_number: 'W001' } ]; return { lines: mockLines, connections: mockConnections, processing_time: Date.now() - startTime }; } /** * Recognize electrical symbols in the schematic */ async recognizeElectricalSymbols(imageData) { const startTime = Date.now(); // Mock implementation - would use computer vision for symbol recognition console.error('FlexaBrain Enhanced OCR: Recognizing electrical symbols'); const mockSymbols = [ { id: 'symbol_001', symbol_type: 'transformer', location: { x: 200, y: 100, width: 50, height: 40 }, confidence: 0.88, parameters: { 'turns_ratio': '1:10', 'rating': '1000kVA' } }, { id: 'symbol_002', symbol_type: 'switch', location: { x: 400, y: 200, width: 30, height: 20 }, confidence: 0.82, parameters: { 'type': 'SPDT', 'rating': '600V' } } ]; return { symbols: mockSymbols, processing_time: Date.now() - startTime }; } /** * Extract specific generator monitoring components */ async extractGeneratorMonitoringComponents(ocrResult) { const components = []; for (const word of ocrResult.words) { const upperText = word.text.toUpperCase(); for (const [patternName, pattern] of Object.entries(this.GENERATOR_COMPONENT_PATTERNS)) { for (const regex of pattern.patterns) { if (regex.test(upperText)) { components.push({ id: word.text, type: pattern.component_type, pattern_matched: patternName, location: word.bbox, confidence: word.confidence * pattern.confidence_boost, associated_parameters: pattern.associated_parameters, typical_locations: pattern.typical_locations }); break; } } } } return components; } /** * Batch process multiple pages efficiently */ async batchProcessPages(pages, options = {}) { const results = new Map(); console.error(`FlexaBrain Enhanced OCR: Batch processing ${pages.length} pages`); // Process pages in parallel for better performance const promises = pages.map(async (page) => { try { const result = await this.processSchematicPage(page, options); results.set(page.page_number, result); } catch (error) { console.error(`Failed to process page ${page.page_number}:`, error); results.set(page.page_number, { error: error instanceof Error ? error.message : String(error) }); } }); await Promise.all(promises); console.error(`FlexaBrain Enhanced OCR: Batch processing completed - ${results.size} pages processed`); return results; } /** * Get processing statistics and performance metrics */ getProcessingStats() { let totalConfidence = 0; let count = 0; for (const result of this.processingCache.values()) { totalConfidence += result.confidence; count++; } return { cache_size: this.processingCache.size, total_processed: count, average_confidence: count > 0 ? totalConfidence / count : 0, worker_status: this.isInitialized && this.worker !== null }; } /** * Clear processing cache to free memory */ clearCache() { this.processingCache.clear(); console.error('FlexaBrain Enhanced OCR: Cache cleared'); } /** * Terminate the OCR service and clean up resources */ async terminate() { if (this.worker) { await this.worker.terminate(); this.worker = null; this.isInitialized = false; } this.clearCache(); console.error('FlexaBrain Enhanced OCR: Service terminated'); } } // Export singleton instance export const enhancedOCRService = new EnhancedOCRService(); // Graceful cleanup on process termination process.on('exit', () => { enhancedOCRService.terminate().catch(console.error); }); process.on('SIGINT', () => { enhancedOCRService.terminate().catch(console.error); }); process.on('SIGTERM', () => { enhancedOCRService.terminate().catch(console.error); }); //# sourceMappingURL=enhanced-ocr-service.js.map