UNPKG

@flexabrain/mcp-server

Version:

Advanced electrical schematic analysis MCP server with rail engineering expertise

202 lines 8.12 kB
/** * FlexaBrain MCP Server - OCR Service * * Advanced OCR service optimized for electrical schematic text extraction. * Uses Tesseract.js with preprocessing for improved component label recognition. */ import { createWorker, PSM } from 'tesseract.js'; export class OCRService { worker = null; isInitialized = false; constructor() { this.initializeWorker(); } async initializeWorker() { try { console.error('FlexaBrain OCR: Initializing Tesseract worker...'); this.worker = await createWorker('eng'); // Configure for electrical schematic text recognition await this.worker.setParameters({ tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/', tessedit_pageseg_mode: PSM.SPARSE_TEXT, tessedit_ocr_engine_mode: 1, // Neural nets LSTM engine }); this.isInitialized = true; console.error('FlexaBrain OCR: Tesseract worker initialized successfully'); } catch (error) { console.error('FlexaBrain OCR: Failed to initialize Tesseract worker:', error); throw new Error(`OCR initialization failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Extract text from electrical schematic image * Optimized for component labels and reference designators */ async extractText(imagePath, options = {}) { if (!this.isInitialized || !this.worker) { throw new Error('OCR service not initialized'); } const startTime = Date.now(); try { console.error(`FlexaBrain OCR: Processing image: ${imagePath}`); // Apply electrical schematic specific configuration if (options.language && options.language !== 'eng') { // Re-initialize with different language if needed await this.worker.terminate(); this.worker = await createWorker(options.language); } // Set custom parameters for electrical components const ocrParams = { tessedit_pageseg_mode: options.psm || PSM.SPARSE_TEXT, tessedit_ocr_engine_mode: 1, }; // Custom whitelist for electrical component identification if (options.whitelist) { ocrParams.tessedit_char_whitelist = options.whitelist; } else { // Default whitelist optimized for electrical components ocrParams.tessedit_char_whitelist = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/()[]'; } if (options.blacklist) { ocrParams.tessedit_char_blacklist = options.blacklist; } await this.worker.setParameters(ocrParams); // Perform OCR recognition const result = await this.worker.recognize(imagePath); const processingTime = Date.now() - startTime; console.error(`FlexaBrain OCR: Processed in ${processingTime}ms, confidence: ${result.data.confidence.toFixed(1)}%`); // Extract words with bounding boxes for component location mapping const words = result.data.words.map(word => ({ text: word.text.trim(), confidence: word.confidence / 100, // Convert to 0-1 scale bbox: { x: word.bbox.x0, y: word.bbox.y0, width: word.bbox.x1 - word.bbox.x0, height: word.bbox.y1 - word.bbox.y0 } })).filter(word => word.text.length > 0); // Filter out empty text return { text: result.data.text.trim(), confidence: result.data.confidence / 100, words, processing_time: processingTime, language: options.language || 'eng' }; } catch (error) { console.error('FlexaBrain OCR: Text extraction failed:', error); throw new Error(`OCR text extraction failed: ${error instanceof Error ? error.message : String(error)}`); } } /** * Extract text from specific regions of interest * Useful for focusing on component labels or specific schematic areas */ async extractTextFromRegion(imagePath, region, options = {}) { // This would require image cropping before OCR // For now, delegate to main extraction method // TODO: Implement region-specific extraction with image preprocessing return this.extractText(imagePath, options); } /** * Batch process multiple images for component extraction */ async extractTextBatch(imagePaths, options = {}) { const results = []; for (const imagePath of imagePaths) { try { const result = await this.extractText(imagePath, options); results.push(result); } catch (error) { console.error(`FlexaBrain OCR: Failed to process ${imagePath}:`, error); // Add error result to maintain array consistency results.push({ text: '', confidence: 0, words: [], processing_time: 0, language: options.language || 'eng' }); } } return results; } /** * Specialized extraction for electrical component patterns * Pre-configured for common rail electrical component formats */ async extractComponentLabels(imagePath) { const electricalOptions = { language: 'eng', psm: PSM.SPARSE_TEXT, // Optimized for electrical component naming conventions whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/()' }; const result = await this.extractText(imagePath, electricalOptions); // Filter words to focus on likely component identifiers const componentWords = result.words.filter(word => { const text = word.text.toUpperCase(); // Rail electrical component patterns const patterns = [ /^[A-Z]\d+[A-Z]?$/, // A601, CB101A, etc. /^\d{6,8}$/, // Signal references (99010101) /^[A-Z]{2,4}\d+[A-Z]?$/, // TR205B, CONV01, etc. /^[A-Z]+[A-Z0-9.-]+$/ // General electrical identifiers ]; return patterns.some(pattern => pattern.test(text)) && word.confidence > 0.6; }); return { ...result, words: componentWords, text: componentWords.map(w => w.text).join(' ') }; } /** * Extract numerical values from schematics (voltages, currents, etc.) */ async extractNumericalValues(imagePath) { const numericalOptions = { language: 'eng', psm: PSM.SINGLE_WORD, whitelist: '0123456789.-+kMmVAWΩμ°' }; return this.extractText(imagePath, numericalOptions); } /** * Clean up resources */ async terminate() { if (this.worker) { await this.worker.terminate(); this.worker = null; this.isInitialized = false; console.error('FlexaBrain OCR: Tesseract worker terminated'); } } /** * Get service status and performance metrics */ getStatus() { return { initialized: this.isInitialized, worker: this.worker !== null }; } } // Export singleton instance for use across the application export const ocrService = new OCRService(); // Graceful cleanup on process termination process.on('exit', () => { ocrService.terminate().catch(console.error); }); process.on('SIGINT', () => { ocrService.terminate().catch(console.error); }); process.on('SIGTERM', () => { ocrService.terminate().catch(console.error); }); //# sourceMappingURL=ocr-service.js.map