@flexabrain/mcp-server
Version:
Advanced electrical schematic analysis MCP server with rail engineering expertise
202 lines • 8.12 kB
JavaScript
/**
* FlexaBrain MCP Server - OCR Service
*
* Advanced OCR service optimized for electrical schematic text extraction.
* Uses Tesseract.js with preprocessing for improved component label recognition.
*/
import { createWorker, PSM } from 'tesseract.js';
export class OCRService {
worker = null;
isInitialized = false;
constructor() {
this.initializeWorker();
}
async initializeWorker() {
try {
console.error('FlexaBrain OCR: Initializing Tesseract worker...');
this.worker = await createWorker('eng');
// Configure for electrical schematic text recognition
await this.worker.setParameters({
tessedit_char_whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/',
tessedit_pageseg_mode: PSM.SPARSE_TEXT,
tessedit_ocr_engine_mode: 1, // Neural nets LSTM engine
});
this.isInitialized = true;
console.error('FlexaBrain OCR: Tesseract worker initialized successfully');
}
catch (error) {
console.error('FlexaBrain OCR: Failed to initialize Tesseract worker:', error);
throw new Error(`OCR initialization failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Extract text from electrical schematic image
* Optimized for component labels and reference designators
*/
async extractText(imagePath, options = {}) {
if (!this.isInitialized || !this.worker) {
throw new Error('OCR service not initialized');
}
const startTime = Date.now();
try {
console.error(`FlexaBrain OCR: Processing image: ${imagePath}`);
// Apply electrical schematic specific configuration
if (options.language && options.language !== 'eng') {
// Re-initialize with different language if needed
await this.worker.terminate();
this.worker = await createWorker(options.language);
}
// Set custom parameters for electrical components
const ocrParams = {
tessedit_pageseg_mode: options.psm || PSM.SPARSE_TEXT,
tessedit_ocr_engine_mode: 1,
};
// Custom whitelist for electrical component identification
if (options.whitelist) {
ocrParams.tessedit_char_whitelist = options.whitelist;
}
else {
// Default whitelist optimized for electrical components
ocrParams.tessedit_char_whitelist = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/()[]';
}
if (options.blacklist) {
ocrParams.tessedit_char_blacklist = options.blacklist;
}
await this.worker.setParameters(ocrParams);
// Perform OCR recognition
const result = await this.worker.recognize(imagePath);
const processingTime = Date.now() - startTime;
console.error(`FlexaBrain OCR: Processed in ${processingTime}ms, confidence: ${result.data.confidence.toFixed(1)}%`);
// Extract words with bounding boxes for component location mapping
const words = result.data.words.map(word => ({
text: word.text.trim(),
confidence: word.confidence / 100, // Convert to 0-1 scale
bbox: {
x: word.bbox.x0,
y: word.bbox.y0,
width: word.bbox.x1 - word.bbox.x0,
height: word.bbox.y1 - word.bbox.y0
}
})).filter(word => word.text.length > 0); // Filter out empty text
return {
text: result.data.text.trim(),
confidence: result.data.confidence / 100,
words,
processing_time: processingTime,
language: options.language || 'eng'
};
}
catch (error) {
console.error('FlexaBrain OCR: Text extraction failed:', error);
throw new Error(`OCR text extraction failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Extract text from specific regions of interest
* Useful for focusing on component labels or specific schematic areas
*/
async extractTextFromRegion(imagePath, region, options = {}) {
// This would require image cropping before OCR
// For now, delegate to main extraction method
// TODO: Implement region-specific extraction with image preprocessing
return this.extractText(imagePath, options);
}
/**
* Batch process multiple images for component extraction
*/
async extractTextBatch(imagePaths, options = {}) {
const results = [];
for (const imagePath of imagePaths) {
try {
const result = await this.extractText(imagePath, options);
results.push(result);
}
catch (error) {
console.error(`FlexaBrain OCR: Failed to process ${imagePath}:`, error);
// Add error result to maintain array consistency
results.push({
text: '',
confidence: 0,
words: [],
processing_time: 0,
language: options.language || 'eng'
});
}
}
return results;
}
/**
* Specialized extraction for electrical component patterns
* Pre-configured for common rail electrical component formats
*/
async extractComponentLabels(imagePath) {
const electricalOptions = {
language: 'eng',
psm: PSM.SPARSE_TEXT,
// Optimized for electrical component naming conventions
whitelist: 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.-+_/()'
};
const result = await this.extractText(imagePath, electricalOptions);
// Filter words to focus on likely component identifiers
const componentWords = result.words.filter(word => {
const text = word.text.toUpperCase();
// Rail electrical component patterns
const patterns = [
/^[A-Z]\d+[A-Z]?$/, // A601, CB101A, etc.
/^\d{6,8}$/, // Signal references (99010101)
/^[A-Z]{2,4}\d+[A-Z]?$/, // TR205B, CONV01, etc.
/^[A-Z]+[A-Z0-9.-]+$/ // General electrical identifiers
];
return patterns.some(pattern => pattern.test(text)) && word.confidence > 0.6;
});
return {
...result,
words: componentWords,
text: componentWords.map(w => w.text).join(' ')
};
}
/**
* Extract numerical values from schematics (voltages, currents, etc.)
*/
async extractNumericalValues(imagePath) {
const numericalOptions = {
language: 'eng',
psm: PSM.SINGLE_WORD,
whitelist: '0123456789.-+kMmVAWΩμ°'
};
return this.extractText(imagePath, numericalOptions);
}
/**
* Clean up resources
*/
async terminate() {
if (this.worker) {
await this.worker.terminate();
this.worker = null;
this.isInitialized = false;
console.error('FlexaBrain OCR: Tesseract worker terminated');
}
}
/**
* Get service status and performance metrics
*/
getStatus() {
return {
initialized: this.isInitialized,
worker: this.worker !== null
};
}
}
// Export singleton instance for use across the application
export const ocrService = new OCRService();
// Graceful cleanup on process termination
process.on('exit', () => {
ocrService.terminate().catch(console.error);
});
process.on('SIGINT', () => {
ocrService.terminate().catch(console.error);
});
process.on('SIGTERM', () => {
ocrService.terminate().catch(console.error);
});
//# sourceMappingURL=ocr-service.js.map