easy-tesseract-ocr
Version:
This is a tiny OCR project and just provide a caller method to interact with Tesseract (Which is a known open source OCR library project written in C++).
106 lines (99 loc) • 3.32 kB
JavaScript
const robot = require('robotjs');
const sleep = require('sleep');
const jimp = require('jimp');
const fs = require('fs');
const exec = require('child-process-promise').exec;
var easyTesseractOCR = {
getStaticOCRPromise: function (params) {
let imagePath = params.imagePath ? params.imagePath : 'image-not-found.jpg';
let compare = params.compare ? params.compare : null;
let trainedData = params.trainedData ? params.trainedData : null;
return new Promise(function (resolve, reject) {
exec('tesseract ' + imagePath + ' tesseract -l ' + trainedData)
.then(function (result) {
let stdout = result.stdout;
let stderr = result.stderr;
//console.log('stdout: ', stdout);
//console.log('stderr: ', stderr);
let out = fs.readFileSync('./tesseract.txt').toString().replace(' ', '');
fs.unlink('./tesseract.txt');
if(compare) {
let compareList = [];
for(let c of compare) {
compareList.push(c);
}
let findText = 0;
for(let c of compareList) {
if(out.indexOf(c) >=0) {
//console.log('equal:' + c);
findText++;
}
}
//calculate probability
let prob = (findText / compareList.length) * 100;
resolve(prob);
} else {
//pass the ocr result(text)
resolve(out.toString());
}
})
.catch(function (err) {
//console.error('error: ', err);
reject(err);
});
});
},
scan: function (params) {
let imagePath = params.imagePath ? params.imagePath : null;
let compare = params.compare ? params.compare : null;
let trainedData = params.trainedData ? params.trainedData : 'chi_tra';
let reservedSample = params.reservedSample ? params.reservedSample : false;
//check ocr progress type
if(typeof(imagePath) === 'object') {
//live screen ocr
return new Promise((resolve, reject) => {
try {
let screenshot = null;
if('x' in imagePath && 'y' in imagePath && 'width' in imagePath && 'height' in imagePath) {
screenshot = robot.screen.capture(imagePath.x, imagePath.y, imagePath.width, imagePath.height);
}
if('x1' in imagePath && 'y1' in imagePath && 'x2' in imagePath && 'y2' in imagePath) {
screenshot = robot.screen.capture(imagePath.x1, imagePath.y1, imagePath.x2 - imagePath.x1, imagePath.y2 - imagePath.y1);
}
new jimp(screenshot.width, screenshot.height, function (err, img) {
img.bitmap.data = screenshot.image;
//img.greyscale();
//img.invert();
img.scale(2);
//img.contrast(0.25);
img.write('./eto.tmp.jpg');
easyTesseractOCR.getStaticOCRPromise({
imagePath: './eto.tmp.jpg',
compare: compare,
trainedData: trainedData
})
.then(function (result) {
resolve(result);
if(!reservedSample) {
fs.unlink('./eto.tmp.jpg');
}
})
.catch(function (err) {
reject(err);
});
});
} catch(e) {
reject(e);
}
});
} else {
//static ocr promise
return easyTesseractOCR.getStaticOCRPromise({
imagePath: imagePath,
compare: compare,
trainedData: trainedData
});
}
}
};
module.exports = easyTesseractOCR;