cea-core
Version:
basic cea api
46 lines • 1.67 kB
JavaScript
import fs from 'fs';
import fetch from 'node-fetch';
import * as tesseract from 'tesseract.js';
const { createWorker } = tesseract;
const tessdataPath = '/tmp/eng.traineddata.gz';
async function downloadTessdata() {
process.env.TESSDATA_PREFIX = '/tmp';
if (!fs.existsSync('/tmp')) {
fs.mkdirSync('/tmp');
}
else {
if (fs.existsSync(tessdataPath)) {
return;
}
}
console.log(await download('https://beetcb.gitee.io/filetransfer/tmp/eng.traineddata.gz', tessdataPath));
}
async function download(url, filename) {
const stream = fs.createWriteStream(filename);
const res = await fetch(url);
const result = await new Promise((resolve, reject) => {
var _a, _b;
(_a = res.body) === null || _a === void 0 ? void 0 : _a.pipe(stream);
(_b = res.body) === null || _b === void 0 ? void 0 : _b.on('error', (e) => reject(e));
stream.on('close', () => resolve(`Downloaded tess data as ${filename}`));
}).catch((err) => console.error(err));
return result;
}
async function ocr(captchaUrl) {
await downloadTessdata();
const worker = createWorker({
langPath: '/tmp',
cachePath: '/tmp',
});
await worker.load();
await worker.loadLanguage('eng');
await worker.initialize('eng');
await worker.setParameters({
tessedit_char_whitelist: 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890',
});
const { data: { text }, } = await worker.recognize(captchaUrl);
await worker.terminate();
return text;
}
export default ocr;
//# sourceMappingURL=capcha.js.map