docparse-upload-process
Version:
process upload api request for the docparse server
114 lines (99 loc) • 3.18 kB
JavaScript
var inspect = require('eyespect').inspector();
var fs = require('fs');
var request = require('request');
var async = require('async');
var config;
module.exports = function performOCR(data, callback) {
config = data.config;
var self = this;
var file_name = data.file_name;
var upload = data.upload;
async.series([
function(cb) {
markUploadOCRStatusAsRunning(upload, cb);
},
function(cb) {
// perfomr the ocr with the remote pdfer service
getPdferURL(function (err, reply) {
if (err) { return cb(err); }
data.url = reply;
cb();
});
},
function(cb) {
sendToPDFer(data, function (err, upload) {
if (err) { return cb(err); }
data.upload = upload;
cb();
});
},
], function(err) {
if (err) { return callback(err); }
callback(null, data.upload);
});
}
function sendToPDFer(data, cb) {
inspect('sending upload data to pdfer');
var timeout = 120*1000
var opts = {
url: data.url,
timeout: timeout,
json: true
}
var r = request.post(opts, function(err, response, body) {
inspect('response recieved from pdfer');
if (err) {
inspect(err, 'error during upload create api request. The request to the remote pdfer service failed at url: ' + opts.url);
return cb('error performing ocr on your upload: ' + err);
}
if (response.statusCode !== 200) {
inspect(body,'body');
return cb('error performing ocr on your upload. Invalid status code returned from pdfer service, body:' + body);
}
data.upload.status = body.status;
data.upload.save(function (err) {
if (err) {
return cb('error performing ocr on your upload: ' + err);
}
cb(null, data.upload);
});
});
var form = r.form();
form.append('type', 'ocr');
var filePath = data.filePath;
var readStream = fs.createReadStream(filePath);
readStream.on('error', function(err) {
inspect(err, 'error reading test pdf file');
});
form.append('upload', readStream);
}
function markUploadOCRStatusAsRunning(upload, cb) {
upload.ocr_status.started = true;
upload.ocr_status.message = 'ocr running';
upload.status = 'ocr in progress';
upload.save(cb);
}
function markUploadOCRStatusAsComplete(upload, cb) {
upload.ocr_status.complete = true;
upload.ocr_status.started = false;
upload.ocr_status.running = false;
upload.ocr_status.message = 'ocr complete, waiting to parse';
upload.status = 'ocr complete, waiting to parse';
upload.save(cb);
}
function getPdferURL(cb) {
var username = config.get('pdfer:username');
var password = config.get('pdfer:password');
var host = config.get('pdfer:host');
var port = config.get('pdfer:port');
if (!username || !password || !host) {
var error_data = {
message: 'failed to connect to remote pdfer service'
}
inspect('failed to create new docparse upload via api, pdfer settings missing from config parameter');
return cb('failed to perform ocr on upload, pdfer service not available');
}
var url = 'http://'+username+':'+password+'@'+host + ':' + port
+'/api/send'
return cb(null, url);
}