nfv
Version:
[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms
188 lines (165 loc) • 7.7 kB
JavaScript
// Generated by CoffeeScript 1.12.7
var Form, FormReader, async, distinctColor, dv, estimateTransform, findBarcodes, findCheckboxes, findText, matchBarcodes, matchByPath, matchCheckboxes, matchText, unpack,
bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; };
dv = require('ndv');
async = require('async');
findBarcodes = require('./find_barcodes').findBarcodes;
findCheckboxes = require('./find_checkboxes').findCheckboxes;
findText = require('./find_text').findText;
estimateTransform = require('./estimate_transform').estimateTransform;
matchBarcodes = require('./match_barcodes').matchBarcodes;
matchText = require('./match_text').matchText;
matchCheckboxes = require('./match_checkboxes').matchCheckboxes;
unpack = require('./schema').unpack;
matchByPath = function(field, formData) {
var i, item, len, ref, tail;
if (!(((field != null ? field.box : void 0) != null) && ((field != null ? field.path : void 0) != null))) {
return;
}
tail = formData;
ref = field.path.split('.');
for (i = 0, len = ref.length; i < len; i++) {
item = ref[i];
tail = tail[item];
if (tail == null) {
return;
}
}
if ((tail != null ? tail.box : void 0) != null) {
return tail;
}
};
distinctColor = function(index) {
var color, colors;
colors = ["#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059", "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80", "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09", "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66", "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C", "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81", "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00", "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700", "#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", "#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C", "#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6", "#A3A489", "#806C66", "#222800", "#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4", "#1E0200", "#5B4E51", "#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379", "#012C58"];
color = colors[index % colors.length];
return [parseInt(color.slice(1, 3), 16), parseInt(color.slice(3, 5), 16), parseInt(color.slice(5, 7), 16)];
};
Form = (function() {
function Form(data1, images1) {
this.data = data1;
this.images = images1;
this.toObject = bind(this.toObject, this);
this.toImage = bind(this.toImage, this);
this.match = bind(this.match, this);
}
Form.prototype.match = function(formSchema, cb) {
var anchors, fallbackScale, formData, schemaToFields, schemaToPage;
formData = {};
if (typeof formSchema.schemaToPage === 'function') {
schemaToPage = formSchema.schemaToPage;
} else if (formSchema.page != null) {
fallbackScale = this.images[0].width / formSchema.page.width;
schemaToPage = estimateTransform(formSchema.words, this.data[2], fallbackScale);
}
matchBarcodes(formData, formSchema, this.data[1], schemaToPage);
anchors = matchText(formData, formSchema, this.data[2], schemaToPage, this.images[0]).anchors;
this.anchors = anchors;
schemaToFields = estimateTransform(formSchema.fields, formData, 1, 1, matchByPath);
matchCheckboxes(formData, formSchema, this.data[3], this.data[2], schemaToPage, schemaToFields);
async.forEach(formSchema.fields, function(field, nextField) {
if (field.formValidator != null) {
return field.formValidator(formData, nextField);
} else {
return nextField();
}
}, function(err) {
if (err != null) {
return cb(err);
}
return cb(null, formData);
});
};
Form.prototype.toImage = function() {
var anchor, box, boxed, boxedIndex, candidateBox, color, data, i, image, imageBox, imageOffset, index, j, k, l, len, len1, len2, len3, len4, m, ref, ref1, ref2, ref3, ref4, resultImage;
resultImage = new dv.Image(this.images[0].width * this.images.length, this.images[0].height, 32);
imageOffset = (function(_this) {
return function(box, index) {
return {
x: box.x + _this.images[0].width * index,
y: box.y,
width: box.width,
height: box.height
};
};
})(this);
imageBox = {
x: 0,
y: 0,
width: this.images[0].width,
height: this.images[0].height
};
ref = this.images;
for (index = i = 0, len = ref.length; i < len; index = ++i) {
image = ref[index];
if (image != null) {
resultImage.drawImage(image.toColor(), imageOffset(imageBox, index));
}
}
ref1 = this.data.slice(1);
for (index = j = 0, len1 = ref1.length; j < len1; index = ++j) {
data = ref1[index];
if (data != null) {
for (boxedIndex = k = 0, len2 = data.length; k < len2; boxedIndex = ++k) {
boxed = data[boxedIndex];
try {
ref2 = boxed.candidate;
for (l = 0, len3 = ref2.length; l < len3; l++) {
candidateBox = ref2[l];
color = distinctColor(boxedIndex);
resultImage.drawBox(imageOffset(candidateBox, index), 6, color[0], color[1], color[2], 0.5);
}
} catch (error) {}
try {
resultImage.drawBox(imageOffset(boxed.box, index), 2, 0, 0, 255, 0.5);
} catch (error) {}
}
}
}
ref4 = (ref3 = this.anchors) != null ? ref3 : [];
for (index = m = 0, len4 = ref4.length; m < len4; index = ++m) {
anchor = ref4[index];
box = {
x: anchor.word.box.x + anchor.offset.x,
y: anchor.word.box.y + anchor.offset.y,
width: anchor.word.box.width,
height: anchor.word.box.height
};
resultImage.drawBox(imageOffset(box, 1), 4, 0, 255, 50, 0.5);
resultImage.drawLine(imageOffset(box, 1), imageOffset(anchor.word.box, 1), 4, 0, 255, 255, 0.5);
}
return resultImage;
};
Form.prototype.toObject = function() {
return {
barcodes: this.data[1],
text: this.data[2],
checkboxes: this.data[3]
};
};
return Form;
})();
module.exports = FormReader = (function() {
function FormReader(language, image1) {
if (language == null) {
language = 'eng';
}
this.image = image1 != null ? image1 : null;
this.find = bind(this.find, this);
this.tesseract = new dv.Tesseract(language);
this.tesseract.pageSegMode = 'single_block';
this.tesseract.classify_enable_learning = 0;
this.tesseract.classify_enable_adaptive_matcher = 0;
this.zxing = new dv.ZXing();
}
FormReader.prototype.find = function() {
var data, images, ref, ref1, ref2;
data = [null, null, null, null];
images = [this.image, null, null, null];
ref = findBarcodes(images[0], this.zxing), data[1] = ref[0], images[1] = ref[1];
ref1 = findText(images[1], this.tesseract), data[2] = ref1[0], images[2] = ref1[1];
ref2 = findCheckboxes(images[2]), data[3] = ref2[0], images[3] = ref2[1];
return new Form(data, images);
};
return FormReader;
})();