UNPKG

nfv

Version:

[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms

188 lines (165 loc) 7.7 kB
// Generated by CoffeeScript 1.12.7 var Form, FormReader, async, distinctColor, dv, estimateTransform, findBarcodes, findCheckboxes, findText, matchBarcodes, matchByPath, matchCheckboxes, matchText, unpack, bind = function(fn, me){ return function(){ return fn.apply(me, arguments); }; }; dv = require('ndv'); async = require('async'); findBarcodes = require('./find_barcodes').findBarcodes; findCheckboxes = require('./find_checkboxes').findCheckboxes; findText = require('./find_text').findText; estimateTransform = require('./estimate_transform').estimateTransform; matchBarcodes = require('./match_barcodes').matchBarcodes; matchText = require('./match_text').matchText; matchCheckboxes = require('./match_checkboxes').matchCheckboxes; unpack = require('./schema').unpack; matchByPath = function(field, formData) { var i, item, len, ref, tail; if (!(((field != null ? field.box : void 0) != null) && ((field != null ? field.path : void 0) != null))) { return; } tail = formData; ref = field.path.split('.'); for (i = 0, len = ref.length; i < len; i++) { item = ref[i]; tail = tail[item]; if (tail == null) { return; } } if ((tail != null ? tail.box : void 0) != null) { return tail; } }; distinctColor = function(index) { var color, colors; colors = ["#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059", "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80", "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09", "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66", "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C", "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81", "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00", "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700", "#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", "#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C", "#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6", "#A3A489", "#806C66", "#222800", "#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4", "#1E0200", "#5B4E51", "#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379", "#012C58"]; color = colors[index % colors.length]; return [parseInt(color.slice(1, 3), 16), parseInt(color.slice(3, 5), 16), parseInt(color.slice(5, 7), 16)]; }; Form = (function() { function Form(data1, images1) { this.data = data1; this.images = images1; this.toObject = bind(this.toObject, this); this.toImage = bind(this.toImage, this); this.match = bind(this.match, this); } Form.prototype.match = function(formSchema, cb) { var anchors, fallbackScale, formData, schemaToFields, schemaToPage; formData = {}; if (typeof formSchema.schemaToPage === 'function') { schemaToPage = formSchema.schemaToPage; } else if (formSchema.page != null) { fallbackScale = this.images[0].width / formSchema.page.width; schemaToPage = estimateTransform(formSchema.words, this.data[2], fallbackScale); } matchBarcodes(formData, formSchema, this.data[1], schemaToPage); anchors = matchText(formData, formSchema, this.data[2], schemaToPage, this.images[0]).anchors; this.anchors = anchors; schemaToFields = estimateTransform(formSchema.fields, formData, 1, 1, matchByPath); matchCheckboxes(formData, formSchema, this.data[3], this.data[2], schemaToPage, schemaToFields); async.forEach(formSchema.fields, function(field, nextField) { if (field.formValidator != null) { return field.formValidator(formData, nextField); } else { return nextField(); } }, function(err) { if (err != null) { return cb(err); } return cb(null, formData); }); }; Form.prototype.toImage = function() { var anchor, box, boxed, boxedIndex, candidateBox, color, data, i, image, imageBox, imageOffset, index, j, k, l, len, len1, len2, len3, len4, m, ref, ref1, ref2, ref3, ref4, resultImage; resultImage = new dv.Image(this.images[0].width * this.images.length, this.images[0].height, 32); imageOffset = (function(_this) { return function(box, index) { return { x: box.x + _this.images[0].width * index, y: box.y, width: box.width, height: box.height }; }; })(this); imageBox = { x: 0, y: 0, width: this.images[0].width, height: this.images[0].height }; ref = this.images; for (index = i = 0, len = ref.length; i < len; index = ++i) { image = ref[index]; if (image != null) { resultImage.drawImage(image.toColor(), imageOffset(imageBox, index)); } } ref1 = this.data.slice(1); for (index = j = 0, len1 = ref1.length; j < len1; index = ++j) { data = ref1[index]; if (data != null) { for (boxedIndex = k = 0, len2 = data.length; k < len2; boxedIndex = ++k) { boxed = data[boxedIndex]; try { ref2 = boxed.candidate; for (l = 0, len3 = ref2.length; l < len3; l++) { candidateBox = ref2[l]; color = distinctColor(boxedIndex); resultImage.drawBox(imageOffset(candidateBox, index), 6, color[0], color[1], color[2], 0.5); } } catch (error) {} try { resultImage.drawBox(imageOffset(boxed.box, index), 2, 0, 0, 255, 0.5); } catch (error) {} } } } ref4 = (ref3 = this.anchors) != null ? ref3 : []; for (index = m = 0, len4 = ref4.length; m < len4; index = ++m) { anchor = ref4[index]; box = { x: anchor.word.box.x + anchor.offset.x, y: anchor.word.box.y + anchor.offset.y, width: anchor.word.box.width, height: anchor.word.box.height }; resultImage.drawBox(imageOffset(box, 1), 4, 0, 255, 50, 0.5); resultImage.drawLine(imageOffset(box, 1), imageOffset(anchor.word.box, 1), 4, 0, 255, 255, 0.5); } return resultImage; }; Form.prototype.toObject = function() { return { barcodes: this.data[1], text: this.data[2], checkboxes: this.data[3] }; }; return Form; })(); module.exports = FormReader = (function() { function FormReader(language, image1) { if (language == null) { language = 'eng'; } this.image = image1 != null ? image1 : null; this.find = bind(this.find, this); this.tesseract = new dv.Tesseract(language); this.tesseract.pageSegMode = 'single_block'; this.tesseract.classify_enable_learning = 0; this.tesseract.classify_enable_adaptive_matcher = 0; this.zxing = new dv.ZXing(); } FormReader.prototype.find = function() { var data, images, ref, ref1, ref2; data = [null, null, null, null]; images = [this.image, null, null, null]; ref = findBarcodes(images[0], this.zxing), data[1] = ref[0], images[1] = ref[1]; ref1 = findText(images[1], this.tesseract), data[2] = ref1[0], images[2] = ref1[1]; ref2 = findCheckboxes(images[2]), data[3] = ref2[0], images[3] = ref2[1]; return new Form(data, images); }; return FormReader; })();