UNPKG

fv

Version:

FormVision is a node.js library for extracting data from scanned forms

184 lines (162 loc) 7.45 kB
// Generated by CoffeeScript 2.3.1 var Form, FormReader, async, distinctColor, dv, estimateTransform, findBarcodes, findCheckboxes, findText, matchBarcodes, matchByPath, matchCheckboxes, matchText, unpack; dv = require('dv'); async = require('async'); ({findBarcodes} = require('./find_barcodes')); ({findCheckboxes} = require('./find_checkboxes')); ({findText} = require('./find_text')); ({estimateTransform} = require('./estimate_transform')); ({matchBarcodes} = require('./match_barcodes')); ({matchText} = require('./match_text')); ({matchCheckboxes} = require('./match_checkboxes')); ({unpack} = require('./schema')); matchByPath = function(field, formData) { var i, item, len, ref, tail; if (!(((field != null ? field.box : void 0) != null) && ((field != null ? field.path : void 0) != null))) { return; } tail = formData; ref = field.path.split('.'); for (i = 0, len = ref.length; i < len; i++) { item = ref[i]; tail = tail[item]; if (tail == null) { return; } } if ((tail != null ? tail.box : void 0) != null) { return tail; } }; distinctColor = function(index) { var color, colors; colors = ["#000000", "#FFFF00", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059", "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87", "#5A0007", "#809693", "#FEFFE6", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80", "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100", "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F", "#372101", "#FFB500", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09", "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66", "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C", "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81", "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00", "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700", "#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329", "#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C", "#83AB58", "#001C1E", "#D1F7CE", "#004B28", "#C8D0F6", "#A3A489", "#806C66", "#222800", "#BF5650", "#E83000", "#66796D", "#DA007C", "#FF1A59", "#8ADBB4", "#1E0200", "#5B4E51", "#C895C5", "#320033", "#FF6832", "#66E1D3", "#CFCDAC", "#D0AC94", "#7ED379", "#012C58"]; color = colors[index % colors.length]; return [parseInt(color.slice(1, 3), 16), parseInt(color.slice(3, 5), 16), parseInt(color.slice(5, 7), 16)]; }; Form = class Form { constructor(data1, images1) { this.match = this.match.bind(this); this.toImage = this.toImage.bind(this); this.toObject = this.toObject.bind(this); this.data = data1; this.images = images1; } match(formSchema, cb) { var anchors, fallbackScale, formData, schemaToFields, schemaToPage; formData = {}; // Test if schema to page mapping was provided, estimate otherwise. if (typeof formSchema.schemaToPage === 'function') { schemaToPage = formSchema.schemaToPage; } else if (formSchema.page != null) { fallbackScale = this.images[0].width / formSchema.page.width; schemaToPage = estimateTransform(formSchema.words, this.data[2], fallbackScale); } // Match barcodes invariant to transformation changes. matchBarcodes(formData, formSchema, this.data[1], schemaToPage); // Match text and verify cleanliness of empty text fields. ({anchors} = matchText(formData, formSchema, this.data[2], schemaToPage, this.images[0])); this.anchors = anchors; // Estimate schema to fields transform and match checkboxes. schemaToFields = estimateTransform(formSchema.fields, formData, 1, 1, matchByPath); matchCheckboxes(formData, formSchema, this.data[3], this.data[2], schemaToPage, schemaToFields); // Call form validators. async.forEach(formSchema.fields, function(field, nextField) { if (field.formValidator != null) { return field.formValidator(formData, nextField); } else { return nextField(); } }, function(err) { if (err != null) { return cb(err); } return cb(null, formData); }); } toImage() { var anchor, box, boxed, boxedIndex, candidateBox, color, data, i, image, imageBox, imageOffset, index, j, k, l, len, len1, len2, len3, len4, m, ref, ref1, ref2, ref3, ref4, resultImage; resultImage = new dv.Image(this.images[0].width * this.images.length, this.images[0].height, 32); imageOffset = (box, index) => { return { x: box.x + this.images[0].width * index, y: box.y, width: box.width, height: box.height }; }; imageBox = { x: 0, y: 0, width: this.images[0].width, height: this.images[0].height }; ref = this.images; for (index = i = 0, len = ref.length; i < len; index = ++i) { image = ref[index]; if (image != null) { resultImage.drawImage(image.toColor(), imageOffset(imageBox, index)); } } ref1 = this.data.slice(1); for (index = j = 0, len1 = ref1.length; j < len1; index = ++j) { data = ref1[index]; if (data != null) { for (boxedIndex = k = 0, len2 = data.length; k < len2; boxedIndex = ++k) { boxed = data[boxedIndex]; try { ref2 = boxed.candidate; for (l = 0, len3 = ref2.length; l < len3; l++) { candidateBox = ref2[l]; color = distinctColor(boxedIndex); resultImage.drawBox(imageOffset(candidateBox, index), 6, color[0], color[1], color[2], 0.5); } } catch (error) {} try { resultImage.drawBox(imageOffset(boxed.box, index), 2, 0, 0, 255, 0.5); } catch (error) {} } } } ref4 = (ref3 = this.anchors) != null ? ref3 : []; for (index = m = 0, len4 = ref4.length; m < len4; index = ++m) { anchor = ref4[index]; box = { x: anchor.word.box.x + anchor.offset.x, y: anchor.word.box.y + anchor.offset.y, width: anchor.word.box.width, height: anchor.word.box.height }; resultImage.drawBox(imageOffset(box, 1), 4, 0, 255, 50, 0.5); resultImage.drawLine(imageOffset(box, 1), imageOffset(anchor.word.box, 1), 4, 0, 255, 255, 0.5); } return resultImage; } toObject() { return { barcodes: this.data[1], text: this.data[2], checkboxes: this.data[3] }; } }; module.exports = FormReader = class FormReader { constructor(language = 'eng', image1 = null) { this.find = this.find.bind(this); this.image = image1; this.tesseract = new dv.Tesseract(language); this.tesseract.pageSegMode = 'single_block'; this.tesseract.classify_enable_learning = 0; this.tesseract.classify_enable_adaptive_matcher = 0; this.zxing = new dv.ZXing(); } find() { var data, images; data = [null, null, null, null]; images = [this.image, null, null, null]; [data[1], images[1]] = findBarcodes(images[0], this.zxing); [data[2], images[2]] = findText(images[1], this.tesseract); [data[3], images[3]] = findCheckboxes(images[2]); return new Form(data, images); } };