UNPKG

nfv

Version:

[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms

155 lines (146 loc) 5.18 kB
// Generated by CoffeeScript 1.12.7 var distance, estimateTransform, findClosestMark, findClosestShortWord, matchByMark, matchByWords, ref, unpack, validate, indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; }; ref = require('./schema'), unpack = ref.unpack, validate = ref.validate; estimateTransform = require('./estimate_transform').estimateTransform; distance = require('./math').distance; findClosestShortWord = function(words, box, maxDistance) { var closestIndex, dist, i, index, len, minDistance, word; minDistance = maxDistance; closestIndex = -1; for (index = i = 0, len = words.length; i < len; index = ++i) { word = words[index]; if (!(word.text.length < 3)) { continue; } dist = distance(word.box, box); if (dist < minDistance) { minDistance = dist; closestIndex = index; } } return closestIndex; }; matchByWords = function(formData, fields, words, schemaToPage, schemaToData) { var closeIndex, dataBox, field, fieldData, fieldIndex, i, index, j, len, len1, matchedFields, matches, wordUsage; matchedFields = []; matches = []; wordUsage = []; for (i = 0, len = fields.length; i < len; i++) { field = fields[i]; dataBox = schemaToData(field.box); closeIndex = findClosestShortWord(words, dataBox, dataBox.width); if (closeIndex === -1) { closeIndex = findClosestShortWord(words, schemaToPage(field.box), dataBox.width); } if (closeIndex === -1) { continue; } if (validate(field, words[closeIndex].text, false)) { if (wordUsage[closeIndex] == null) { wordUsage[closeIndex] = []; } wordUsage[closeIndex].push(field.path); matchedFields.push(field); matches.push(closeIndex); } } for (fieldIndex = j = 0, len1 = matchedFields.length; j < len1; fieldIndex = ++j) { field = matchedFields[fieldIndex]; index = matches[fieldIndex]; fieldData = unpack(formData, field.path); fieldData.value = words[index].text; fieldData.confidence = words[index].confidence; fieldData.box = words[index].box; fieldData.conflicts = wordUsage[index].filter(function(path) { return path !== field.path; }); } return matchedFields; }; findClosestMark = function(marks, box, maxDistance) { var closestIndex, dist, i, index, len, mark, minDistance; minDistance = maxDistance; closestIndex = -1; for (index = i = 0, len = marks.length; i < len; index = ++i) { mark = marks[index]; dist = distance(mark.box, box); if (dist < minDistance) { minDistance = dist; closestIndex = index; } } return closestIndex; }; matchByMark = function(formData, fields, marks, schemaToPage) { var closeIndex, farDistance, field, fieldData, i, j, len, len1, markUsage, match, matches, nearDistance, pageBox; matches = {}; markUsage = []; for (i = 0, len = fields.length; i < len; i++) { field = fields[i]; pageBox = schemaToPage(field.box); nearDistance = pageBox.width * 0.95; farDistance = pageBox.width * 3.00; closeIndex = findClosestMark(marks, pageBox, farDistance); if (closeIndex === -1) { matches[field.path] = { index: -1, value: false, confidence: 100, box: pageBox }; } else if (distance(marks[closeIndex].box, pageBox) > nearDistance) { matches[field.path] = { index: -1, value: false, confidence: Math.round((distance(marks[closeIndex].box, pageBox) / farDistance) * 100), box: pageBox }; } else { matches[field.path] = { index: closeIndex, value: marks[closeIndex].checked, confidence: marks[closeIndex].confidence, box: marks[closeIndex].box }; if (markUsage[closeIndex] == null) { markUsage[closeIndex] = []; } markUsage[closeIndex].push(field.path); } } for (j = 0, len1 = fields.length; j < len1; j++) { field = fields[j]; match = matches[field.path]; fieldData = unpack(formData, field.path); fieldData.value = match.value; fieldData.confidence = match.confidence; fieldData.box = match.box; if (match.index === -1) { fieldData.conflicts = []; } else { fieldData.conflicts = markUsage[match.index].filter(function(path) { return path !== field.path; }); } } }; module.exports.matchCheckboxes = function(formData, formSchema, marks, words, schemaToPage, schemaToData) { var assignedFields, checkboxFields, field, remainingFields; checkboxFields = formSchema.fields.filter(function(field) { return field.type === 'checkbox'; }); assignedFields = matchByWords(formData, checkboxFields, words, schemaToPage, schemaToData); remainingFields = (function() { var i, len, results; results = []; for (i = 0, len = checkboxFields.length; i < len; i++) { field = checkboxFields[i]; if (indexOf.call(assignedFields, field) < 0) { results.push(field); } } return results; })(); matchByMark(formData, remainingFields, marks, schemaToPage); };