nfv
Version:
[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms
155 lines (146 loc) • 5.18 kB
JavaScript
// Generated by CoffeeScript 1.12.7
var distance, estimateTransform, findClosestMark, findClosestShortWord, matchByMark, matchByWords, ref, unpack, validate,
indexOf = [].indexOf || function(item) { for (var i = 0, l = this.length; i < l; i++) { if (i in this && this[i] === item) return i; } return -1; };
ref = require('./schema'), unpack = ref.unpack, validate = ref.validate;
estimateTransform = require('./estimate_transform').estimateTransform;
distance = require('./math').distance;
findClosestShortWord = function(words, box, maxDistance) {
var closestIndex, dist, i, index, len, minDistance, word;
minDistance = maxDistance;
closestIndex = -1;
for (index = i = 0, len = words.length; i < len; index = ++i) {
word = words[index];
if (!(word.text.length < 3)) {
continue;
}
dist = distance(word.box, box);
if (dist < minDistance) {
minDistance = dist;
closestIndex = index;
}
}
return closestIndex;
};
matchByWords = function(formData, fields, words, schemaToPage, schemaToData) {
var closeIndex, dataBox, field, fieldData, fieldIndex, i, index, j, len, len1, matchedFields, matches, wordUsage;
matchedFields = [];
matches = [];
wordUsage = [];
for (i = 0, len = fields.length; i < len; i++) {
field = fields[i];
dataBox = schemaToData(field.box);
closeIndex = findClosestShortWord(words, dataBox, dataBox.width);
if (closeIndex === -1) {
closeIndex = findClosestShortWord(words, schemaToPage(field.box), dataBox.width);
}
if (closeIndex === -1) {
continue;
}
if (validate(field, words[closeIndex].text, false)) {
if (wordUsage[closeIndex] == null) {
wordUsage[closeIndex] = [];
}
wordUsage[closeIndex].push(field.path);
matchedFields.push(field);
matches.push(closeIndex);
}
}
for (fieldIndex = j = 0, len1 = matchedFields.length; j < len1; fieldIndex = ++j) {
field = matchedFields[fieldIndex];
index = matches[fieldIndex];
fieldData = unpack(formData, field.path);
fieldData.value = words[index].text;
fieldData.confidence = words[index].confidence;
fieldData.box = words[index].box;
fieldData.conflicts = wordUsage[index].filter(function(path) {
return path !== field.path;
});
}
return matchedFields;
};
findClosestMark = function(marks, box, maxDistance) {
var closestIndex, dist, i, index, len, mark, minDistance;
minDistance = maxDistance;
closestIndex = -1;
for (index = i = 0, len = marks.length; i < len; index = ++i) {
mark = marks[index];
dist = distance(mark.box, box);
if (dist < minDistance) {
minDistance = dist;
closestIndex = index;
}
}
return closestIndex;
};
matchByMark = function(formData, fields, marks, schemaToPage) {
var closeIndex, farDistance, field, fieldData, i, j, len, len1, markUsage, match, matches, nearDistance, pageBox;
matches = {};
markUsage = [];
for (i = 0, len = fields.length; i < len; i++) {
field = fields[i];
pageBox = schemaToPage(field.box);
nearDistance = pageBox.width * 0.95;
farDistance = pageBox.width * 3.00;
closeIndex = findClosestMark(marks, pageBox, farDistance);
if (closeIndex === -1) {
matches[field.path] = {
index: -1,
value: false,
confidence: 100,
box: pageBox
};
} else if (distance(marks[closeIndex].box, pageBox) > nearDistance) {
matches[field.path] = {
index: -1,
value: false,
confidence: Math.round((distance(marks[closeIndex].box, pageBox) / farDistance) * 100),
box: pageBox
};
} else {
matches[field.path] = {
index: closeIndex,
value: marks[closeIndex].checked,
confidence: marks[closeIndex].confidence,
box: marks[closeIndex].box
};
if (markUsage[closeIndex] == null) {
markUsage[closeIndex] = [];
}
markUsage[closeIndex].push(field.path);
}
}
for (j = 0, len1 = fields.length; j < len1; j++) {
field = fields[j];
match = matches[field.path];
fieldData = unpack(formData, field.path);
fieldData.value = match.value;
fieldData.confidence = match.confidence;
fieldData.box = match.box;
if (match.index === -1) {
fieldData.conflicts = [];
} else {
fieldData.conflicts = markUsage[match.index].filter(function(path) {
return path !== field.path;
});
}
}
};
module.exports.matchCheckboxes = function(formData, formSchema, marks, words, schemaToPage, schemaToData) {
var assignedFields, checkboxFields, field, remainingFields;
checkboxFields = formSchema.fields.filter(function(field) {
return field.type === 'checkbox';
});
assignedFields = matchByWords(formData, checkboxFields, words, schemaToPage, schemaToData);
remainingFields = (function() {
var i, len, results;
results = [];
for (i = 0, len = checkboxFields.length; i < len; i++) {
field = checkboxFields[i];
if (indexOf.call(assignedFields, field) < 0) {
results.push(field);
}
}
return results;
})();
matchByMark(formData, remainingFields, marks, schemaToPage);
};