nfv
Version:
[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms
145 lines (141 loc) • 4.38 kB
JavaScript
// Generated by CoffeeScript 1.12.7
var avgMedian3, matchText;
matchText = function(word, words) {
var item, matches;
matches = (function() {
var k, len, results;
results = [];
for (k = 0, len = words.length; k < len; k++) {
item = words[k];
if (item.text === word.text) {
results.push(item);
}
}
return results;
})();
if (matches.length === 1) {
return matches[0];
}
};
avgMedian3 = function(values) {
var averagingValues, count, k, len, median, sum, value;
if (values.length > 3) {
values.sort();
median = Math.floor(values.length / 2);
averagingValues = values.slice(median - 1, +(median + 1) + 1 || 9e9);
} else {
averagingValues = values;
}
sum = 0;
count = averagingValues.length;
for (k = 0, len = averagingValues.length; k < len; k++) {
value = averagingValues[k];
sum += value;
}
if (count > 0) {
return sum / count;
}
};
module.exports.estimateTransform = function(itemsA, itemsB, fallbackScale, requiredMatchCount, findMatch) {
var angle, byDistX, byDistY, distX, distY, expectedAngle, i, index, item, itemA1, itemA2, itemB1, itemB2, j, k, l, len, len1, m, matches, offsetX, offsetY, ref, ref1, scaleX, scaleY, transforms;
if (requiredMatchCount == null) {
requiredMatchCount = 7;
}
if (findMatch == null) {
findMatch = matchText;
}
matches = new Array(itemsA.length);
for (index = k = 0, len = itemsA.length; k < len; index = ++k) {
item = itemsA[index];
matches[index] = findMatch(item, itemsB);
}
transforms = [];
for (i = l = 0, len1 = itemsA.length; l < len1; i = ++l) {
itemA1 = itemsA[i];
if (itemB1 = matches[i]) {
for (j = m = ref = i + 1, ref1 = itemsA.length; m < ref1; j = m += 1) {
itemA2 = itemsA[j];
if (itemB2 = matches[j]) {
distX = Math.abs(itemB1.box.x - itemB2.box.x);
distY = Math.abs(itemB1.box.y - itemB2.box.y);
angle = Math.atan2(itemB1.box.x - itemB2.box.x, itemB1.box.y - itemB2.box.y);
expectedAngle = Math.atan2(itemA1.box.x - itemA2.box.x, itemA1.box.y - itemA2.box.y);
if (distX + distY > 1000 && Math.abs(angle - expectedAngle) < 0.02) {
scaleX = distX / Math.abs(itemA1.box.x - itemA2.box.x);
scaleY = distY / Math.abs(itemA1.box.y - itemA2.box.y);
transforms.push({
distance: [distX, distY],
scale: [scaleX, scaleY],
offset: [itemB1.box.x - itemA1.box.x * scaleX, itemB1.box.y - itemA1.box.y * scaleY]
});
}
}
}
}
}
if (transforms.length > requiredMatchCount) {
byDistX = transforms.filter(function(i) {
return i.distance[0] > 20;
}).sort(function(a, b) {
return a.distance[0] - b.distance[0];
});
scaleX = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistX.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.scale[0]);
}
return results;
})());
offsetX = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistX.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.offset[0]);
}
return results;
})());
byDistY = transforms.filter(function(i) {
return i.distance[1] > 20;
}).sort(function(a, b) {
return a.distance[1] - b.distance[1];
});
scaleY = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistY.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.scale[1]);
}
return results;
})());
offsetY = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistY.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.offset[1]);
}
return results;
})());
} else {
scaleX = fallbackScale;
scaleY = fallbackScale;
offsetX = 0;
offsetY = 0;
}
return function(box) {
return {
x: box.x * scaleX + offsetX,
y: box.y * scaleY + offsetY,
width: box.width * scaleX,
height: box.height * scaleY
};
};
};