fv
Version:
FormVision is a node.js library for extracting data from scanned forms
158 lines (152 loc) • 5.41 kB
JavaScript
// Generated by CoffeeScript 2.3.1
var avgMedian3, matchText;
matchText = function(word, words) {
var item, matches;
matches = (function() {
var k, len, results;
results = [];
for (k = 0, len = words.length; k < len; k++) {
item = words[k];
if (item.text === word.text) {
results.push(item);
}
}
return results;
})();
if (matches.length === 1) {
return matches[0];
}
};
// Return the average out of the median and its two neighbours.
// I.e. for `values = [1, 2, 2, 2.3, 4]`, returns (2 + 2 + 2.3) / 3 = 2.1.
// For one to three elements, all values will be averaged.
// If *values* is empty, returns undefined.
avgMedian3 = function(values) {
var averagingValues, count, k, len, median, sum, value;
if (values.length > 3) {
values.sort();
median = Math.floor(values.length / 2);
averagingValues = values.slice(median - 1, +(median + 1) + 1 || 9e9);
} else {
averagingValues = values;
}
sum = 0;
count = averagingValues.length;
for (k = 0, len = averagingValues.length; k < len; k++) {
value = averagingValues[k];
sum += value;
}
if (count > 0) {
return sum / count;
}
};
// Estimates an affine transformation from `itemsA` to `itemsB`. The function assumes that both sets
// have (almost) the same orientation and skew.
// `itemsA` and `itemsB` must be of the form [{box: {x, y}}].
// `fallbackScale` will be used if `requiredMatchCount` isn't met.
// `findMatch` is a function of the form (itemFromSetA, itemsB) -> itemFromSetB. May return undefined if no match exists.
module.exports.estimateTransform = function(itemsA, itemsB, fallbackScale, requiredMatchCount = 7, findMatch = matchText) {
var angle, byDistX, byDistY, distX, distY, expectedAngle, i, index, item, itemA1, itemA2, itemB1, itemB2, j, k, l, len, len1, m, matches, offsetX, offsetY, ref, ref1, scaleX, scaleY, transforms;
// For every element of itemsA, run findMatch and cache the result
matches = new Array(itemsA.length);
for (index = k = 0, len = itemsA.length; k < len; index = ++k) {
item = itemsA[index];
matches[index] = findMatch(item, itemsB);
}
transforms = [];
// Iterate over all pairs of items in itemsA iff they have a correspondence in itemsB
for (i = l = 0, len1 = itemsA.length; l < len1; i = ++l) {
itemA1 = itemsA[i];
if (itemB1 = matches[i]) {
for (j = m = ref = i + 1, ref1 = itemsA.length; m < ref1; j = m += 1) {
itemA2 = itemsA[j];
if (itemB2 = matches[j]) {
// Compute affine transformation for this pair
distX = Math.abs(itemB1.box.x - itemB2.box.x);
distY = Math.abs(itemB1.box.y - itemB2.box.y);
angle = Math.atan2(itemB1.box.x - itemB2.box.x, itemB1.box.y - itemB2.box.y);
expectedAngle = Math.atan2(itemA1.box.x - itemA2.box.x, itemA1.box.y - itemA2.box.y);
// Assume that A and B are not too close to each other and
// have roughly the same rotation. This means we can detect
// false positives quite safely by comparing the angle between them.
if (distX + distY > 1000 && Math.abs(angle - expectedAngle) < 0.02) {
scaleX = distX / Math.abs(itemA1.box.x - itemA2.box.x);
scaleY = distY / Math.abs(itemA1.box.y - itemA2.box.y);
transforms.push({
distance: [distX, distY],
scale: [scaleX, scaleY],
offset: [itemB1.box.x - itemA1.box.x * scaleX, itemB1.box.y - itemA1.box.y * scaleY]
});
}
}
}
}
}
if (transforms.length > requiredMatchCount) {
// Aggregate matches to find a stable solution.
byDistX = transforms.filter(function(i) {
return i.distance[0] > 20;
}).sort(function(a, b) {
return a.distance[0] - b.distance[0];
});
scaleX = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistX.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.scale[0]);
}
return results;
})());
offsetX = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistX.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.offset[0]);
}
return results;
})());
byDistY = transforms.filter(function(i) {
return i.distance[1] > 20;
}).sort(function(a, b) {
return a.distance[1] - b.distance[1];
});
scaleY = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistY.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.scale[1]);
}
return results;
})());
offsetY = avgMedian3((function() {
var len2, n, ref2, results;
ref2 = byDistY.slice(-7);
results = [];
for (n = 0, len2 = ref2.length; n < len2; n++) {
i = ref2[n];
results.push(i.offset[1]);
}
return results;
})());
} else {
// Use fallback.
scaleX = fallbackScale;
scaleY = fallbackScale;
offsetX = 0;
offsetY = 0;
}
return function(box) {
return {
x: box.x * scaleX + offsetX,
y: box.y * scaleY + offsetY,
width: box.width * scaleX,
height: box.height * scaleY
};
};
};