UNPKG

nfv

Version:

[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms

145 lines (141 loc) 4.38 kB
// Generated by CoffeeScript 1.12.7 var avgMedian3, matchText; matchText = function(word, words) { var item, matches; matches = (function() { var k, len, results; results = []; for (k = 0, len = words.length; k < len; k++) { item = words[k]; if (item.text === word.text) { results.push(item); } } return results; })(); if (matches.length === 1) { return matches[0]; } }; avgMedian3 = function(values) { var averagingValues, count, k, len, median, sum, value; if (values.length > 3) { values.sort(); median = Math.floor(values.length / 2); averagingValues = values.slice(median - 1, +(median + 1) + 1 || 9e9); } else { averagingValues = values; } sum = 0; count = averagingValues.length; for (k = 0, len = averagingValues.length; k < len; k++) { value = averagingValues[k]; sum += value; } if (count > 0) { return sum / count; } }; module.exports.estimateTransform = function(itemsA, itemsB, fallbackScale, requiredMatchCount, findMatch) { var angle, byDistX, byDistY, distX, distY, expectedAngle, i, index, item, itemA1, itemA2, itemB1, itemB2, j, k, l, len, len1, m, matches, offsetX, offsetY, ref, ref1, scaleX, scaleY, transforms; if (requiredMatchCount == null) { requiredMatchCount = 7; } if (findMatch == null) { findMatch = matchText; } matches = new Array(itemsA.length); for (index = k = 0, len = itemsA.length; k < len; index = ++k) { item = itemsA[index]; matches[index] = findMatch(item, itemsB); } transforms = []; for (i = l = 0, len1 = itemsA.length; l < len1; i = ++l) { itemA1 = itemsA[i]; if (itemB1 = matches[i]) { for (j = m = ref = i + 1, ref1 = itemsA.length; m < ref1; j = m += 1) { itemA2 = itemsA[j]; if (itemB2 = matches[j]) { distX = Math.abs(itemB1.box.x - itemB2.box.x); distY = Math.abs(itemB1.box.y - itemB2.box.y); angle = Math.atan2(itemB1.box.x - itemB2.box.x, itemB1.box.y - itemB2.box.y); expectedAngle = Math.atan2(itemA1.box.x - itemA2.box.x, itemA1.box.y - itemA2.box.y); if (distX + distY > 1000 && Math.abs(angle - expectedAngle) < 0.02) { scaleX = distX / Math.abs(itemA1.box.x - itemA2.box.x); scaleY = distY / Math.abs(itemA1.box.y - itemA2.box.y); transforms.push({ distance: [distX, distY], scale: [scaleX, scaleY], offset: [itemB1.box.x - itemA1.box.x * scaleX, itemB1.box.y - itemA1.box.y * scaleY] }); } } } } } if (transforms.length > requiredMatchCount) { byDistX = transforms.filter(function(i) { return i.distance[0] > 20; }).sort(function(a, b) { return a.distance[0] - b.distance[0]; }); scaleX = avgMedian3((function() { var len2, n, ref2, results; ref2 = byDistX.slice(-7); results = []; for (n = 0, len2 = ref2.length; n < len2; n++) { i = ref2[n]; results.push(i.scale[0]); } return results; })()); offsetX = avgMedian3((function() { var len2, n, ref2, results; ref2 = byDistX.slice(-7); results = []; for (n = 0, len2 = ref2.length; n < len2; n++) { i = ref2[n]; results.push(i.offset[0]); } return results; })()); byDistY = transforms.filter(function(i) { return i.distance[1] > 20; }).sort(function(a, b) { return a.distance[1] - b.distance[1]; }); scaleY = avgMedian3((function() { var len2, n, ref2, results; ref2 = byDistY.slice(-7); results = []; for (n = 0, len2 = ref2.length; n < len2; n++) { i = ref2[n]; results.push(i.scale[1]); } return results; })()); offsetY = avgMedian3((function() { var len2, n, ref2, results; ref2 = byDistY.slice(-7); results = []; for (n = 0, len2 = ref2.length; n < len2; n++) { i = ref2[n]; results.push(i.offset[1]); } return results; })()); } else { scaleX = fallbackScale; scaleY = fallbackScale; offsetX = 0; offsetY = 0; } return function(box) { return { x: box.x * scaleX + offsetX, y: box.y * scaleY + offsetY, width: box.width * scaleX, height: box.height * scaleY }; }; };