nfv
Version:
[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms
109 lines (97 loc) • 4.01 kB
JavaScript
// Generated by CoffeeScript 1.12.7
var LOWER_CERTAIN_SCORE, LOWER_UNCERTAIN_SCORE, QUIETZONE_WIDTH, UPPER_CERTAIN_SCORE, UPPER_UNCERTAIN_SCORE, binarize, detectCandidates, dv, scoreCandidate, scoreToCheckState;
dv = require('ndv');
binarize = require('./filters/binarize');
QUIETZONE_WIDTH = 10;
LOWER_UNCERTAIN_SCORE = 0.02;
LOWER_CERTAIN_SCORE = 0.10;
UPPER_CERTAIN_SCORE = 1.5;
UPPER_UNCERTAIN_SCORE = 4;
detectCandidates = function(binarizedImage) {
var candidate, candidates, i, innerCandidate, j, len, len1, ref, ref1, ref2, ref3, ref4, ref5;
candidates = [];
ref = binarizedImage.dilate(3, 3).connectedComponents(8);
for (i = 0, len = ref.length; i < len; i++) {
candidate = ref[i];
if ((0.5 < (ref1 = candidate.width / candidate.height) && ref1 < 5) && (10 < (ref2 = candidate.width) && ref2 < (binarizedImage.width / 3))) {
candidates.push(candidate);
} else if (candidate.width > (binarizedImage.width / 3)) {
ref3 = binarizedImage.crop(candidate).erode(3, 3).dilate(5, 5).connectedComponents(8);
for (j = 0, len1 = ref3.length; j < len1; j++) {
innerCandidate = ref3[j];
if ((0.5 < (ref4 = innerCandidate.width / innerCandidate.height) && ref4 < 2) && (20 < (ref5 = innerCandidate.width) && ref5 < (binarizedImage.width / 3))) {
innerCandidate.x += candidate.x;
innerCandidate.y += candidate.y;
candidates.push(innerCandidate);
}
}
}
}
return candidates;
};
scoreCandidate = function(binarizedImage, candidate) {
var candidateImage, distanceImage, imageWithGapsClosed, imageWithWeightedPixels, index, overscannedCandidate, ref, score, value;
overscannedCandidate = {
x: candidate.x - QUIETZONE_WIDTH,
y: candidate.y - QUIETZONE_WIDTH,
width: candidate.width + QUIETZONE_WIDTH * 2,
height: candidate.height + QUIETZONE_WIDTH * 2
};
candidateImage = binarizedImage.crop(overscannedCandidate);
imageWithGapsClosed = candidateImage.dilate(31, 31).erode(31, 31);
distanceImage = imageWithGapsClosed.distanceFunction(8);
imageWithWeightedPixels = distanceImage.and(candidateImage.invert().toGray());
score = 0;
ref = imageWithWeightedPixels.histogram();
for (value in ref) {
index = ref[value];
score += index * value;
}
return score;
};
scoreToCheckState = (function(_this) {
return function(score) {
var checked, confidence, lerp;
lerp = function(x, zeroAt, oneAt) {
return (x - zeroAt) / (oneAt - zeroAt);
};
checked = (LOWER_UNCERTAIN_SCORE < score && score < UPPER_UNCERTAIN_SCORE);
confidence = (function() {
switch (false) {
case !(score < LOWER_UNCERTAIN_SCORE):
return lerp(score, LOWER_UNCERTAIN_SCORE, 0);
case !((LOWER_UNCERTAIN_SCORE <= score && score < LOWER_CERTAIN_SCORE)):
return lerp(score, LOWER_UNCERTAIN_SCORE, LOWER_CERTAIN_SCORE);
case !((LOWER_CERTAIN_SCORE <= score && score < UPPER_CERTAIN_SCORE)):
return 1.0;
case !((UPPER_CERTAIN_SCORE <= score && score < UPPER_UNCERTAIN_SCORE)):
return lerp(score, UPPER_UNCERTAIN_SCORE, UPPER_CERTAIN_SCORE);
default:
return 0.0;
}
})();
confidence = Math.round(confidence * 92 + 5);
return [checked, confidence];
};
})(this);
module.exports.findCheckboxes = function(image) {
var binarizedImage, box, candidates, checked, clearedImage, confidence, i, len, marks, ref, score;
marks = [];
clearedImage = new dv.Image(image);
binarizedImage = binarize(image);
candidates = detectCandidates(binarizedImage);
for (i = 0, len = candidates.length; i < len; i++) {
box = candidates[i];
score = scoreCandidate(binarizedImage, box);
ref = scoreToCheckState(score), checked = ref[0], confidence = ref[1];
if (confidence > 0) {
marks.push({
box: box,
checked: checked,
confidence: confidence
});
clearedImage.clearBox(box);
}
}
return [marks, clearedImage];
};