UNPKG

nfv

Version:

[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms

109 lines (97 loc) 4.01 kB
// Generated by CoffeeScript 1.12.7 var LOWER_CERTAIN_SCORE, LOWER_UNCERTAIN_SCORE, QUIETZONE_WIDTH, UPPER_CERTAIN_SCORE, UPPER_UNCERTAIN_SCORE, binarize, detectCandidates, dv, scoreCandidate, scoreToCheckState; dv = require('ndv'); binarize = require('./filters/binarize'); QUIETZONE_WIDTH = 10; LOWER_UNCERTAIN_SCORE = 0.02; LOWER_CERTAIN_SCORE = 0.10; UPPER_CERTAIN_SCORE = 1.5; UPPER_UNCERTAIN_SCORE = 4; detectCandidates = function(binarizedImage) { var candidate, candidates, i, innerCandidate, j, len, len1, ref, ref1, ref2, ref3, ref4, ref5; candidates = []; ref = binarizedImage.dilate(3, 3).connectedComponents(8); for (i = 0, len = ref.length; i < len; i++) { candidate = ref[i]; if ((0.5 < (ref1 = candidate.width / candidate.height) && ref1 < 5) && (10 < (ref2 = candidate.width) && ref2 < (binarizedImage.width / 3))) { candidates.push(candidate); } else if (candidate.width > (binarizedImage.width / 3)) { ref3 = binarizedImage.crop(candidate).erode(3, 3).dilate(5, 5).connectedComponents(8); for (j = 0, len1 = ref3.length; j < len1; j++) { innerCandidate = ref3[j]; if ((0.5 < (ref4 = innerCandidate.width / innerCandidate.height) && ref4 < 2) && (20 < (ref5 = innerCandidate.width) && ref5 < (binarizedImage.width / 3))) { innerCandidate.x += candidate.x; innerCandidate.y += candidate.y; candidates.push(innerCandidate); } } } } return candidates; }; scoreCandidate = function(binarizedImage, candidate) { var candidateImage, distanceImage, imageWithGapsClosed, imageWithWeightedPixels, index, overscannedCandidate, ref, score, value; overscannedCandidate = { x: candidate.x - QUIETZONE_WIDTH, y: candidate.y - QUIETZONE_WIDTH, width: candidate.width + QUIETZONE_WIDTH * 2, height: candidate.height + QUIETZONE_WIDTH * 2 }; candidateImage = binarizedImage.crop(overscannedCandidate); imageWithGapsClosed = candidateImage.dilate(31, 31).erode(31, 31); distanceImage = imageWithGapsClosed.distanceFunction(8); imageWithWeightedPixels = distanceImage.and(candidateImage.invert().toGray()); score = 0; ref = imageWithWeightedPixels.histogram(); for (value in ref) { index = ref[value]; score += index * value; } return score; }; scoreToCheckState = (function(_this) { return function(score) { var checked, confidence, lerp; lerp = function(x, zeroAt, oneAt) { return (x - zeroAt) / (oneAt - zeroAt); }; checked = (LOWER_UNCERTAIN_SCORE < score && score < UPPER_UNCERTAIN_SCORE); confidence = (function() { switch (false) { case !(score < LOWER_UNCERTAIN_SCORE): return lerp(score, LOWER_UNCERTAIN_SCORE, 0); case !((LOWER_UNCERTAIN_SCORE <= score && score < LOWER_CERTAIN_SCORE)): return lerp(score, LOWER_UNCERTAIN_SCORE, LOWER_CERTAIN_SCORE); case !((LOWER_CERTAIN_SCORE <= score && score < UPPER_CERTAIN_SCORE)): return 1.0; case !((UPPER_CERTAIN_SCORE <= score && score < UPPER_UNCERTAIN_SCORE)): return lerp(score, UPPER_UNCERTAIN_SCORE, UPPER_CERTAIN_SCORE); default: return 0.0; } })(); confidence = Math.round(confidence * 92 + 5); return [checked, confidence]; }; })(this); module.exports.findCheckboxes = function(image) { var binarizedImage, box, candidates, checked, clearedImage, confidence, i, len, marks, ref, score; marks = []; clearedImage = new dv.Image(image); binarizedImage = binarize(image); candidates = detectCandidates(binarizedImage); for (i = 0, len = candidates.length; i < len; i++) { box = candidates[i]; score = scoreCandidate(binarizedImage, box); ref = scoreToCheckState(score), checked = ref[0], confidence = ref[1]; if (confidence > 0) { marks.push({ box: box, checked: checked, confidence: confidence }); clearedImage.clearBox(box); } } return [marks, clearedImage]; };