fv
Version:
FormVision is a node.js library for extracting data from scanned forms
50 lines (47 loc) • 1.66 kB
JavaScript
// Generated by CoffeeScript 2.3.1
var dv;
dv = require('dv');
// Darken very light ink to enhance readability.
module.exports = function(image) {
var curve, darkValueMask, deltaV, desaturatedMask, h, hsv, i, j, k, len, protectionMask, rect, ref, s, v, vOrig, x;
// Transform to HSV.
hsv = image.toColor().toHSV();
// Select desatured (grayish) and dark pixels.
desaturatedMask = hsv.inRange(0, 0, 0, 239, 0.1 * 255, 255);
darkValueMask = hsv.inRange(0, 0, 0, 239, 255, 0.5 * 255).invert();
// Split HSV channels.
h = hsv.toGray(1, 0, 0);
s = hsv.toGray(0, 1, 0);
v = hsv.toGray(0, 0, 1);
vOrig = new dv.Image(v);
// Compute "fuzzy value protection mask" and apply linear spline.
curve = (function() {
var results = [];
for (var i = 0; i <= 255; i++){ results.push(i); }
return results;
}).apply(this);
for (x = i = 0; i <= 199; x = ++i) {
curve[x] = 0;
}
for (x = j = 0; j <= 54; x = ++j) {
curve[x + 200] = x / 54 * 220;
}
protectionMask = desaturatedMask.and(darkValueMask.erode(5, 5));
v.applyCurve(curve, protectionMask);
// Extract changes to value channel.
deltaV = vOrig.subtract(v);
// Filter undesired changes (blobs and noise).
deltaV = deltaV.invert();
ref = deltaV.threshold(254).dilate(5, 5).connectedComponents(8);
for (k = 0, len = ref.length; k < len; k++) {
rect = ref[k];
if (rect.height > 100 || rect.width < 5 || rect.height < 5) {
deltaV.clearBox(rect);
}
}
deltaV = deltaV.invert();
// Apply filtered changes to unmodified value channel.
v = vOrig.subtract(deltaV);
// Merge HSV channels.
return new dv.Image(h, s, v).toRGB();
};