UNPKG

fv

Version:

FormVision is a node.js library for extracting data from scanned forms

172 lines (162 loc) 6.21 kB
// Generated by CoffeeScript 2.3.1 var boundingBox, boxDistanceVector, cloneUsingRegion, detectCandidates, detectLineMask, distanceVector, dv, findWords, intersectBox, isSameBlock, length, mergeRegions; dv = require('dv'); ({length, distanceVector, boxDistanceVector, intersectBox, boundingBox} = require('./math')); // Compiles a mask with lines that have a certain length. detectLineMask = function(image, minLineLength) { var k, len, line, lineMask, longLines; lineMask = new dv.Image(image.width, image.height, 8); longLines = image.toGray().lineSegments(0, 0, false).filter(function(line) { return length(distanceVector(line.p1, line.p2)) >= minLineLength; }); for (k = 0, len = longLines.length; k < len; k++) { line = longLines[k]; lineMask.drawLine(line.p1, line.p2, 7, 'set'); } return lineMask; }; mergeRegions = function(items, predicate) { var done, i, item, j, jj, k, l, len, len1, m, otherItem, ref, ref1, ref2, region, regions; // Initialize regions with unique indices. regions = (function() { var results = []; for (var k = 0, ref = items.length; 0 <= ref ? k < ref : k > ref; 0 <= ref ? k++ : k--){ results.push(k); } return results; }).apply(this); // Merge regions until predicate can no longer be applied. done = false; while (!done) { done = true; // Merge regions (non-transitive). for (i = k = 0, len = items.length; k < len; i = ++k) { item = items[i]; ref1 = items.slice(i + 1); for (j = l = 0, len1 = ref1.length; l < len1; j = ++l) { otherItem = ref1[j]; jj = j + i + 1; if (regions[jj] !== regions[i] && predicate(item, otherItem)) { region = Math.min(regions[jj], regions[i]); regions[i] = regions[jj] = region; done = false; } } } // Propagate merges (transitive). for (i = m = 0, ref2 = regions.length; (0 <= ref2 ? m <= ref2 : m >= ref2); i = 0 <= ref2 ? ++m : --m) { while (regions[regions[i]] !== regions[i]) { regions[i] = regions[regions[i]]; } } } return regions; }; isSameBlock = function(fontWidth, fontHeight) { return function(boxA, boxB) { var bottomA, bottomB, delta, sameLine; bottomA = boxA.y + boxA.height; bottomB = boxB.y + boxB.height; delta = boxDistanceVector(boxA, boxB); sameLine = Math.abs(bottomA - bottomB) < fontHeight / 2 && delta.x < fontWidth * 3; return sameLine || intersectBox(boxA, boxB); }; }; detectCandidates = function(binarizedImage, fontWidth = 20, fontHeight = 30) { var _, boxIndex, boxes, boxesByRegion, candidates, hasLetterSize, k, len, region, regions, smearHeight, smearWidth; hasLetterSize = function(box) { var ref; return fontWidth / 2 < box.width && (fontHeight / 2 < (ref = box.height) && ref < fontHeight * 6); }; // Smear text a bit to extract letter boxes. smearWidth = (1 * fontWidth) + fontWidth % 2; smearHeight = (0.25 * fontHeight) + fontHeight % 2; boxes = binarizedImage.dilate(smearWidth, smearHeight).connectedComponents(8).filter(hasLetterSize); // Merge letter boxes to text regions. regions = mergeRegions(boxes, isSameBlock(fontWidth, fontHeight)); boxesByRegion = {}; for (boxIndex = k = 0, len = regions.length; k < len; boxIndex = ++k) { region = regions[boxIndex]; if (boxesByRegion[region] == null) { boxesByRegion[region] = []; } boxesByRegion[region].push(boxes[boxIndex]); } candidates = (function() { var results; results = []; for (_ in boxesByRegion) { boxes = boxesByRegion[_]; results.push(boxes); } return results; })(); return candidates; }; // Clone area of an image from boxes cloneUsingRegion = function(image, boxes) { var box, cloneBox, cloneImage, k, len; cloneBox = boundingBox(boxes); cloneImage = new dv.Image(cloneBox.width, cloneBox.height, image.depth); cloneImage.clearBox({ x: 0, y: 0, width: cloneBox.width, height: cloneBox.height }); for (k = 0, len = boxes.length; k < len; k++) { box = boxes[k]; cloneImage.drawImage(image.crop(box.x, box.y, box.width + 25, box.height), { x: box.x - cloneBox.x, y: box.y - cloneBox.y, width: box.width + 25, height: box.height }); } return [cloneImage, cloneBox]; }; findWords = function(candidates, image, tesseract) { var candidateBoxes, cloneBox, cloneImage, k, l, len, len1, localWords, word, words; words = []; for (k = 0, len = candidates.length; k < len; k++) { candidateBoxes = candidates[k]; // Crop and recognize. [cloneImage, cloneBox] = cloneUsingRegion(image, candidateBoxes); tesseract.image = cloneImage; tesseract.pageSegMode = cloneBox.height < 60 ? 'single_line' : 'single_block'; localWords = tesseract.findWords(); for (l = 0, len1 = localWords.length; l < len1; l++) { word = localWords[l]; // Transform back. word.box.x += cloneBox.x; word.box.y += cloneBox.y; // Store candidate. word.candidate = candidateBoxes.slice(0); } words = words.concat(localWords); } // Filter words with tiny boxes. words = words.filter(function(word) { return word.box.width > 5 && word.box.height > 5; }); return words; }; // Use given *Tesseract* instance to find all text grouped as words along with // confidence and boxes. module.exports.findText = function(image, tesseract) { var candidates, clearedImage, k, len, lineMask, textImage, word, words; clearedImage = new dv.Image(image); // Remove long lines. lineMask = detectLineMask(image, 45); textImage = image.toGray().add(lineMask.toGray()); // Find words using a simple Otsu thresholding. tesseract.image = textImage; candidates = detectCandidates(textImage.otsuAdaptiveThreshold(128, 128, 0, 0, 0).image); words = findWords(candidates, image, tesseract); // Remove words from image, but safeguard against removing 'noise' that may be a checkmark. for (k = 0, len = words.length; k < len; k++) { word = words[k]; if (word.text.length >= 6 || (word.text.length >= 3 && word.confidence >= 30)) { clearedImage.clearBox(word.box); } } return [words, clearedImage]; };