nfv
Version:
[Updated to use ndv package instead dv] FormVision is a node.js library for extracting data from scanned forms
110 lines (104 loc) • 3.28 kB
text/coffeescript
#!/usr/bin/env coffee
#
# FormVision Command-Line Interface
#
args = require('minimist')(process.argv[2..], boolean: true)
glob = require 'glob'
fs = require 'fs'
dv = require 'ndv'
fv = require __dirname + '/../src/index'
printHelp = ->
console.log ['Usage: cli [OPTION]... FILES'
'Process FILES using FormVision.'
''
' --lang= language to use for Tesseract'
' --schema= FormVision Schema (module.exports)'
' --deskew apply deskew filter'
' --darken-ink apply darken-ink filter'
' --filter-background apply filter-background filter'
' --remove-red apply remove-red filter'
' --help display help text'
''
'Supported image formats are PNG and JPEG.'
].join('\n')
processImage = (formReader, filename, data) ->
console.log 'Processing: ' + filename
if /\.log\..*$/i.test filename
console.log ' Skipping log image'
return
if /\.png$/i.test filename
image = new dv.Image 'png', data
else if /\.(jpg|jpeg)$/i.test filename
image = new dv.Image 'jpg', data
else
console.warn ' Invalid format'
return
# Apply filters.
if args['deskew']
console.log ' Deskewing'
image = fv.filters.deskew image
if args['remove-red']
console.log ' Removing Red'
image = fv.filters.removeRed image
if args['filter-background']
console.log ' Filtering Background'
image = fv.filters.filterBackground image, 25, 35
if args['darken-ink']
console.log ' Darkening Ink'
image = fv.filters.darkenInk image
# Require schema.
if args['schema'] and typeof(args['schema']) is 'string'
if /\.json$/.test args['schema']
formSchema = JSON.parse(fs.readFileSync(args['schema']).toString())
else
formSchema = require args['schema']
# Read form.
formReader.image = image
result = formReader.find()
if formSchema?
result.match formSchema, (err, formData) =>
if err?
console.error err.message
return
console.log JSON.stringify(formData, null, 2)
else
logFilename = filename.replace(/\.([^\.]+)$/, '-log.$1')
fs.writeFileSync logFilename, result.toImage().toBuffer('png')
object = result.toObject()
console.log ' Log: ' + logFilename
console.log ' Barcodes: ' + object.barcodes.map((data) -> data.type + ': ' + data.data)
console.log ' Checkboxes: ' + object.checkboxes.length + ' in total'
console.log ' Text: ', object.text.map((data) -> data.text).join(' ')
console.log ''
# Help text.
if args.help
printHelp()
process.exit(0)
# Initialize reader.
lang = args['lang']
if lang?
console.log 'Language: ' + lang
formReader = new fv.FormReader lang
# Process images.
filenames = args._
if filenames.length is 0
console.warn 'Missing file arguments'
printHelp()
process.exit(-1)
for filename in filenames
do (filename) ->
fs.readFile filename, (err, data) ->
if err? and err.code is 'ENOENT'
glob filename, null, (err, filenames) ->
if err?
console.warn 'Cannot glob pattern: ' + filename + ' ' + err
process.exit(-2)
for filename in filenames
do (filename) ->
fs.readFile filename, (err, data) ->
processImage formReader, filename, data
else if err?
console.error 'Cannot read file: ' + filename
process.exit(-3)
else
processImage formReader, filename, data