unfluff

Version:

A web page content extractor

53 lines (42 loc) • 1.14 kB

text/coffeescript

# We use optimist for parsing the CLI arguments fs = require('fs') extractor = require('./unfluff') argvParser = require('optimist') .usage( 'unfluff [OPTIONS] [FILE_NAME]' ).options( version: alias: 'v' describe: 'Show version information' boolean: true help: alias: 'h' describe: 'Show this. See: https://github.com/ageitgey/node-unfluff' boolean: true lang: describe: 'Override language auto-detection. Valid values are en, es, fr, etc.' ) argv = argvParser.argv if argv.version version = require('../package.json').version process.stdout.write "#{version}\n" process.exit 0 if argv.help argvParser.showHelp() process.exit 0 language = undefined if argv.lang language = argv.lang file = argv._.shift() html = "" if file html = fs.readFileSync(file).toString() process.stdout.write(JSON.stringify(extractor(html, language))) else process.stdin.setEncoding('utf8') process.stdin.on 'readable', () -> chunk = process.stdin.read() if (chunk != null) html += chunk process.stdin.on 'end', () -> process.stdout.write(JSON.stringify(extractor(html, language)))