UNPKG

textract

Version:

Extracting text from files of various type including html, pdf, doc, docx, xls, xlsx, csv, pptx, png, jpg, gif, rtf, text/*, and various open office.

github.com/dbashford/textract

dbashford/textract

27 lines (23 loc) • 558 B

JavaScript

var fs = require( 'fs' ) , marked = require( 'marked' ) , htmlExtract = require( './html' ) ; function extractText( filePath, options, cb ) { fs.readFile( filePath, function( error, data ) { if ( error ) { cb( error, null ); return; } marked( data.toString(), function( err, content ) { if ( err ) { cb( err, null ); } else { htmlExtract.extractFromText( content, options, cb ); } }); }); } module.exports = { types: ['text/x-markdown', 'text/markdown'], extract: extractText };