textract
Version:
Extracting text from files of various type including html, pdf, doc, docx, xls, xlsx, csv, pptx, png, jpg, gif, rtf, text/*, and various open office.
27 lines (23 loc) • 558 B
JavaScript
var fs = require( 'fs' )
, marked = require( 'marked' )
, htmlExtract = require( './html' )
;
function extractText( filePath, options, cb ) {
fs.readFile( filePath, function( error, data ) {
if ( error ) {
cb( error, null );
return;
}
marked( data.toString(), function( err, content ) {
if ( err ) {
cb( err, null );
} else {
htmlExtract.extractFromText( content, options, cb );
}
});
});
}
module.exports = {
types: ['text/x-markdown', 'text/markdown'],
extract: extractText
};