UNPKG

textract

Version:

Extracting text from files of various type including html, pdf, doc, docx, xls, xlsx, csv, pptx, png, jpg, gif, rtf, text/*, and various open office.

github.com/dbashford/textract

dbashford/textract

36 lines (30 loc) • 948 B

JavaScript

var path = require( 'path' ) , J = require( 'j' ) ; function extractText( filePath, options, cb ) { var CSVs, wb, result, error; try { wb = J.readFile( filePath ); CSVs = J.utils.to_csv( wb ); } catch ( err ) { error = new Error( 'Could not extract ' + path.basename( filePath ) + ', ' + err ); cb( error, null ); return; } result = ''; Object.keys( CSVs ).forEach( function( key ) { result += CSVs[key]; }); cb( null, result ); } module.exports = { types: ['application/vnd.ms-excel', 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', 'application/vnd.ms-excel.sheet.binary.macroEnabled.12', 'application/vnd.ms-excel.sheet.macroEnabled.12', 'application/vnd.oasis.opendocument.spreadsheet', 'application/vnd.openxmlformats-officedocument.spreadsheetml.template', 'application/vnd.oasis.opendocument.spreadsheet-template' ], extract: extractText };