UNPKG

@nosferatu500/textract

Version:

Extracting text from files of various type including html, pdf, doc, docx, xls, xlsx, csv, pptx, png, jpg, gif, rtf, text/*, and various open office.

48 lines (43 loc) 1.4 kB
const { exec } = require("child_process"); const path = require("path"); const util = require("../util"); function extractText(filePath, options, cb) { const execOptions = util.createExecOptions("dxf", options); const escapedPath = filePath.replace(/\s/g, "\\ "); exec(`drawingtotext ${escapedPath}`, execOptions, function (error, stdout, stderr) { if (stderr !== "") { error = new Error(`error extracting DXF text ${path.basename(filePath)}: ${stderr}`); cb(error, null); return; } cb(null, stdout); }); } function testForBinary(options, cb) { exec("drawingtotext notalegalfile", function (error, stdout, stderr) { let msg; const errorRegex = /I couldn't make sense of your input/; if (stderr && errorRegex.test(stderr)) { cb(true); } else { msg = "INFO: 'drawingtotext' does not appear to be installed, " + "so textract will be unable to extract DXFs."; cb(false, msg); } }); } module.exports = { types: [ "application/dxf", "application/x-autocad", "application/x-dxf", "drawing/x-dxf", "image/vnd.dxf", "image/x-autocad", "image/x-dxf", "zz-application/zz-winassoc-dxf", ], extract: extractText, test: testForBinary, };