UNPKG

carbone

Version:

Fast, Simple and Powerful report generator. Injects JSON and produces PDF, DOCX, XLSX, ODT, PPTX, ODS, ...!

671 lines (613 loc) 24.1 kB
var path = require('path'); var fs = require('fs'); var os = require('os'); var helper = require('./helper'); var spawn = require('child_process').spawn; var params = require('./params'); var debug = require('debug')('carbone:converter'); var which = require('which'); /* Factories object */ var conversionFactory = {}; /* An active factory is a factory which is starting (but not started completely), running or stopping (but not stopped completely) */ var activeFactories = []; /* Every conversion is placed in this job queue */ var jobQueue = []; /* If true, a factory will restart automatically */ var isAutoRestartActive = true; var isLibreOfficeFound = false; var converterOptions = { /* Python path */ pythonExecPath : 'python', /* Libre Office executable path */ sofficeExecPath : 'soffice', /* Delay before killing the other process (either LibreOffice or Python) when one of them died */ delayBeforeKill : 500 }; /* get the total memory available on the system (unit: MB) */ const totalMemoryAvailableMB = os.totalmem() / 1024 / 1024; var pythonErrors = { 1 : 'Global error', 100 : 'Existing office server not found', 400 : 'Could not open document', 401 : 'Could not convert document' }; var converter = { /** * Initialize the converter. * @param {Object} options : same options as carbone's options * @param {function} callback(factory): called when all factories are ready. if startFactory is true, the first parameter will contain the object descriptor of all factories */ init : function (options, callback) { if (typeof(options) === 'function') { callback = options; } else { for (var attr in options) { if (params[attr]!== undefined) { params[attr] = options[attr]; } else { throw Error('Undefined options :' + attr); } } } // restart Factory automatically if it crashes. isAutoRestartActive = true; // if we must start all factory now if (params.startFactory === true) { // and if the maximum of factories is not reached if (activeFactories.length < params.factories) { var _nbFactoriesStarting=0; for (var i = 0; i < params.factories; i++) { _nbFactoriesStarting++; addConversionFactory(function () { // here all factories are ready _nbFactoriesStarting--; if (_nbFactoriesStarting === 0 && callback) { callback(conversionFactory); } }); } } } else { // else, start LibreOffice when needed if (callback) { callback(); } } }, /** * Kill all LibreOffice + Python threads * When this method is called, we must call init() to re-initialize the converter * * @param {function} callback : when everything is off */ exit : function (callback) { isAutoRestartActive = false; jobQueue = []; for (var i in conversionFactory) { var _factory = conversionFactory[i]; // if a factory is running if (_factory && (_factory.pythonThread !== null || _factory.officeThread !== null)) { clearTimeout(_factory.timeoutId); _factory.exitCallback = factoryExitFn; // kill Python thread first. if (_factory.pythonThread !== null) { _factory.pythonThread.kill('SIGKILL'); } if (_factory.officeThread !== null) { _factory.officeThread.kill('SIGKILL'); helper.rmDirRecursive(_factory.userCachePath); } } } // if all factories are already off if (activeFactories.length === 0) { factoryExitFn(); } function factoryExitFn () { if (activeFactories.length === 0) { conversionFactory = {}; debug('exit!'); if (callback !== undefined) { callback(); } } } }, /** * Convert a document * * @param {string} inputFile : absolute path to the source document * @param {string} outputType : destination type of format.js (ex. writer_pdf_Export for PDF) * @param {string} formatOptions : options string passed to convert * @param {string} outputFile : outputFile to generate * @param {function} callback(err, outputFile) */ convertFile : function (inputFile, outputType, formatOptions, outputFile, callback) { if (isLibreOfficeFound === false) { return callback('Cannot find LibreOffice. Document conversion cannot be used'); } var _job = { inputFilePath : inputFile, outputFilePath : outputFile, outputFormat : outputType, formatOptions : formatOptions || '', callback : callback, nbAttempt : 0, error : null }; jobQueue.push(_job); executeQueue(); }, /** * Do we need to restart LibreOffice? * * Temporal fix for memory leaks of LibreOffice 6+ * * @param {Objecct} params * @param {Integer} availableMemory system available memory * @param {Integer} nbReports nb reborts computed by one factory * @return {Boolean} true if LibreOffice must be restarted, false otherwise */ shouldTheFactoryBeRestarted : function (params, availableMemory, nbReports) { const _percentageFactoryMemoryLoaded = nbReports * params.factoryMemoryFileSize * 100 / availableMemory; if (_percentageFactoryMemoryLoaded < params.factoryMemoryThreshold || params.factoryMemoryThreshold === 0) { return false; } return true; } }; /** ***************************************************************************************************************/ /* Private methods */ /** ***************************************************************************************************************/ /** * Add a LibreOffice + Python factory (= 2 threads) * * WARNING: the callback must be used only by converter.init() * * @param {function} callback : function() called when the factory is ready to convert documents. */ function addConversionFactory (callback) { debug('ask to add a conversion factory'); // find a free factory var _prevFactory = {}; var _startListenerID = -1; for (var i = 0; i < params.factories; i++) { _prevFactory = conversionFactory[i]; if (_prevFactory === undefined) { _startListenerID = i; break; } else if (_prevFactory.pythonThread === null && _prevFactory.officeThread === null) { _startListenerID = i; break; } } // maximum of factories reached if (_startListenerID === -1) { if (callback) { callback(); } return; } var _uniqueName = helper.getUID(); // generate a unique path to a fake user profile. We cannot start multiple instances of LibreOffice if it uses the same user cache var _userCachePath = path.join(params.tempPath, '_office_' + _uniqueName); if (_prevFactory) { // re-use previous directory if possible (faster restart) if (_prevFactory.userCachePath !== undefined) { _userCachePath = _prevFactory.userCachePath; } // If soffice crashes as soon as it was started, the callback of the previous starting process must be passed to the new started factory // On Linux, it happens when LibreOffice creates its directory for the first time (oosplash seems to hide this) if (_prevFactory.readyCallback) { callback = _prevFactory.readyCallback; } } // generate a URL in LibreOffice's format so that it's portable across OSes: // see: https://wiki.openoffice.org/wiki/URL_Basics var _userCacheURL = convertToURL(_userCachePath); // generate a unique pipe name var _pipeName = params.pipeNamePrefix + '_' +_uniqueName; var _connectionString = 'pipe,name=' + _pipeName + ';urp;StarOffice.ComponentContext'; var _officeParams = ['--headless', '--invisible', '--nocrashreport', '--nodefault', '--nologo', '--nofirststartwizard', '--norestore', '--quickstart', '--nolockcheck', '--accept='+_connectionString, '-env:UserInstallation='+_userCacheURL ]; // save unique name activeFactories.push(_pipeName); var _officeThread = spawn(converterOptions.sofficeExecPath, _officeParams); _officeThread.on('close', generateOnExitCallback(_startListenerID, false, _pipeName)); debug('office thread started with PID ' + _officeThread.pid); var _pythonThread = spawn(converterOptions.pythonExecPath, [params.pythonPath, '--pipe', _pipeName]); debug('python thread started with PID ' + _pythonThread.pid); _pythonThread.on('close', generateOnExitCallback(_startListenerID, true, _pipeName)); _pythonThread.stdout.on('data', generateOnDataCallback(_startListenerID)); _pythonThread.stderr.on('data', function (err) { debug('python stderr :', err.toString()); }); if (_officeThread !== null && _pythonThread !== null) { var _factory = { mode : 'pipe', pipeName : _pipeName, userCachePath : _userCachePath, pid : _officeThread.pid, officeThread : _officeThread, pythonThread : _pythonThread, isReady : false, isConverting : false, readyCallback : callback, nbrReports : 0, timeoutId : null }; conversionFactory[_startListenerID] = _factory; } else { throw new Error('Carbone: Cannot start LibreOffice or Python Thread'); } } /** * Kill one LibreOffice factory * * @param {Object} factory */ function killFactory (factory) { if (factory.isReady === false) { return; } factory.isReady = false; factory.isConverting = false; factory.nbrReports = 0; clearTimeout(factory.timeoutId); if (factory.officeThread !== null) { factory.officeThread.kill('SIGKILL'); } else if (factory.pythonThread !== null) { factory.pythonThread.kill('SIGKILL'); } } /** * Generate a callback which is used to handle thread error and exit * @param {Integer} factoryID factoryID * @param {Boolean} isPythonProcess true if the callback is used by the Python thread, false if it used by the Office Thread * @param {String} factoryUniqueName factory unique name (equals pipeName) * @return {Function} function(error) */ function generateOnExitCallback (factoryID, isPythonProcess, factoryUniqueName) { return function (error) { var _processName = ''; var _otherThreadToKill = null; // get factory object var _factory = conversionFactory[factoryID]; if (!_factory) { throw new Error('Carbone: Process crashed but the factory is unknown!'); } // the factory cannot receive jobs anymore _factory.isReady = false; _factory.isConverting = false; clearTimeout(_factory.timeoutId); // if the Python process died... if (isPythonProcess === true) { _processName = 'Python'; _factory.pythonThread = null; _otherThreadToKill = _factory.officeThread; } else { _processName = 'Office'; _factory.officeThread = null; _otherThreadToKill = _factory.pythonThread; } debug('process '+_processName+' (PID ' +_factory.pid+ ') of factory '+factoryID+' died ' + error); // if both processes Python and Office are off... if (_factory.pythonThread === null && _factory.officeThread === null) { debug('factory '+factoryID+' is completely off'); // remove factory from activeFactories to avoid infinite loop activeFactories.splice(activeFactories.indexOf(factoryUniqueName), 1); whenFactoryIsCompletelyOff(_factory); } else { _otherThreadToKill.kill('SIGKILL'); // Fixes #12 // SIGKILL to make sure everything is off // // Be careful, LibreOffice has two threads oosplash (parent) -> soffice (child) if launched with "soffice". // On Linux, we decided to launch the child process directly to simplify the thread management. // Otherwise, only oosplash is killed if SIGKILL is sent. In that case: // - The child "soffice" is still alive and stdin, stdout and stderr of are not closed automatically. // - The event "spawn.close" is received only if stdin, stdout and stderr are closed. So carbone hangs indefinitely :( // When killing the oospash parent process, we should close stdin, stdout and stderr and kill the child thread soffice ourself (like "pkill soffice") // // Also, we could use SIGTERM. In that case, oosplash (parent) sends the signal to its child... but this signal is not powerful enough to // guarantee a shutdown. If LibreOffice hangs, we could wait forever. // // It is easier to only launch directly soffice.bin directly on Linux (see below) } }; } /** * Manage factory restart ot shutdown when a factory is completly off * @param {Object} factory factory description */ function whenFactoryIsCompletelyOff (factory) { // if Carbone is not shutting down if (isAutoRestartActive === true) { if (factory.currentJob) { // if there is an error while converting a document, let's try another time factory.currentJob.error = new Error('Could not convert the document'); } onCurrentJobEnd(factory); // avoid restarting too early setTimeout(addConversionFactory, 50); } // else if Carbone is shutting down and there is an exitCallback else { // TODO delete async // delete office files synchronously (we do not care because Carbone is shutting down) when office is dead helper.rmDirRecursive(factory.userCachePath); if (factory.exitCallback) { factory.exitCallback(); factory.exitCallback = null; } } } /** * Generate a callback which handle communication with the Python thread * @param {Integer} factoryID factoryID * @return {Function} function(data) */ function generateOnDataCallback (factoryID) { return function (data) { var _factory = conversionFactory[factoryID]; data = data.toString(); // Ready to receive document conversion if (data === '204') { debug('factory '+factoryID+' ready'); _factory.isReady = true; if (_factory.readyCallback) { _factory.readyCallback(); // void readyCallback to avoid calling it twice when the factory object is re-used. _factory.readyCallback = null; } return executeQueue(); } // Document converted with or without errors if (_factory.currentJob) { _factory.currentJob.error = (pythonErrors[data] !== undefined) ? new Error(pythonErrors[data]) : null; } onCurrentJobEnd(_factory); }; } /** * Called when the job is finished * * @param {Object} factory factory object */ function onCurrentJobEnd (factory) { var _job = factory.currentJob; factory.currentJob = null; factory.isConverting = false; clearTimeout(factory.timeoutId); if (_job && _job.callback instanceof Function) { // save the number of report converted to check the memory level of the LO process // if it reach a threshold, the LO process is killed if (converter.shouldTheFactoryBeRestarted(params, totalMemoryAvailableMB, ++factory.nbrReports) === true) { killFactory(factory); } _job.callback(_job.error, _job.outputFilePath); } executeQueue(); } /** * Execute the queue of conversion. * It will auto executes itself until the queue is empty */ function executeQueue () { if (jobQueue.length===0) { return; } // if there is no active factories, start them if (activeFactories.length < params.factories) { addConversionFactory(); return; } for (var i in conversionFactory) { if (jobQueue.length > 0) { var _factory = conversionFactory[i]; if (_factory.isReady === true && _factory.isConverting === false) { var _job = jobQueue.shift(); sendToFactory(_factory, _job); } } } } /** * Send the document to the Factory * * @param {object} factory : LibreOffice + Python factory to send to * @param {object} job : job description (file to convert, callback to call when finished, ...) */ function sendToFactory (factory, job) { factory.isConverting = true; factory.currentJob = job; factory.pythonThread.stdin.write('--format="'+job.outputFormat+'" --input="'+job.inputFilePath+'" --output="'+job.outputFilePath+'" --formatOptions="'+job.formatOptions+'"\n'); // keep the number of attempts to convert this file job.nbAttempt++; // Timeout to kill long conversions if (params.converterFactoryTimeout > 0) { clearTimeout(factory.timeoutId); // by security factory.timeoutId = setTimeout(function () { job.nbAttempt = params.attempts; // do not retry job.error = new Error('Document conversion timeout reached ('+params.converterFactoryTimeout+' ms)'); killFactory(factory); onCurrentJobEnd(factory); }, params.converterFactoryTimeout); } } /** * Error for path * * @param {[type]} message [description] */ function PathError (message) { this.name = 'PathError'; this.code = 'PathError'; this.message = message || 'Failed to convert path'; if (typeof Error.captureStackTrace === 'function') { Error.captureStackTrace(this, PathError); } } PathError.prototype = new Error(); /** * Convert an absolute path to an absolute URL understood by LibreOffice and * OpenOffice. This is necessary because LO/OO use a cross-platform path format * that does not match paths understood natively by OSes. * If the input is already a URL, it is returned as-is. * * @param {string} inputPath - An absolute path to convert to a URL. * @returns {string} A string suitable for use with LibreOffice as an absolute file path URL. */ function convertToURL (inputPath) { // Guard clause: if it already looks like a URL, keep it that way. if (inputPath.slice(0, 8) === 'file:///') { return inputPath; } if (!path.isAbsolute(inputPath)) { throw new PathError('Paths to convert must be absolute'); } // Split into parts so that we can join into a URL: var _normalizedPath = path.normalize(inputPath); // (Use both delimiters blindly - we're aiming for maximum compatibility) var _pathComponents = _normalizedPath.split(/[\\/]/); // Make sure there is no leading empty element, since we always add a leading "/" anyway. if (_pathComponents[0] === '') { _pathComponents.shift(); } var outputURL = 'file:///' + _pathComponents.join('/'); return outputURL; } /** * Detect If LibreOffice and python are available at startup */ function detectLibreOffice (additionalPaths) { function _findBundledPython (sofficePath, pythonName) { if (!sofficePath) { return null; } // Try finding a Python binary shipped alongside the soffice binary, // either in its actual directory, or - if it's a symbolic link - // in the directory it points to. var _sofficeActualDirectory; var _symlinkDestination; try { _symlinkDestination = path.resolve(path.dirname(sofficePath), fs.readlinkSync(sofficePath)); // Assume symbolic link, will throw in case it's not: _sofficeActualDirectory = path.dirname(_symlinkDestination); } catch (errorToIgnore) { // Not a symlink. _sofficeActualDirectory = path.dirname(sofficePath); } // Check for the Python binary in the actual soffice path: try { return which.sync(pythonName, { path : _sofficeActualDirectory }); } catch (errorToIgnore) { // No bundled Python found. return null; } } function _findBinaries (paths, pythonName, sofficeName) { var _whichPython; var _whichSoffice; // Look for the soffice binary - first in the well-known paths, then in // the system PATH. On Linux, this prioritizes "upstream" (TDF) packages // over distro-provided ones from the OS' repository. _whichSoffice = which.sync(sofficeName, { path : paths.join(':'), nothrow : true }) || which.sync(sofficeName, { nothrow : true }) || null; // Check for a Python binary bundled with soffice, fall back to system-wide: // This is a bit more complex, since we deal with some corner cases. // 1. Hopefully use the python from the original soffice package, same dir // (this might fail on Mac if python is not in MacOS/, but in Resources/). // 1a. Corner case: on Linux, if soffice was in /usr/bin/soffice and NOT // a symlink, then we would hit /usr/bin/python, which is probably python2. // This is why we try with python3 first, to defend against this. // 2. Try finding it in any of the well-known paths - this might result in // using Python from *another install* of LibreOffice, but it should be ok. // This is only attempted if the paths exist on this system to avoid // a fallback to system PATH that "which" does when passed an empty string. // 3. Fall back to system python (hopefully named python3). _whichPython = _findBundledPython(_whichSoffice, 'python3') || _findBundledPython(_whichSoffice, 'python') || (paths.length > 0 && which.sync('python3', { path : paths.join(':'), nothrow : true })) || (paths.length > 0 && which.sync('python', { path : paths.join(':'), nothrow : true })) || which.sync('python3', { nothrow : true }) || which.sync('python', { nothrow : true }) || null; return { soffice : _whichSoffice, python : _whichPython }; } function _listProgramDirectories (basePath, pattern) { try { return fs.readdirSync(basePath).filter(function _isLibreOfficeDirectory (dirname) { return pattern.test(dirname); }).map(function _buildFullProgramPath (dirname) { return path.join(basePath, dirname, 'program'); }); } catch (errorToIgnore) { return []; } } var _pathsToCheck = additionalPaths || []; // overridable file names to look for in the checked paths: var _pythonName = 'python'; var _sofficeName = 'soffice'; var _linuxDirnamePattern = /^libreoffice\d+\.\d+$/; var _windowsDirnamePattern = /^LibreOffice( \d+(?:\.\d+)*?)?$/i; if (process.platform === 'darwin') { _pathsToCheck = _pathsToCheck.concat([ // It is better to use the python bundled with LibreOffice: '/Applications/LibreOffice.app/Contents/MacOS', '/Applications/LibreOffice.app/Contents/Resources' ]); } else if (process.platform === 'linux') { // on Linux, avoid oosplash parent process to simplify SIGKILL propagation. Launch directly soffice.bin. // Fixes #12 _sofficeName = 'soffice.bin'; // The Document Foundation packages (.debs, at least) install to /opt, // into a directory named after the contained LibreOffice version. // Add any existing directories that match this to the list. _pathsToCheck = _pathsToCheck.concat(_listProgramDirectories('/opt', _linuxDirnamePattern)); } else if (process.platform === 'win32') { _pathsToCheck = _pathsToCheck .concat(_listProgramDirectories('C:\\Program Files', _windowsDirnamePattern)) .concat(_listProgramDirectories('C:\\Program Files (x86)', _windowsDirnamePattern)); _pythonName = 'python.exe'; } else { debug('your platform "%s" is not supported yet', process.platform); } // Common logic for all OSes: perform the search and save results as options: var _foundPaths = _findBinaries(_pathsToCheck, _pythonName, _sofficeName); if (_foundPaths.soffice) { debug('LibreOffice found: soffice at %s, python at %s', _foundPaths.soffice, _foundPaths.python); isLibreOfficeFound = true; converterOptions.pythonExecPath = _foundPaths.python; converterOptions.sofficeExecPath = _foundPaths.soffice; } if (isLibreOfficeFound === false) { debug('cannot find LibreOffice. Document conversion cannot be used'); } } detectLibreOffice(); ['SIGINT', 'SIGHUP', 'SIGQUIT'].forEach(function (signal) { process.on(signal, function () { converter.exit(); }); }); process.on('exit', function () { converter.exit(); }); module.exports = converter;