UNPKG

file2md

Version:

A TypeScript library for converting various document types (PDF, DOCX, XLSX, PPTX, HWP, HWPX) into Markdown with image and layout preservation

169 lines 6.48 kB
import { spawn } from 'child_process'; import path from 'path'; import fs from 'fs/promises'; import { tmpdir } from 'os'; import { randomBytes } from 'crypto'; import { EventEmitter } from 'events'; import { LibreOfficeDetector } from './libreoffice-detector.js'; export class LibreOfficeConverter extends EventEmitter { detector; constructor() { super(); this.detector = LibreOfficeDetector.getInstance(); } /** * Convert PPTX to PDF with enhanced options */ async convertToPdf(inputBuffer, options = {}) { const { quality = 'maximum', timeout = 30000, additionalArgs = [] } = options; // Check LibreOffice installation const info = await this.detector.checkLibreOfficeInstallation(); if (!info.installed || !info.path) { throw new Error('LibreOffice is not installed'); } // Create temporary files const tempId = randomBytes(8).toString('hex'); const tempDir = path.join(tmpdir(), `libreoffice-${tempId}`); const inputPath = path.join(tempDir, 'input.pptx'); const outputDir = tempDir; try { // Create temp directory and save input file await fs.mkdir(tempDir, { recursive: true }); await fs.writeFile(inputPath, inputBuffer); this.emit('progress', { stage: 'starting', percentage: 0, message: 'Initializing LibreOffice conversion...' }); // Build conversion arguments const args = [ '--headless', '--invisible', '--nodefault', '--nolockcheck', '--nologo', '--norestore', '--convert-to', this.getPdfFilter(quality), '--outdir', outputDir, ...additionalArgs, inputPath ]; // Execute conversion const outputPath = await this.executeConversion(info.path, args, timeout); // Read the output PDF const pdfBuffer = await fs.readFile(outputPath); this.emit('progress', { stage: 'completed', percentage: 100, message: 'Conversion completed successfully' }); return pdfBuffer; } finally { // Cleanup temporary files try { await fs.rm(tempDir, { recursive: true, force: true }); } catch { // Ignore cleanup errors } } } /** * Get PDF filter string based on quality setting */ getPdfFilter(quality) { switch (quality) { case 'maximum': return 'pdf:writer_pdf_Export:{"MaxImageResolution":{"type":"long","value":"300"},"UseTaggedPDF":{"type":"boolean","value":"true"},"SelectPdfVersion":{"type":"long","value":"0"},"Quality":{"type":"long","value":"100"}}'; case 'high': return 'pdf:writer_pdf_Export:{"MaxImageResolution":{"type":"long","value":"200"},"Quality":{"type":"long","value":"90"}}'; case 'medium': return 'pdf:writer_pdf_Export:{"MaxImageResolution":{"type":"long","value":"150"},"Quality":{"type":"long","value":"80"}}'; case 'low': return 'pdf:writer_pdf_Export:{"MaxImageResolution":{"type":"long","value":"100"},"Quality":{"type":"long","value":"70"}}'; default: return 'pdf:writer_pdf_Export'; } } /** * Execute LibreOffice conversion with timeout and progress tracking */ executeConversion(sofficePath, args, timeout) { return new Promise((resolve, reject) => { const process = spawn(sofficePath, args); let outputPath = null; let stderr = ''; let timedOut = false; // Set up timeout const timeoutHandle = setTimeout(() => { timedOut = true; process.kill('SIGTERM'); reject(new Error(`Conversion timed out after ${timeout}ms`)); }, timeout); // Track progress const progressInterval = setInterval(() => { if (!timedOut && outputPath === null) { this.emit('progress', { stage: 'converting', percentage: 50, message: 'Converting document...' }); } }, 1000); process.stderr.on('data', (data) => { stderr += data.toString(); }); process.on('close', async (code) => { clearTimeout(timeoutHandle); clearInterval(progressInterval); if (timedOut) { return; // Already rejected } if (code !== 0) { reject(new Error(`LibreOffice exited with code ${code}: ${stderr}`)); return; } // Find the output file try { const tempDir = path.dirname(args[args.length - 1]); const files = await fs.readdir(tempDir); const pdfFile = files.find(f => f.endsWith('.pdf')); if (!pdfFile) { reject(new Error('No PDF output file found')); return; } outputPath = path.join(tempDir, pdfFile); resolve(outputPath); } catch (error) { reject(error); } }); process.on('error', (error) => { clearTimeout(timeoutHandle); clearInterval(progressInterval); reject(error); }); }); } /** * Convert with progress callback */ async convertWithProgress(inputBuffer, options = {}, onProgress) { if (onProgress) { this.on('progress', onProgress); } try { return await this.convertToPdf(inputBuffer, options); } finally { if (onProgress) { this.removeListener('progress', onProgress); } } } } //# sourceMappingURL=libreoffice-converter.js.map