UNPKG

pdfetch

Version:

Node.js application to download KB articles from an arbitrary ServiceNow instance in PDF format.

327 lines (294 loc) 10.5 kB
const fs = require("fs"); const fsp = require("fs").promises; const path = require("path"); const glob = require("glob"); const archiver = require("archiver"); const { PDFDocument } = require("pdf-lib"); /** * Removes content of a specified folder without deleting the folder itself. * * @param {string} folderPath * The path to the folder whose contents should be removed. * * @param {string[]} [patterns=[]] * An array of strings representing file or folder patterns to match for deletion. * Supports wildcards (* for any characters, ? for one character). Empty strings * will be ignored. If the array is null or empty, all contents will be deleted. * * @param {Function} [monitoringFn=null] * Optional function to receive real-time monitoring information. * Expected signature/arguments structure is: * onMonitoringInfo ({type:"info|warn|error", message:"<any>"[, data : {}]}); * * @returns {Promise<void>} - A promise that resolves when the folder contents have been * removed. */ async function removeFolderContents( folderPath, patterns = [], monitoringFn = null ) { const $m = monitoringFn || function () {}; try { const files = await fsp.readdir(folderPath); // Prepare a set of files to delete based on patterns const filesToDelete = new Set(); // If patterns are provided, match files against patterns if (patterns && patterns.length > 0) { for (const pattern of patterns.filter(Boolean)) { const matches = glob.sync(pattern, { cwd: folderPath }); for (const match of matches) { filesToDelete.add(match); } } } else { // If no patterns are provided, delete everything files.forEach((file) => filesToDelete.add(file)); } // Delete matched files and folders for (const file of filesToDelete) { const filePath = path.join(folderPath, file); const stat = await fsp.lstat(filePath); if (stat.isDirectory()) { await fsp.rm(filePath, { recursive: true, force: true }); } else { await fsp.unlink(filePath); } $m({ type: "debug", message: `Deleted: "${filePath}"`, }); } $m({ type: "debug", message: `Done clearing (matching) content of folder "${folderPath}".`, }); } catch (error) { $m({ type: "error", message: `Error clearing folder "${folderPath}". Details: ${error.message}`, data: { error }, }); } } /** * Function to archive ServiceNow articles as PDF files into a ZIP archive. * * @param {String} workDir * The absolute path to the working directory where PDF files are located. * * @param {String[]} articleNumbers * An array of strings representing KB article numbers. These will be resolved to PDF files in the format "<workDir>/PDFs/<articleNumber>.pdf". * * @param {String} archiveName * The name of the resulting ZIP archive. It will be saved at "<workDir>/<archiveName>.zip". * * @param {Function} [monitoringFn=null] * Optional function to receive real-time monitoring information. * Expected signature/arguments structure is: onMonitoringInfo ({type:"info|warn|error", message:"<any>"[, data : {}]}); * * @returns {Promise<void>} * A promise that resolves when the archive has been created. */ async function archiveArticles( workDir, articleNumbers, archiveName, monitoringFn = null ) { const $m = monitoringFn || function () {}; return new Promise((resolve, reject) => { const outputPath = path.join(workDir, `${archiveName}.zip`); const pdfDir = path.join(workDir, "PDFs"); const output = fs.createWriteStream(outputPath); const archive = archiver("zip", { zlib: { level: 9 } }); output.on("close", () => { $m({ type: "info", message: `Archive "${outputPath}" has been finalized. Total bytes: ${archive.pointer()}`, }); resolve(); }); archive.on("warning", (err) => { if (err.code === "ENOENT") { $m({ type: "warn", message: `Warning: ${err.message}` }); } else { $m({ type: "error", message: `Error: ${err.message}`, data: { err } }); reject(err); } }); archive.on("error", (err) => { $m({ type: "error", message: `Error: ${err.message}`, data: { err } }); reject(err); }); archive.pipe(output); // Append files incrementally articleNumbers.forEach((articleNumber) => { const filePath = path.join(pdfDir, `${articleNumber}.pdf`); archive.file(filePath, { name: `${articleNumber}.pdf` }); $m({ type: "info", message: `Adding file "${articleNumber}.pdf" to archive...`, }); }); archive.finalize(); }); } /** * Function to merge ServiceNow articles exported as PDF files into a single PDF file. * * @param {String} workDir * The absolute path to the working directory where PDF files are located. * * @param {String[]} articleNumbers * An array of strings representing KB article numbers. These will be resolved to * PDF files in the format "<workDir>/PDFs/<articleNumber>.pdf". * * @param {String} mergedFileName * The name of the resulting merged PDF file. It will be saved at * "<workDir>/<mergedFileName>.pdf". * * @param {Function} [monitoringFn=null] * Optional function to receive real-time monitoring information. * Expected signature/arguments structure is: onMonitoringInfo * ({type:"info|warn|error", message:"<any>"[, data : {}]}); * * @returns {Promise<void>} * A promise that resolves when the merged PDF has been created. */ async function mergeArticles( workDir, articleNumbers, mergedFileName, monitoringFn = null ) { const $m = monitoringFn || function () {}; try { const mergedPdf = await PDFDocument.create(); const pdfDir = path.join(workDir, "PDFs"); const outputPath = path.join(workDir, `${mergedFileName}.pdf`); let addedFiles = 0; for (const articleNumber of articleNumbers) { const filePath = path.join(pdfDir, `${articleNumber}.pdf`); if (!fs.existsSync(filePath)) { $m({ type: "warn", message: `Skipped missing file: ${filePath}.` }); continue; } $m({ type: "info", message: `Adding file "${articleNumber}.pdf" to merged PDF...`, }); const pdfBytes = await fs.promises.readFile(filePath); const pdf = await PDFDocument.load(pdfBytes); const copiedPages = await mergedPdf.copyPages(pdf, pdf.getPageIndices()); copiedPages.forEach((page) => mergedPdf.addPage(page)); addedFiles++; } if (addedFiles > 0) { const mergedPdfBytes = await mergedPdf.save(); await fs.promises.writeFile(outputPath, mergedPdfBytes); $m({ type: "info", message: `Merged ${addedFiles} PDF(s) to file "${outputPath}"`, }); } else { $m({ type: "warn", message: `Found no actual PDFs to merge. Check your "operation_mode".`, }); } } catch (error) { $m({ type: "error", message: `Error merging PDFs. Details: ${error.message}`, data: { error }, }); } } /** * Ensures a specific folder structure exists and populates it with files based on templates. * @param {String} homeDir * An absolute path to the base directory. * * @param {Object} bluePrint * A blueprint object describing the structure and content, such as: * { * content: [ * { type: "folder", path: "/path/to/my/folder" }, * { * type: "file", * path: "/path/to/my/folder/my_file.txt", * template: "Hello world! My name is {{firstName}} {{lastName}}.", * data: { firstName: "John", lastName: "Doe" }, * }, * { type: "folder", path: "/path/to/my/other/folder" }, * ], * } * The `content` property of this Object is mandatory, and must resemble the * above example. Other free-form information can as well be stored in the * Object, to aid in the process. * * @param {Function} [monitoringFn=null] * Optional function to receive real-time monitoring information. * Expected signature/arguments structure is: onMonitoringInfo * ({type:"info|warn|error", message:"<any>"[, data : {}]}); */ function ensureSetup(homeDir, bluePrint, monitoringFn = null) { const $m = monitoringFn || function () {}; try { // Sort content by path alphabetically bluePrint.content.sort((a, b) => a.path.localeCompare(b.path)); // Ensure all directories and files exist for (const item of bluePrint.content) { const itemPath = path.join(homeDir, item.path); if (item.type === "folder") { // Create folder if it doesn't exist if (!fs.existsSync(itemPath)) { fs.mkdirSync(itemPath, { recursive: true }); $m({ type: "info", message: `Created folder: ${itemPath}` }); } } else if (item.type === "file") { // Ensure the parent directory exists const dir = path.dirname(itemPath); if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); $m({ type: "info", message: `Created parent directory for file: ${dir}`, }); } // Create and populate the file based on the template and data const content = populateTemplate(item.template, item.data); fs.writeFileSync(itemPath, content, "utf8"); $m({ type: "info", message: `Created file: ${itemPath}` }); } } } catch (error) { $m({ type: "error", message: `Error in ensureSetup. Details: ${error.message}`, data: { error }, }); } } /** * Populates a template with data. * @param {String} template - The template string with placeholders. * @param {Object} data - The data object with key-value pairs for placeholders. * @return {String} The populated template. */ function populateTemplate(template, data) { return template.replace(/{{(.*?)}}/g, (_, key) => { if (key in data) { return data[key]; } else { return `{{${key}}}`; } }); } module.exports = { removeFolderContents, archiveArticles, mergeArticles, ensureSetup, };