UNPKG

pdfetch

Version:

Node.js application to download KB articles from an arbitrary ServiceNow instance in PDF format.

369 lines (344 loc) 12.2 kB
const puppeteer = require("puppeteer"); const fs = require("fs").promises; const path = require("path"); const axios = require("axios"); const { removeFolderContents } = require("./fs_tools"); /** * Function to retrieve a list of currently active KB articles from a ServiceNow instance. * * @param {String} instanceName * Name of ServiceNow instance to connect to, e.g., "acme" in "acme.service-now.com". * * @param {String} userName * Name of a local ITIL user to log in with. * * @param {String} userPass * Password to use when logging in. * * @param {String} targetFile * Absolute path to a JSON file where resulting list of articles is to be stored. * IMPORTANT: it is assumed that the user or process executing the pdFetch application * has the needed credentials for writing to that file. * * @param {String} [filter=null] * Optional ServiceNow encoded query to filter the results. * * @param {Function} [monitoringFn=null] * Optional function to receive real-time monitoring information. * * @returns {Promise<Array<Object>>} * Promise resolving to an array of objects, each containing the KB * article details. Each Object in the returned Array has these keys: * `sys_id`, `number`, `version`, `short_description`, * `kb_knowledge_base`, `sys_domain`, `sys_updated_on`, * `sys_updated_on_msecs`. * The `version`, `kb_knowledge_base` and `sys_domain` are Objects * with extended information. */ async function getArticlesList( instanceName, userName, userPass, targetFile, filter = null, monitoringFn = null ) { // Ensure defaults for optional arguments. filter = (filter || "").trim(); const $m = monitoringFn || function () {}; // Prepare the call to the Table API o the target ServiceNow instance. const url = `https://${instanceName}.service-now.com/api/now/table/kb_knowledge`; const auth = { username: userName, password: userPass, }; const query = `workflow_state=published${filter ? "^" + filter : ""}`; const params = { sysparm_query: query, sysparm_display_value: true, sysparm_fields: "sys_id,number,short_description,version,sys_updated_on,sys_domain,kb_knowledge_base", }; // Call the API. $m({ type: "info", message: `Listing articles from "${instanceName}.service-now" that match query: "${query}"...`, }); try { const response = await axios.get(url, { auth, params }); $m({ type: "debug", message: `Got response from "${instanceName}.service-now".`, }); // Loop through received articles and pack them as an Array of Objects. Refine raw data as needed. const articles = response.data.result.map((article) => { $m({ type: "info", message: `Processing article "${article.number} v${article.version.display_value} - ${article.short_description}"...`, }); const updatedAt = new Date(article.sys_updated_on).getTime(); return { sys_id: article.sys_id, number: article.number, version: article.version, short_description: article.short_description, kb_knowledge_base: article.kb_knowledge_base, sys_domain: article.sys_domain, sys_updated_on: article.sys_updated_on, sys_updated_on_msecs: updatedAt, }; }); $m({ type: "info", message: `All done. Processed ${articles.length} article(s).`, }); try { await fs.writeFile(targetFile, JSON.stringify(articles, null, "\t")); $m({ type: "debug", message: `Successfully wrote file "${targetFile}".`, }); } catch (err) { $m({ type: "error", message: `Error writing file "${targetFile}".`, }); } return articles; } catch (error) { // Handle errors in calling the API, if any. $m({ type: "error", message: `Error listing articles from "${instanceName}.service-now". Details: ${error.message}`, data: { error }, }); } } /** * Low-level function that logs onto a ServiceNow instance, navigates to given KB articles * within that instance, and downloads them as PDFs. * * IMPORTANT: this is a low-level function. It is expected that the data it receives as * arguments has been validated outside it. * * @param {String} instanceName * Name of ServiceNow instance to connect to, e.g., "acme" in "acme.service-now.com". * * @param {String} userName * Name of a local itil user to log in with. * * @param {String} userPass * Password to use when logging it. * * @param {String[]} articleNumbers * Array of strings representing ServiceNow Knowledge article numbers, e.g., * ['KB0010279', 'KB0010280', 'KB0010281']. IMPORTANT: these are assumed as valid * and accessible by the given `userName`. * * @param {String} targetDir * Absolute path to a folder where downloaded PDFs are to be placed. IMPORTANT: it is * assumed that the user or process executing the pdFetch application has the needed * credentials for writing to that folder. * * @param {Boolean} resetTarget * Optional, default `false`. Whether to delete all the files within the `targetDir` * folder before writing new ones. * * @param {Function} monitoringFn * Optional, default `null`. Function to be sent monitoring information to, in * real-time, as KB articles are being fetched. Expected signature/arguments structure is: * onMonitoringInfo ({type:"info|warn|error", message:"<any>"[, data : {}]}); */ async function fetchArticles( instanceName, userName, userPass, articleNumbers, targetDir, resetTarget = false, monitoringFn = null ) { const $m = monitoringFn || function () {}; $m({ type: "debug", message: "Opening headless browser..." }); const browser = await puppeteer.launch({ headless: "shell" }); const page = await browser.newPage(); $m({ type: "info", message: "Logging in..." }); await page.goto(`https://${instanceName}.service-now.com/login.do`, { waitUntil: "networkidle2", }); await page.type("#user_name", userName); await page.type("#user_password", userPass); await page.click("#sysverb_login"); await page.waitForNavigation({ waitUntil: "networkidle2" }); if (resetTarget) { removeFolderContents(targetDir, $m); } for (const articleNumber of articleNumbers) { try { $m({ type: "info", message: `Downloading KB article "${articleNumber}"...`, }); await page.goto( `https://${instanceName}.service-now.com/kb_view.do?sysparm_article=${articleNumber}&sysparm_media=print`, { waitUntil: "networkidle2" } ); const pdfPath = path.join(targetDir, `${articleNumber}.pdf`); // PAGE ADJUSTMENTS // 1. Ensure all `<details>` HTML tags within the page about to be printed will stay open, so that their // disclosed content will be printed as well. const numDetailsTags = await page.evaluate(() => { const allDetailsTags = document.querySelectorAll("details"); allDetailsTags.forEach((detail) => { detail.open = true; }); return allDetailsTags.length; }); if (numDetailsTags) { $m({ type: "debug", message: `Found and expanded ${numDetailsTags} "<details>" HTML tag(s) for print.`, }); } // 2. Remove unwanted elements on the page. These are standard ServiceNow element in the KB pages, // and we expect to find them in whatever instances we connect to. await page.evaluate(() => { const selectorsToRemove = [ "#versionNumber", "#articleStarRatingGroup", ".kb-article-view-count", ".snc-article-header-author", ]; for (const selector of selectorsToRemove) { const element = document.querySelector(selector); if (element) { element.remove(); } } }); await page.pdf({ path: pdfPath, format: "A4", printBackground: true }); $m({ type: "debug", message: `Done. Successfully downloaded "${articleNumber}".`, }); } catch (error) { $m({ type: "error", message: `Failed to download ${articleNumber}: ${error.message}`, data: { error }, }); } } await browser.close(); $m({ type: "info", message: `All done. Fetched ${articleNumbers.length} article(s).`, }); } /** * Function to determine changes in the list of ServiceNow KB articles between * two points in time. * * @param {String} currListFile * Absolute path to the current list JSON file. * * @param {String} olderListFile * Absolute path to the older list JSON file. * * @param {String} targetChangesFile * Absolute path to the target JSON file where changes will be written. * * @param {Function} [monitoringFn=null] * Optional function to receive real-time monitoring information. * * @returns {Object} The content written to the `targetChangesFile`. This will * be an Object resembling to the following: * { * "last_updated_on": "2024/07/09 14:52:03", * "changes": { * "removed": ["KB001234", "KB004567"], * "added": ["KB8888", "KB9999"], * "updated": ["KB74125", "KB89652"] * } * } */ async function getKBChanges( currListFile, olderListFile, targetChangesFile, monitoringFn = null ) { const $m = monitoringFn || function () {}; try { // Load current and older lists of articles. $m({ type: "info", message: `Loading current articles from "${currListFile}"...`, }); const currListData = await fs.readFile(currListFile, "utf-8"); const currList = JSON.parse(currListData); $m({ type: "info", message: `Loading older articles from "${olderListFile}"...`, }); const olderListData = await fs.readFile(olderListFile, "utf-8"); const olderList = JSON.parse(olderListData); // Create sets for easy lookup const currArticles = new Map( currList.map((article) => [article.sys_id, article]) ); const olderArticles = new Map( olderList.map((article) => [article.sys_id, article]) ); // Detect changes const changes = { added: [], updated: [], removed: [], }; // Find added and updated articles currArticles.forEach((article, sys_id) => { if (!olderArticles.has(sys_id)) { changes.added.push(article.number); $m({ type: "info", message: `Article "${article.number}" added.` }); } else if ( article.version.display_value !== olderArticles.get(sys_id).version.display_value ) { changes.updated.push(article.number); $m({ type: "info", message: `Article "${article.number}" updated from version "${ olderArticles.get(sys_id).version.display_value }" to "${article.version.display_value}".`, }); } }); // Find removed articles olderArticles.forEach((article, sys_id) => { if (!currArticles.has(sys_id)) { changes.removed.push(article.number); $m({ type: "info", message: `Article "${article.number}" removed.` }); } }); // Add timestamp and write changes to the target file const changesFileContent = { last_updated_on: new Date().toISOString(), changes, }; await fs.writeFile( targetChangesFile, JSON.stringify(changesFileContent, null, 2) ); $m({ type: "info", message: `Changes written to "${targetChangesFile}".` }); return changesFileContent; } catch (err) { $m({ type: "error", message: `Error processing KB changes. Details: ${err.message}`, data: { err }, }); } return null; } module.exports = { getArticlesList, fetchArticles, getKBChanges };