@alauda/doom
Version:
Doctor Doom making docs.
138 lines (137 loc) • 5.43 kB
JavaScript
import fs from 'node:fs/promises';
import path from 'node:path';
import process from 'node:process';
import { asPDFName, PDFArray, PDFDict, PDFDocument, PDFObject, PDFString, } from 'pdf-lib';
import PDFMerger from 'pdf-merger-js';
import { mergePDFs } from '../../merge-pdfs/index.js';
import { convertPathToPosix } from './convertPathToPosix.js';
import { getUrlLink } from './getUrlLink.js';
const extractOutline = (doc, dict) => {
return {
title: dict.get(asPDFName('Title')).decodeText(),
dest: dict.get(asPDFName('Dest')),
children: extractOutlines(doc, dict),
};
};
const extractOutlines = (doc, dictObj) => {
const outlineDict = doc.context.lookup(dictObj, PDFDict);
const first = outlineDict.get(asPDFName('First'));
let dict = doc.context.lookupMaybe(first, PDFDict);
if (!dict) {
return [];
}
const outlines = [extractOutline(doc, dict)];
let obj;
while ((obj = dict.get(asPDFName('Next')))) {
dict = doc.context.lookup(obj, PDFDict);
outlines.push(extractOutline(doc, dict));
}
return outlines;
};
/**
* Based on @see https://github.com/Hopding/pdf-lib/issues/867#issuecomment-827570106
*/
export const replaceLinksWithOutline = async (pdfData, pdfOutlines) => {
const pdfDoc = await PDFDocument.load(pdfData);
const outlinesObj = pdfDoc.catalog.get(asPDFName('Outlines'));
if (!outlinesObj) {
return pdfData;
}
const outlineMap = new WeakMap();
const outlineNodes = pdfOutlines.flatMap(([, outlineNodes]) => outlineNodes);
const outlineNodeMap = new Map(pdfOutlines);
const outlines = extractOutlines(pdfDoc, outlinesObj);
for (const [index, outlineNode] of outlineNodes.entries()) {
const outline = outlines[index];
outlineMap.set(outlineNode, outline);
for (const [index, outlineChild] of outlineNode.children.entries()) {
outlineMap.set(outlineChild, outline.children[index]);
}
}
const pages = pdfDoc.getPages();
for (const page of pages) {
for (const annot of page.node.Annots()?.asArray() || []) {
const dict = pdfDoc.context.lookupMaybe(annot, PDFDict);
const aRecord = dict?.get(asPDFName(`A`));
const link = pdfDoc.context.lookupMaybe(aRecord, PDFDict);
const uri = link?.get(asPDFName('URI'))?.toString().slice(1, -1); // get the original link, remove parenthesis
if (!dict || !aRecord || !link || !uri) {
continue;
}
const url = uri.replace(/\\(\d{2,3})/g, (_, code) => String.fromCharCode(Number.parseInt(code, 8)));
if (/^https?:\/\//.test(url)) {
const { link, hash } = getUrlLink(url);
let matched = outlineNodeMap.get(link);
if (!matched && link.endsWith('/')) {
matched = outlineNodeMap.get(link + 'index.html');
}
if (!matched?.length) {
continue;
}
let matchedOutlineNode = matched[0];
if (hash) {
for (const node of matched) {
if (node.destination === hash) {
matchedOutlineNode = node;
break;
}
for (const child of node.children) {
if (child.destination === hash) {
matchedOutlineNode = child;
break;
}
}
}
}
const matchedOutline = outlineMap.get(matchedOutlineNode);
if (!matchedOutline) {
continue;
}
dict.set(asPDFName('A'), pdfDoc.context.obj({
S: 'GoTo',
D: matchedOutline.dest,
}));
}
}
}
return pdfDoc.save();
};
/**
* Merge PDFs.
* @param pages - NormalizePage
* @param outFile - Output file
* @param outDir - Output directory
* @returns relativePath - Output relative path
*/
export async function mergePDF(pages, outFile, outDir, pdfOutlines) {
const saveDirPath = path.resolve(outDir);
if (outDir) {
await fs.mkdir(saveDirPath, { recursive: true });
}
const saveFilePath = path.join(saveDirPath, outFile);
if (pages.length === 0) {
throw new Error('The website has no pages, please check whether the export path is set correctly');
}
else if (pages.length === 1) {
await fs.rename(pages[0].pagePath, saveFilePath);
}
else {
let pdfData;
if (pdfOutlines.length > 0) {
pdfData = await mergePDFs(pages.map(({ pagePath }) => {
const relativePagePath = path.relative(process.cwd(), pagePath);
return convertPathToPosix(relativePagePath);
}));
pdfData = await replaceLinksWithOutline(pdfData, pdfOutlines);
}
else {
const merger = new PDFMerger();
for (const { pagePath } of pages) {
await merger.add(pagePath);
}
pdfData = await merger.saveAsBuffer();
}
await fs.writeFile(saveFilePath, pdfData);
}
return path.relative(process.cwd(), saveFilePath);
}