html-to-document-adapter-pdf
Version:
PDF adapter for html-to-document-core — converts a DocumentElement tree into .pdf using the html2pdf.js library.
174 lines (173 loc) • 7.28 kB
JavaScript
import { toHtml, } from 'html-to-document-core';
import { DocxAdapter } from 'html-to-document-adapter-docx';
// Runtime + TS type‑guard to recognise a builder object
function isHtml2PdfBuilder(obj) {
return (!!obj &&
typeof obj.set === 'function' &&
typeof obj.from === 'function' &&
typeof obj.outputPdf === 'function');
}
// -----------------------------
export class PDFAdapter {
docxAdapter;
_defaultStyles = {};
constructor(dependencies) {
this.docxAdapter = new DocxAdapter(dependencies);
this._defaultStyles = { ...(dependencies.defaultStyles ?? {}) };
}
async convert(elements) {
try {
// Step 1: Convert to DOCX using the existing DocxAdapter
const htmlString = toHtml(elements, this._defaultStyles);
if (typeof window !== 'undefined') {
// Browser: feed HTML straight to html2pdf
return await this.convertHtmlInBrowser(htmlString);
}
else {
// Node: fall back to DOCX ➜ PDF pathway (unchanged for now)
const docxResult = await this.docxAdapter.convert(elements);
return await this.convertInNode(docxResult);
}
}
catch (error) {
throw new Error(`PDF conversion failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
async convertInNode(docxBuffer) {
try {
// Dynamic import for Node.js environment
const { convert } = await import('libreoffice-convert');
const { promisify } = await import('util');
const convertAsync = promisify(convert);
// Convert DOCX to PDF using libre-office-convert
const pdfBuffer = await convertAsync(docxBuffer, '.pdf', undefined);
return pdfBuffer;
}
catch (error) {
throw new Error(`LibreOffice conversion failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
/**
* Inserts a <div class="html2pdf__page-break"> before an <img> element only
* when the image would overflow the remaining space on the current page.
*
* The calculation is intentionally approximate – we rely on declared height
* attributes (falling back to a default) and assume a constant line height for
* text nodes. This ensures images aren't blindly pushed to a new page while
* avoiding complex layout calculations.
*/
async getImageHeight(img) {
const attrHeight = parseInt(img.getAttribute('height') || '', 10);
if (!isNaN(attrHeight)) {
return attrHeight;
}
const styleAttr = img.getAttribute('style');
if (styleAttr) {
const match = /height\s*:\s*(\d+)/i.exec(styleAttr);
if (match) {
return parseInt(match[1], 10);
}
}
return await new Promise((resolve) => {
const probe = new Image();
probe.onload = () => resolve(probe.naturalHeight || 100);
probe.onerror = () => resolve(100);
probe.src = img.getAttribute('src') || '';
});
}
async insertPageBreaks(html) {
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const PAGE_HEIGHT = 9 * 96; // letter page minus 1in margins -> px
const LINE_HEIGHT = 16; // rough text line height in px
const IMAGE_PADDING = 20; // extra padding for images
const ELEMENT_MARGIN = 36; // approximate top/bottom margin per block
const breakBefore = [];
// Pre-measure all images
const imgs = Array.from(doc.querySelectorAll('img'));
const heights = await Promise.all(imgs.map((i) => this.getImageHeight(i)));
const imgHeights = new Map();
imgs.forEach((img, idx) => imgHeights.set(img, heights[idx] + IMAGE_PADDING));
let remaining = PAGE_HEIGHT;
const container = doc.body.children.length === 1
? doc.body.firstElementChild
: doc.body;
const estimateHeight = (element) => {
if (element.tagName.toLowerCase() === 'img') {
return ((imgHeights.get(element) || 100) + ELEMENT_MARGIN);
}
const txt = element.textContent ?? '';
const lines = Math.ceil(txt.trim().length / 80) || 1;
let height = lines * LINE_HEIGHT;
const innerImgs = Array.from(element.querySelectorAll('img'));
for (const img of innerImgs) {
height += imgHeights.get(img) || 100;
}
height += ELEMENT_MARGIN;
return height;
};
const elements = Array.from(container.children);
for (const el of elements) {
if (el.classList.contains('html2pdf__page-break')) {
remaining = PAGE_HEIGHT;
continue;
}
const elHeight = estimateHeight(el);
if (elHeight >= remaining) {
breakBefore.push(el);
remaining = PAGE_HEIGHT - elHeight;
}
else {
remaining -= elHeight;
}
if (remaining <= 0) {
remaining = PAGE_HEIGHT;
}
}
breakBefore.forEach((img) => {
const pageBreak = doc.createElement('div');
pageBreak.className = 'html2pdf__page-break';
img.parentNode?.insertBefore(pageBreak, img);
});
return doc.body.innerHTML;
}
async convertHtmlInBrowser(html) {
try {
// Pre‑process HTML so each image starts on a fresh page
const processedHtml = await this.insertPageBreaks(html);
const html2pdfModule = (await import('html2pdf.js'));
const maybeExport = html2pdfModule.default ?? html2pdfModule;
let builder;
if (typeof maybeExport === 'function') {
builder = maybeExport();
}
else if (isHtml2PdfBuilder(maybeExport)) {
builder = maybeExport;
}
else {
throw new Error('html2pdf module did not export a callable factory or builder object');
}
// wrap HTML string in a container for html2pdf
const opt = {
margin: 1,
filename: 'document.pdf',
// image: { type: 'jpeg', quality: 0.98 },
html2canvas: {
scale: 2,
useCORS: true, // allow cross‑origin images
allowTaint: false, // keep canvas clean when CORS succeeds
imageTimeout: 15000, // wait up to 15 s for images
},
jsPDF: { unit: 'in', format: 'letter', orientation: 'portrait' },
};
const pdfBlob = await builder
.set(opt)
.from(processedHtml)
.outputPdf('blob');
return pdfBlob;
}
catch (error) {
throw new Error(`Browser PDF conversion failed: ${error instanceof Error ? error.message : String(error)}`);
}
}
}