@brngdsn/docly
Version:
A tool to convert Markdown files into PDFs, merge multiple PDFs together, and extract pages from PDFs.
430 lines (404 loc) • 11.9 kB
JavaScript
import { readFile } from 'fs/promises';
import { marked } from 'marked';
import hljs from 'highlight.js';
import puppeteer from 'puppeteer';
import path from 'path';
import { pathToFileURL } from 'url';
import { writeFile, unlink } from 'fs/promises';
import { tmpdir } from 'os';
// Configure marked to use highlight.js for code blocks.
marked.setOptions({
highlight: (code, lang) => {
const language = hljs.getLanguage(lang) ? lang : 'plaintext';
return hljs.highlight(code, { language }).value;
},
mangle: false,
headerIds: false
});
/**
* Convert a Markdown file to a PDF.
*
* @param {Object} options - Options for conversion.
* @param {string} options.markdownPath - Path to the Markdown input file.
* @param {string} options.pdfPath - Path to output the generated PDF.
* @returns {Promise<Object>} Resolves with an object containing the success status and output path.
* @throws Will throw an error if conversion fails.
*/
export async function convertMarkdownToPdf({ markdownPath, pdfPath }) {
try {
// Get the absolute path and directory of the markdown file
const absoluteMarkdownPath = path.resolve(markdownPath);
const markdownDir = path.dirname(absoluteMarkdownPath);
// Read the Markdown file content
const markdownContent = await readFile(markdownPath, 'utf8');
// Create a custom renderer to handle cover images
const renderer = new marked.Renderer();
const originalImage = renderer.image.bind(renderer);
renderer.image = function(href, title, text) {
// Check if this is a cover image (alt text starts with "cover:")
if (text && text.toLowerCase().startsWith('cover:')) {
const coverType = text.toLowerCase().replace('cover:', '').trim();
if (coverType === 'front' || coverType === 'back') {
return `<div class="cover-page cover-${coverType}"><img src="${href}" alt="${text}" title="${title || ''}"></div>`;
}
}
// Regular image with caption
const imgTag = originalImage(href, title, text);
// If there's alt text and it's not empty, add it as a caption
if (text && text.trim()) {
return `<figure class="image-with-caption">${imgTag}<figcaption>${text}</figcaption></figure>`;
}
return imgTag;
};
// Convert Markdown to HTML with syntax highlighting for code blocks
const htmlContent = marked.parse(markdownContent, { renderer });
// Post-process to wrap lists in divs for proper margins
const processedHtml = htmlContent
.replace(/<ul>/g, '<div class="list-wrapper"><ul>')
.replace(/<\/ul>/g, '</ul></div>')
.replace(/<ol>/g, '<div class="list-wrapper"><ol>')
.replace(/<\/ol>/g, '</ol></div>');
// Extract cover pages and main content - use non-greedy regex
const coverPageRegex = /<div class="cover-page[^>]*><img[^>]*><\/div>/g;
const coverPages = processedHtml.match(coverPageRegex) || [];
const mainContent = processedHtml.replace(coverPageRegex, '');
// Separate front and back covers
const frontCovers = coverPages.filter(page => page.includes('cover-front'));
const backCovers = coverPages.filter(page => page.includes('cover-back'));
// Check if this is a cover-only document
const isCoverOnly = coverPages.length > 0 && mainContent.trim() === '';
// Create the base href for the HTML document
const baseHref = pathToFileURL(markdownDir + path.sep).href;
// For cover-only documents, use a simplified HTML structure
if (isCoverOnly) {
const html = `<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<base href="${baseHref}">
<title>Cover</title>
<style>
@page {
margin: 0;
size: A4;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
html, body {
width: 210mm;
height: 297mm;
margin: 0;
padding: 0;
overflow: hidden;
}
.cover-page {
width: 210mm;
height: 297mm;
margin: 0;
padding: 0;
position: relative;
overflow: hidden;
display: block;
}
.cover-page img {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
object-fit: cover;
margin: 0;
padding: 0;
display: block;
}
</style>
</head>
<body>
${coverPages.join('')}
</body>
</html>`;
// Launch Puppeteer to generate a PDF from the HTML content
const browser = await puppeteer.launch({
headless: 'new',
args: ['--allow-file-access-from-files'] // Allow local file access
});
const page = await browser.newPage();
// Set viewport to A4 dimensions at 96 DPI
await page.setViewport({
width: 794, // A4 width in pixels at 96 DPI
height: 1123, // A4 height in pixels at 96 DPI
deviceScaleFactor: 1
});
// Save HTML to a temporary file in the same directory as the markdown
// This ensures relative paths work correctly
const tempHtmlPath = path.join(markdownDir, `.temp-${Date.now()}.html`);
await writeFile(tempHtmlPath, html);
try {
// Navigate to the temporary HTML file
await page.goto(pathToFileURL(tempHtmlPath).href, {
waitUntil: 'networkidle0'
});
// Wait a bit for images to fully load
await page.waitForTimeout(500);
// Define PDF options for cover pages - exactly one page
await page.pdf({
path: pdfPath,
width: '210mm',
height: '297mm',
printBackground: true,
margin: { top: 0, bottom: 0, left: 0, right: 0 }
});
} finally {
// Clean up temporary file
await unlink(tempHtmlPath).catch(() => {}); // Ignore errors if file doesn't exist
await browser.close();
}
return { success: true, pdfPath };
}
// Regular document handling continues below...
// Wrap the HTML content with a basic template and inline styles.
const html = `
<html>
<head>
<meta charset="UTF-8">
<base href="${baseHref}">
<title>Document</title>
<style>
/* PDF page margins */
@page {
margin: 0.5in;
}
/* Special page setup for cover pages - no margins */
@page cover {
margin: 0;
}
body {
font-family: Arial, sans-serif;
margin: 0;
padding: 0;
line-height: 1.6;
min-height: 0;
}
.container {
margin: 0;
padding: 0;
page-break-before: auto;
}
/* Cover page styles */
.cover-page {
page: cover;
width: 100vw;
height: 100vh;
margin: 0;
padding: 0;
display: block;
position: relative;
overflow: hidden;
page-break-inside: avoid;
}
/* Only add page break when there's a container after */
.cover-page + .container {
page-break-before: always;
}
.cover-page img {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
object-fit: cover;
margin: 0;
padding: 0;
display: block;
}
h1, h2, h3, h4, h5, h6 {
color: #333;
margin-top: 1.2em;
margin-bottom: 0.6em;
}
p {
margin: 0.6em 0;
}
/* List styles with proper indentation */
ul, ol {
margin: 0.6em 0;
padding-left: 2em;
}
li {
margin: 0.3em 0;
}
/* List wrapper for symmetric margins */
.list-wrapper {
margin: 0 2em;
}
pre {
background: #f4f4f4;
padding: 10px;
white-space: pre-wrap;
overflow-wrap: break-word;
margin: 1em 0;
border-radius: 4px;
}
code {
background: #f4f4f4;
padding: 2px 4px;
}
blockquote {
border-left: 4px solid #ccc;
padding-left: 10px;
color: #666;
margin: 1em 0;
}
table {
border-collapse: collapse;
width: 100%;
}
th, td {
border: 1px solid #ddd;
padding: 8px;
}
th {
background-color: #f2f2f2;
}
/* Image styles */
img {
max-width: 100%;
height: auto;
display: block;
margin: 1em auto;
}
/* Inline images */
p img {
display: inline;
margin: 0;
}
/* Figure and caption styles */
figure.image-with-caption {
margin: 1.5em 0;
text-align: center;
max-width: 100%;
padding: 0;
display: block;
}
figure.image-with-caption img {
margin: 0 auto 0.5em;
max-width: 100%;
height: auto;
display: block;
}
figure.image-with-caption figcaption {
font-size: 0.9em;
color: #666;
font-style: italic;
margin-top: 0.5em;
padding: 0 1em;
}
/* Highlight.js default theme */
.hljs {
display: block;
overflow-x: auto;
padding: 0.5em;
background: #f0f0f0;
color: #444;
}
.hljs-comment,
.hljs-quote {
color: #998;
font-style: italic;
}
.hljs-keyword,
.hljs-selector-tag,
.hljs-subst {
color: #333;
font-weight: bold;
}
.hljs-number,
.hljs-literal,
.hljs-variable,
.hljs-template-variable,
.hljs-tag .hljs-attr {
color: #008080;
}
.hljs-string,
.hljs-doctag {
color: #d14;
}
.hljs-title,
.hljs-section,
.hljs-selector-id {
color: #900;
font-weight: bold;
}
.hljs-type,
.hljs-class .hljs-title {
color: #458;
font-weight: bold;
}
.hljs-tag,
.hljs-name,
.hljs-attribute {
color: #000080;
}
.hljs-regexp,
.hljs-link {
color: #009926;
}
.hljs-symbol,
.hljs-bullet {
color: #990073;
}
.hljs-built_in,
.hljs-builtin-name {
color: #0086b3;
}
.hljs-meta {
color: #999;
font-weight: bold;
}
.hljs-deletion {
background: #fdd;
}
.hljs-addition {
background: #dfd;
}
</style>
</head>
<body>
${frontCovers.join('')}${mainContent ? `<div class="container">${mainContent}</div>` : ''}${backCovers.join('')}
</body>
</html>`;
// Launch Puppeteer to generate a PDF from the HTML content
const browser = await puppeteer.launch({
headless: 'new',
args: ['--allow-file-access-from-files'] // Allow local file access
});
const page = await browser.newPage();
// Save HTML to a temporary file in the same directory as the markdown
// This ensures relative paths work correctly
const tempHtmlPath = path.join(markdownDir, `.temp-${Date.now()}.html`);
await writeFile(tempHtmlPath, html);
try {
// Navigate to the temporary HTML file
await page.goto(pathToFileURL(tempHtmlPath).href, {
waitUntil: 'networkidle0'
});
// Define PDF options with proper margins.
await page.pdf({
path: pdfPath,
format: 'A4',
printBackground: true,
margin: { top: '0', bottom: '0', left: '0', right: '0' }
});
} finally {
// Clean up temporary file
await unlink(tempHtmlPath).catch(() => {}); // Ignore errors if file doesn't exist
await browser.close();
}
return { success: true, pdfPath };
} catch (error) {
throw new Error(`Conversion failed: ${error.message}`);
}
}