pdf-transform
Version:
Transforms PDF to png or html files
148 lines (136 loc) • 5.55 kB
JavaScript
const Canvas = require("canvas");
const assert = require("assert").strict;
const fs = require("fs");
const pdfjsLib = require("./pdfjs-dist/es5/build/pdf.js");
function NodeCanvasFactory() {}
NodeCanvasFactory.prototype = {
create: function NodeCanvasFactory_create(width, height) {
assert(width > 0 && height > 0, "Invalid canvas size");
var canvas = Canvas.createCanvas(width, height);
var context = canvas.getContext("2d");
return {
canvas: canvas,
context: context,
};
},
reset: function NodeCanvasFactory_reset(canvasAndContext, width, height) {
assert(canvasAndContext.canvas, "Canvas is not specified");
assert(width > 0 && height > 0, "Invalid canvas size");
canvasAndContext.canvas.width = width;
canvasAndContext.canvas.height = height;
},
destroy: function NodeCanvasFactory_destroy(canvasAndContext) {
assert(canvasAndContext.canvas, "Canvas is not specified");
canvasAndContext.canvas.width = 0;
canvasAndContext.canvas.height = 0;
canvasAndContext.canvas = null;
canvasAndContext.context = null;
},
};
const convert = (options) => {
const pdfFileName = options.fileName;
// Read the PDF file into a typed array so PDF.js can load it.
const rawData = new Uint8Array(fs.readFileSync(pdfFileName));
// let wholeHTMLBodyData = ``;
// Load the PDF file.
const loadingTask = pdfjsLib.getDocument(rawData);
loadingTask.promise
.then(function (pdfDocument) {
console.log("PDF document loaded.", pdfDocument.numPages);
// Get the first page.
for (let i = 1; i <= pdfDocument.numPages; i++) {
pdfDocument.getPage(i).then(function (page) {
// Render the page on a Node canvas with 100% scale.
const viewport = page.getViewport({ scale: 1.0 });
const canvasFactory = new NodeCanvasFactory();
const canvasAndContext = canvasFactory.create(
viewport.width,
viewport.height
);
const renderContext = {
canvasContext: canvasAndContext.context,
viewport: viewport,
canvasFactory: canvasFactory,
};
const renderTask = page.render(renderContext);
renderTask.promise.then(function () {
// Convert the canvas to an image buffer.
const image = canvasAndContext.canvas.toBuffer();
// TODO: create directory if not present
// TODO: define directory structure if necessary -> pdf file name or timestamp
if (!fs.existsSync("./png-outputs")) {
fs.mkdirSync("./png-outputs");
}
fs.writeFile(`./png-outputs/output_${i}.png`, image, (error) => {
if (error) {
console.error("Error: " + error);
} else {
console.log(
`Finished converting ${i} page of PDF file to a PNG image.`
);
if (options.convertTo === "html") {
fs.readFile(
`png-outputs/output_${i}.png`,
"base64",
(err, data) => {
if (err) {
console.error("Error:: ");
}
if (data) {
let htmlBodyData = `<img src="data:image/png;base64, ${data}" style="width:50vw; height:auto" />`;
// wholeHTMLBodyData += htmlBodyData;
if (!fs.existsSync("./html-files")) {
fs.mkdirSync("./html-files");
}
fs.writeFile(
`./html-files/sample_${i}.html`,
`
<html>
<body style="margin: 0 !important; display: flex; flex-direction: column; width: 100%; height: auto; align-items: center; background-color: #e6e6e6;">
${htmlBodyData}
</body
</html>
`,
(error) => {
if (error) {
console.error(
"Error in writng HTML file: " + error
);
} else {
console.log(
`HTML file created successfully and saved as sample_${i}.html in html-files folder.`
);
}
}
);
}
}
);
}
}
});
});
});
}
// TODO: To write in a single HTML file
// fs.writeFile(
// `./compiled-html-files/sample.html`,
// wholeHTMLBodyData,
// (error) => {
// if (error) {
// console.error("Error in writng HTML file: " + error);
// } else {
// console.log(
// `HTML file created successfully and saved as sample.html in compiled-html-files folder.`
// );
// }
// }
// );
})
.catch(function (reason) {
console.log(reason);
});
};
module.exports = {
convert,
};