pdf2json
Version:
PDF file parser that converts PDF binaries to JSON and text, powered by porting a fork of PDF.JS to Node.js
629 lines (529 loc) • 13.9 kB
JavaScript
import nodeUtil from "util";
import PDFLine from "./pdfline.js";
import PDFFill from "./pdffill.js";
import PDFFont from "./pdffont.js";
// alias some functions to make (compiled) code shorter
// eslint-disable-next-line @typescript-eslint/no-unused-vars
const { round: mr, sin: ms, cos: mc, abs, sqrt } = Math;
// precompute "00" to "FF"
const dec2hex = [];
for (let i = 0; i < 16; i++) {
for (let j = 0; j < 16; j++) {
dec2hex[i * 16 + j] = i.toString(16) + j.toString(16);
}
}
function createMatrixIdentity() {
return [
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
];
}
function matrixMultiply(m1, m2) {
const result = createMatrixIdentity();
for (let x = 0; x < 3; x++) {
for (let y = 0; y < 3; y++) {
let sum = 0;
for (let z = 0; z < 3; z++) {
sum += m1[x][z] * m2[z][y];
}
result[x][y] = sum;
}
}
return result;
}
function copyState(o1, o2) {
o2.fillStyle = o1.fillStyle;
o2.lineCap = o1.lineCap;
o2.lineJoin = o1.lineJoin;
o2.lineWidth = o1.lineWidth;
o2.miterLimit = o1.miterLimit;
o2.shadowBlur = o1.shadowBlur;
o2.shadowColor = o1.shadowColor;
o2.shadowOffsetX = o1.shadowOffsetX;
o2.shadowOffsetY = o1.shadowOffsetY;
o2.strokeStyle = o1.strokeStyle;
o2.globalAlpha = o1.globalAlpha;
o2.arcScaleX_ = o1.arcScaleX_;
o2.arcScaleY_ = o1.arcScaleY_;
o2.lineScale_ = o1.lineScale_;
o2.dashArray = o1.dashArray;
}
function processStyle(styleString) {
let str,
alpha = 1;
styleString = String(styleString);
if (styleString.substring(0, 3) === "rgb") {
const start = styleString.indexOf("(", 3);
const end = styleString.indexOf(")", start + 1);
const guts = styleString.substring(start + 1, end).split(",");
str = "#";
for (let i = 0; i < 3; i++) {
str += dec2hex[Number(guts[i])];
}
if (guts.length === 4 && styleString.substring(3, 4) === "a") {
alpha = guts[3];
}
} else {
str = styleString;
}
return { color: str, alpha };
}
// eslint-disable-next-line @typescript-eslint/no-unused-vars
function processLineCap(lineCap) {
switch (lineCap) {
case "butt":
return "flat";
case "round":
return "round";
case "square":
default:
return "square";
}
}
// Helper function that takes the already fixed cordinates.
function bezierCurveToHelper(self, cp1, cp2, p) {
self.currentPath_.push({
type: "bezierCurveTo",
cp1x: cp1.x,
cp1y: cp1.y,
cp2x: cp2.x,
cp2y: cp2.y,
x: p.x,
y: p.y,
});
self.currentX_ = p.x;
self.currentY_ = p.y;
}
function matrixIsFinite(m) {
for (let j = 0; j < 3; j++) {
for (let k = 0; k < 2; k++) {
if (!isFinite(m[j][k]) || isNaN(m[j][k])) {
return false;
}
}
}
return true;
}
function setM(ctx, m, updateLineScale) {
if (!matrixIsFinite(m)) {
return;
}
ctx.m_ = m;
if (updateLineScale) {
// Get the line scale.
// Determinant of this.m_ means how much the area is enlarged by the
// transformation. So its square root can be used as a scale factor
// for width.
const det = m[0][0] * m[1][1] - m[0][1] * m[1][0];
ctx.lineScale_ = sqrt(abs(det));
}
}
// eslint-disable-next-line @typescript-eslint/naming-convention
class CanvasPattern_ {
constructor() {}
}
// Gradient / Pattern Stubs
// eslint-disable-next-line @typescript-eslint/naming-convention
class CanvasGradient_ {
constructor(aType) {
this.type_ = aType;
this.x0_ = 0;
this.y0_ = 0;
this.r0_ = 0;
this.x1_ = 0;
this.y1_ = 0;
this.r1_ = 0;
this.colors_ = [];
}
addColorStop(aOffset, aColor) {
aColor = processStyle(aColor);
this.colors_.push({
offset: aOffset,
color: aColor.color,
alpha: aColor.alpha,
});
}
}
/**
* This class implements CanvasRenderingContext2D interface as described by
* the WHATWG.
* @param {HTMLElement} surfaceElement The element that the 2D context should
* be associated with
*/
// eslint-disable-next-line @typescript-eslint/naming-convention
export default class CanvasRenderingContext2D_ {
constructor(canvasTarget, scaledWidth, scaledHeight) {
this.m_ = createMatrixIdentity();
this.mStack_ = [];
this.aStack_ = [];
this.currentPath_ = [];
// Canvas context properties
this.strokeStyle = "#000";
this.fillStyle = "#000";
this.lineWidth = 1;
this.lineJoin = "miter";
this.lineCap = "butt";
this.dashArray = [];
this.miterLimit = 1;
this.globalAlpha = 1;
if (!("HLines" in canvasTarget) || !Array.isArray(canvasTarget.HLines))
canvasTarget.HLines = [];
if (!("VLines" in canvasTarget) || !Array.isArray(canvasTarget.VLines))
canvasTarget.VLines = [];
if (!("Fills" in canvasTarget) || !Array.isArray(canvasTarget.Fills))
canvasTarget.Fills = [];
if (!("Texts" in canvasTarget) || !Array.isArray(canvasTarget.Texts))
canvasTarget.Texts = [];
this.canvas = canvasTarget;
this.width = scaledWidth;
this.height = scaledHeight;
this.arcScaleX_ = 1;
this.arcScaleY_ = 1;
this.lineScale_ = 1;
this.currentFont = null;
}
//private helper methods
#drawPDFLine(p1, p2, lineWidth, color) {
const dashedLine = Array.isArray(this.dashArray) && this.dashArray.length > 1;
const pL = new PDFLine(p1.x, p1.y, p2.x, p2.y, lineWidth, color, dashedLine);
pL.processLine(this.canvas);
}
#drawPDFFill(cp, min, max, color) {
const width = max.x - min.x;
const height = max.y - min.y;
const pF = new PDFFill(cp.x, cp.y, width, height, color);
pF.processFill(this.canvas);
}
#needRemoveRect(x, y, w, h) {
const retVal = Math.abs(w - Math.abs(h)) < 1 && w < 13;
if (retVal) {
nodeUtil.p2jinfo(`Skipped: tiny rect: w=${w}, h=${h}`);
}
return retVal;
}
getContext(ctxType) {
return ctxType === "2d" ? this : null;
}
setLineDash(lineDash) {
this.dashArray = lineDash;
}
getLineDash() {
return this.dashArray;
}
fillText(text, x, y, maxWidth, fontSize) {
if (!text || (!text.length === 1 && text.trim().length < 1)) return;
const p = this.getCoords_(x, y);
const a = processStyle(this.fillStyle || this.strokeStyle);
const color = a ? a.color : "#000000";
this.currentFont.processText(
p,
text,
maxWidth,
color,
fontSize,
this.canvas,
this.m_
);
}
strokeText(text, x, y, maxWidth) {
//MQZ. 10/23/2012, yeah, no hollow text for now
this.fillText(text, x, y, maxWidth);
}
measureText(text) {
nodeUtil.p2jwarn("to be implemented: contextPrototype.measureText - ", text);
const chars = text.length || 1;
return { width: chars * (this.currentFont.spaceWidth || 5) };
}
setFont(fontObj) {
if (!!this.currentFont && typeof this.currentFont.clean === "function") {
this.currentFont.clean();
this.currentFont = null;
}
this.currentFont = new PDFFont(fontObj);
}
clearRect() {
nodeUtil.p2jwarn("to be implemented: contextPrototype.clearRect");
}
beginPath() {
// TODO: Branch current matrix so that save/restore has no effect
// as per safari docs.
this.currentPath_ = [];
}
moveTo(aX, aY) {
const p = this.getCoords_(aX, aY);
this.currentPath_.push({ type: "moveTo", x: p.x, y: p.y });
this.currentX_ = p.x;
this.currentY_ = p.y;
}
lineTo(aX, aY) {
const p = this.getCoords_(aX, aY);
this.currentPath_.push({ type: "lineTo", x: p.x, y: p.y });
this.currentX_ = p.x;
this.currentY_ = p.y;
}
bezierCurveTo(aCP1x, aCP1y, aCP2x, aCP2y, aX, aY) {
const p = this.getCoords_(aX, aY);
const cp1 = this.getCoords_(aCP1x, aCP1y);
const cp2 = this.getCoords_(aCP2x, aCP2y);
bezierCurveToHelper(this, cp1, cp2, p);
}
quadraticCurveTo(aCPx, aCPy, aX, aY) {
// the following is lifted almost directly from
// http://developer.mozilla.org/en/docs/Canvas_tutorial:Drawing_shapes
const cp = this.getCoords_(aCPx, aCPy);
const p = this.getCoords_(aX, aY);
const cp1 = {
x: this.currentX_ + (2.0 / 3.0) * (cp.x - this.currentX_),
y: this.currentY_ + (2.0 / 3.0) * (cp.y - this.currentY_),
};
const cp2 = {
x: cp1.x + (p.x - this.currentX_) / 3.0,
y: cp1.y + (p.y - this.currentY_) / 3.0,
};
bezierCurveToHelper(this, cp1, cp2, p);
}
arc(aX, aY, aRadius, aStartAngle, aEndAngle, aClockwise) {
const arcType = aClockwise ? "at" : "wa";
let xStart = aX + mc(aStartAngle) * aRadius;
const yStart = aY + ms(aStartAngle) * aRadius;
const xEnd = aX + mc(aEndAngle) * aRadius;
const yEnd = aY + ms(aEndAngle) * aRadius;
// IE won't render arches drawn counter clockwise if xStart == xEnd.
if (xStart === xEnd && !aClockwise) {
xStart += 0.125; // Offset xStart by 1/80 of a pixel. Use something
// that can be represented in binary
}
const p = this.getCoords_(aX, aY);
const pStart = this.getCoords_(xStart, yStart);
const pEnd = this.getCoords_(xEnd, yEnd);
this.currentPath_.push({
type: arcType,
x: p.x,
y: p.y,
radius: aRadius,
xStart: pStart.x,
yStart: pStart.y,
xEnd: pEnd.x,
yEnd: pEnd.y,
});
}
rect(aX, aY, aWidth, aHeight) {
if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) {
return; //try to remove the rectangle behind radio buttons and checkboxes
}
this.moveTo(aX, aY);
this.lineTo(aX + aWidth, aY);
this.lineTo(aX + aWidth, aY + aHeight);
this.lineTo(aX, aY + aHeight);
this.closePath();
}
strokeRect(aX, aY, aWidth, aHeight) {
if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) {
return; //try to remove the rectangle behind radio buttons and checkboxes
}
const oldPath = this.currentPath_;
this.beginPath();
this.moveTo(aX, aY);
this.lineTo(aX + aWidth, aY);
this.lineTo(aX + aWidth, aY + aHeight);
this.lineTo(aX, aY + aHeight);
this.closePath();
this.stroke();
this.currentPath_ = oldPath;
}
fillRect(aX, aY, aWidth, aHeight) {
if (this.#needRemoveRect(aX, aY, aWidth, aHeight)) {
return; //try to remove the rectangle behind radio buttons and checkboxes
}
const oldPath = this.currentPath_;
this.beginPath();
this.moveTo(aX, aY);
this.lineTo(aX + aWidth, aY);
this.lineTo(aX + aWidth, aY + aHeight);
this.lineTo(aX, aY + aHeight);
this.closePath();
this.fill();
this.currentPath_ = oldPath;
}
createLinearGradient(aX0, aY0, aX1, aY1) {
const gradient = new CanvasGradient_("gradient");
gradient.x0_ = aX0;
gradient.y0_ = aY0;
gradient.x1_ = aX1;
gradient.y1_ = aY1;
return gradient;
}
createRadialGradient(aX0, aY0, aR0, aX1, aY1, aR1) {
const gradient = new CanvasGradient_("gradientradial");
gradient.x0_ = aX0;
gradient.y0_ = aY0;
gradient.r0_ = aR0;
gradient.x1_ = aX1;
gradient.y1_ = aY1;
gradient.r1_ = aR1;
return gradient;
}
// eslint-disable-next-line @typescript-eslint/naming-convention
drawImage(image, var_args) {
//MQZ. no image drawing support for now
}
getImageData(x, y, w, h) {
//MQZ. returns empty data buffer for now
return {
width: w,
height: h,
data: new Uint8Array(w * h * 4),
};
}
stroke(aFill) {
if (this.currentPath_.length < 2) {
return;
}
const a = processStyle(aFill ? this.fillStyle : this.strokeStyle);
const { color } = a;
// let opacity = a.alpha * this.globalAlpha;
const lineWidth = this.lineScale_ * this.lineWidth;
const min = { x: null, y: null };
const max = { x: null, y: null };
for (let i = 0; i < this.currentPath_.length; i++) {
let p = this.currentPath_[i];
switch (p.type) {
case "moveTo":
break;
case "lineTo":
if (!aFill) {
//lines
if (i > 0) {
this.#drawPDFLine(this.currentPath_[i - 1], p, lineWidth, color);
}
}
break;
case "close":
if (!aFill) {
//lines
if (i > 0) {
this.#drawPDFLine(
this.currentPath_[i - 1],
this.currentPath_[0],
lineWidth,
color
);
}
}
p = null;
break;
case "bezierCurveTo":
break;
case "at":
case "wa":
break;
}
// Figure out dimensions so we can set fills' coordinates correctly
if (aFill && p) {
if (min.x === null || p.x < min.x) {
min.x = p.x;
}
if (max.x === null || p.x > max.x) {
max.x = p.x;
}
if (min.y === null || p.y < min.y) {
min.y = p.y;
}
if (max.y === null || p.y > max.y) {
max.y = p.y;
}
}
}
if (aFill) {
//fill
this.#drawPDFFill(min, min, max, color);
}
}
fill() {
this.stroke(true);
}
closePath() {
this.currentPath_.push({ type: "close" });
}
/**
* @private
*/
// eslint-disable-next-line @typescript-eslint/naming-convention
getCoords_(aX, aY) {
const m = this.m_;
return {
x: aX * m[0][0] + aY * m[1][0] + m[2][0],
y: aX * m[0][1] + aY * m[1][1] + m[2][1],
};
}
save() {
const o = {};
copyState(this, o);
this.aStack_.push(o);
this.mStack_.push(this.m_);
this.m_ = matrixMultiply(createMatrixIdentity(), this.m_);
}
restore() {
copyState(this.aStack_.pop(), this);
this.m_ = this.mStack_.pop();
}
translate(aX, aY) {
const m1 = [
[1, 0, 0],
[0, 1, 0],
[aX, aY, 1],
];
setM(this, matrixMultiply(m1, this.m_), false);
}
rotate(aRot) {
const c = mc(aRot);
const s = ms(aRot);
const m1 = [
[c, s, 0],
[-s, c, 0],
[0, 0, 1],
];
setM(this, matrixMultiply(m1, this.m_), false);
}
scale(aX, aY) {
this.arcScaleX_ *= aX;
this.arcScaleY_ *= aY;
const m1 = [
[aX, 0, 0],
[0, aY, 0],
[0, 0, 1],
];
setM(this, matrixMultiply(m1, this.m_), true);
}
transform(m11, m12, m21, m22, dx, dy) {
const m1 = [
[m11, m12, 0],
[m21, m22, 0],
[dx, dy, 1],
];
setM(this, matrixMultiply(m1, this.m_), true);
}
setTransform(m11, m12, m21, m22, dx, dy) {
const m = [
[m11, m12, 0],
[m21, m22, 0],
[dx, dy, 1],
];
setM(this, m, true);
}
/******** STUBS ********/
clip() {
// TODO: Implement
}
arcTo() {
// TODO: Implement
}
createPattern() {
return new CanvasPattern_();
}
}
// replacing HTML5 canvas with PDFCanvas (in-memory canvas)
export function createScratchCanvas(width, height) {
return new CanvasRenderingContext2D_({}, width, height);
}