pdf2json
Version:
PDF file parser that converts PDF binaries to JSON and text, powered by porting a fork of PDF.JS to Node.js
299 lines (258 loc) • 9 kB
JavaScript
import nodeUtil from "util";
import PDFUnit from "./pdfunit.js";
const kFBANotOverridable = 0x00000400; // indicates the field is read only by the user
const kFBARequired = 0x00000010; // indicates the field is required
const kMinHeight = 20;
export default class PDFField {
static tabIndex = 0;
static isWidgetSupported(field) {
let retVal = false;
switch(field.fieldType) {
case 'Tx': retVal = true; break; //text input
case 'Btn':
if (field.fieldFlags & 32768) {
field.fieldType = 'Rd'; //radio button
}
else if (field.fieldFlags & 65536) {
field.fieldType = 'Btn'; //push button
}
else {
field.fieldType = 'Cb'; //checkbox
}
retVal = true;
break;
case 'Ch': retVal = true; break; //drop down
case 'Sig': retVal = true; break; //signature
default:
nodeUtil.p2jwarn(`Unsupported: field.fieldType of ${field.fieldType}`);
break;
}
return retVal;
}
static isFormElement(field) {
let retVal = false;
switch(field.subtype) {
case 'Widget': retVal = PDFField.isWidgetSupported(field); break;
default:
nodeUtil.p2jwarn(`Unsupported: field.type of ${field.subtype}`);
break;
}
return retVal;
}
// constructor
constructor(field, viewport, Fields, Boxsets) {
this.field = field;
this.viewport = viewport;
this.Fields = Fields;
this.Boxsets = Boxsets;
}
// Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2)
// For coordinate systems whose origin lies in the bottom-left, this
// means normalization to (BL,TR) ordering. For systems with origin in the
// top-left, this means (TL,BR) ordering.
static #normalizeRect(rect) {
const r = rect.slice(0); // clone rect
if (rect[0] > rect[2]) {
r[0] = rect[2];
r[2] = rect[0];
}
if (rect[1] > rect[3]) {
r[1] = rect[3];
r[3] = rect[1];
}
return r;
}
#getFieldPosition(field) {
const viewPort = this.viewport;
const fieldRect = viewPort.convertToViewportRectangle(field.rect);
const rect = PDFField.#normalizeRect(fieldRect);
let height = rect[3] - rect[1];
if (field.fieldType === 'Tx') {
if (height > kMinHeight + 2) {
rect[1] += 2;
height -= 2;
}
}
else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button
rect[1] -= 3;
}
height = (height >= kMinHeight) ? height : kMinHeight;
return {
x: PDFUnit.toFormX(rect[0]),
y: PDFUnit.toFormY(rect[1]),
w: PDFUnit.toFormX(rect[2] - rect[0]),
h: PDFUnit.toFormY(height)
};
}
#getFieldBaseData(field) {
let attributeMask = 0;
//PDF Spec p.676 TABLE 8.70 Field flags common to all field types
if (field.fieldFlags & 0x00000001) {
attributeMask |= kFBANotOverridable;
}
if (field.fieldFlags & 0x00000002) {
attributeMask |= kFBARequired;
}
const anData = {
id: { Id: field.fullName, EN: 0 },
TI: field.TI,
AM: attributeMask
};
//PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info
if (field.alternativeText && field.alternativeText.length > 1) {
anData.TU = field.alternativeText;
}
if (field.alternativeID && field.alternativeID.length > 1) {
anData.TM = field.alternativeID;
}
return Object.assign(anData, this.#getFieldPosition(field));
}
#addAlpha(field) {
const anData = Object.assign({
style: 48,
T: {
Name: field.TName || "alpha",
TypeInfo: {}
}
}, this.#getFieldBaseData(field));
if (field.MV) { //field attributes: arbitrary mask value
anData.MV = field.MV;
}
if (field.fieldValue) {
anData.V = field.fieldValue; //read-only field value, like "self-prepared"
}
this.Fields.push(anData);
}
#addCheckBox(box) {
const anData = Object.assign({
style: 48,
T: {
Name: "box",
TypeInfo: {}
}
}, this.#getFieldBaseData(box));
if(box.fieldValue) {
anData.checked = box.fieldValue !== 'Off';
}
this.Boxsets.push({ boxes:[anData] });
}
#addRadioButton(box) {
const anData = Object.assign({
style: 48,
T: {
Name: "box",
TypeInfo: {}
}
}, this.#getFieldBaseData(box));
anData.id.Id = box.value;
if ('checked' in box) {
anData.checked = box.checked;
}
const rdGroup = this.Boxsets.filter(boxset => ('id' in boxset) && ('Id' in boxset.id) && (boxset.id.Id === box.fullName))[0];
if ((!!rdGroup) && ('boxes' in rdGroup)) {
rdGroup.boxes.push(anData);
}
else {
this.Boxsets.push({ boxes:[anData], id: { Id: box.fullName, EN: 0 } });
}
}
#addLinkButton(field) {
const anData = Object.assign({
style: 48,
T: {
Name: "link"
},
FL: {
form: { Id: field.FL }
}
}, this.#getFieldBaseData(field));
this.Fields.push(anData);
}
#addSelect(field) {
const anData = Object.assign({
style: 48,
T: {
Name: "alpha",
TypeInfo: {}
}
}, this.#getFieldBaseData(field));
anData.w -= 0.5; //adjust combobox width
anData.PL = { V: [], D: [] };
field.value.forEach( (ele, idx) => {
if (Array.isArray(ele)) {
anData.PL.D.push(ele[0]);
anData.PL.V.push(ele[1]);
} else {
anData.PL.D.push(ele);
anData.PL.V.push(ele);
}
});
// add field value to the object
if (field.fieldValue) {
anData.V = field.fieldValue;
}
this.Fields.push(anData);
}
#addSignature(field) {
const anData = Object.assign({
style: 48,
T: {
Name: "signature",
TypeInfo: {}
}
}, this.#getFieldBaseData(field));
if (field.Sig) {
anData.Sig = {};
if (field.Sig.Name) anData.Sig.Name = field.Sig.Name;
if (field.Sig.M) anData.Sig.M = PDFUnit.dateToIso8601(field.Sig.M);
if (field.Sig.Location) anData.Sig.Location = field.Sig.Location;
if (field.Sig.Reason) anData.Sig.Reason = field.Sig.Reason;
if (field.Sig.ContactInfo) anData.Sig.ContactInfo = field.Sig.ContactInfo;
}
this.Fields.push(anData);
}
// public instance methods
processField() {
this.field.TI = PDFField.tabIndex++;
switch(this.field.fieldType) {
case 'Tx': this.#addAlpha(this.field); break;
case 'Cb': this.#addCheckBox(this.field); break;
case 'Rd': this.#addRadioButton(this.field);break;
case 'Btn':this.#addLinkButton(this.field); break;
case 'Ch': this.#addSelect(this.field); break;
case 'Sig': this.#addSignature(this.field); break;
}
this.clean();
}
clean() {
delete this.field;
delete this.viewport;
delete this.Fields;
delete this.Boxsets;
}
//static public method to generate fieldsType object based on parser result
static getAllFieldsTypes(data) {
const isFieldReadOnly = field => (field.AM & kFBANotOverridable) ? true : false;
const getFieldBase = field => ({
id: field.id.Id,
type: field.T.Name,
calc: isFieldReadOnly(field),
value: field.V || ""
});
const retVal = [];
data.Pages.forEach( page => {
page.Boxsets.forEach( boxsets => {
if (boxsets.boxes.length > 1) { //radio button
boxsets.boxes.forEach( box => {
retVal.push({ id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id });
});
}
else { //checkbox
retVal.push(getFieldBase(boxsets.boxes[0]));
}
});
page.Fields.forEach(field => retVal.push(getFieldBase(field)));
});
return retVal;
}
}