pdf3json
Version:
A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js
290 lines (243 loc) • 8.71 kB
JavaScript
var nodeUtil = require("util"),
_ = require("underscore"),
PDFUnit = require('./pdfunit.js');
var PDFField = (function PDFFieldClosure() {
'use strict';
// private static
var _nextId = 1;
var _name = 'PDFField';
var _tabIndex = 0;
var kFBANotOverridable = 0x00000400; // indicates the field is read only by the user
var kFBARequired = 0x00000010; // indicates the field is required
var kMinHeight = 20;
// constructor
var cls = function (field, viewport, Fields, Boxsets) {
// private
var _id = _nextId++;
// public (every instance will have their own copy of these methods, needs to be lightweight)
this.get_id = function() { return _id; };
this.get_name = function() { return _name + _id; };
this.field = field;
this.viewport = viewport;
this.Fields = Fields;
this.Boxsets = Boxsets;
};
// Normalize rectangle rect=[x1, y1, x2, y2] so that (x1,y1) < (x2,y2)
// For coordinate systems whose origin lies in the bottom-left, this
// means normalization to (BL,TR) ordering. For systems with origin in the
// top-left, this means (TL,BR) ordering.
var _normalizeRect = function(rect) {
var r = rect.slice(0); // clone rect
if (rect[0] > rect[2]) {
r[0] = rect[2];
r[2] = rect[0];
}
if (rect[1] > rect[3]) {
r[1] = rect[3];
r[3] = rect[1];
}
return r;
};
// public static
cls.get_nextId = function () {
return _name + _nextId;
};
cls.isWidgetSupported = function(field) {
var retVal = false;
switch(field.fieldType) {
case 'Tx': retVal = true; break; //text input
case 'Btn':
if (field.fieldFlags & 32768) {
field.fieldType = 'Rd'; //radio button
}
else if (field.fieldFlags & 65536) {
field.fieldType = 'Btn'; //push button
}
else {
field.fieldType = 'Cb'; //checkbox
}
retVal = true;
break;
case 'Ch': retVal = true; break; //drop down
default:
nodeUtil.p2jwarn("Unsupported: field.fieldType of " + field.fieldType);
break;
}
return retVal;
};
cls.isFormElement = function(field) {
var retVal = false;
switch(field.subtype) {
case 'Widget': retVal = cls.isWidgetSupported(field); break;
default:
nodeUtil.p2jwarn("Unsupported: field.type of " + field.subtype);
break;
}
return retVal;
};
var _getFieldPosition = function(field) {
var viewPort = this.viewport;
var fieldRect = viewPort.convertToViewportRectangle(field.rect);
var rect = _normalizeRect(fieldRect);
var height = rect[3] - rect[1];
if (field.fieldType === 'Tx') {
if (height > kMinHeight + 2) {
rect[1] += 2;
height -= 2;
}
}
else if (field.fieldType !== 'Ch') { //checkbox, radio button, and link button
rect[1] -= 3;
}
height = (height >= kMinHeight) ? height : kMinHeight;
return {
x: PDFUnit.toFormX(rect[0]),
y: PDFUnit.toFormY(rect[1]),
w: PDFUnit.toFormX(rect[2] - rect[0]),
h: PDFUnit.toFormY(height)
};
};
var _getFieldBaseData = function(field) {
var attributeMask = 0;
//PDF Spec p.676 TABLE 8.70 Field flags common to all field types
if (field.fieldFlags & 0x00000001) {
attributeMask |= kFBANotOverridable;
}
if (field.fieldFlags & 0x00000002) {
attributeMask |= kFBARequired;
}
return _.extend({
id: { Id: field.fullName, EN: 0},
TI: field.TI,
AM: attributeMask
}, _getFieldPosition.call(this, field));
};
var _addAlpha = function(field) {
var anData = _.extend({
style: 48,
T: {
Name: field.TName || "alpha",
TypeInfo: {}
}
}, _getFieldBaseData.call(this, field));
//PDF Spec p.675: add TU (AlternativeText) fields to provide accessibility info
if (field.alternativeText && field.alternativeText.length > 1) {
anData.TU = field.alternativeText;
}
if (field.MV) { //field attributes: arbitrary mask value
anData.MV = field.MV;
}
if (field.fieldValue) {
anData.V = field.fieldValue; //read-only field value, like "self-prepared"
}
this.Fields.push(anData);
};
var _addCheckBox = function(box) {
var anData = _.extend({
style: 48,
T: {
Name: "box",
TypeInfo: {}
}
}, _getFieldBaseData.call(this, box));
this.Boxsets.push({boxes:[anData]});
};
var _addRadioButton = function(box) {
var anData = _.extend({
style: 48,
T: {
Name: "box",
TypeInfo: {}
}
}, _getFieldBaseData.call(this, box));
anData.id.Id = box.value;
if (_.has(box, 'checked')) {
anData.checked = box.checked;
}
var rdGroup = _.find(this.Boxsets, function(boxset) {
return _.has(boxset, 'id') && _.has(boxset.id, 'Id') && (boxset.id.Id === box.fullName);
});
if ((!!rdGroup) && (_.has(rdGroup, 'boxes'))) {
rdGroup.boxes.push(anData);
}
else {
this.Boxsets.push({boxes:[anData], id: { Id: box.fullName, EN: 0}});
}
};
var _addLinkButton = function(field) {
var anData = _.extend({
style: 48,
T: {
Name: "link"
},
FL: {
form: {Id: field.FL}
}
}, _getFieldBaseData.call(this, field));
this.Fields.push(anData);
};
var _addSelect = function(field) {
var anData = _.extend({
style: 48,
T: {
Name: "alpha",
TypeInfo: {}
}
}, _getFieldBaseData.call(this, field));
anData.w -= 0.5; //adjust combobox width
anData.PL = {V: [], D: []};
_.each(field.value, function(ele, idx) {
anData.PL.D.push(ele[0]);
anData.PL.V.push(ele[1]);
});
this.Fields.push(anData);
};
// public (every instance will share the same method, but has no access to private fields defined in constructor)
cls.prototype.processField = function () {
this.field.TI = _tabIndex++;
switch(this.field.fieldType) {
case 'Tx': _addAlpha.call(this, this.field); break;
case 'Cb': _addCheckBox.call(this, this.field); break;
case 'Rd': _addRadioButton.call(this, this.field);break;
case 'Btn':_addLinkButton.call(this, this.field); break;
case 'Ch': _addSelect.call(this, this.field); break;
}
this.clean();
};
cls.prototype.clean = function() {
delete this.get_id;
delete this.get_name;
delete this.field;
delete this.viewport;
delete this.Fields;
delete this.Boxsets;
};
//static public method to generate fieldsType object based on parser result
cls.getAllFieldsTypes = function(data) {
function isFieldReadOnly(field) {
return (field.AM & kFBANotOverridable) ? true : false;
}
function getFieldBase(field) {
return {id: field.id.Id, type: field.T.Name, calc: isFieldReadOnly(field), value: field.V || ""};
}
var retVal = [];
_.each(data.Pages, function(page) {
_.each(page.Boxsets, function(boxsets) {
if (boxsets.boxes.length > 1) { //radio button
_.each(boxsets.boxes, function(box) {
retVal.push({id: boxsets.id.Id, type: "radio", calc: isFieldReadOnly(box), value: box.id.Id});
});
}
else { //checkbox
retVal.push(getFieldBase(boxsets.boxes[0]));
}
});
_.each(page.Fields, function(field){
retVal.push(getFieldBase(field));
});
});
return retVal;
};
return cls;
})();
module.exports = PDFField;