pdf-lib
Version:
Library for creating and modifying PDF files in JavaScript
95 lines (94 loc) • 4.92 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
}
Object.defineProperty(exports, "__esModule", { value: true });
var pdf_objects_1 = require("../pdf-objects");
var pdf_structures_1 = require("../pdf-structures");
var utils_1 = require("../../utils");
var parseArray_1 = __importDefault(require("./parseArray"));
var parseBool_1 = __importDefault(require("./parseBool"));
var parseDict_1 = __importDefault(require("./parseDict"));
var parseHexString_1 = __importDefault(require("./parseHexString"));
var parseIndirectRef_1 = __importDefault(require("./parseIndirectRef"));
var parseName_1 = __importDefault(require("./parseName"));
var parseNull_1 = __importDefault(require("./parseNull"));
var parseNumber_1 = __importDefault(require("./parseNumber"));
var parseString_1 = __importDefault(require("./parseString"));
/**
* Accepts a PDFDictionary and an array of bytes as input. The PDFDictionary should
* be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's content.
*
* Attempts to parse the pairs of integers at the start of the input bytes. Each
* pair describes one object within the Object Stream - its object number and byte
* offset within the stream, respectively.
*
* Returns an array of objects representing the parsed integer pairs.
*/
var parseObjData = function (dict, input) {
// Extract the value of the "N" entry from the dict
var numObjects = dict.get('N').number;
// Regex representing a pair of integers
var objDatumRegex = /^[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*/;
// Find the first non-numeric character (not including EOLs and spaces) in the
// input bytes
var firstNonNumIdx = utils_1.arrayFindIndexOf(input, function (charByte) { return !!String.fromCharCode(charByte).match(/[^\0\t\n\f\r \d]/); });
// Convert the input bytes to a string, up to the first non-numeric character
var objDatumsStr = utils_1.arrayToString(input, 0, firstNonNumIdx);
// Repeatedly apply the integer pair regex to the input string to build up an
// array of the parsed integer pairs
var objData = [];
var i = 0;
var remaining = objDatumsStr;
while (i < numObjects) {
var _a = remaining.match(objDatumRegex), fullmatch = _a[0], objNum = _a[1], byteOffset = _a[2];
objData.push({ objNum: Number(objNum), byteOffset: Number(byteOffset) });
remaining = remaining.substring(fullmatch.length);
i += 1;
}
return objData;
};
/**
* Accepts an a PDFDictionary and an array of bytes as input. The PDFDictionary
* should be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's
* content. *The array of bytes is expected to have been decoded (based on the
* "Filter"s in the dictionary) prior to being passed to this function.*
*
* After parsing the integer pairs from the start of the input bytes, the objects
* themselves will be parsed from the remaining input bytes.
*
* A PDFObjectStream will be returned, representing the objects parsed
* from the Object Stream. The "onParseObjectStream" parse handler will also be
* called with the parsed PDFObjectStream object.
*/
var parseObjectStream = function (dict, input, index, parseHandlers) {
if (parseHandlers === void 0) { parseHandlers = {}; }
// Parse the pairs of integers from start of input bytes
var objData = parseObjData(dict, input);
// Extract the value of the "First" entry in the dict
var First = dict.get('First');
var firstObjOffset = First.number;
// Map each pair of integers to a PDFIndirectObject
var indirectObjects = objData.map(function (_a) {
var objNum = _a.objNum, byteOffset = _a.byteOffset;
var subarray = input.subarray(firstObjOffset + byteOffset);
var pdfObject = (parseDict_1.default(subarray, index, parseHandlers) ||
parseArray_1.default(subarray, index, parseHandlers) ||
parseName_1.default(subarray, parseHandlers) ||
parseString_1.default(subarray, parseHandlers) ||
parseIndirectRef_1.default(subarray, parseHandlers) ||
parseNumber_1.default(subarray, parseHandlers) ||
parseHexString_1.default(subarray, parseHandlers) ||
parseBool_1.default(subarray, parseHandlers) ||
parseNull_1.default(subarray, parseHandlers) ||
utils_1.error('Failed to parse object in Object Stream'))[0];
return pdf_objects_1.PDFIndirectObject.of(pdfObject).setReferenceNumbers(objNum, 0);
});
var objectStream = pdf_structures_1.PDFObjectStream.from(dict, indirectObjects);
// Call the parse handler
if (parseHandlers.onParseObjectStream) {
parseHandlers.onParseObjectStream(objectStream);
}
return objectStream;
};
exports.default = parseObjectStream;
;