UNPKG

pdf-lib

Version:

Library for creating and modifying PDF files in JavaScript

95 lines (94 loc) 4.92 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; } Object.defineProperty(exports, "__esModule", { value: true }); var pdf_objects_1 = require("../pdf-objects"); var pdf_structures_1 = require("../pdf-structures"); var utils_1 = require("../../utils"); var parseArray_1 = __importDefault(require("./parseArray")); var parseBool_1 = __importDefault(require("./parseBool")); var parseDict_1 = __importDefault(require("./parseDict")); var parseHexString_1 = __importDefault(require("./parseHexString")); var parseIndirectRef_1 = __importDefault(require("./parseIndirectRef")); var parseName_1 = __importDefault(require("./parseName")); var parseNull_1 = __importDefault(require("./parseNull")); var parseNumber_1 = __importDefault(require("./parseNumber")); var parseString_1 = __importDefault(require("./parseString")); /** * Accepts a PDFDictionary and an array of bytes as input. The PDFDictionary should * be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's content. * * Attempts to parse the pairs of integers at the start of the input bytes. Each * pair describes one object within the Object Stream - its object number and byte * offset within the stream, respectively. * * Returns an array of objects representing the parsed integer pairs. */ var parseObjData = function (dict, input) { // Extract the value of the "N" entry from the dict var numObjects = dict.get('N').number; // Regex representing a pair of integers var objDatumRegex = /^[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*/; // Find the first non-numeric character (not including EOLs and spaces) in the // input bytes var firstNonNumIdx = utils_1.arrayFindIndexOf(input, function (charByte) { return !!String.fromCharCode(charByte).match(/[^\0\t\n\f\r \d]/); }); // Convert the input bytes to a string, up to the first non-numeric character var objDatumsStr = utils_1.arrayToString(input, 0, firstNonNumIdx); // Repeatedly apply the integer pair regex to the input string to build up an // array of the parsed integer pairs var objData = []; var i = 0; var remaining = objDatumsStr; while (i < numObjects) { var _a = remaining.match(objDatumRegex), fullmatch = _a[0], objNum = _a[1], byteOffset = _a[2]; objData.push({ objNum: Number(objNum), byteOffset: Number(byteOffset) }); remaining = remaining.substring(fullmatch.length); i += 1; } return objData; }; /** * Accepts an a PDFDictionary and an array of bytes as input. The PDFDictionary * should be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's * content. *The array of bytes is expected to have been decoded (based on the * "Filter"s in the dictionary) prior to being passed to this function.* * * After parsing the integer pairs from the start of the input bytes, the objects * themselves will be parsed from the remaining input bytes. * * A PDFObjectStream will be returned, representing the objects parsed * from the Object Stream. The "onParseObjectStream" parse handler will also be * called with the parsed PDFObjectStream object. */ var parseObjectStream = function (dict, input, index, parseHandlers) { if (parseHandlers === void 0) { parseHandlers = {}; } // Parse the pairs of integers from start of input bytes var objData = parseObjData(dict, input); // Extract the value of the "First" entry in the dict var First = dict.get('First'); var firstObjOffset = First.number; // Map each pair of integers to a PDFIndirectObject var indirectObjects = objData.map(function (_a) { var objNum = _a.objNum, byteOffset = _a.byteOffset; var subarray = input.subarray(firstObjOffset + byteOffset); var pdfObject = (parseDict_1.default(subarray, index, parseHandlers) || parseArray_1.default(subarray, index, parseHandlers) || parseName_1.default(subarray, parseHandlers) || parseString_1.default(subarray, parseHandlers) || parseIndirectRef_1.default(subarray, parseHandlers) || parseNumber_1.default(subarray, parseHandlers) || parseHexString_1.default(subarray, parseHandlers) || parseBool_1.default(subarray, parseHandlers) || parseNull_1.default(subarray, parseHandlers) || utils_1.error('Failed to parse object in Object Stream'))[0]; return pdf_objects_1.PDFIndirectObject.of(pdfObject).setReferenceNumbers(objNum, 0); }); var objectStream = pdf_structures_1.PDFObjectStream.from(dict, indirectObjects); // Call the parse handler if (parseHandlers.onParseObjectStream) { parseHandlers.onParseObjectStream(objectStream); } return objectStream; }; exports.default = parseObjectStream;