UNPKG

pdf-lib

Version:

Library for creating and modifying PDF files in JavaScript

82 lines (81 loc) 3.61 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; } Object.defineProperty(exports, "__esModule", { value: true }); /* eslint-disable no-constant-condition */ var utils_1 = require("../../utils"); var parseHeader_1 = __importDefault(require("./parseHeader")); var parseIndirectObj_1 = __importDefault(require("./parseIndirectObj")); var parseLinearization_1 = __importDefault(require("./parseLinearization")); var parseTrailer_1 = require("./parseTrailer"); var parseXRefTable_1 = __importDefault(require("./parseXRefTable")); /** * Accepts an array of bytes as input. Parses indirect objects from the input bytes * until an xref table or trailer is found. The "onParseIndirectObj" parse * handler is called with each indirect object that is parsed. * * Returns a subarray of the input bytes with the bytes making up the parsed * indirect objects removed. */ var parseBodySection = function (input, index, parseHandlers) { var remainder = input; while (true) { var result = parseIndirectObj_1.default(remainder, index, parseHandlers); if (!result) break; remainder = result[1]; } return remainder; }; /** * Accepts an array of bytes as input. Checks to see if the first characters in the * input make up an xref table followed by a trailer, or just a trailer. The * "onParseXRefTable" and "onParseTrailer" parseHandlers will be called with the * parsed objects. * * Returns a subarray of the input bytes with the bytes making up the parsed * objects removed. */ var parseFooterSection = function (input, index, parseHandlers) { var remainder = input; // Try to parse the XRef table (some PDFs omit the XRef table) var parsedXRef = parseXRefTable_1.default(input, parseHandlers); if (parsedXRef) remainder = parsedXRef[1]; // Try to parse the trailer with and without dictionary, because some // malformatted documents are missing the dictionary. var parsedTrailer = parseTrailer_1.parseTrailer(remainder, index, parseHandlers) || parseTrailer_1.parseTrailerWithoutDict(remainder, index, parseHandlers); if (!parsedTrailer) return undefined; remainder = parsedTrailer[1]; return remainder; }; /** * Accepts an array of bytes comprising a PDF document as input. Parses all the * objects in the file in a sequential fashion, beginning with the header and * ending with the last trailer. * * The XRef tables/streams in the input are not used to locate and parse objects * as needed. Rather, the whole document is parsed and stored in memory at once. */ var parseDocument = function (input, index, parseHandlers) { var cleaned = input; // Parse the document header var remainder; _a = parseHeader_1.default(cleaned, parseHandlers) || utils_1.error('PDF is missing a header'), remainder = _a[1]; // If document is linearized, we'll need to parse the linearization // dictionary and First-Page XRef table/stream next... var linearizationMatch = parseLinearization_1.default(remainder, index, parseHandlers); if (linearizationMatch) remainder = linearizationMatch[1]; // Parse each body of the document and its corresponding footer. // (if document does not have update sections, loop will only occur once) while (remainder) { remainder = parseBodySection(remainder, index, parseHandlers); remainder = parseFooterSection(remainder, index, parseHandlers); } var _a; }; exports.default = parseDocument;