pdf-lib
Version:
Library for creating and modifying PDF files in JavaScript
82 lines (81 loc) • 3.61 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
}
Object.defineProperty(exports, "__esModule", { value: true });
/* eslint-disable no-constant-condition */
var utils_1 = require("../../utils");
var parseHeader_1 = __importDefault(require("./parseHeader"));
var parseIndirectObj_1 = __importDefault(require("./parseIndirectObj"));
var parseLinearization_1 = __importDefault(require("./parseLinearization"));
var parseTrailer_1 = require("./parseTrailer");
var parseXRefTable_1 = __importDefault(require("./parseXRefTable"));
/**
* Accepts an array of bytes as input. Parses indirect objects from the input bytes
* until an xref table or trailer is found. The "onParseIndirectObj" parse
* handler is called with each indirect object that is parsed.
*
* Returns a subarray of the input bytes with the bytes making up the parsed
* indirect objects removed.
*/
var parseBodySection = function (input, index, parseHandlers) {
var remainder = input;
while (true) {
var result = parseIndirectObj_1.default(remainder, index, parseHandlers);
if (!result)
break;
remainder = result[1];
}
return remainder;
};
/**
* Accepts an array of bytes as input. Checks to see if the first characters in the
* input make up an xref table followed by a trailer, or just a trailer. The
* "onParseXRefTable" and "onParseTrailer" parseHandlers will be called with the
* parsed objects.
*
* Returns a subarray of the input bytes with the bytes making up the parsed
* objects removed.
*/
var parseFooterSection = function (input, index, parseHandlers) {
var remainder = input;
// Try to parse the XRef table (some PDFs omit the XRef table)
var parsedXRef = parseXRefTable_1.default(input, parseHandlers);
if (parsedXRef)
remainder = parsedXRef[1];
// Try to parse the trailer with and without dictionary, because some
// malformatted documents are missing the dictionary.
var parsedTrailer = parseTrailer_1.parseTrailer(remainder, index, parseHandlers) ||
parseTrailer_1.parseTrailerWithoutDict(remainder, index, parseHandlers);
if (!parsedTrailer)
return undefined;
remainder = parsedTrailer[1];
return remainder;
};
/**
* Accepts an array of bytes comprising a PDF document as input. Parses all the
* objects in the file in a sequential fashion, beginning with the header and
* ending with the last trailer.
*
* The XRef tables/streams in the input are not used to locate and parse objects
* as needed. Rather, the whole document is parsed and stored in memory at once.
*/
var parseDocument = function (input, index, parseHandlers) {
var cleaned = input;
// Parse the document header
var remainder;
_a = parseHeader_1.default(cleaned, parseHandlers) || utils_1.error('PDF is missing a header'), remainder = _a[1];
// If document is linearized, we'll need to parse the linearization
// dictionary and First-Page XRef table/stream next...
var linearizationMatch = parseLinearization_1.default(remainder, index, parseHandlers);
if (linearizationMatch)
remainder = linearizationMatch[1];
// Parse each body of the document and its corresponding footer.
// (if document does not have update sections, loop will only occur once)
while (remainder) {
remainder = parseBodySection(remainder, index, parseHandlers);
remainder = parseFooterSection(remainder, index, parseHandlers);
}
var _a;
};
exports.default = parseDocument;
;