UNPKG

pdf-lib

Version:

Library for creating and modifying PDF files in JavaScript

89 lines (88 loc) 3.94 kB
import { PDFName, PDFRawStream } from '../pdf-objects'; import { arrayIndexOf, arrayIndexOneOf, arrayToString, error, trimArrayAndRemoveComments, } from '../../utils'; import decodeStream from './encoding/decodeStream'; import parseObjectStream from './parseObjectStream'; /** * Accepts an array of bytes and a PDFDictionary as input. Checks to see if the * first characters in the trimmed input make up a PDF Stream. * * If so, the content of the stream is extracted into a subarray. A tuple * containing this content subarray and a subarray of the input with the bytes making * up the entire stream removed is returned. * * If not, null is returned. */ var parseStream = function (input, dict, parseHandlers) { if (parseHandlers === void 0) { parseHandlers = {}; } // Check that the next bytes comprise the beginning of a stream var trimmed = trimArrayAndRemoveComments(input); var startstreamIdx; if (arrayToString(trimmed, 0, 7) === 'stream\n') startstreamIdx = 7; else if (arrayToString(trimmed, 0, 8) === 'stream\r\n') startstreamIdx = 8; if (!startstreamIdx) return undefined; /* TODO: Make this more efficient by using the "Length" entry of the stream dictionary to jump to the end of the stream, instead of traversing each byte. */ // Locate the end of the stream var endstreamMatchTuple = arrayIndexOneOf(trimmed, [ '\nendstream', '\rendstream', 'endstream', ]); if (!endstreamMatchTuple) error('Invalid Stream!'); var _a = endstreamMatchTuple, endstreamIdx = _a[0], endstreamMatch = _a[1]; /* TODO: See if it makes sense to .slice() the stream contents, even though this would require more memory space. */ // Extract the stream content bytes var contents = trimmed.subarray(startstreamIdx, endstreamIdx); // Verify that the next characters denote the end of the stream var endobjIdx = arrayIndexOf(trimmed, 'endobj', endstreamIdx); if (arrayToString(trimmed, endstreamIdx, endobjIdx).trim() !== 'endstream') { error('Invalid Stream!'); } return [contents, trimmed.subarray(endstreamIdx + endstreamMatch.length)]; }; /** * Accepts an array of bytes and a PDFDictionary as input. Checks to see if the * first characters in the trimmed input make up a PDF Stream. * * If so, returns a tuple containing (1) a PDFObjectStream if it is an * Object Stream, otherwise a PDFStream and (2) a subarray of the input wih the * characters making up the parsed stream removed. The "onParseObjectStream" will * be called with the PDFObjectStream if it is an Object Stream. Otherwise * the "onParseStream" parse hander will be called. * * If not, null is returned. */ export default (function (input, dict, index, parseHandlers) { if (parseHandlers === void 0) { parseHandlers = {}; } // Parse the input bytes into the stream dictionary and content bytes var res = parseStream(input, dict, parseHandlers); if (!res) return undefined; var contents = res[0], remaining = res[1]; // If it's an Object Stream, parse it and return the indirect objects it contains if (dict.getMaybe('Type') === PDFName.from('ObjStm')) { if (dict.getMaybe('Filter') !== PDFName.from('FlateDecode')) { error("Cannot decode \"" + dict.get('Filter') + "\" Object Streams"); } var decoded = decodeStream(dict, contents); var objectStream = parseObjectStream(dict, decoded, index, parseHandlers); if (parseHandlers.onParseObjectStream) { parseHandlers.onParseObjectStream(objectStream); } return [objectStream, remaining]; } // Otherwise, return a PDFStream without parsing the content bytes var stream = PDFRawStream.from(dict, contents); if (parseHandlers.onParseStream) parseHandlers.onParseStream(stream); return [stream, remaining]; });