pdf-lib
Version:
Library for creating and modifying PDF files in JavaScript
89 lines (88 loc) • 3.94 kB
JavaScript
import { PDFName, PDFRawStream } from '../pdf-objects';
import { arrayIndexOf, arrayIndexOneOf, arrayToString, error, trimArrayAndRemoveComments, } from '../../utils';
import decodeStream from './encoding/decodeStream';
import parseObjectStream from './parseObjectStream';
/**
* Accepts an array of bytes and a PDFDictionary as input. Checks to see if the
* first characters in the trimmed input make up a PDF Stream.
*
* If so, the content of the stream is extracted into a subarray. A tuple
* containing this content subarray and a subarray of the input with the bytes making
* up the entire stream removed is returned.
*
* If not, null is returned.
*/
var parseStream = function (input, dict, parseHandlers) {
if (parseHandlers === void 0) { parseHandlers = {}; }
// Check that the next bytes comprise the beginning of a stream
var trimmed = trimArrayAndRemoveComments(input);
var startstreamIdx;
if (arrayToString(trimmed, 0, 7) === 'stream\n')
startstreamIdx = 7;
else if (arrayToString(trimmed, 0, 8) === 'stream\r\n')
startstreamIdx = 8;
if (!startstreamIdx)
return undefined;
/*
TODO: Make this more efficient by using the "Length" entry of the stream
dictionary to jump to the end of the stream, instead of traversing each byte.
*/
// Locate the end of the stream
var endstreamMatchTuple = arrayIndexOneOf(trimmed, [
'\nendstream',
'\rendstream',
'endstream',
]);
if (!endstreamMatchTuple)
error('Invalid Stream!');
var _a = endstreamMatchTuple, endstreamIdx = _a[0], endstreamMatch = _a[1];
/*
TODO: See if it makes sense to .slice() the stream contents, even though this
would require more memory space.
*/
// Extract the stream content bytes
var contents = trimmed.subarray(startstreamIdx, endstreamIdx);
// Verify that the next characters denote the end of the stream
var endobjIdx = arrayIndexOf(trimmed, 'endobj', endstreamIdx);
if (arrayToString(trimmed, endstreamIdx, endobjIdx).trim() !== 'endstream') {
error('Invalid Stream!');
}
return [contents, trimmed.subarray(endstreamIdx + endstreamMatch.length)];
};
/**
* Accepts an array of bytes and a PDFDictionary as input. Checks to see if the
* first characters in the trimmed input make up a PDF Stream.
*
* If so, returns a tuple containing (1) a PDFObjectStream if it is an
* Object Stream, otherwise a PDFStream and (2) a subarray of the input wih the
* characters making up the parsed stream removed. The "onParseObjectStream" will
* be called with the PDFObjectStream if it is an Object Stream. Otherwise
* the "onParseStream" parse hander will be called.
*
* If not, null is returned.
*/
export default (function (input, dict, index, parseHandlers) {
if (parseHandlers === void 0) { parseHandlers = {}; }
// Parse the input bytes into the stream dictionary and content bytes
var res = parseStream(input, dict, parseHandlers);
if (!res)
return undefined;
var contents = res[0], remaining = res[1];
// If it's an Object Stream, parse it and return the indirect objects it contains
if (dict.getMaybe('Type') === PDFName.from('ObjStm')) {
if (dict.getMaybe('Filter') !== PDFName.from('FlateDecode')) {
error("Cannot decode \"" + dict.get('Filter') + "\" Object Streams");
}
var decoded = decodeStream(dict, contents);
var objectStream = parseObjectStream(dict, decoded, index, parseHandlers);
if (parseHandlers.onParseObjectStream) {
parseHandlers.onParseObjectStream(objectStream);
}
return [objectStream, remaining];
}
// Otherwise, return a PDFStream without parsing the content bytes
var stream = PDFRawStream.from(dict, contents);
if (parseHandlers.onParseStream)
parseHandlers.onParseStream(stream);
return [stream, remaining];
});