pdf-lib
Version:
Library for creating and modifying PDF files in JavaScript
90 lines (89 loc) • 4.4 kB
JavaScript
import { PDFIndirectObject } from '../pdf-objects';
import { PDFObjectStream } from '../pdf-structures';
import { arrayFindIndexOf, arrayToString, error } from '../../utils';
import parseArray from './parseArray';
import parseBool from './parseBool';
import parseDict from './parseDict';
import parseHexString from './parseHexString';
import parseIndirectRef from './parseIndirectRef';
import parseName from './parseName';
import parseNull from './parseNull';
import parseNumber from './parseNumber';
import parseString from './parseString';
/**
* Accepts a PDFDictionary and an array of bytes as input. The PDFDictionary should
* be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's content.
*
* Attempts to parse the pairs of integers at the start of the input bytes. Each
* pair describes one object within the Object Stream - its object number and byte
* offset within the stream, respectively.
*
* Returns an array of objects representing the parsed integer pairs.
*/
var parseObjData = function (dict, input) {
// Extract the value of the "N" entry from the dict
var numObjects = dict.get('N').number;
// Regex representing a pair of integers
var objDatumRegex = /^[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*/;
// Find the first non-numeric character (not including EOLs and spaces) in the
// input bytes
var firstNonNumIdx = arrayFindIndexOf(input, function (charByte) { return !!String.fromCharCode(charByte).match(/[^\0\t\n\f\r \d]/); });
// Convert the input bytes to a string, up to the first non-numeric character
var objDatumsStr = arrayToString(input, 0, firstNonNumIdx);
// Repeatedly apply the integer pair regex to the input string to build up an
// array of the parsed integer pairs
var objData = [];
var i = 0;
var remaining = objDatumsStr;
while (i < numObjects) {
var _a = remaining.match(objDatumRegex), fullmatch = _a[0], objNum = _a[1], byteOffset = _a[2];
objData.push({ objNum: Number(objNum), byteOffset: Number(byteOffset) });
remaining = remaining.substring(fullmatch.length);
i += 1;
}
return objData;
};
/**
* Accepts an a PDFDictionary and an array of bytes as input. The PDFDictionary
* should be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's
* content. *The array of bytes is expected to have been decoded (based on the
* "Filter"s in the dictionary) prior to being passed to this function.*
*
* After parsing the integer pairs from the start of the input bytes, the objects
* themselves will be parsed from the remaining input bytes.
*
* A PDFObjectStream will be returned, representing the objects parsed
* from the Object Stream. The "onParseObjectStream" parse handler will also be
* called with the parsed PDFObjectStream object.
*/
var parseObjectStream = function (dict, input, index, parseHandlers) {
if (parseHandlers === void 0) { parseHandlers = {}; }
// Parse the pairs of integers from start of input bytes
var objData = parseObjData(dict, input);
// Extract the value of the "First" entry in the dict
var First = dict.get('First');
var firstObjOffset = First.number;
// Map each pair of integers to a PDFIndirectObject
var indirectObjects = objData.map(function (_a) {
var objNum = _a.objNum, byteOffset = _a.byteOffset;
var subarray = input.subarray(firstObjOffset + byteOffset);
var pdfObject = (parseDict(subarray, index, parseHandlers) ||
parseArray(subarray, index, parseHandlers) ||
parseName(subarray, parseHandlers) ||
parseString(subarray, parseHandlers) ||
parseIndirectRef(subarray, parseHandlers) ||
parseNumber(subarray, parseHandlers) ||
parseHexString(subarray, parseHandlers) ||
parseBool(subarray, parseHandlers) ||
parseNull(subarray, parseHandlers) ||
error('Failed to parse object in Object Stream'))[0];
return PDFIndirectObject.of(pdfObject).setReferenceNumbers(objNum, 0);
});
var objectStream = PDFObjectStream.from(dict, indirectObjects);
// Call the parse handler
if (parseHandlers.onParseObjectStream) {
parseHandlers.onParseObjectStream(objectStream);
}
return objectStream;
};
export default parseObjectStream;