UNPKG

pdf-lib

Version:

Library for creating and modifying PDF files in JavaScript

90 lines (89 loc) 4.4 kB
import { PDFIndirectObject } from '../pdf-objects'; import { PDFObjectStream } from '../pdf-structures'; import { arrayFindIndexOf, arrayToString, error } from '../../utils'; import parseArray from './parseArray'; import parseBool from './parseBool'; import parseDict from './parseDict'; import parseHexString from './parseHexString'; import parseIndirectRef from './parseIndirectRef'; import parseName from './parseName'; import parseNull from './parseNull'; import parseNumber from './parseNumber'; import parseString from './parseString'; /** * Accepts a PDFDictionary and an array of bytes as input. The PDFDictionary should * be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's content. * * Attempts to parse the pairs of integers at the start of the input bytes. Each * pair describes one object within the Object Stream - its object number and byte * offset within the stream, respectively. * * Returns an array of objects representing the parsed integer pairs. */ var parseObjData = function (dict, input) { // Extract the value of the "N" entry from the dict var numObjects = dict.get('N').number; // Regex representing a pair of integers var objDatumRegex = /^[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*(\d+)[\0\t\n\f\r ]*/; // Find the first non-numeric character (not including EOLs and spaces) in the // input bytes var firstNonNumIdx = arrayFindIndexOf(input, function (charByte) { return !!String.fromCharCode(charByte).match(/[^\0\t\n\f\r \d]/); }); // Convert the input bytes to a string, up to the first non-numeric character var objDatumsStr = arrayToString(input, 0, firstNonNumIdx); // Repeatedly apply the integer pair regex to the input string to build up an // array of the parsed integer pairs var objData = []; var i = 0; var remaining = objDatumsStr; while (i < numObjects) { var _a = remaining.match(objDatumRegex), fullmatch = _a[0], objNum = _a[1], byteOffset = _a[2]; objData.push({ objNum: Number(objNum), byteOffset: Number(byteOffset) }); remaining = remaining.substring(fullmatch.length); i += 1; } return objData; }; /** * Accepts an a PDFDictionary and an array of bytes as input. The PDFDictionary * should be a PDF Object Stream dictionary, and the array of bytes should be the Object Stream's * content. *The array of bytes is expected to have been decoded (based on the * "Filter"s in the dictionary) prior to being passed to this function.* * * After parsing the integer pairs from the start of the input bytes, the objects * themselves will be parsed from the remaining input bytes. * * A PDFObjectStream will be returned, representing the objects parsed * from the Object Stream. The "onParseObjectStream" parse handler will also be * called with the parsed PDFObjectStream object. */ var parseObjectStream = function (dict, input, index, parseHandlers) { if (parseHandlers === void 0) { parseHandlers = {}; } // Parse the pairs of integers from start of input bytes var objData = parseObjData(dict, input); // Extract the value of the "First" entry in the dict var First = dict.get('First'); var firstObjOffset = First.number; // Map each pair of integers to a PDFIndirectObject var indirectObjects = objData.map(function (_a) { var objNum = _a.objNum, byteOffset = _a.byteOffset; var subarray = input.subarray(firstObjOffset + byteOffset); var pdfObject = (parseDict(subarray, index, parseHandlers) || parseArray(subarray, index, parseHandlers) || parseName(subarray, parseHandlers) || parseString(subarray, parseHandlers) || parseIndirectRef(subarray, parseHandlers) || parseNumber(subarray, parseHandlers) || parseHexString(subarray, parseHandlers) || parseBool(subarray, parseHandlers) || parseNull(subarray, parseHandlers) || error('Failed to parse object in Object Stream'))[0]; return PDFIndirectObject.of(pdfObject).setReferenceNumbers(objNum, 0); }); var objectStream = PDFObjectStream.from(dict, indirectObjects); // Call the parse handler if (parseHandlers.onParseObjectStream) { parseHandlers.onParseObjectStream(objectStream); } return objectStream; }; export default parseObjectStream;