UNPKG

pdf-lib

Version:

Library for creating and modifying PDF files in JavaScript

89 lines (88 loc) 3.52 kB
import { PDFXRef } from '../pdf-structures'; import { arrayToString, trimArray } from '../../utils'; /** * Accepts an string as input. Repeatedly applies a regex to the input that matches * against entries of PDF Cross Reference Table subsections. * * If entries are found, then an array of Entry will be returned. * * If not, null is returned. */ var parseEntries = function (input) { var trimmed = input.trim(); var entryRegex = /^(\d{10}) (\d{5}) (n|f)/; var entriesArr = []; var remainder = trimmed; while (remainder.length > 0) { var result = remainder.match(entryRegex); if (!result) return undefined; var fullMatch = result[0], offset = result[1], genNum = result[2], isInUse = result[3]; entriesArr.push(PDFXRef.Entry.create() .setOffset(Number(offset)) .setGenerationNum(Number(genNum)) .setIsInUse(isInUse === 'n')); remainder = remainder.substring(fullMatch.length).trim(); } return entriesArr; }; /** * Accepts an string as input. Repeatedly applies a regex to the input that matches * against subsections of PDF Cross Reference Tables. * * If subsections are found, then an array of Subsection will be returned. * * If not, null is returned. */ var parseSubsections = function (input) { var trimmed = input.trim(); var sectionsRegex = /^(\d+) (\d+)((\n|\r| )*(\d{10} \d{5} (n|f)(\n|\r| )*)+)/; var sectionsArr = []; var remainder = trimmed; while (remainder.length > 0) { var result = remainder.match(sectionsRegex); if (!result) return undefined; var fullMatch = result[0], firstObjNum = result[1], _objCount = result[2], entriesStr = result[3]; var entries = parseEntries(entriesStr); if (!entries) return undefined; sectionsArr.push(PDFXRef.Subsection.from(entries).setFirstObjNum(Number(firstObjNum))); remainder = remainder.substring(fullMatch.length).trim(); } return sectionsArr; }; /** * Accepts an array of bytes as input. Checks to see if the first characters in the * trimmed input make up a PDF Cross Reference Table. * * If so, returns a tuple containing (1) an object representing the parsed PDF * Cross Reference Table and (2) a subarray of the input with the characters making * up the parsed cross reference table removed. The "onParseXRefTable" parse * handler will also be called with the Table object. * * If not, null is returned. */ var parseXRefTable = function (input, _a) { var onParseXRefTable = (_a === void 0 ? {} : _a).onParseXRefTable; var trimmed = trimArray(input); var xRefTableRegex = /^xref[\n|\r| ]*([\d|\n|\r| |f|n]+)/; // Search for first character that isn't part of an xref table var idx = 0; while (String.fromCharCode(trimmed[idx]).match(/^[xref \n\r\dfn]/)) idx += 1; // Try to match the regex up to that character to see if we've got an xref table var result1 = arrayToString(trimmed, 0, idx).match(xRefTableRegex); if (!result1) return undefined; // Parse the subsections of the xref table var fullMatch = result1[0], contents = result1[1]; var subsections = parseSubsections(contents); if (!subsections) return undefined; var xRefTable = PDFXRef.Table.from(subsections); if (onParseXRefTable) onParseXRefTable(xRefTable); return [xRefTable, trimmed.subarray(fullMatch.length)]; }; export default parseXRefTable;