pdf-lib
Version:
Library for creating and modifying PDF files in JavaScript
89 lines (88 loc) • 3.52 kB
JavaScript
import { PDFXRef } from '../pdf-structures';
import { arrayToString, trimArray } from '../../utils';
/**
* Accepts an string as input. Repeatedly applies a regex to the input that matches
* against entries of PDF Cross Reference Table subsections.
*
* If entries are found, then an array of Entry will be returned.
*
* If not, null is returned.
*/
var parseEntries = function (input) {
var trimmed = input.trim();
var entryRegex = /^(\d{10}) (\d{5}) (n|f)/;
var entriesArr = [];
var remainder = trimmed;
while (remainder.length > 0) {
var result = remainder.match(entryRegex);
if (!result)
return undefined;
var fullMatch = result[0], offset = result[1], genNum = result[2], isInUse = result[3];
entriesArr.push(PDFXRef.Entry.create()
.setOffset(Number(offset))
.setGenerationNum(Number(genNum))
.setIsInUse(isInUse === 'n'));
remainder = remainder.substring(fullMatch.length).trim();
}
return entriesArr;
};
/**
* Accepts an string as input. Repeatedly applies a regex to the input that matches
* against subsections of PDF Cross Reference Tables.
*
* If subsections are found, then an array of Subsection will be returned.
*
* If not, null is returned.
*/
var parseSubsections = function (input) {
var trimmed = input.trim();
var sectionsRegex = /^(\d+) (\d+)((\n|\r| )*(\d{10} \d{5} (n|f)(\n|\r| )*)+)/;
var sectionsArr = [];
var remainder = trimmed;
while (remainder.length > 0) {
var result = remainder.match(sectionsRegex);
if (!result)
return undefined;
var fullMatch = result[0], firstObjNum = result[1], _objCount = result[2], entriesStr = result[3];
var entries = parseEntries(entriesStr);
if (!entries)
return undefined;
sectionsArr.push(PDFXRef.Subsection.from(entries).setFirstObjNum(Number(firstObjNum)));
remainder = remainder.substring(fullMatch.length).trim();
}
return sectionsArr;
};
/**
* Accepts an array of bytes as input. Checks to see if the first characters in the
* trimmed input make up a PDF Cross Reference Table.
*
* If so, returns a tuple containing (1) an object representing the parsed PDF
* Cross Reference Table and (2) a subarray of the input with the characters making
* up the parsed cross reference table removed. The "onParseXRefTable" parse
* handler will also be called with the Table object.
*
* If not, null is returned.
*/
var parseXRefTable = function (input, _a) {
var onParseXRefTable = (_a === void 0 ? {} : _a).onParseXRefTable;
var trimmed = trimArray(input);
var xRefTableRegex = /^xref[\n|\r| ]*([\d|\n|\r| |f|n]+)/;
// Search for first character that isn't part of an xref table
var idx = 0;
while (String.fromCharCode(trimmed[idx]).match(/^[xref \n\r\dfn]/))
idx += 1;
// Try to match the regex up to that character to see if we've got an xref table
var result1 = arrayToString(trimmed, 0, idx).match(xRefTableRegex);
if (!result1)
return undefined;
// Parse the subsections of the xref table
var fullMatch = result1[0], contents = result1[1];
var subsections = parseSubsections(contents);
if (!subsections)
return undefined;
var xRefTable = PDFXRef.Table.from(subsections);
if (onParseXRefTable)
onParseXRefTable(xRefTable);
return [xRefTable, trimmed.subarray(fullMatch.length)];
};
export default parseXRefTable;