UNPKG

@cantoo/pdf-lib

Version:

Create and modify PDF files with JavaScript

1,425 lines (1,319 loc) 58.3 kB
import { parse as parseHtml, HTMLElement, NodeType, } from 'node-html-better-parser'; import Embeddable from './Embeddable'; import { EncryptedPDFError, FontkitNotRegisteredError, ForeignPageError, RemovePageFromEmptyDocumentError, } from './errors'; import PDFEmbeddedPage from './PDFEmbeddedPage'; import PDFFont from './PDFFont'; import PDFImage from './PDFImage'; import PDFPage from './PDFPage'; import PDFForm from './form/PDFForm'; import { PageSizes } from './sizes'; import { StandardFonts } from './StandardFonts'; import { CustomFontEmbedder, CustomFontSubsetEmbedder, JpegEmbedder, PageBoundingBox, PageEmbeddingMismatchedContextError, PDFArray, PDFCatalog, PDFContext, PDFDict, decodePDFRawStream, PDFStream, PDFRawStream, PDFHexString, PDFName, PDFObjectCopier, PDFPageEmbedder, PDFPageLeaf, PDFPageTree, PDFParser, PDFStreamWriter, PDFString, PDFWriter, PngEmbedder, StandardFontEmbedder, UnexpectedObjectTypeError, } from '../core'; import { ParseSpeeds, AttachmentOptions, SaveOptions, Base64SaveOptions, LoadOptions, CreateOptions, EmbedFontOptions, SetTitleOptions, } from './PDFDocumentOptions'; import PDFObject from '../core/objects/PDFObject'; import PDFRef from '../core/objects/PDFRef'; import { Fontkit } from '../types/fontkit'; import { TransformationMatrix } from '../types/matrix'; import { assertIs, assertIsOneOfOrUndefined, assertOrUndefined, assertRange, Cache, canBeConvertedToUint8Array, encodeToBase64, isStandardFont, pluckIndices, range, toUint8Array, } from '../utils'; import FileEmbedder, { AFRelationship } from '../core/embedders/FileEmbedder'; import PDFEmbeddedFile from './PDFEmbeddedFile'; import PDFJavaScript from './PDFJavaScript'; import JavaScriptEmbedder from '../core/embedders/JavaScriptEmbedder'; import { CipherTransformFactory } from '../core/crypto'; import PDFSvg from './PDFSvg'; import PDFSecurity, { SecurityOptions } from '../core/security/PDFSecurity'; export type BasePDFAttachment = { name: string; data: Uint8Array; mimeType: string | undefined; afRelationship: AFRelationship | undefined; description: string | undefined; creationDate: Date | undefined; modificationDate: Date | undefined; }; export type SavedPDFAttachment = BasePDFAttachment & { embeddedFileDict: PDFDict; specRef: PDFRef; }; export type UnsavedPDFAttachment = BasePDFAttachment & { pdfEmbeddedFile: PDFEmbeddedFile; }; export type PDFAttachment = UnsavedPDFAttachment | SavedPDFAttachment; /** * Represents a PDF document. */ export default class PDFDocument { /** * Load an existing [[PDFDocument]]. The input data can be provided in * multiple formats: * * | Type | Contents | * | ------------- | ------------------------------------------------------ | * | `string` | A base64 encoded string (or data URI) containing a PDF | * | `Uint8Array` | The raw bytes of a PDF | * | `ArrayBuffer` | The raw bytes of a PDF | * * For example: * ```js * import { PDFDocument } from 'pdf-lib' * * // pdf=string * const base64 = * 'JVBERi0xLjcKJYGBgYEKCjUgMCBvYmoKPDwKL0ZpbHRlciAvRmxhdGVEZWNvZGUKL0xlbm' + * 'd0aCAxMDQKPj4Kc3RyZWFtCniccwrhMlAAwaJ0Ln2P1Jyy1JLM5ERdc0MjCwUjE4WQNC4Q' + * '6cNlCFZkqGCqYGSqEJLLZWNuYGZiZmbkYuZsZmlmZGRgZmluDCQNzc3NTM2NzdzMXMxMjQ' + * 'ztFEKyuEK0uFxDuAAOERdVCmVuZHN0cmVhbQplbmRvYmoKCjYgMCBvYmoKPDwKL0ZpbHRl' + * 'ciAvRmxhdGVEZWNvZGUKL1R5cGUgL09ialN0bQovTiA0Ci9GaXJzdCAyMAovTGVuZ3RoID' + * 'IxNQo+PgpzdHJlYW0KeJxVj9GqwjAMhu/zFHkBzTo3nCCCiiKIHPEICuJF3cKoSCu2E8/b' + * '20wPIr1p8v9/8kVhgilmGfawX2CGaVrgcAi0/bsy0lrX7IGWpvJ4iJYEN3gEmrrGBlQwGs' + * 'HHO9VBX1wNrxAqMX87RBD5xpJuddqwd82tjAHxzV1U5LPgy52DKXWnr1Lheg+j/c/pzGVr' + * 'iqV0VlwZPXGPCJjElw/ybkwUmeoWgxesDXGhHJC/D/iikp1Av80ptKU0FdBEe25pPihAM1' + * 'u6ytgaaWfs2Hrz35CJT1+EWmAKZW5kc3RyZWFtCmVuZG9iagoKNyAwIG9iago8PAovU2l6' + * 'ZSA4Ci9Sb290IDIgMCBSCi9GaWx0ZXIgL0ZsYXRlRGVjb2RlCi9UeXBlIC9YUmVmCi9MZW' + * '5ndGggMzgKL1cgWyAxIDIgMiBdCi9JbmRleCBbIDAgOCBdCj4+CnN0cmVhbQp4nBXEwREA' + * 'EBAEsCwz3vrvRmOOyyOoGhZdutHN2MT55fIAVocD+AplbmRzdHJlYW0KZW5kb2JqCgpzdG' + * 'FydHhyZWYKNTEwCiUlRU9G' * * const dataUri = 'data:application/pdf;base64,' + base64 * * const pdfDoc1 = await PDFDocument.load(base64) * const pdfDoc2 = await PDFDocument.load(dataUri) * * // pdf=Uint8Array * import fs from 'fs' * const uint8Array = fs.readFileSync('with_update_sections.pdf') * const pdfDoc3 = await PDFDocument.load(uint8Array) * * // pdf=ArrayBuffer * const url = 'https://pdf-lib.js.org/assets/with_update_sections.pdf' * const arrayBuffer = await fetch(url).then(res => res.arrayBuffer()) * const pdfDoc4 = await PDFDocument.load(arrayBuffer) * * ``` * * @param pdf The input data containing a PDF document. * @param options The options to be used when loading the document. * @returns Resolves with a document loaded from the input. */ static async load( pdf: string | Uint8Array | ArrayBuffer, options: LoadOptions = {}, ) { const { ignoreEncryption = false, parseSpeed = ParseSpeeds.Slow, throwOnInvalidObject = false, warnOnInvalidObjects = false, updateMetadata = true, capNumbers = false, password, } = options; assertIs(pdf, 'pdf', ['string', Uint8Array, ArrayBuffer]); assertIs(ignoreEncryption, 'ignoreEncryption', ['boolean']); assertIs(parseSpeed, 'parseSpeed', ['number']); assertIs(throwOnInvalidObject, 'throwOnInvalidObject', ['boolean']); assertIs(warnOnInvalidObjects, 'warnOnInvalidObjects', ['boolean']); assertIs(password, 'password', ['string', 'undefined']); const bytes = toUint8Array(pdf); const context = await PDFParser.forBytesWithOptions( bytes, parseSpeed, throwOnInvalidObject, capNumbers, ).parseDocument(); if ( !!context.lookup(context.trailerInfo.Encrypt) && password !== undefined ) { // Decrypt const fileIds = context.lookup(context.trailerInfo.ID, PDFArray); const encryptDict = context.lookup(context.trailerInfo.Encrypt, PDFDict); const decryptedContext = await PDFParser.forBytesWithOptions( bytes, parseSpeed, throwOnInvalidObject, warnOnInvalidObjects, capNumbers, new CipherTransformFactory( encryptDict, (fileIds.get(0) as PDFHexString).asBytes(), password, ), ).parseDocument(); return new PDFDocument(decryptedContext, true, updateMetadata); } else { return new PDFDocument(context, ignoreEncryption, updateMetadata); } } /** * Create a new [[PDFDocument]]. * @returns Resolves with the newly created document. */ static async create(options: CreateOptions = {}) { const { updateMetadata = true } = options; const context = PDFContext.create(); const pageTree = PDFPageTree.withContext(context); const pageTreeRef = context.register(pageTree); const catalog = PDFCatalog.withContextAndPages(context, pageTreeRef); context.trailerInfo.Root = context.register(catalog); return new PDFDocument(context, false, updateMetadata); } /** The low-level context of this document. */ readonly context: PDFContext; /** The catalog of this document. */ readonly catalog: PDFCatalog; /** Whether or not this document is encrypted. */ readonly isEncrypted: boolean; /** The default word breaks used in PDFPage.drawText */ defaultWordBreaks: string[] = [' ']; private fontkit?: Fontkit; private pageCount: number | undefined; private readonly pageCache: Cache<PDFPage[]>; private readonly pageMap: Map<PDFPageLeaf, PDFPage>; private readonly formCache: Cache<PDFForm>; private readonly fonts: PDFFont[]; private readonly images: PDFImage[]; private readonly embeddedPages: PDFEmbeddedPage[]; private readonly embeddedFiles: PDFEmbeddedFile[]; private readonly javaScripts: PDFJavaScript[]; private constructor( context: PDFContext, ignoreEncryption: boolean, updateMetadata: boolean, ) { assertIs(context, 'context', [[PDFContext, 'PDFContext']]); assertIs(ignoreEncryption, 'ignoreEncryption', ['boolean']); this.context = context; this.catalog = context.lookup(context.trailerInfo.Root) as PDFCatalog; if (!!context.lookup(context.trailerInfo.Encrypt) && context.isDecrypted) { // context.delete(context.trailerInfo.Encrypt); delete context.trailerInfo.Encrypt; } this.isEncrypted = !!context.lookup(context.trailerInfo.Encrypt); this.pageCache = Cache.populatedBy(this.computePages); this.pageMap = new Map(); this.formCache = Cache.populatedBy(this.getOrCreateForm); this.fonts = []; this.images = []; this.embeddedPages = []; this.embeddedFiles = []; this.javaScripts = []; if (!ignoreEncryption && this.isEncrypted) throw new EncryptedPDFError(); if (updateMetadata) this.updateInfoDict(); } /** * Register a fontkit instance. This must be done before custom fonts can * be embedded. See [here](https://github.com/Hopding/pdf-lib/tree/master#fontkit-installation) * for instructions on how to install and register a fontkit instance. * * > You do **not** need to call this method to embed standard fonts. * * For example: * ```js * import { PDFDocument } from 'pdf-lib' * import fontkit from '@pdf-lib/fontkit' * * const pdfDoc = await PDFDocument.create() * pdfDoc.registerFontkit(fontkit) * ``` * * @param fontkit The fontkit instance to be registered. */ registerFontkit(fontkit: Fontkit): void { this.fontkit = fontkit; } /** * Get the [[PDFForm]] containing all interactive fields for this document. * For example: * ```js * const form = pdfDoc.getForm() * const fields = form.getFields() * fields.forEach(field => { * const type = field.constructor.name * const name = field.getName() * console.log(`${type}: ${name}`) * }) * ``` * @returns The form for this document. */ getForm(): PDFForm { const form = this.formCache.access(); if (form.hasXFA()) { console.warn( 'Removing XFA form data as pdf-lib does not support reading or writing XFA', ); form.deleteXFA(); } return form; } /** * Get this document's title metadata. The title appears in the * "Document Properties" section of most PDF readers. For example: * ```js * const title = pdfDoc.getTitle() * ``` * @returns A string containing the title of this document, if it has one. */ getTitle(): string | undefined { const title = this.getInfoDict().lookup(PDFName.Title); if (!title) return undefined; assertIsLiteralOrHexString(title); return title.decodeText(); } /** * Get this document's author metadata. The author appears in the * "Document Properties" section of most PDF readers. For example: * ```js * const author = pdfDoc.getAuthor() * ``` * @returns A string containing the author of this document, if it has one. */ getAuthor(): string | undefined { const author = this.getInfoDict().lookup(PDFName.Author); if (!author) return undefined; assertIsLiteralOrHexString(author); return author.decodeText(); } /** * Get this document's subject metadata. The subject appears in the * "Document Properties" section of most PDF readers. For example: * ```js * const subject = pdfDoc.getSubject() * ``` * @returns A string containing the subject of this document, if it has one. */ getSubject(): string | undefined { const subject = this.getInfoDict().lookup(PDFName.Subject); if (!subject) return undefined; assertIsLiteralOrHexString(subject); return subject.decodeText(); } /** * Get this document's keywords metadata. The keywords appear in the * "Document Properties" section of most PDF readers. For example: * ```js * const keywords = pdfDoc.getKeywords() * ``` * @returns A string containing the keywords of this document, if it has any. */ getKeywords(): string | undefined { const keywords = this.getInfoDict().lookup(PDFName.Keywords); if (!keywords) return undefined; assertIsLiteralOrHexString(keywords); return keywords.decodeText(); } /** * Get this document's creator metadata. The creator appears in the * "Document Properties" section of most PDF readers. For example: * ```js * const creator = pdfDoc.getCreator() * ``` * @returns A string containing the creator of this document, if it has one. */ getCreator(): string | undefined { const creator = this.getInfoDict().lookup(PDFName.Creator); if (!creator) return undefined; assertIsLiteralOrHexString(creator); return creator.decodeText(); } /** * Get this document's producer metadata. The producer appears in the * "Document Properties" section of most PDF readers. For example: * ```js * const producer = pdfDoc.getProducer() * ``` * @returns A string containing the producer of this document, if it has one. */ getProducer(): string | undefined { const producer = this.getInfoDict().lookup(PDFName.Producer); if (!producer) return undefined; assertIsLiteralOrHexString(producer); return producer.decodeText(); } /** * Get this document's language metadata. The language appears in the * "Document Properties" section of most PDF readers. For example: * ```js * const language = pdfDoc.getLanguage() * ``` * @returns A string containing the RFC 3066 _Language-Tag_ of this document, * if it has one. */ getLanguage(): string | undefined { const language = this.catalog.get(PDFName.of('Lang')); if (!language) return undefined; assertIsLiteralOrHexString(language); return language.decodeText(); } /** * Get this document's creation date metadata. The creation date appears in * the "Document Properties" section of most PDF readers. For example: * ```js * const creationDate = pdfDoc.getCreationDate() * ``` * @returns A Date containing the creation date of this document, * if it has one. */ getCreationDate(): Date | undefined { const creationDate = this.getInfoDict().lookup(PDFName.CreationDate); if (!creationDate) return undefined; assertIsLiteralOrHexString(creationDate); return creationDate.decodeDate(); } /** * Get this document's modification date metadata. The modification date * appears in the "Document Properties" section of most PDF readers. * For example: * ```js * const modification = pdfDoc.getModificationDate() * ``` * @returns A Date containing the modification date of this document, * if it has one. */ getModificationDate(): Date | undefined { const modificationDate = this.getInfoDict().lookup(PDFName.ModDate); if (!modificationDate) return undefined; assertIsLiteralOrHexString(modificationDate); return modificationDate.decodeDate(); } /** * Set this document's title metadata. The title will appear in the * "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setTitle('🥚 The Life of an Egg 🍳') * ``` * * To display the title in the window's title bar, set the * `showInWindowTitleBar` option to `true` (works for _most_ PDF readers). * For example: * ```js * pdfDoc.setTitle('🥚 The Life of an Egg 🍳', { showInWindowTitleBar: true }) * ``` * * @param title The title of this document. * @param options The options to be used when setting the title. */ setTitle(title: string, options?: SetTitleOptions): void { assertIs(title, 'title', ['string']); const key = PDFName.of('Title'); this.getInfoDict().set(key, PDFHexString.fromText(title)); // Indicate that readers should display the title rather than the filename if (options?.showInWindowTitleBar) { const prefs = this.catalog.getOrCreateViewerPreferences(); prefs.setDisplayDocTitle(true); } } /** * Set this document's author metadata. The author will appear in the * "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setAuthor('Humpty Dumpty') * ``` * @param author The author of this document. */ setAuthor(author: string): void { assertIs(author, 'author', ['string']); const key = PDFName.of('Author'); this.getInfoDict().set(key, PDFHexString.fromText(author)); } /** * Set this document's subject metadata. The subject will appear in the * "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setSubject('📘 An Epic Tale of Woe 📖') * ``` * @param subject The subject of this document. */ setSubject(subject: string): void { assertIs(subject, 'author', ['string']); const key = PDFName.of('Subject'); this.getInfoDict().set(key, PDFHexString.fromText(subject)); } /** * Set this document's keyword metadata. These keywords will appear in the * "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setKeywords(['eggs', 'wall', 'fall', 'king', 'horses', 'men']) * ``` * @param keywords An array of keywords associated with this document. */ setKeywords(keywords: string[]): void { assertIs(keywords, 'keywords', [Array]); const key = PDFName.of('Keywords'); this.getInfoDict().set(key, PDFHexString.fromText(keywords.join(' '))); } /** * Set this document's creator metadata. The creator will appear in the * "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setCreator('PDF App 9000 🤖') * ``` * @param creator The creator of this document. */ setCreator(creator: string): void { assertIs(creator, 'creator', ['string']); const key = PDFName.of('Creator'); this.getInfoDict().set(key, PDFHexString.fromText(creator)); } /** * Set this document's producer metadata. The producer will appear in the * "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setProducer('PDF App 9000 🤖') * ``` * @param producer The producer of this document. */ setProducer(producer: string): void { assertIs(producer, 'creator', ['string']); const key = PDFName.of('Producer'); this.getInfoDict().set(key, PDFHexString.fromText(producer)); } /** * Set this document's language metadata. The language will appear in the * "Document Properties" section of some PDF readers. For example: * ```js * pdfDoc.setLanguage('en-us') * ``` * * @param language An RFC 3066 _Language-Tag_ denoting the language of this * document, or an empty string if the language is unknown. */ setLanguage(language: string): void { assertIs(language, 'language', ['string']); const key = PDFName.of('Lang'); this.catalog.set(key, PDFString.of(language)); } /** * Set this document's creation date metadata. The creation date will appear * in the "Document Properties" section of most PDF readers. For example: * ```js * pdfDoc.setCreationDate(new Date()) * ``` * @param creationDate The date this document was created. */ setCreationDate(creationDate: Date): void { assertIs(creationDate, 'creationDate', [[Date, 'Date']]); const key = PDFName.of('CreationDate'); this.getInfoDict().set(key, PDFString.fromDate(creationDate)); } /** * Set this document's modification date metadata. The modification date will * appear in the "Document Properties" section of most PDF readers. For * example: * ```js * pdfDoc.setModificationDate(new Date()) * ``` * @param modificationDate The date this document was last modified. */ setModificationDate(modificationDate: Date): void { assertIs(modificationDate, 'modificationDate', [[Date, 'Date']]); const key = PDFName.of('ModDate'); this.getInfoDict().set(key, PDFString.fromDate(modificationDate)); } /** * Get the number of pages contained in this document. For example: * ```js * const totalPages = pdfDoc.getPageCount() * ``` * @returns The number of pages in this document. */ getPageCount(): number { if (this.pageCount === undefined) this.pageCount = this.getPages().length; return this.pageCount; } /** * Get an array of all the pages contained in this document. The pages are * stored in the array in the same order that they are rendered in the * document. For example: * ```js * const pages = pdfDoc.getPages() * pages[0] // The first page of the document * pages[2] // The third page of the document * pages[197] // The 198th page of the document * ``` * @returns An array of all the pages contained in this document. */ getPages(): PDFPage[] { return this.pageCache.access(); } /** * Get the page rendered at a particular `index` of the document. For example: * ```js * pdfDoc.getPage(0) // The first page of the document * pdfDoc.getPage(2) // The third page of the document * pdfDoc.getPage(197) // The 198th page of the document * ``` * @returns The [[PDFPage]] rendered at the given `index` of the document. */ getPage(index: number): PDFPage { const pages = this.getPages(); assertRange(index, 'index', 0, pages.length - 1); return pages[index]; } /** * Get an array of indices for all the pages contained in this document. The * array will contain a range of integers from * `0..pdfDoc.getPageCount() - 1`. For example: * ```js * const pdfDoc = await PDFDocument.create() * pdfDoc.addPage() * pdfDoc.addPage() * pdfDoc.addPage() * * const indices = pdfDoc.getPageIndices() * indices // => [0, 1, 2] * ``` * @returns An array of indices for all pages contained in this document. */ getPageIndices(): number[] { return range(0, this.getPageCount()); } /** * Remove the page at a given index from this document. For example: * ```js * pdfDoc.removePage(0) // Remove the first page of the document * pdfDoc.removePage(2) // Remove the third page of the document * pdfDoc.removePage(197) // Remove the 198th page of the document * ``` * Once a page has been removed, it will no longer be rendered at that index * in the document. * @param index The index of the page to be removed. */ removePage(index: number): void { const pageCount = this.getPageCount(); if (this.pageCount === 0) throw new RemovePageFromEmptyDocumentError(); assertRange(index, 'index', 0, pageCount - 1); this.catalog.removeLeafNode(index); this.pageCount = pageCount - 1; } /** * Add a page to the end of this document. This method accepts three * different value types for the `page` parameter: * * | Type | Behavior | * | ------------------ | ----------------------------------------------------------------------------------- | * | `undefined` | Create a new page and add it to the end of this document | * | `[number, number]` | Create a new page with the given dimensions and add it to the end of this document | * | `PDFPage` | Add the existing page to the end of this document | * * For example: * ```js * // page=undefined * const newPage = pdfDoc.addPage() * * // page=[number, number] * import { PageSizes } from 'pdf-lib' * const newPage1 = pdfDoc.addPage(PageSizes.A7) * const newPage2 = pdfDoc.addPage(PageSizes.Letter) * const newPage3 = pdfDoc.addPage([500, 750]) * * // page=PDFPage * const pdfDoc1 = await PDFDocument.create() * const pdfDoc2 = await PDFDocument.load(...) * const [existingPage] = await pdfDoc1.copyPages(pdfDoc2, [0]) * pdfDoc1.addPage(existingPage) * ``` * * @param page Optionally, the desired dimensions or existing page. * @returns The newly created (or existing) page. */ addPage(page?: PDFPage | [number, number]): PDFPage { assertIs(page, 'page', ['undefined', [PDFPage, 'PDFPage'], Array]); return this.insertPage(this.getPageCount(), page); } /** * Insert a page at a given index within this document. This method accepts * three different value types for the `page` parameter: * * | Type | Behavior | * | ------------------ | ------------------------------------------------------------------------------ | * | `undefined` | Create a new page and insert it into this document | * | `[number, number]` | Create a new page with the given dimensions and insert it into this document | * | `PDFPage` | Insert the existing page into this document | * * For example: * ```js * // page=undefined * const newPage = pdfDoc.insertPage(2) * * // page=[number, number] * import { PageSizes } from 'pdf-lib' * const newPage1 = pdfDoc.insertPage(2, PageSizes.A7) * const newPage2 = pdfDoc.insertPage(0, PageSizes.Letter) * const newPage3 = pdfDoc.insertPage(198, [500, 750]) * * // page=PDFPage * const pdfDoc1 = await PDFDocument.create() * const pdfDoc2 = await PDFDocument.load(...) * const [existingPage] = await pdfDoc1.copyPages(pdfDoc2, [0]) * pdfDoc1.insertPage(0, existingPage) * ``` * * @param index The index at which the page should be inserted (zero-based). * @param page Optionally, the desired dimensions or existing page. * @returns The newly created (or existing) page. */ insertPage(index: number, page?: PDFPage | [number, number]): PDFPage { const pageCount = this.getPageCount(); assertRange(index, 'index', 0, pageCount); assertIs(page, 'page', ['undefined', [PDFPage, 'PDFPage'], Array]); if (!page || Array.isArray(page)) { const dims = Array.isArray(page) ? page : PageSizes.A4; page = PDFPage.create(this); page.setSize(...dims); } else if (page.doc !== this) { throw new ForeignPageError(); } const parentRef = this.catalog.insertLeafNode(page.ref, index); page.node.setParent(parentRef); this.pageMap.set(page.node, page); this.pageCache.invalidate(); this.pageCount = pageCount + 1; return page; } /** * Copy pages from a source document into this document. Allows pages to be * copied between different [[PDFDocument]] instances. For example: * ```js * const pdfDoc = await PDFDocument.create() * const srcDoc = await PDFDocument.load(...) * * const copiedPages = await pdfDoc.copyPages(srcDoc, [0, 3, 89]) * const [firstPage, fourthPage, ninetiethPage] = copiedPages; * * pdfDoc.addPage(fourthPage) * pdfDoc.insertPage(0, ninetiethPage) * pdfDoc.addPage(firstPage) * ``` * @param srcDoc The document from which pages should be copied. * @param indices The indices of the pages that should be copied. * @returns Resolves with an array of pages copied into this document. */ async copyPages(srcDoc: PDFDocument, indices: number[]): Promise<PDFPage[]> { assertIs(srcDoc, 'srcDoc', [[PDFDocument, 'PDFDocument']]); assertIs(indices, 'indices', [Array]); await srcDoc.flush(); const copier = PDFObjectCopier.for(srcDoc.context, this.context); const srcPages = srcDoc.getPages(); // Copy each page in a separate thread const copiedPages = indices .map((i) => srcPages[i]) .map(async (page) => copier.copy(page.node)) .map((p) => p.then((copy) => PDFPage.of(copy, this.context.register(copy), this)), ); return Promise.all(copiedPages); } /** * Get a copy of this document. * * For example: * ```js * const srcDoc = await PDFDocument.load(...) * const pdfDoc = await srcDoc.copy() * ``` * * > **NOTE:** This method won't copy all information over to the new * > document (acroforms, outlines, etc...). * * @returns Resolves with a copy this document. */ async copy(): Promise<PDFDocument> { const pdfCopy = await PDFDocument.create(); const contentPages = await pdfCopy.copyPages(this, this.getPageIndices()); for (let idx = 0, len = contentPages.length; idx < len; idx++) { pdfCopy.addPage(contentPages[idx]); } if (this.getAuthor() !== undefined) { pdfCopy.setAuthor(this.getAuthor()!); } if (this.getCreationDate() !== undefined) { pdfCopy.setCreationDate(this.getCreationDate()!); } if (this.getCreator() !== undefined) { pdfCopy.setCreator(this.getCreator()!); } if (this.getLanguage() !== undefined) { pdfCopy.setLanguage(this.getLanguage()!); } if (this.getModificationDate() !== undefined) { pdfCopy.setModificationDate(this.getModificationDate()!); } if (this.getProducer() !== undefined) { pdfCopy.setProducer(this.getProducer()!); } if (this.getSubject() !== undefined) { pdfCopy.setSubject(this.getSubject()!); } if (this.getTitle() !== undefined) { pdfCopy.setTitle(this.getTitle()!); } pdfCopy.defaultWordBreaks = this.defaultWordBreaks; return pdfCopy; } /** * Add JavaScript to this document. The supplied `script` is executed when the * document is opened. The `script` can be used to perform some operation * when the document is opened (e.g. logging to the console), or it can be * used to define a function that can be referenced later in a JavaScript * action. For example: * ```js * // Show "Hello World!" in the console when the PDF is opened * pdfDoc.addJavaScript( * 'main', * 'console.show(); console.println("Hello World!");' * ); * * // Define a function named "foo" that can be called in JavaScript Actions * pdfDoc.addJavaScript( * 'foo', * 'function foo() { return "foo"; }' * ); * ``` * See the [JavaScript for Acrobat API Reference](https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/js_api_reference.pdf) * for details. * @param name The name of the script. Must be unique per document. * @param script The JavaScript to execute. */ addJavaScript(name: string, script: string) { assertIs(name, 'name', ['string']); assertIs(script, 'script', ['string']); const embedder = JavaScriptEmbedder.for(script, name); const ref = this.context.nextRef(); const javaScript = PDFJavaScript.of(ref, this, embedder); this.javaScripts.push(javaScript); } /** * Add an attachment to this document. Attachments are visible in the * "Attachments" panel of Adobe Acrobat and some other PDF readers. Any * type of file can be added as an attachment. This includes, but is not * limited to, `.png`, `.jpg`, `.pdf`, `.csv`, `.docx`, and `.xlsx` files. * * The input data can be provided in multiple formats: * * | Type | Contents | * | ------------- | -------------------------------------------------------------- | * | `string` | A base64 encoded string (or data URI) containing an attachment | * | `Uint8Array` | The raw bytes of an attachment | * | `ArrayBuffer` | The raw bytes of an attachment | * * For example: * ```js * // attachment=string * await pdfDoc.attach('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...', 'cat_riding_unicorn.jpg', { * mimeType: 'image/jpeg', * description: 'Cool cat riding a unicorn! 🦄🐈🕶️', * creationDate: new Date('2019/12/01'), * modificationDate: new Date('2020/04/19'), * }) * await pdfDoc.attach('...', 'cat_riding_unicorn.jpg', { * mimeType: 'image/jpeg', * description: 'Cool cat riding a unicorn! 🦄🐈🕶️', * creationDate: new Date('2019/12/01'), * modificationDate: new Date('2020/04/19'), * }) * * // attachment=Uint8Array * import fs from 'fs' * const uint8Array = fs.readFileSync('cat_riding_unicorn.jpg') * await pdfDoc.attach(uint8Array, 'cat_riding_unicorn.jpg', { * mimeType: 'image/jpeg', * description: 'Cool cat riding a unicorn! 🦄🐈🕶️', * creationDate: new Date('2019/12/01'), * modificationDate: new Date('2020/04/19'), * }) * * // attachment=ArrayBuffer * const url = 'https://pdf-lib.js.org/assets/cat_riding_unicorn.jpg' * const arrayBuffer = await fetch(url).then(res => res.arrayBuffer()) * await pdfDoc.attach(arrayBuffer, 'cat_riding_unicorn.jpg', { * mimeType: 'image/jpeg', * description: 'Cool cat riding a unicorn! 🦄🐈🕶️', * creationDate: new Date('2019/12/01'), * modificationDate: new Date('2020/04/19'), * }) * ``` * * @param attachment The input data containing the file to be attached. * @param name The name of the file to be attached. * @returns Resolves when the attachment is complete. */ async attach( attachment: string | Uint8Array | ArrayBuffer, name: string, options: AttachmentOptions = {}, ): Promise<void> { assertIs(attachment, 'attachment', ['string', Uint8Array, ArrayBuffer]); assertIs(name, 'name', ['string']); assertOrUndefined(options.mimeType, 'mimeType', ['string']); assertOrUndefined(options.description, 'description', ['string']); assertOrUndefined(options.creationDate, 'options.creationDate', [Date]); assertOrUndefined(options.modificationDate, 'options.modificationDate', [ Date, ]); assertIsOneOfOrUndefined( options.afRelationship, 'options.afRelationship', AFRelationship, ); const bytes = toUint8Array(attachment); const embedder = FileEmbedder.for(bytes, name, options); const ref = this.context.nextRef(); const embeddedFile = PDFEmbeddedFile.of(ref, this, embedder); this.embeddedFiles.push(embeddedFile); } private getRawAttachments() { if (!this.catalog.has(PDFName.of('Names'))) return []; const Names = this.catalog.lookup(PDFName.of('Names'), PDFDict); if (!Names.has(PDFName.of('EmbeddedFiles'))) return []; const EmbeddedFiles = Names.lookup(PDFName.of('EmbeddedFiles'), PDFDict); if (!EmbeddedFiles.has(PDFName.of('Names'))) return []; const EFNames = EmbeddedFiles.lookup(PDFName.of('Names'), PDFArray); const rawAttachments = []; for (let idx = 0, len = EFNames.size(); idx < len; idx += 2) { const fileName = EFNames.lookup(idx) as PDFHexString | PDFString; const fileSpec = EFNames.lookup(idx + 1, PDFDict); rawAttachments.push({ fileName, fileSpec, specRef: EFNames.get(idx + 1) as PDFRef, }); } return rawAttachments; } private getSavedAttachments(): SavedPDFAttachment[] { const rawAttachments = this.getRawAttachments(); return rawAttachments.flatMap(({ fileName, fileSpec, specRef }) => { const efDict = fileSpec.lookup(PDFName.of('EF')); if (!(efDict instanceof PDFDict)) return []; const stream = efDict.lookup(PDFName.of('F')); if (!(stream instanceof PDFStream)) return []; const afr = fileSpec.lookup(PDFName.of('AFRelationship')); const afRelationship = afr instanceof PDFName ? afr.toString().slice(1) // Remove leading slash : afr instanceof PDFString ? afr.decodeText() : undefined; const embeddedFileDict = stream.dict; const subtype = embeddedFileDict.lookup(PDFName.of('Subtype')); const mimeType = subtype instanceof PDFName ? subtype.toString().slice(1) : subtype instanceof PDFString ? subtype.decodeText() : undefined; const paramsDict = embeddedFileDict.lookup(PDFName.of('Params'), PDFDict); let creationDate: Date | undefined; let modificationDate: Date | undefined; if (paramsDict instanceof PDFDict) { const creationDateRaw = paramsDict.lookup(PDFName.of('CreationDate')); const modDateRaw = paramsDict.lookup(PDFName.of('ModDate')); if (creationDateRaw instanceof PDFString) { creationDate = creationDateRaw.decodeDate(); } if (modDateRaw instanceof PDFString) { modificationDate = modDateRaw.decodeDate(); } } const descRaw = fileSpec.lookup(PDFName.of('Desc')); let description: string | undefined; if (descRaw instanceof PDFHexString) { description = descRaw.decodeText(); } return [ { name: fileName.decodeText(), data: decodePDFRawStream(stream as PDFRawStream).decode(), mimeType: mimeType?.replace(/#([0-9A-Fa-f]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)), ), afRelationship: afRelationship as AFRelationship, description, creationDate, modificationDate, embeddedFileDict: efDict, specRef, }, ]; }); } private getUnsavedAttachments(): UnsavedPDFAttachment[] { const attachments = this.embeddedFiles.flatMap((file) => { if (file.getAlreadyEmbedded()) return []; const embedder = file.getEmbedder(); return { name: embedder.fileName, data: embedder.getFileData(), description: embedder.options.description, mimeType: embedder.options.mimeType, afRelationship: embedder.options.afRelationship, creationDate: embedder.options.creationDate, modificationDate: embedder.options.modificationDate, pdfEmbeddedFile: file, }; }); return attachments; } /** * Get all attachments that are embedded in this document. * * @returns Array of attachments with name and data */ getAttachments(): PDFAttachment[] { const savedAttachments = this.getSavedAttachments(); const unsavedAttachments = this.getUnsavedAttachments(); return [...savedAttachments, ...unsavedAttachments]; } detach(name: string) { const attachedFiles = this.getAttachments(); attachedFiles.forEach((file) => { if (file.name !== name) return; // the file wasn't embedded into context yet if ('pdfEmbeddedFile' in file) { const i = this.embeddedFiles.findIndex( (f) => file.pdfEmbeddedFile === f, ); if (i !== undefined) this.embeddedFiles.splice(i, 1); } else { // remove references from catalog const namesArr = this.catalog .Names() ?.lookup(PDFName.of('EmbeddedFiles'), PDFDict) .lookup(PDFName.of('Names'), PDFArray); const iNames = namesArr?.indexOf(file.specRef); if (iNames !== undefined && iNames > 0) { // attachment spec ref namesArr?.remove(iNames); // attachment name namesArr?.remove(iNames - 1); } // AF-Tag for PDF-A3 compliance const AF = this.catalog.AttachedFiles(); const afIndex = AF?.indexOf(file.specRef); if (afIndex !== undefined) AF?.remove(afIndex); // remove references from context const streamRef = this.context .lookupMaybe(file.specRef, PDFDict) ?.lookupMaybe(PDFName.of('EF'), PDFDict) ?.get(PDFName.of('F')) as PDFRef | undefined; if (streamRef) this.context.delete(streamRef); this.context.delete(file.specRef); } }); } /** * Embed a font into this document. The input data can be provided in multiple * formats: * * | Type | Contents | * | --------------- | ------------------------------------------------------- | * | `StandardFonts` | One of the standard 14 fonts | * | `string` | A base64 encoded string (or data URI) containing a font | * | `Uint8Array` | The raw bytes of a font | * | `ArrayBuffer` | The raw bytes of a font | * * For example: * ```js * // font=StandardFonts * import { StandardFonts } from 'pdf-lib' * const font1 = await pdfDoc.embedFont(StandardFonts.Helvetica) * * // font=string * const font2 = await pdfDoc.embedFont('AAEAAAAVAQAABABQRFNJRx/upe...') * const font3 = await pdfDoc.embedFont('data:font/opentype;base64,AAEAAA...') * * // font=Uint8Array * import fs from 'fs' * const font4 = await pdfDoc.embedFont(fs.readFileSync('Ubuntu-R.ttf')) * * // font=ArrayBuffer * const url = 'https://pdf-lib.js.org/assets/ubuntu/Ubuntu-R.ttf' * const ubuntuBytes = await fetch(url).then(res => res.arrayBuffer()) * const font5 = await pdfDoc.embedFont(ubuntuBytes) * ``` * See also: [[registerFontkit]] * @param font The input data for a font. * @param options The options to be used when embedding the font. * @returns Resolves with the embedded font. */ async embedFont( font: StandardFonts | string | Uint8Array | ArrayBuffer, options: EmbedFontOptions = {}, ): Promise<PDFFont> { const { subset = false, customName, features } = options; assertIs(font, 'font', ['string', Uint8Array, ArrayBuffer]); assertIs(subset, 'subset', ['boolean']); let embedder: CustomFontEmbedder | StandardFontEmbedder; if (isStandardFont(font)) { embedder = StandardFontEmbedder.for(font, customName); } else if (canBeConvertedToUint8Array(font)) { const bytes = toUint8Array(font); const fontkit = this.assertFontkit(); embedder = subset ? await CustomFontSubsetEmbedder.for( fontkit, bytes, customName, features, ) : await CustomFontEmbedder.for(fontkit, bytes, customName, features); } else { throw new TypeError( '`font` must be one of `StandardFonts | string | Uint8Array | ArrayBuffer`', ); } const ref = this.context.nextRef(); const pdfFont = PDFFont.of(ref, this, embedder); this.fonts.push(pdfFont); return pdfFont; } /** * Embed a standard font into this document. * For example: * ```js * import { StandardFonts } from 'pdf-lib' * const helveticaFont = pdfDoc.embedFont(StandardFonts.Helvetica) * ``` * @param font The standard font to be embedded. * @param customName The name to be used when embedding the font. * @returns The embedded font. */ embedStandardFont(font: StandardFonts, customName?: string): PDFFont { assertIs(font, 'font', ['string']); if (!isStandardFont(font)) { throw new TypeError('`font` must be one of type `StandardFonts`'); } const embedder = StandardFontEmbedder.for(font, customName); const ref = this.context.nextRef(); const pdfFont = PDFFont.of(ref, this, embedder); this.fonts.push(pdfFont); return pdfFont; } /** * Embed a JPEG image into this document. The input data can be provided in * multiple formats: * * | Type | Contents | * | ------------- | ------------------------------------------------------------- | * | `string` | A base64 encoded string (or data URI) containing a JPEG image | * | `Uint8Array` | The raw bytes of a JPEG image | * | `ArrayBuffer` | The raw bytes of a JPEG image | * * For example: * ```js * // jpg=string * const image1 = await pdfDoc.embedJpg('/9j/4AAQSkZJRgABAQAAAQABAAD/2wBD...') * const image2 = await pdfDoc.embedJpg('...') * * // jpg=Uint8Array * import fs from 'fs' * const uint8Array = fs.readFileSync('cat_riding_unicorn.jpg') * const image3 = await pdfDoc.embedJpg(uint8Array) * * // jpg=ArrayBuffer * const url = 'https://pdf-lib.js.org/assets/cat_riding_unicorn.jpg' * const arrayBuffer = await fetch(url).then(res => res.arrayBuffer()) * const image4 = await pdfDoc.embedJpg(arrayBuffer) * ``` * * @param jpg The input data for a JPEG image. * @returns Resolves with the embedded image. */ async embedJpg(jpg: string | Uint8Array | ArrayBuffer): Promise<PDFImage> { assertIs(jpg, 'jpg', ['string', Uint8Array, ArrayBuffer]); const bytes = toUint8Array(jpg); const embedder = await JpegEmbedder.for(bytes); const ref = this.context.nextRef(); const pdfImage = PDFImage.of(ref, this, embedder); this.images.push(pdfImage); return pdfImage; } /** * Embed a PNG image into this document. The input data can be provided in * multiple formats: * * | Type | Contents | * | ------------- | ------------------------------------------------------------ | * | `string` | A base64 encoded string (or data URI) containing a PNG image | * | `Uint8Array` | The raw bytes of a PNG image | * | `ArrayBuffer` | The raw bytes of a PNG image | * * For example: * ```js * // png=string * const image1 = await pdfDoc.embedPng('iVBORw0KGgoAAAANSUhEUgAAAlgAAAF3...') * const image2 = await pdfDoc.embedPng('...') * * // png=Uint8Array * import fs from 'fs' * const uint8Array = fs.readFileSync('small_mario.png') * const image3 = await pdfDoc.embedPng(uint8Array) * * // png=ArrayBuffer * const url = 'https://pdf-lib.js.org/assets/small_mario.png' * const arrayBuffer = await fetch(url).then(res => res.arrayBuffer()) * const image4 = await pdfDoc.embedPng(arrayBuffer) * ``` * * @param png The input data for a PNG image. * @returns Resolves with the embedded image. */ async embedPng(png: string | Uint8Array | ArrayBuffer): Promise<PDFImage> { assertIs(png, 'png', ['string', Uint8Array, ArrayBuffer]); const bytes = toUint8Array(png); const embedder = await PngEmbedder.for(bytes); const ref = this.context.nextRef(); const pdfImage = PDFImage.of(ref, this, embedder); this.images.push(pdfImage); return pdfImage; } async embedSvg(svg: string): Promise<PDFSvg> { if (!svg) return new PDFSvg(svg); const parsedSvg = parseHtml(svg); const findImages = (element: HTMLElement): HTMLElement[] => { if (element.tagName === 'image') return [element]; else { return element.childNodes .map((child) => child.nodeType === NodeType.ELEMENT_NODE ? findImages(child) : [], ) .flat(); } }; const images = findImages(parsedSvg); const imagesDict = {} as Record<string, PDFImage>; await Promise.all( images.map(async (image) => { const href = image.attributes.href ?? image.attributes['xlink:href']; if (!href || imagesDict[href]) return; const isPng = href.match(/\.png(\?|$)|^data:image\/png;base64/gim); const pdfImage = isPng ? await this.embedPng(href) : await this.embedJpg(href); imagesDict[href] = pdfImage; }), ); return new PDFSvg(svg, imagesDict); } /** * Embed one or more PDF pages into this document. * * For example: * ```js * const pdfDoc = await PDFDocument.create() * * const sourcePdfUrl = 'https://pdf-lib.js.org/assets/with_large_page_count.pdf' * const sourcePdf = await fetch(sourcePdfUrl).then((res) => res.arrayBuffer()) * * // Embed page 74 of `sourcePdf` into `pdfDoc` * const [embeddedPage] = await pdfDoc.embedPdf(sourcePdf, [73]) * ``` * * See [[PDFDocument.load]] for examples of the allowed input data formats. * * @param pdf The input data containing a PDF document. * @param indices The indices of the pages that should be embedded. * @returns Resolves with an array of the embedded pages. */ async embedPdf( pdf: string | Uint8Array | ArrayBuffer | PDFDocument, indices: number[] = [0], ): Promise<PDFEmbeddedPage[]> { assertIs(pdf, 'pdf', [ 'string', Uint8Array, ArrayBuffer, [PDFDocument, 'PDFDocument'], ]); assertIs(indices, 'indices', [Array]); const srcDoc = pdf instanceof PDFDocument ? pdf : await PDFDocument.load(pdf); const srcPages = pluckIndices(srcDoc.getPages(), indices); return this.embedPages(srcPages); } /** * Embed a single PDF page into this document. * * For example: * ```js * const pdfDoc = await PDFDocument.create() * * const sourcePdfUrl = 'https://pdf-lib.js.org/assets/with_large_page_count.pdf' * const sourceBuffer = await fetch(sourcePdfUrl).then((res) => res.arrayBuffer()) * const sourcePdfDoc = await PDFDocument.load(sourceBuffer) * const sourcePdfPage = sourcePdfDoc.getPages()[73] * * const embeddedPage = await pdfDoc.embedPage( * sourcePdfPage, * * // Clip a section of the source page so that we only embed part of it * { left: 100, right: 450, bottom: 330, top: 570 }, * * // Translate all drawings of the embedded page by (10, 200) units * [1, 0, 0, 1, 10, 200], * ) * ``` * * @param page The page to be embedded. * @param boundingBox * Optionally, an area of the source page that should be embedded * (defaults to entire page). * @param transformationMatrix * Optiona