UNPKG

pymupdf-node

Version:
221 lines (168 loc) 6.73 kB
# pymupdf-node **PyMuPDF** for **Node JS** ## Getting Started ### Install node modules `npm install pymupdf-node` ### Trying Import into your source code with: ``` import * as PyMuPDFNode from "pymupdf-node"; ``` Wheels should be loaded into the `pymupdf` module with: ``` const pymupdf = await PyMuPDFNode.loadPyMuPDF("node_modules/pymupdf-node/pymupdf/pymupdf-1.26.0-cp312-abi3-pyodide_2024_0_wasm32.whl"); ``` And if you need **PyMuPDF4LLM**, into the `pymupdf4LLM` module with: ``` const pymupdf4LLM = await PyMuPDFNode.loadPyMuPDF4LLM("node_modules/pymupdf-node/pymupdf/pymupdf4llm-0.0.24-py3-none-any.whl"); ``` #### Open a document: ``` let doc = pymupdf.open("test.pdf"); ``` #### Copy a page: ``` doc.copy_page(0); ``` #### Get document as markdown: ``` const md = pymupdf4LLM.to_markdown(doc, { page_chunks: false, write_images: false, ignore_images: true, image_path: "", extract_words: false, show_progress: false, }); console.log(md); ``` ## And much more! **PyMuPDF Node** is PyMuPDF for JavaScript environments, but just use the PyMuPDF Python API as if it was JavaScript, see the [PyMuPDF API documentation](https://pymupdf.readthedocs.io). ### Full example: ``` import * as PyMuPDFNode from "pymupdf-node"; import * as fs from "fs"; const pymupdf = await PyMuPDFNode.loadPyMuPDF("node_modules/pymupdf-node/pymupdf/pymupdf-1.26.0-cp312-abi3-pyodide_2024_0_wasm32.whl"); const pymupdf4LLM = await PyMuPDFNode.loadPyMuPDF4LLM("node_modules/pymupdf-node/pymupdf/pymupdf4llm-0.0.24-py3-none-any.whl"); function logSection(title) { console.log(`\n===== ${title} =====`); } function printMarkdown(doc, label = "Document Snapshot") { logSection(`📄 Print Markdown: ${label}`); const identifyHeaders = new pymupdf4LLM.IdentifyHeaders(doc); const tocHeaders = new pymupdf4LLM.TocHeaders(doc); const md = pymupdf4LLM.to_markdown(doc, { page_chunks: false, write_images: false, ignore_images: true, image_path: "", extract_words: false, show_progress: false, }); console.log(md); } function openFreshDoc() { return pymupdf.open("test.pdf"); } let doc = openFreshDoc(); logSection("1. Initial Page Count"); console.log("Page count:", doc.page_count); printMarkdown(doc, "Initial PDF"); doc = openFreshDoc(); logSection("2. Copy Page"); doc.copy_page(0); console.log("Copied page 0 to the end"); printMarkdown(doc, "After copy_page(0)"); doc = openFreshDoc(); logSection("3. Add New Page (end)"); doc.new_page(); console.log("Added blank page at the end"); printMarkdown(doc, "After new_page()"); doc = openFreshDoc(); logSection("4. Add New Page (index 1, custom size)"); doc.new_page(1, 400, 500); console.log("Inserted blank page at index 1 (400x500)"); printMarkdown(doc, "After new_page(1, 400, 500)"); doc = openFreshDoc(); logSection("5. Insert Page with Text"); const count = doc.insert_page({ pno: 0, text: "Inserted Page Content" }); console.log(`Inserted text page at index 0 (lines inserted: ${count})`); printMarkdown(doc, "After insert_page()"); doc = openFreshDoc(); logSection("6. Delete Last Page"); doc.delete_page(doc.page_count - 1); console.log("Deleted last page"); printMarkdown(doc, "After delete_page()"); doc = openFreshDoc(); logSection("7. Delete Pages by Array"); doc.delete_pages([1, 2]); console.log("Deleted pages at index 1 and 2"); printMarkdown(doc, "After delete_pages([1, 2])"); doc = openFreshDoc(); logSection("8. Delete Page Range 0–1"); doc.delete_pages(0, 1); console.log("Deleted pages from index 0 to 1"); printMarkdown(doc, "After delete_pages(0, 1)"); doc = openFreshDoc(); logSection("9. Set & Get Page Labels"); doc.set_page_labels([{ startpage: 0, prefix: "L-", style: "D", firstpagenum: 1 }]); const labels = doc.get_page_labels(); console.log("Page labels:", labels); const match = doc.get_page_numbers("L-1"); console.log("Page numbers with label 'L-1':", match); doc = openFreshDoc(); logSection("10. Bake Document"); doc.bake(true, true); console.log("Baked document (annotations + widgets)"); doc = openFreshDoc(); logSection("11. Scrub Metadata"); doc.scrub({ metadata: true, javascript: true }); console.log("Scrubbed metadata and javascript"); doc = openFreshDoc(); const doc2 = openFreshDoc(); logSection("12. Insert PDF Page from Another Doc"); doc.insert_pdf(doc2, { from_page: 0, to_page: 0 }); console.log("Inserted first page of another doc"); printMarkdown(doc, "After insert_pdf()"); doc = openFreshDoc(); logSection("13. Embed File"); const buffer = fs.readFileSync("test.pdf"); const xref = doc.embfile_add("sample", buffer.buffer, "test.pdf", "test.pdf", "Embedded sample PDF"); console.log("Embedded file XREF:", xref); doc = openFreshDoc(); logSection("14. Add annot, link, text, etc"); const page = doc.load_page(0); page.add_caret_annot([10, 10]); page.add_text_annot([10, 30], 'This is a text annotation.'); page.add_freetext_annot([50, 30, 300, 80], 'This is a free_text annotation.', { fontsize: 14, richtext: true, border_color: [0, 0, 0], border_width: 2 }); page.add_file_annot([10, 70], fs.readFileSync('./test.pdf').buffer, 'pymupdf4node.pdf'); page.add_ink_annot([ [[10, 90], [30, 110], [50, 90]], ]); page.add_line_annot([10, 110], [30, 130]); page.add_rect_annot([10, 150, 30, 170]); page.add_circle_annot([10, 170, 30, 190]); page.add_polyline_annot([[10, 190], [30, 210], [50, 190]]); page.add_polyline_annot([[10, 210], [30, 230], [50, 210]]); page.add_underline_annot({ quads: [[10, 230, 50, 240]] }); page.add_strikeout_annot({ quads: [[10, 250, 50, 260]] }); page.add_squiggly_annot({ quads: [[10, 270, 50, 280]] }); page.add_highlight_annot({ quads: [[10, 290, 50, 300]] }); page.add_stamp_annot([10, 310, 50, 350], 3); page.add_redact_annot([10, 370, 50, 410], { text: 'pymupdf4node redact', fill: [0, 0, 0], text_color: [1, 1, 1] }); page.apply_redactions(); page.add_redact_annot([10, 430, 50, 470], { cross_out: false }); page.insert_link({ from: [10, 490, 50, 530], kind: 2, uri: 'https://pymupdf.readthedocs.io/en/latest/vars.html#linkdest-kinds' }); page.insert_text([10, 550], 'Inserted Text'); page.delete_annot(page.add_freetext_annot([310, 30, 570, 80], 'This is a free_text annotation.', { fontsize: 14, richtext: true, border_color: [0, 0, 0], border_width: 2 })); doc.load_page(1).set_rotation(180); doc.load_page(2).set_cropbox([10, 10, 100, 100]); await doc.save('./pdf/output.pdf'); console.log("Please check the PDF located at the \"tests/pdf/output.pdf\" location."); logSection('15. Find tables in page'); const results = page.find_tables(); results.tables.forEach(table => console.log(table.to_markdown())); logSection('16. Get XML metadata'); const metadata = doc.get_xml_metadata() console.log("XML metadata=", metadata); ```