UNPKG

pdf-data-parser

Version:

Parse, search and stream PDF tabular data using Node.js with Mozilla's PDF.js library.

75 lines (62 loc) 1.61 kB
/* Any copyright is dedicated to the Public Domain. * http://creativecommons.org/publicdomain/zero/1.0/ */ /** * pdf-junction/pdfDataReader */ import PdfDataParser from "./PdfDataParser.js"; import { Readable } from 'node:stream'; export default class PdfDataReader extends Readable { /** * * @param {Object} options * @param {String|URL} options.url * @param {String|ArrayBuffer} options.data * @param {any} see PdfDataParser for all options */ constructor(options) { let streamOptions = { objectMode: true, highWaterMark: 16, autoDestroy: false }; super(streamOptions); this.options = options || {}; this.parser; } async _construct(callback) { let parser = this.parser = new PdfDataParser(this.options); var reader = this; parser.on('data', (row) => { if (row) { if (!reader.push(row)) { parser.pause(); // If push() returns false stop reading from source. } } }); parser.on('end', () => { reader.push(null); }); parser.on('error', (err) => { console.error(err); //throw err; }); callback(); } /** * Fetch data from the underlying resource. * @param {Number} size number of bytes to read asynchronously */ async _read(size) { // ignore size try { if (!this.parser.started) this.parser.parse(); else this.parser.resume(); } catch (err) { this.push(null); } } };