pdf-data-parser
Version:
Parse, search and stream PDF tabular data using Node.js with Mozilla's PDF.js library.
75 lines (62 loc) • 1.61 kB
JavaScript
/* Any copyright is dedicated to the Public Domain.
* http://creativecommons.org/publicdomain/zero/1.0/
*/
/**
* pdf-junction/pdfDataReader
*/
import PdfDataParser from "./PdfDataParser.js";
import { Readable } from 'node:stream';
export default class PdfDataReader extends Readable {
/**
*
* @param {Object} options
* @param {String|URL} options.url
* @param {String|ArrayBuffer} options.data
* @param {any} see PdfDataParser for all options
*/
constructor(options) {
let streamOptions = {
objectMode: true,
highWaterMark: 16,
autoDestroy: false
};
super(streamOptions);
this.options = options || {};
this.parser;
}
async _construct(callback) {
let parser = this.parser = new PdfDataParser(this.options);
var reader = this;
parser.on('data', (row) => {
if (row) {
if (!reader.push(row)) {
parser.pause(); // If push() returns false stop reading from source.
}
}
});
parser.on('end', () => {
reader.push(null);
});
parser.on('error', (err) => {
console.error(err);
//throw err;
});
callback();
}
/**
* Fetch data from the underlying resource.
* @param {Number} size number of bytes to read asynchronously
*/
async _read(size) {
// ignore size
try {
if (!this.parser.started)
this.parser.parse();
else
this.parser.resume();
}
catch (err) {
this.push(null);
}
}
};