UNPKG

ld-jsonstream

Version:

Simple and secure newline delimited JSON stream parser

206 lines (173 loc) 6.34 kB
/** * Copyright (c) 2014, 2015, 2016, 2019 Tim Kuijsten * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* jshint -W116 */ var util = require('util'); var Transform = require('stream').Transform; /** * LDJSONStream * * Parse a binary stream of stringified new line separated JSON objects and * write to output. Implements a Transform stream. * * Features: * * simple * * dependency free * * easy to audit * * LDJSON support * * NDJSON support * * @param {Object} [opts] object containing optional parameters * * opts: * maxDocLength {Number, default 16777216} maximum JSON document size in bytes * maxDocs {Number, default infinite} maximum number of documents to receive * maxBytes {Number, default infinite} maximum number of bytes to receive * readableObjectMode {Boolean, default false} Sets objectMode for the readable side of * the stream. Note: the writable side of the stream can never be in object mode. If * you have such a case, you don't need this module. * objectMode {Boolean, default false} alias for readableObjectMode * * @event "data" {Object} emits one object at a time * @event "end" emitted once the underlying cursor is closed */ function LDJSONStream(opts) { if (opts == null) { opts = {}; } if (typeof opts !== 'object') { throw new TypeError('opts must be an object'); } if (opts.maxDocLength != null && typeof opts.maxDocLength !== 'number') { throw new TypeError('opts.maxDocLength must be a number'); } if (opts.maxDocs != null && typeof opts.maxDocs !== 'number') { throw new TypeError('opts.maxDocs must be a number'); } if (opts.maxBytes != null && typeof opts.maxBytes !== 'number') { throw new TypeError('opts.maxBytes must be a number'); } if (opts.writableObjectMode) { throw new Error('writableObjectMode is not supported, line delimited JSON is required as input'); } if (opts.objectMode != null && typeof opts.objectMode !== 'boolean') { throw new TypeError('opts.objectMode must be a boolean'); } if (opts.readableObjectMode != null && typeof opts.readableObjectMode !== 'boolean') { throw new TypeError('opts.readableObjectMode must be a boolean'); } if (opts.objectMode) { opts.readableObjectMode = opts.objectMode; delete opts.objectMode; } Transform.call(this, opts); this._maxDocLength = opts.maxDocLength || 16777216; this._maxBytes = opts.maxBytes; this._maxDocs = opts.maxDocs; this._objectMode = opts.readableObjectMode; this.bytesRead = 0; this.docsRead = 0; this._docptr = 0; // initialize internal buffer this._reset(); } util.inherits(LDJSONStream, Transform); module.exports = LDJSONStream; // reset internal buffer LDJSONStream.prototype._reset = function _reset() { this.buffer = Buffer.alloc(0); this._docptr = 0; }; // read up to the next newline LDJSONStream.prototype._parseDocs = function _parseDocs(cb) { for (;;) { if (this._maxDocs && this.docsRead >= this._maxDocs) { cb(); return; } // move pointer to first newline character var found = false; while (!found && this._docptr < this.buffer.length) { if (~[0x0a, 0x0d].indexOf(this.buffer[this._docptr])) { found = true; } this._docptr++; } // if a newline is found, check if it's a carriage return followed by a newline var crnl = false; if (found && this._docptr < this.buffer.length && this.buffer[this._docptr] === 0x0d && this.buffer[this._docptr + 1] === 0x0a) { this._docptr++; crnl = true; } // enforce max doc length if (this._docptr - (crnl ? 2 : 1) > this._maxDocLength) { // discard buffer this._reset(); cb(new Error('document exceeds configured maximum length')); return; } if (!found) { // wait for more chunks cb(); return; } // since a newline is found, try to read and parse it as JSON var rawdoc = this.buffer.slice(0, this._docptr); var obj; try { obj = JSON.parse(rawdoc); } catch (err) { // support multi-line JSON if (err.message === 'Unexpected end of JSON input') { // look for next newline continue; } else { this._reset(); cb(err); } return; } // shift document from internal buffer and nullify expected document length this.buffer = this.buffer.slice(this._docptr); this._docptr = 0; // push the raw or parsed doc out to the reader if (this._objectMode) { this.push(obj); } else { this.push(rawdoc); } this.docsRead++; // check if there might be any new document that can be parsed if (!this.buffer.length) { cb(); return; } } }; LDJSONStream.prototype._transform = function _transform(chunk, encoding, cb) { this.bytesRead += chunk.length; if (this._maxBytes && this.bytesRead > this._maxBytes) { this._reset(); cb(new Error('more than maxBytes received')); return; } var newLength = this.buffer.length + chunk.length; this.buffer = Buffer.concat([this.buffer, chunk], newLength); this._parseDocs(cb); }; // parse any final object that does not end with a newline LDJSONStream.prototype._flush = function _flush(cb) { if (!this.buffer.length) { cb(); return; } if (this._maxDocs && this.docsRead >= this._maxDocs) { cb(); return; } var obj; try { obj = JSON.parse(this.buffer); // push the raw or parsed doc out to the reader if (this._objectMode) { this.push(obj); } else { this.push(this.buffer); } this.docsRead++; this._reset(); cb(); } catch (err) { this._reset(); cb(err); } };