UNPKG

@idio/dicer

Version:

[fork] A Very Fast Streaming Multipart Parser For Node.JS Written In ES6 And Optimised With JavaScript Compiler.

214 lines (187 loc) 6.38 kB
/* Based heavily on the Streaming Boyer-Moore-Horspool C++ implementation by Hongli Lai at: https://github.com/FooBarWidget/boyer-moore-horspool */ import { EventEmitter } from 'events' function jsmemcmp(buf1, pos1, buf2, pos2, num) { for (var i = 0; i < num; ++i, ++pos1, ++pos2) if (buf1[pos1] !== buf2[pos2]) return false return true } export default class SBMH extends EventEmitter { constructor(needle) { super() if (typeof needle === 'string') needle = new Buffer(needle) var i, j, needle_len = needle.length this.maxMatches = Infinity this.matches = 0 this._occ = new Array(256) this._lookbehind_size = 0 this._needle = needle this._bufpos = 0 this._lookbehind = new Buffer(needle_len) // Initialize occurrence table. for (j = 0; j < 256; ++j) this._occ[j] = needle_len // Populate occurrence table with analysis of the needle, // ignoring last letter. if (needle_len >= 1) { for (i = 0; i < needle_len - 1; ++i) this._occ[needle[i]] = needle_len - 1 - i } } reset() { this._lookbehind_size = 0 this.matches = 0 this._bufpos = 0 } push(chunk, pos = 0) { var r, chlen if (!Buffer.isBuffer(chunk)) chunk = new Buffer(chunk, 'binary') chlen = chunk.length this._bufpos = pos while (r !== chlen && this.matches < this.maxMatches) r = this._sbmh_feed(chunk) return r } _sbmh_feed(data) { var len = data.length, needle = this._needle, needle_len = needle.length // Positive: points to a position in `data` // pos == 3 points to data[3] // Negative: points to a position in the lookbehind buffer // pos == -2 points to lookbehind[lookbehind_size - 2] var pos = -this._lookbehind_size, last_needle_char = needle[needle_len - 1], occ = this._occ, lookbehind = this._lookbehind, ch if (pos < 0) { // Lookbehind buffer is not empty. Perform Boyer-Moore-Horspool // search with character lookup code that considers both the // lookbehind buffer and the current round's haystack data. // // Loop until // there is a match. // or until // we've moved past the position that requires the // lookbehind buffer. In this case we switch to the // optimized loop. // or until // the character to look at lies outside the haystack. while (pos < 0 && pos <= len - needle_len) { ch = this._sbmh_lookup_char(data, pos + needle_len - 1) if (ch === last_needle_char && this._sbmh_memcmp(data, pos, needle_len - 1)) { this._lookbehind_size = 0 ++this.matches if (pos > -this._lookbehind_size) this.emit('info', true, lookbehind, 0, this._lookbehind_size + pos) else this.emit('info', true) return (this._bufpos = pos + needle_len) } else pos += occ[ch] } // No match. if (pos < 0) { // There's too few data for Boyer-Moore-Horspool to run, // so let's use a different algorithm to skip as much as // we can. // Forward pos until // the trailing part of lookbehind + data // looks like the beginning of the needle // or until // pos == 0 while (pos < 0 && !this._sbmh_memcmp(data, pos, len - pos)) pos++ } if (pos >= 0) { // Discard lookbehind buffer. this.emit('info', false, lookbehind, 0, this._lookbehind_size) this._lookbehind_size = 0 } else { // Cut off part of the lookbehind buffer that has // been processed and append the entire haystack // into it. var bytesToCutOff = this._lookbehind_size + pos if (bytesToCutOff > 0) { // The cut off data is guaranteed not to contain the needle. this.emit('info', false, lookbehind, 0, bytesToCutOff) } lookbehind.copy(lookbehind, 0, bytesToCutOff, this._lookbehind_size - bytesToCutOff) this._lookbehind_size -= bytesToCutOff data.copy(lookbehind, this._lookbehind_size) this._lookbehind_size += len this._bufpos = len return len } } if (pos >= 0) pos += this._bufpos // Lookbehind buffer is now empty. Perform Boyer-Moore-Horspool // search with optimized character lookup code that only considers // the current round's haystack data. while (pos <= len - needle_len) { ch = data[pos + needle_len - 1] if (ch === last_needle_char && data[pos] === needle[0] && jsmemcmp(needle, 0, data, pos, needle_len - 1)) { ++this.matches if (pos > 0) this.emit('info', true, data, this._bufpos, pos) else this.emit('info', true) return (this._bufpos = pos + needle_len) } else pos += occ[ch] } // There was no match. If there's trailing haystack data that we cannot // match yet using the Boyer-Moore-Horspool algorithm (because the trailing // data is less than the needle size) then match using a modified // algorithm that starts matching from the beginning instead of the end. // Whatever trailing data is left after running this algorithm is added to // the lookbehind buffer. if (pos < len) { while (pos < len && (data[pos] !== needle[0] || !jsmemcmp(data, pos, needle, 0, len - pos))) { ++pos } if (pos < len) { data.copy(lookbehind, 0, pos, pos + (len - pos)) this._lookbehind_size = len - pos } } // Everything until pos is guaranteed not to contain needle data. if (pos > 0) this.emit('info', false, data, this._bufpos, pos < len ? pos : len) this._bufpos = len return len } _sbmh_lookup_char(data, pos) { if (pos < 0) { /** @suppress {checkTypes} */ const res = this._lookbehind[this._lookbehind_size + pos] return res } else return data[pos] } _sbmh_memcmp(data, pos, len) { var i = 0 while (i < len) { if (this._sbmh_lookup_char(data, pos + i) === this._needle[i]) ++i else return false } return true } } /** * @license MIT streamsearch by Brian White * https://github.com/mscdex/streamsearch */