UNPKG

@animetosho/parpar

Version:

High performance multi-threaded PAR2 creation library

240 lines (209 loc) 7.55 kB
"use strict"; var fs = require('fs'); var ThrottleQueue = require('./throttlequeue'); var allocBuffer = (Buffer.allocUnsafe || Buffer); function FileReaderData(file, buffer, len, pos, parent) { this._readerFile = file; this.file = file.info; this.buffer = buffer.slice(0, len); this._readerBuffer = buffer; this.pos = pos; this._parent = parent; this._refs = 2; } FileReaderData.prototype = { chunks: null, hashed: function() { this._parent.hashed(this._readerFile); if(--this._refs == 0) this._parent.processed(this._readerBuffer); }, release: function() { // NOTE: doesn't necessarily release, as it requires hashing to have completed if(--this._refs == 0) this._parent.processed(this._readerBuffer); } }; function FileSeqReader(files, readSize, readBuffers, throttleQ) { this.fileQueue = files.filter(function(file) { return file.size > 0; }); this.buf = []; this.readSize = readSize; this.maxBufs = readBuffers; this.openFiles = []; this.throttleQ = throttleQ || (new ThrottleQueue.NoThrottle()); } FileSeqReader.prototype = { maxQueuePerFile: 5, // number of queued hash requests per file; maybe scale this based on readSize? 3x4MB seems too small in tests (switches frequently on HDD), where 4x4MB is much better, and 5x4MB never switches on HDD buf: null, bufCount: 0, maxBufs: 0, readSize: 0, openFiles: null, fileQueue: null, cb: null, finishCb: null, _isReading: false, throttleQ: null, // when doing sequential read with chunker, caller requires the first chunkLen bytes of every slice, so ensure that this always arrives as one piece reqSliceLen: 0, reqChunkLen: 0, requireChunk: function(sliceLen, chunkLen) { if(chunkLen > this.readSize) throw new Error('Required chunk length cannot exceed maximum read length'); this.reqSliceLen = sliceLen; this.reqChunkLen = chunkLen; }, // use external buffers instead of allocating new setBuffers: function(bufs) { this.buf = bufs; this.bufCount = bufs.length; }, run: function(readCb, finishCb) { this.cb = readCb; this.finishCb = finishCb; this.readNext(); }, _getBuf: function() { while(this.buf.length) { var buf = this.buf.pop(); if(buf.length >= this.readSize) return buf; // else, buffer too small - discard this.bufCount--; } if(this.bufCount < this.maxBufs) { // allocate new buffer, since we're below the limit this.bufCount++; return allocBuffer(this.readSize); } return null; // no available buffers }, _readSize: function(pos, size) { // determine appropriate read length, based on file's current position if(!this.reqSliceLen) return [this.readSize]; // we need to size our reads so that the required chunk fully lands in a buffer var nextSlicePos = Math.ceil(pos / this.reqSliceLen) * this.reqSliceLen; if(nextSlicePos >= size) // will never read next slice return [this.readSize]; var chunks = []; var readSize = nextSlicePos - pos; var maxSize = size - pos; while(readSize < this.readSize) { if(Math.min(maxSize, readSize + this.reqChunkLen) > this.readSize) // can't read any more as we'd get a partial chunk break; chunks.push(readSize); readSize += this.reqSliceLen; if(readSize >= maxSize) break; // at or past EOF } return [Math.min(this.readSize, readSize), chunks]; }, _doRead: function(file, buffer) { var self = this; var readSize = this._readSize(file.pos, file.info.size); this.throttleQ.pass(readSize[0], function(cancelled, readDone) { if(cancelled) return; // this should never happen because we only read once at a time fs.read(file.fd, buffer, 0, readSize[0], null, function(err, bytesRead) { readDone(); if(err) return self.cb(err); // file position/EOF tracking var newPos = file.pos + bytesRead; if(newPos > file.info.size) return self.cb(new Error('Read past expected end of file - latest position (' + newPos + ') exceeds size (' + file.info.size + ')')); var eof = (newPos == file.info.size); if(!eof && bytesRead != readSize[0]) return self.cb(new Error("Read failure - expected " + readSize[0] + " bytes, got " + bytesRead + " bytes instead.")); // increase hashing count and wait for other end to signal when done var ret = new FileReaderData(file, buffer, bytesRead, file.pos, self); if(readSize[1]) ret.chunks = readSize[1]; file.hashQueue++; file.pos += bytesRead; self.cb(null, ret); if(eof) { // remove from openFiles for(var i=0; i<self.openFiles.length; i++) if(self.openFiles[i].fd == file.fd) { self.openFiles.splice(i, 1); break; } fs.close(file.fd, function(err) { if(err) self.cb(err); else self.readNext(); }); } else self.readNext(); }); }); }, readNext: function() { var buffer = this._getBuf(); if(!buffer) { // all buffers used - need to wait for some to be released to proceed this._isReading = false; return; } this._isReading = true; // try reading off currently active file var file = this.openFiles[0]; if(file && file.hashQueue < this.maxQueuePerFile) return this._doRead(file, buffer); // otherwise, find the file with the shortest hash queue var shortestQueue = this.maxQueuePerFile; var shortestIndex = 0; for(var fileI=1; fileI<this.openFiles.length; fileI++) { var file = this.openFiles[fileI]; if(file.hashQueue < shortestQueue) { shortestQueue = file.hashQueue; shortestIndex = fileI; } } // if the shortest queue is empty, use that file if(shortestIndex > 0 && shortestQueue == 0) { // (shortestQueue*2 <= this.maxQueuePerFile) -- if we prefer to reuse files // move this file to front of open file queue // this ensures that this file will be preferred over the others, to preserve sequential reading behaviour as much as possible this.openFiles.unshift(this.openFiles.splice(shortestIndex, 1)[0]); return this._doRead(this.openFiles[0], buffer); } // can't fulfill request from existing open files, try a new file if(this.fileQueue.length) { var self = this; var file = this.fileQueue.shift(); fs.open(file.name, 'r', function(err, fd) { if(err) return self.cb(err); // create new file entry; we put this at the end of the queue because if a hash completes during the open, we want to prioritize existing files self.openFiles.push({ fd: fd, info: file, pos: 0, hashQueue: 0 }); // put buffer back and retry self.buf.push(buffer); self.readNext(); }); return; } else if(shortestIndex > 0) { // if no unopened files available, prefer the best open file this.openFiles.unshift(this.openFiles.splice(shortestIndex, 1)[0]); return this._doRead(this.openFiles[0], buffer); } // otherwise, we've exhausted all files we can read from // can't proceed, return buffer to pool this.buf.push(buffer); this._isReading = false; if(this.openFiles.length == 0 && this.buf.length == this.bufCount) { // completed processing all files (no open files, no files in queue and all buffers returned to pool) this.finishCb(); } // TODO: else perhaps consider pushing more to queues, since we have the buffers (or maybe that's just a pointless idea since we can't process any faster either) }, hashed: function(file) { file.hashQueue--; }, processed: function(buffer) { this.buf.push(buffer); if(!this._isReading) this.readNext(); } }; module.exports = FileSeqReader;