UNPKG

line-chomper

Version:

Chomps utf-8 based byte stream into lines.

334 lines (284 loc) 8.4 kB
var libFs = require("fs"), Stream = require("stream"), StringDecoder = require('string_decoder').StringDecoder, Buffer = require("buffer").Buffer; var libTools = require("./tools"), libVars = require("./vars"); exports.DEFAULT_OPTIONS = libVars.DEFAULT_OPTIONS; exports.DEFAULTS = libVars.DEFAULTS; function doOpenReadStream(fileName, options, callback) { var streamOptions = {}; parseOffsets(options, streamOptions); var stream = libFs.createReadStream(fileName, streamOptions), calledBack = false; stream.once("error", doCallBack); stream.once("open", doCallBack.bind(null, null)); stream.owned = true; function doCallBack(err) { if (!calledBack) { calledBack = true; if (err) { callback(err); } else { callback(null, stream); } } } function parseOffsets(options, streamOptions) { if (options.fromByte !== undefined) { streamOptions.start = options.initialDataOffset = options.fromByte; } if (options.fromLine && options.lineOffsets) { var offsetRec = null, currentRec = null, lineOffsetsLength = options.lineOffsets.length, index = 0; while (index < lineOffsetsLength) { currentRec = options.lineOffsets[index]; if (currentRec.line >= options.fromLine) { break; } index++; offsetRec = currentRec; } if (offsetRec && offsetRec.line < options.fromLine) { streamOptions.start = options.initialDataOffset = offsetRec.offset; options.initialLineOffset = offsetRec.line; } } if (streamOptions.start) { // Eat out the preceding byte to test if the stream is aligned with line ends streamOptions.start--; } } } function doChompStream(stream, options, callback) { var lines = undefined, trailingChunk = null, dataOffset = options.initialDataOffset || 0, error = null, decoder = new StringDecoder("utf-8"), lineTerminator = null, callbackThisValue = options.callbackThisValue || stream, calledBack = false, notAligned = false, stats = { total: options.initialLineOffset || 0, skipped: options.initialLineOffset || 0 }; if (options.returnLines || (options.returnLines === null && !options.lineCallback)) { lines = []; } stream.on("data", streamOnData); stream.on("error", streamOnError); stream.on("end", streamOnEnd); stream.on("close", streamOnClose); function streamOnData(data) { if (options.initialDataOffset > 0 && dataOffset === options.initialDataOffset && stream.owned) { // First chunk of data. Starting from offset. Our own stream. We know we moved a byte back before // Repackage the data without the first byte var savedBuffer = data; data = new Buffer(savedBuffer.length - 1); savedBuffer.copy(data, 0, 1); var firstByte = savedBuffer[0]; if (firstByte !== 13 && firstByte !== 10) { // processDecoded now knows to skip the first line notAligned = true; } } var decoded = decoder.write(data); processDecoded(decoded); dataOffset += data.length; } function streamOnError(err) { error = err; // NOTE: I'm aware error can be swallowed here. ¯\(°_o)/¯ } function streamOnEnd() { tryCallBack(false); stream.removeListener("error", streamOnError); } function streamOnClose() { tryCallBack(false); stream.removeListener("error", streamOnError); } function tryCallBack(interrupt) { if (calledBack) { return; } calledBack = true; if (!interrupt) { processDecoded(decoder.end(), true); } stream.removeListener("data", streamOnData); stream.removeListener("end", streamOnEnd); stream.removeListener("close", streamOnClose); if (interrupt && stream.owned) { stream.close(); } if (error) { callback(error); } else { if (lines) { callback(null, lines); } else { callback(null, stats.total - stats.skipped); } } } function tryDetermineLineTerminator(raw) { var res = null, index = null; testFor("\r\n"); testFor("\r"); testFor("\n", /\n([^\r]|$)/); // make sure we don't cut halfway into the double '\r\n' return res; function testFor(candidate, regexp) { var candidateIndex = raw.search(regexp || candidate); if (candidateIndex >= 0 && (index === null || candidateIndex < index)) { index = candidateIndex; res = candidate; } } } function processDecoded(decoded, eof) { eof = eof === true; var trailingChunkLength = 0; if (trailingChunk !== null) { decoded = trailingChunk + (decoded || ""); trailingChunkLength = Buffer.byteLength(trailingChunk, "utf-8"); } if (lineTerminator === null) { lineTerminator = tryDetermineLineTerminator(decoded); } if (eof && !decoded && !options.keepLastEmptyLine) { return; } var parsedLines = lineTerminator ? decoded.split(lineTerminator) : [decoded]; if (!eof) { trailingChunk = parsedLines[parsedLines.length - 1]; parsedLines.length--; } var chunkOffset = 0; for (var i = 0; i < parsedLines.length; i++) { var parsedLine = parsedLines[i], lineSizeInBytes = Buffer.byteLength(parsedLine || "", "utf-8") + (lineTerminator ? lineTerminator.length : 0), resLine = parsedLine, resOffset = dataOffset + chunkOffset - trailingChunkLength; chunkOffset += lineSizeInBytes; if (options.trim) { resLine = resLine.trim(); } if (notAligned) { // Discard the parsed line, so we would align from the next line onward stats.unalignedChunk = parsedLine; notAligned = false; continue; } if (resOffset >= options.toByte) { return tryCallBack(true); } stats.total++; if (options.fromByte > resOffset || (options.fromLine >= stats.total)) { stats.skipped++; continue; } if (options.lineCallback) { var lineCallbackRetValue = options.lineCallback.call(callbackThisValue, resLine, resOffset, lineSizeInBytes); if (lineCallbackRetValue === false) { return tryCallBack(true); } else if (lineCallbackRetValue !== undefined && lineCallbackRetValue !== true) { resLine = lineCallbackRetValue; } } if (lines && resLine !== null) { if (options.returnDetails) { lines.push({ line: resLine, offset: resOffset, sizeInBytes: lineSizeInBytes }); } else { lines.push(resLine); } } if (options.toLine && stats.total >= options.toLine) { return tryCallBack(true); } } } } function chomp(source, userOptions, callback) { if (arguments.length < 3) { callback = userOptions; userOptions = {}; } else if (libTools.isFunction(userOptions)) { userOptions = { lineCallback: userOptions }; } var options = libTools.shallowCopy(userOptions, libTools.shallowCopy(libVars.DEFAULT_OPTIONS)); if (!options.fromByte && (options.toByte || options.byteCount)) { options.fromByte = 0; } if (options.byteCount) { options.toByte = options.fromByte + options.byteCount; } if (!options.fromLine && (options.toLine || options.lineCount)) { options.fromLine = 0; } if (options.lineCount) { options.toLine = options.fromLine + options.lineCount; } if (source instanceof Stream) { return doChompStream(source, options, callback); } if (libTools.isString(source)) { return doOpenReadStream(source, options, function (err, stream) { if (err) { return callback(err); } return doChompStream(stream, options, callback); }) } return callback(new Error("Unknown source format. Please provide either file path or ReadStream")); } exports.chomp = chomp; function mapLineOffsets(source, resolution, lineCallback, callback) { if (libTools.isFunction(resolution)) { callback = lineCallback; lineCallback = resolution; resolution = null; } if (callback === undefined) { callback = lineCallback; lineCallback = null; } resolution = resolution || libVars.DEFAULTS.mapOffsetsResolution; var currentSectionFrom = resolution, lineCount = 0, results = []; return chomp(source, localLineCallback, completedCallback); function localLineCallback(line, offset, sizeInBytes) { if (offset > currentSectionFrom) { results.push({ line: lineCount, offset: offset }); currentSectionFrom += Math.max(resolution, sizeInBytes); } lineCount++; if (lineCallback) { lineCallback(line, offset, sizeInBytes); } } function completedCallback(err) { if (err) { return callback(err); } return callback(null, results, lineCount); } } exports.mapLineOffsets = mapLineOffsets;