UNPKG

hoard

Version:

node.js lib for storing time series data on disk, similar to RRD.

521 lines (440 loc) 22 kB
fs = require 'fs' Buffer = require('buffer').Buffer Binary = require 'binary' underscore = _ = require '../lib/underscore' async = require '../lib/async' pack = require('../lib/jspack').jspack path = require 'path' Put = require 'put' # Monkey patch since modulo operator is broken in JS Number.prototype.mod = (n) -> ((@ % n) + n) % n longFormat = "!L" longSize = pack.CalcLength(longFormat) floatFormat = "!f" floatSize = pack.CalcLength(floatFormat) timestampFormat = "!L" timestampSize = pack.CalcLength(timestampFormat) valueFormat = "!d" valueSize = pack.CalcLength(valueFormat) pointFormat = "!Ld" pointSize = pack.CalcLength(pointFormat) metadataFormat = "!2LfL" metadataSize = pack.CalcLength(metadataFormat) archiveInfoFormat = "!3L" archiveInfoSize = pack.CalcLength(archiveInfoFormat) unixTime = -> parseInt(new Date().getTime() / 1000) create = (filename, archives, xFilesFactor, cb) -> # FIXME: Check parameters # FIXME: Check that values are correctly formatted archives.sort (a, b) -> a[0] - b[0] if path.existsSync(filename) cb new Error('File ' + filename + ' already exists') oldest = (a[0] * a[1] for a in archives).sort().reverse()[0] encodeFloat = (value) -> # Dirty hack. # Using 'buffer_ieee754' from node 0.5.x # as no libraries had a working IEEE754 encoder buffer = new Buffer(4) require('buffer_ieee754').writeIEEE754(buffer, 0.5, 0, 'big', 23, 4); buffer buffer = Put() .word32be(unixTime()) # last update .word32be(oldest) # max retention .put(encodeFloat(xFilesFactor)) .word32be(archives.length) headerSize = metadataSize + (archiveInfoSize * archives.length) archiveOffset = headerSize for archive in archives secondsPerPoint = archive[0]; points = archive[1] buffer.word32be(archiveOffset) buffer.word32be(secondsPerPoint) buffer.word32be(points) archiveOffset += (points * pointSize) # Pad archive data itself with zeroes buffer.pad(archiveOffset - headerSize) # FIXME: Check file lock? # FIXME: fsync this? fs.writeFile filename, buffer.buffer(), 'binary', cb propagate = (fd, timestamp, xff, higher, lower, cb) -> lowerIntervalStart = timestamp - timestamp.mod(lower.secondsPerPoint) lowerIntervalEnd = lowerIntervalStart + lower.secondsPerPoint packedPoint = new Buffer(pointSize) fs.read fd, packedPoint, 0, pointSize, higher.offset, (err, written, buffer) -> cb(err) if err [higherBaseInterval, higherBaseValue] = pack.Unpack(pointFormat, packedPoint) if higherBaseInterval == 0 higherFirstOffset = higher.offset else timeDistance = lowerIntervalStart - higherBaseInterval pointDistance = timeDistance / higher.secondsPerPoint byteDistance = pointDistance * pointSize higherFirstOffset = higher.offset + byteDistance.mod(higher.size) higherPoints = lower.secondsPerPoint / higher.secondsPerPoint higherSize = higherPoints * pointSize relativeFirstOffset = higherFirstOffset - higher.offset relativeLastOffset = (relativeFirstOffset + higherSize).mod(higher.size) higherLastOffset = relativeLastOffset + higher.offset if higherFirstOffset < higherLastOffset # We don't wrap the archive seriesSize = higherLastOffset - higherFirstOffset seriesString = new Buffer(seriesSize) fs.read fd, seriesString, 0, seriesSize, higherFirstOffset, (err, written, buffer) -> parseSeries(seriesString) else # We do wrap the archive higherEnd = higher.offset + higher.size firstSeriesSize = higherEnd - higherFirstOffset secondSeriesSize = higherLastOffset - higher.offset seriesString = new Buffer(firstSeriesSize + secondSeriesSize) fs.read fd, seriesString, 0, firstSeriesSize, higherFirstOffset, (err, written, buffer) -> cb(err) if err if secondSeriesSize > 0 fs.read fd, seriesString, firstSeriesSize, secondSeriesSize, higher.offset, (err, written, buffer) -> cb(err) if err parseSeries(seriesString) else ret = new Buffer(firstSeriesSize) seriesString.copy(ret, 0, 0, firstSeriesSize) parseSeries(ret) parseSeries = (seriesString) -> # Now we unpack the series data we just read [byteOrder, pointTypes] = [pointFormat[0], pointFormat.slice(1)] points = seriesString.length / pointSize seriesFormat = byteOrder + (pointTypes for f in [0...points]).join("") unpackedSeries = pack.Unpack(seriesFormat, seriesString, 0) # And finally we construct a list of values neighborValues = (null for f in [0...points]) currentInterval = lowerIntervalStart step = higher.secondsPerPoint for i in [0...unpackedSeries.length] by 2 pointTime = unpackedSeries[i] if pointTime == currentInterval neighborValues[i/2] = unpackedSeries[i+1] currentInterval += step # Propagate aggregateValue to propagate from neighborValues if we have enough known points knownValues = (v for v in neighborValues when v isnt null) if knownValues.length == 0 cb null, false return sum = (list) -> s = 0 for x in list s += x s knownPercent = knownValues.length / neighborValues.length if knownPercent >= xff # We have enough data to propagate a value! aggregateValue = sum(knownValues) / knownValues.length # TODO: Another CF besides average? myPackedPoint = pack.Pack(pointFormat, [lowerIntervalStart, aggregateValue]) # !!!!!!!!!!!!!!!!! packedPoint = new Buffer(pointSize) fs.read fd, packedPoint, 0, pointSize, lower.offset, (err) -> [lowerBaseInterval, lowerBaseValue] = pack.Unpack(pointFormat, packedPoint) if lowerBaseInterval == 0 # First propagated update to this lower archive offset = lower.offset else # Not our first propagated update to this lower archive timeDistance = lowerIntervalStart - lowerBaseInterval pointDistance = timeDistance / lower.secondsPerPoint byteDistance = pointDistance * pointSize offset = lower.offset + byteDistance.mod(lower.size) mypp = new Buffer(myPackedPoint) fs.write fd, mypp, 0, pointSize, offset, (err) -> cb(null, true) else cb(null, false) update = (filename, value, timestamp, cb) -> # FIXME: Check file lock? # FIXME: Don't use info(), re-use fd between internal functions info filename, (err, header) -> cb(err) if err now = unixTime() diff = now - timestamp if not (diff < header.maxRetention and diff >= 0) cb(new Error('Timestamp not covered by any archives in this database.')) return # Find the highest-precision archive that covers timestamp for i in [0...header.archives.length] archive = header.archives[i] continue if archive.retention < diff # We'll pass on the update to these lower precision archives later lowerArchives = header.archives.slice(i + 1) break fs.open filename, 'r+', (err, fd) -> cb(err) if err # First we update the highest-precision archive myInterval = timestamp - timestamp.mod(archive.secondsPerPoint) myPackedPoint = new Buffer(pack.Pack(pointFormat, [myInterval, value])) packedPoint = new Buffer(pointSize) fs.read fd, packedPoint, 0, pointSize, archive.offset, (err, bytesRead, buffer) -> cb(err) if err [baseInterval, baseValue] = pack.Unpack(pointFormat, packedPoint) if baseInterval == 0 # This file's first update fs.write fd, myPackedPoint, 0, pointSize, archive.offset, (err, written, buffer) -> cb(err) if err [baseInterval, baseValue] = [myInterval, value] propagateLowerArchives() else # File has been updated before timeDistance = myInterval - baseInterval pointDistance = timeDistance / archive.secondsPerPoint byteDistance = pointDistance * pointSize myOffset = archive.offset + byteDistance.mod(archive.size) fs.write fd, myPackedPoint, 0, pointSize, myOffset, (err, written, buffer) -> cb(err) if err propagateLowerArchives() propagateLowerArchives = -> # Propagate the update to lower-precision archives #higher = archive #for lower in lowerArchives: # if not __propagate(fd, myInterval, header.xFilesFactor, higher, lower): # break # higher = lower #__changeLastUpdate(fh) # FIXME: Also fsync here? fs.close fd, cb return updateMany = (filename, points, cb) -> points.sort((a, b) -> a[0] - b[0]).reverse() # FIXME: Check lock info filename, (err, header) -> cb err if err fs.open filename, 'r+', (err, fd) -> now = unixTime() archives = header.archives currentArchiveIndex = 0 currentArchive = header.archives[currentArchiveIndex] currentPoints = [] updateArchiveCalls = [] for point in points age = now - point[0] while currentArchive.retention < age # We can't fit any more points in this archive if currentPoints # Commit all the points we've found that it can fit currentPoints.reverse() # Put points in chronological order do (header, currentArchive, currentPoints) -> f = (cb) -> updateManyArchive fd, header, currentArchive, currentPoints, cb updateArchiveCalls.push(f) currentPoints = [] if currentArchiveIndex < (archives.length - 1) currentArchiveIndex++ currentArchive = archives[currentArchiveIndex] else # Last archive currentArchive = null break if not currentArchive break # Drop remaining points that don't fit in the database currentPoints.push(point) async.series updateArchiveCalls, (err, results) -> throw err if err if currentArchive and currentPoints.length > 0 # Don't forget to commit after we've checked all the archives currentPoints.reverse() updateManyArchive fd, header, currentArchive, currentPoints, (err) -> throw err if err fs.close fd, cb else fs.close fd, cb # FIXME: touch last update # FIXME: fsync here? # FIXME: close fd fh.close() #cb(null) updateManyArchive = (fd, header, archive, points, cb) -> step = archive.secondsPerPoint alignedPoints = [] for p in points [timestamp, value] = p alignedPoints.push([timestamp - timestamp.mod(step), value]) # Create a packed string for each contiguous sequence of points packedStrings = [] previousInterval = null currentString = [] for ap in alignedPoints [interval, value] = ap if !previousInterval or (interval == previousInterval + step) currentString.concat(pack.Pack(pointFormat, [interval, value])) previousInterval = interval else numberOfPoints = currentString.length / pointSize startInterval = previousInterval - (step * (numberOfPoints - 1)) packedStrings.push([startInterval, new Buffer(currentString)]) currentString = pack.Pack(pointFormat, [interval, value]) previousInterval = interval if currentString.length > 0 numberOfPoints = currentString.length / pointSize startInterval = previousInterval - (step * (numberOfPoints - 1)) packedStrings.push([startInterval, new Buffer(currentString, 'binary')]) # Read base point and determine where our writes will start packedBasePoint = new Buffer(pointSize) fs.read fd, packedBasePoint, 0, pointSize, archive.offset, (err) -> cb err if err [baseInterval, baseValue] = pack.Unpack(pointFormat, packedBasePoint) if baseInterval == 0 # This file's first update # Use our first string as the base, so we start at the start baseInterval = packedStrings[0][0] # Write all of our packed strings in locations determined by the baseInterval writePackedString = (ps, callback) -> [interval, packedString] = ps timeDistance = interval - baseInterval pointDistance = timeDistance / step byteDistance = pointDistance * pointSize myOffset = archive.offset + byteDistance.mod(archive.size) archiveEnd = archive.offset + archive.size bytesBeyond = (myOffset + packedString.length) - archiveEnd if bytesBeyond > 0 fs.write fd, packedString, 0, packedString.length - bytesBeyond, myOffset, (err) -> cb err if err assert.equal archiveEnd, myOffset + packedString.length - bytesBeyond #assert fh.tell() == archiveEnd, "archiveEnd=%d fh.tell=%d bytesBeyond=%d len(packedString)=%d" % (archiveEnd,fh.tell(),bytesBeyond,len(packedString)) # Safe because it can't exceed the archive (retention checking logic above) fs.write fd, packedString, packedString.length - bytesBeyond, bytesBeyond, archive.offset, (err) -> cb err if err callback() else fs.write fd, packedString, 0, packedString.length, myOffset, (err) -> callback() async.forEachSeries packedStrings, writePackedString, (err) -> throw err if err propagateLowerArchives() propagateLowerArchives = -> # Now we propagate the updates to lower-precision archives higher = archive lowerArchives = (arc for arc in header.archives when arc.secondsPerPoint > archive.secondsPerPoint) if lowerArchives.length > 0 # Collect a list of propagation calls to make # This is easier than doing async looping propagateCalls = [] for lower in lowerArchives fit = (i) -> i - i.mod(lower.secondsPerPoint) lowerIntervals = (fit(p[0]) for p in alignedPoints) uniqueLowerIntervals = _.uniq(lowerIntervals) for interval in uniqueLowerIntervals propagateCalls.push {interval: interval, header: header, higher: higher, lower: lower} higher = lower callPropagate = (args, callback) -> propagate fd, args.interval, args.header.xFilesFactor, args.higher, args.lower, (err, result) -> cb err if err callback err, result async.forEachSeries propagateCalls, callPropagate, (err, result) -> throw err if err cb null else cb null info = (path, cb) -> # FIXME: Close this stream? # FIXME: Signal errors to callback? # FIXME: Stream parsing with node-binary dies # Looks like an issue, see their GitHub # Using fs.readFile() instead of read stream for now fs.readFile path, (err, data) -> cb err if err archives = []; metadata = {} Binary.parse(data) .word32bu('lastUpdate') .word32bu('maxRetention') .buffer('xff', 4) # Must decode separately since node-binary can't handle floats .word32bu('archiveCount') .tap (vars) -> metadata = vars metadata.xff = pack.Unpack('!f', vars.xff, 0)[0] @flush() for index in [0...metadata.archiveCount] @word32bu('offset').word32bu('secondsPerPoint').word32bu('points') @tap (archive) -> @flush() archive.retention = archive.secondsPerPoint * archive.points archive.size = archive.points * pointSize archives.push(archive) .tap -> cb null, maxRetention: metadata.maxRetention xFilesFactor: metadata.xff archives: archives return fetch = (path, from, to, cb) -> info path, (err, header) -> now = unixTime() oldestTime = now - header.maxRetention from = oldestTime if from < oldestTime throw new Error('Invalid time interval') unless from < to to = now if to > now or to < from diff = now - from fd = null # Find closest archive to look in, that iwll contain our information for archive in header.archives break if archive.retention >= diff fromInterval = parseInt(from - from.mod(archive.secondsPerPoint)) + archive.secondsPerPoint toInterval = parseInt(to - to.mod(archive.secondsPerPoint)) + archive.secondsPerPoint file = fs.createReadStream(path) Binary.stream(file) .skip(archive.offset) .word32bu('baseInterval') .word32bu('baseValue') .tap (vars) -> if vars.baseInterval == 0 # Nothing has been written to this hoard step = archive.secondsPerPoint points = (toInterval - fromInterval) / step timeInfo = [fromInterval, toInterval, step] values = (null for n in [0...points]) cb(null, timeInfo, values) else # We have data in this hoard, let's read it getOffset = (interval) -> timeDistance = interval - vars.baseInterval pointDistance = timeDistance / archive.secondsPerPoint byteDistance = pointDistance * pointSize a = archive.offset + byteDistance.mod(archive.size) a fromOffset = getOffset(fromInterval) toOffset = getOffset(toInterval) fs.open path, 'r', (err, fd) -> if err then throw err if fromOffset < toOffset # We don't wrap around, can everything in a single read size = toOffset - fromOffset seriesBuffer = new Buffer(size) fs.read fd, seriesBuffer, 0, size, fromOffset, (err, num) -> cb(err) if err fs.close fd, (err) -> cb(err) if err unpack(seriesBuffer) # We have read it, go unpack! else # We wrap around the archive, we need two reads archiveEnd = archive.offset + archive.size size1 = archiveEnd - fromOffset size2 = toOffset - archive.offset seriesBuffer = new Buffer(size1 + size2) fs.read fd, seriesBuffer, 0, size1, fromOffset, (err, num) -> cb(err) if err fs.read fd, seriesBuffer, size1, size2, archive.offset, (err, num) -> cb(err) if err unpack(seriesBuffer) # We have read it, go unpack! fs.close(fd) unpack = (seriesData) -> # Optmize this? numPoints = seriesData.length / pointSize seriesFormat = "!" + ('Ld' for f in [0...numPoints]).join("") unpackedSeries = pack.Unpack(seriesFormat, seriesData) # Use buffer/pre-allocate? valueList = (null for f in [0...numPoints]) currentInterval = fromInterval step = archive.secondsPerPoint for i in [0...unpackedSeries.length] by 2 pointTime = unpackedSeries[i] if pointTime == currentInterval pointValue = unpackedSeries[i + 1] valueList[i / 2] = pointValue currentInterval += step timeInfo = [fromInterval, toInterval, step] cb(null, timeInfo, valueList) return exports.create = create exports.update = update exports.updateMany = updateMany exports.info = info exports.fetch = fetch