UNPKG

pull-git-pack

Version:
551 lines (502 loc) 14.5 kB
var buffered = require('pull-buffered') var pull = require('pull-stream') var toPull = require('stream-to-pull-stream') var pako = require('pako') var createHash = require('./lib/util').createHash var cat = require('pull-cat') var zlib = require('zlib') var packidx = require('pull-git-packidx-parser') var pullLooper = require('pull-looper') exports.decode = decodePack exports.decodeWithIndex = decodePackWithIndex exports.decodeObject = decodePackObject exports.encode = encodePack var PACK_VERSION = 2 var objectTypes = [ 'none', 'commit', 'tree', 'blob', 'tag', 'unused', 'ofs-delta', 'ref-delta' ] var objectTypeNums = { commit: 1, tree: 2, blob: 3, tag: 4, 'ofs-delta': 6, 'ref-delta': 7 } function error(cb) { return function (err) { cb(err || true) } } function inflateBytes(read) { var inflate = new pako.Inflate() var ended, dataOut inflate.onData = function (data) { dataOut = new Buffer(data) } inflate.onEnd = function (status) { ended = (status === 0) ? true : new Error(inflate.strm.msg) } return function (abort, cb) { if (ended) return cb(ended) read(abort, function next(end, data) { if (end === true) { end = null data = [] } if (ended = end) return cb(end) if (data.length > 1) return cb(new Error('got more than one byte')) dataOut = null inflate.push(data, end === true) if (dataOut) cb(null, dataOut) else if (ended) cb(ended) else // let the stack unwind setImmediate(function () { read(null, next) }) }) } } function deflate(read) { var def = new pako.Deflate() var queue = [] var ended def.onData = function (data) { queue.push([null, new Buffer(data)]) } def.onEnd = function (status) { queue.push([(status === 0) ? true : new Error(def.strm.msg)]) } return function readOut(abort, cb) { if (ended) cb(ended) else if (queue.length) cb.apply(this, queue.shift()) else read(abort, function next(end, data) { if (end === true) def.push([], true) else if (end) return cb(end) else def.push(data) setImmediate(function () { readOut(null, cb) }) }) } } function decodePack(opts, repo, onEnd, read) { if (read === undefined) return decodePack.bind(this, opts, repo, onEnd) onEnd = onEnd || function(err) { if (err) throw err } opts = opts || {} var ended var inObject = false var numObjects = -1 var offset = 0, objectOffset var checksum = createHash('sha1') var b = buffered(read) var readByte = pull(b.chunks(1), track, checksum) var readWord = pull(b.chunks(4), track, checksum) var readHash = pull(b.chunks(20), track, checksum) var readChecksum = b.chunks(20) var expectChecksum = true var _cb function track(read) { return function (end, cb) { read(end, function (end, data) { offset += data && data.length cb(end, data) }) } } function readHeader(cb) { readWord(null, function (end, header) { if (ended = end) return cb(end) if (!header.equals(header, new Buffer('PACK'))) read(new Error('Invalid packfile header'), error(cb)) else readVersion(cb) }) } function readVersion(cb) { readWord(null, function (end, word) { if (ended = end) return cb(end) var version = word.readUInt32BE() if (version < 2 || version > 3) read(new Error('Invalid packfile version ' + version), error(cb)) else readNumObjects(cb) }) } function readNumObjects(cb) { readWord(null, function (end, word) { if (ended = end) return cb(end) numObjects = word.readUInt32BE() if (opts.verbosity >= 2) console.error(numObjects + ' objects') if (opts.onHeader) opts.onHeader(numObjects) readObject(null, cb) }) } function getObject(cb) { inObject = true objectOffset = offset readTypedVarInt(readByte, function (end, type, length) { if (opts.verbosity >= 2) console.error('read object header', end, type, length) numObjects-- if (end === true && expectChecksum) onEnd(new Error('Missing checksum')) if (ended = end) return cb(end) // TODO: verify that the inflated data is the correct length if (type == 'ref-delta') getObjectFromRefDelta(length, gotObject) else gotObject(null, { type: type, length: length, read: inflateBytes(readByte) }) }) function gotObject(err, obj) { // pass through the object but detect when it ends if (err) return cb(err) cb(null, { type: obj.type, length: obj.length, offset: objectOffset, read: pull( obj.read, pull.through(null, function () { inObject = false if (_cb) { var cb = _cb readObject(null, cb) } }) ) }) } } // TODO: test with ref-delta objects in pack function getObjectFromRefDelta(length, cb) { readHash(null, function (end, sourceHash) { if (end) return cb(end) sourceHash = sourceHash.toString('hex') var b = buffered(inflateBytes(readByte)) var readInflatedByte = b.chunks(1) readVarInt(readInflatedByte, function (err, expectedSourceLength) { if (err) return cb(err) readVarInt(readInflatedByte, function (err, expectedTargetLength) { if (err) return cb(err) if (opts.verbosity >= 3) console.error('getting object', sourceHash) getRepoObject(repo, sourceHash, function (err, sourceObject) { if (opts.verbosity >= 3) console.error('got object', sourceHash, sourceObject, err) if (err) return cb(err) if (sourceObject.length != expectedSourceLength) cb(new Error('Incorrect source object size in ref delta')) else patchObject(opts, b, length, sourceObject, expectedTargetLength, cb) }) }) }) }) } function readTrailer(cb) { // read the checksum before it updates to include the trailer var expected = checksum.digest() readChecksum(null, function (end, value) { cb(true) if (end === true && expectChecksum) onEnd(new Error('Missing checksum')) if (!value.equals(expected)) { onEnd(new Error('Checksum mismatch: ' + expected.hexSlice() + ' != ' + value.hexSlice())) } else { if (opts.verbosity >= 3) console.error('checksum ok', expected.hexSlice()) onEnd(null) } }) } function readObject(abort, cb) { if (ended) cb(ended) else if (inObject) _cb = cb else if (abort) read(abort, function (err) { cb(ended = err || abort) }) else if (numObjects < 0) readHeader(cb) else if (numObjects > 0) getObject(cb) else if (expectChecksum) readTrailer(cb) } return readObject } function decodePackWithIndex(repo, readPack, readIdx) { var b = buffered(readPack) var ended, offsets, i = 0 return function readObject(end, cb) { if (end) return readPack(end, function (err) { readIdx(err, function (err) { cb(err === true ? null : err) }) }) if (ended) return cb(ended) if (!offsets) return pull(readIdx, packidx(function (err, idx) { if (ended = err) return cb(err) // sort the offset objects in the order in which they are in the pack offsets = idx.objects.slice().sort(function (a, b) { return a.offset - b.offset }) // skip the pack header pull(b.take(12), pull.drain(null, function (err) { if (err) return cb(err) readObject(null, cb) })) })) var offset = offsets[i++] if (!offset) return cb(ended = true) pull( offset.next ? b.take(offset.next.offset - offset.offset) : b.passthrough, decodePackObject({}, repo, cb) // TODO: block on parallel read ) } } function readVarInt(readByte, cb) { var value = 0, shift = 0 readByte(null, function gotByte(end, buf) { if (ended = end) return cb(end) var byte = buf[0] value += (byte & 0x7f) << shift shift += 7 if (byte & 0x80) readByte(null, gotByte) else cb(null, value) }) } function readTypedVarInt(readByte, cb) { var type, value, shift readByte(null, function (end, buf) { if (ended = end) return cb(end) var firstByte = buf[0] type = objectTypes[(firstByte >> 4) & 7] value = firstByte & 15 shift = 4 checkByte(firstByte) }) function checkByte(byte) { if (byte & 0x80) readByte(null, gotByte) else cb(null, type, value) } function gotByte(end, buf) { if (ended = end) return cb(end) var byte = buf[0] value += (byte & 0x7f) << shift shift += 7 checkByte(byte) } } function getRepoObject(repo, id, cb) { // TODO: abstract this better ;(repo.getObjectFromAny || repo.getObject).call(repo, id, cb) } function patchObject(opts, deltaB, deltaLength, srcObject, targetLength, cb) { var readByte = deltaB.chunks(1) var srcBuf var ended if (opts.verbosity >= 2) console.error('patching', srcObject.type, targetLength) pull( srcObject.read, pull.collect(function (err, bufs) { srcBuf = Buffer.concat(bufs, srcObject.length) cb(null, { type: srcObject.type, length: targetLength, read: pullLooper(read) }) }) ) function read(abort, cb) { if (ended) return cb(ended) readByte(null, function (end, dBuf) { if (ended = end) return cb(end) var cmd = dBuf[0] if (cmd & 0x80) // skip a variable amount and then pass through a variable amount readOffsetSize(cmd, deltaB, function (err, offset, size) { if (err) return earlyEnd(err) var buf = srcBuf.slice(offset, offset + size) cb(end, buf) }) else if (cmd) // insert `cmd` bytes from delta deltaB.chunks(cmd)(null, cb) else cb(new Error("unexpected delta opcode 0")) }) function earlyEnd(err) { cb(err === true ? new Error('stream ended early') : err) } } } function readOffsetSize(cmd, b, readCb) { var readByte = b.chunks(1) var offset = 0, size = 0 function addByte(bit, outPos, cb) { if (cmd & (1 << bit)) readByte(null, function (err, buf) { if (err) readCb(err) else cb(buf[0] << (outPos << 3)) }) else cb(0) } addByte(0, 0, function (val) { offset = val addByte(1, 1, function (val) { offset |= val addByte(2, 2, function (val) { offset |= val addByte(3, 3, function (val) { offset |= val addSize() }) }) }) }) function addSize() { addByte(4, 0, function (val) { size = val addByte(5, 1, function (val) { size |= val addByte(6, 2, function (val) { size |= val readCb(null, offset, size || 0x10000) }) }) }) } } function encodeTypedVarInt(typeStr, length, cb) { var type = objectTypeNums[typeStr] if (!type) return cb(new Error("Bad object type " + typeStr)) var vals = [] var b = (type << 4) | (length & 15) for (length >>= 4; length; length >>= 7) { vals.push(b | 0x80) b = length & 0x7f } vals.push(b) cb(null, new Buffer(vals)) } function encodePack(opts, numObjects, readObject) { if (numObjects === undefined) numObjects = opts, opts = null if (readObject === undefined) return encodePack.bind(this, opts, numObjects) var header = new Buffer(12) header.write('PACK') header.writeUInt32BE(PACK_VERSION, 4) header.writeUInt32BE(numObjects, 8) var checksum = createHash('sha1') var readData return cat([ checksum(cat([ pull.once(header), encodeObject ])), checksum.readDigest ]) function encodeObject(abort, cb) { if (readData) readData(abort, function (end, data) { if (end === true) readObject(abort, nextObject) else cb(end, data) }) else readObject(abort, nextObject) function nextObject(end, object) { if (end) return cb(end) readData = deflate(object.read) encodeTypedVarInt(object.type, object.length, cb) } } } function decodePackObject(opts, repo, cb, read) { if (read === undefined) return decodePackObject.bind(this, opts, repo, cb) opts = opts || {} var b = buffered(read) var readByte = b.chunks(1) var readHash = b.chunks(20) readTypedVarInt(readByte, function (end, type, length) { if (opts.verbosity >= 2) console.error('object', end || type, length) if (end === true) cb(new Error('Missing object type')) else if (end) cb(end) else if (type == 'ref-delta') getObjectFromRefDelta(length, cb) else cb(null, { type: type, length: length, read: pull( b.passthrough, toPull(zlib.createInflate()) ) }) }) // TODO: test with ref-delta objects in pack function getObjectFromRefDelta(length, cb) { readHash(null, function (end, sourceHash) { if (end) return cb(end) sourceHash = sourceHash.toString('hex') b = pull( b.passthrough, toPull(zlib.createInflate()), buffered ) var readInflatedByte = b.chunks(1) readVarInt(readInflatedByte, function (err, expectedSourceLength) { if (err) return cb(err) readVarInt(readInflatedByte, function (err, expectedTargetLength) { if (err) return cb(err) if (opts.verbosity >= 3) console.error('getting object', sourceHash) getRepoObject(repo, sourceHash, function (err, sourceObject) { if (opts.verbosity >= 3) console.error('got object', sourceHash, sourceObject, err) if (err) return cb(err) if (sourceObject.length != expectedSourceLength) cb(new Error('Incorrect source object size in ref delta')) else patchObject(opts, b, length, sourceObject, expectedTargetLength, cb) }) }) }) }) } }