protobuf-lite
Version:
protocol buffers, much simplified
435 lines (364 loc) • 15 kB
JavaScript
/**
* protobuf-lite -- protocol buffers, simplified
*
* 2017-12-06 - AR.
*/
// https://developers.google.com/protocol-buffers/docs/encoding (wire protocol, very terse)
// https://developers.google.com/protocol-buffers/docs/proto (long!)
/**
- varint encoding: 1xxx xxxx 0xxx xxxx little-e 14-bit value
- signed varint: zig-zag varint encoded: lsb is sign, other bits are absolute value (eg 6 = 1100, -6 = 1011)
- 64-bit: always 8 bytes, decodes as 64 little-e bits, parser interprets bits (eg double, fixint64)
- 32-bit: always 4 bytes, decodes as 32 little-e bits, parser interprets bits (eg float)
- wire protocol encodes bits, type can only be determined from its type .proto type definition file
**/
;
var util = require('util');
var qutf8 = require('q-utf8');
var fp = require('ieee-float');
var protobuf = module.exports = {
pack: pack,
unpack: unpack,
_pack: _pack,
_unpack: _unpack,
// common aliases
encode: pack,
decode: unpack,
};
/*
* protobuf wire protocol decoder
*/
function pack( format, data ) {
var buf = new Array();
//var buf = new Buffer(1000);
var pos = { p: 0, fieldnum: 0 };
protobuf._pack(format, data, buf, pos);
//return pos.p < buf.length ? buf.slice(0, pos.p) : buf;
return new Buffer(buf);
}
function unpack( format, data ) {
return protobuf._unpack(format, data, {p:0});
}
var convMap = {
'i': { wt: 0, enc: encodeVarint, dec: decodeVarint }, // int
'I': { wt: 0, enc: encodeUVarint, dec: decodeUVarint }, // uint
'j': { wt: 0, enc: encodeVarint32, dec: decodeVarint32 }, // int
'k': { wt: 0, enc: encodeVarint64, dec: decodeVarint64 }, // uint
'b': { wt: 0, // bool
enc: function(v, buf, pos) { buf[pos.p++] = v ? 1 : 0 },
dec: function(buf, pos) { return buf[pos.p++] ? true : false } },
'd': { wt: 1, enc: encodeDouble, dec: decodeDouble }, // double
'q': { wt: 1, enc: encodeInt64, dec: decodeInt64 }, // int64
'P': { wt: 1, enc: encodeUInt64, dec: decodeUInt64 }, // uint64
'a': { wt: 2, enc: encodeString, dec: decodeString }, // string
'Z': { wt: 2, enc: encodeBinary, dec: decodeBinary }, // binary
'f': { wt: 5, enc: encodeFloat, dec: decodeFloat }, // float
'l': { wt: 5, enc: encodeInt32, dec: decodeInt32 }, // int32
'V': { wt: 5, enc: encodeUInt32, dec: decodeUInt32 }, // uint32
// enum? (else int32)
};
// when a message is serialized [fields] should be written sequentially by field number
var _packers = {};
function _pack( format, data, buf, pos ) {
var packer = _packers[format] || (_packers[format] = compilePack(format));
return packer(format, data, buf, pos);
}
// build a function to decode this format
function compilePack( format ) {
var encSrc = [
util.format('function _pack(format, data, buf, pos) {'),
util.format(' if (data.length !== %d) throw new Error("expected %d data items, got " + data.length);', format.length, format.length),
];
for (var fi = 0; fi < format.length; fi++) {
var fmt = format[fi];
if (!convMap[fmt]) throw new Error(fmt + ': unknown conversion specifier at offset ' + fi);
encSrc.push(util.format(' encodeType(%d, %d, buf, pos);', fi + 1, convMap[fmt].wt));
var packFuncName = convMap[fmt].enc.name.length > 5 ? convMap[fmt].enc.name : 'convMap["' + fmt + '"].enc';
//encSrc.push(util.format(' %s(data[%d], buf, pos);', convMap[fmt].enc.name.length > 5 ? convMap[fmt].enc.name : 'convMap["' + fmt + '"].enc', fi));
encSrc.push(util.format(' %s(data[%d], buf, pos);', packFuncName, fi));
}
encSrc.push(' return buf;');
encSrc.push('}');
return eval('true && ' + encSrc.join('\n'));
}
// NOTE: the fields are normally decoded according to the .proto type spec
// Each wire type can be decoded as various different types.
var _unpackers = {};
function _unpack( format, buf, pos ) {
var unpacker = _unpackers[format] || (_unpackers[format] = compileUnpack(format));
return unpacker(format, buf, pos);
}
// build a function to decode this format
function compileUnpack( format ) {
var decSrc = [
util.format('function _unpack(format, buf, pos) {'),
util.format(' var data = new Array(%d);', format.length),
util.format(' var key, fieldnum, wiretype, conv;'),
// key = decodeUVarint(buf, pos);
// wiretype = key & 7;
// fieldnum = key >>> 3;
];
for (var fi = 0; fi < format.length; fi++) {
var fmt = format[fi];
if (!convMap[fmt]) throw new Error(fmt + ': unknown pack conversion at offset ' + fi);
decSrc.push(util.format(' key = decodeUVarint(buf, pos);'));
var unpackFuncName = convMap[fmt].dec.name.length > 5 ? convMap[fmt].dec.name : 'convMap["' + fmt + '"].dec';
decSrc.push(util.format(' data[(key >>> 3) - 1] = %s(buf, pos);', unpackFuncName));
}
decSrc.push(util.format(' if (data.length !== %d) throw new Error("expected %d data items, got " + data.length);', format.length, format.length));
decSrc.push(' return data;');
decSrc.push('}');
return eval('true && ' + decSrc.join('\n'));
}
function encodeType( fieldnum, wiretype, buf, pos ) {
encodeUVarint(fieldnum * 8 + wiretype, buf, pos);
}
function encodeUVarint( n, buf, pos ) {
// TODO: if n < 0, while (n & 0x7f) { ... } ... but might end up with 1023 bits!!
while (n >= 128) {
buf[pos.p++] = 0x80 | (n & 0x7f);
//n /= 128;
n *= 0.0078125;
}
// TODO: this stores -1 as 127... is that a problem?
buf[pos.p++] = n & 0x7f;
}
// negative numbers are stored in ones complement with a sign bit,
// e.g. -2 111110 => 00001.1 and -6 111010 => 00101.1
// "Ones complement" == "two's complement - 1", and "two's complement" is
// the negative of the number (on all cpus that javascript runs on).
function encodeVarint( n, buf, pos ) {
var negative = (n < 0) ? 1 : 0;
n = (n < 0) ? -n - 1 : n;
buf[pos.p++] = ((n >= 64) ? 0x80 : 0x00) | ((n & 0x3f) << 1) | negative;
if (n >= 64) encodeUVarint(n / 64, buf, pos);
}
// spec says negative int32 are always encoded as 10 bytes, so just use the Varint64 code.
// This breaks procol-buffers compat (which stores only 32 bits and leaves overlongs as positive).
// "If you use int32 or int64 as the type for a negative number, the resulting varint
// is always ten bytes long -- it is, effectively, treated like a very large unsigned
// integer." (encoding doc, "More Value Types").
function encodeVarint32( n, buf, pos ) {
encodeVarint64(n, buf, pos);
}
// encode 64 bits of the twos complement value n
// Stored as unsigned, but will decode as a signed 64-bit int.
// Work with the ones complement halves to keep things positive,
// to not truncate (-1/2) to 0: (1111.1 >>> 0) == 0.
function encodeVarint64( n, buf, pos ) {
if (n >= 0) return encodeUVarint(n, buf, pos);
n = -n - 1;
var v1 = (0xFFFFFFFF ^ n) >>> 0;
var v2 = (0xFFFFFFFF ^ (n / 0x100000000)) >>> 0;
while (v2 > 0) {
buf[pos.p++] = 0x80 | (v1 & 0x7f);
v1 = ((v2 & 0x7f) << (32 - 7)) | (v1 >>> 7);
v2 = v2 >>> 7;
}
while (v1 >= 0x100000000) { buf[pos.p++] = 0x80 | (v1 & 0x7f); v1 /= 128; }
while (v1 >= 128) { buf[pos.p++] = 0x80 | (v1 & 0x7f); v1 >>>= 7; }
buf[pos.p++] = v1 & 0x7f;
}
var tmpbuf = new Buffer(8);
function encodeFloat( v, buf, pos ) {
fp.writeFloatLE(buf, v, (pos.p += 4) - 4);
}
function encodeDouble( v, buf, pos ) {
fp.writeDoubleLE(buf, v, (pos.p += 8) - 8);
}
// store two-s complement little-endian 32-bit integer
function encodeInt32( v, buf, pos ) {
buf[pos.p++] = (v ) & 0xff;
buf[pos.p++] = (v >>= 8) & 0xff;
buf[pos.p++] = (v >>= 8) & 0xff;
buf[pos.p++] = (v >>= 8) & 0xff;
}
function encodeUInt32( v, buf, pos ) {
return encodeInt32(v, buf, pos);
}
// store two-s complement little-endian 64-bit integer
function encodeInt64( v, buf, pos ) {
if (v < 0) return encodeVarint64(v, buf, pos);
encodeUInt32(v & 0xffffffff, buf, pos);
encodeUInt32(v / 0x100000000, buf, pos);
}
// store two-s complement little-endian 64-bit integer
function encodeUInt64( v, buf, pos ) {
return encodeInt64(v, buf, pos);
}
function encodeString( str, buf, pos ) {
if (Buffer.isBuffer(buf)) {
encodeUVarint(Buffer.byteLength(str), buf, pos);
pos.p += buf.write(str, pos.p);
} else {
encodeUVarint(qutf8.utf8_byteLength(str, 0, str.length), buf, pos);
pos.p = qutf8.utf8_encode(str, 0, str.length, buf, pos.p);
}
}
function encodeBinary( bytes, buf, pos ) {
encodeUVarint(bytes.length, buf, pos);
for (var i=0; i<bytes.length; i++) buf[pos.p++] = bytes[i];
}
function decodeUVarint( buf, pos ) {
var byte = buf[pos.p++];
if (! (byte & 0x80)) return byte;
var val = byte & 0x7f;
var scale = 128;
do {
byte = buf[pos.p++];
val += (byte & 0x7f) * scale;
scale *= 128;
} while (byte & 0x80);
return val;
}
function decodeVarint( buf, pos ) {
var byte = buf[pos.p++];
var val = (byte & 0x7e) >>> 1;
if (! (byte & 0x80)) return (byte & 1) ? -val - 1 : val;
// multi-byte values
// decode as 0..63 or -1..-64
val += 64 * decodeUVarint(buf, pos);
return (byte & 1) ? -val - 1 : val;
}
// spec says negative int32 are always 10 bytes, so decode as Varint64
function decodeVarint32( buf, pos ) {
return decodeVarint64(buf, pos);
}
// to recover -1 as negative, must not overflow 53 bits.
// decodeUVarint would overflow and round -1 ffff to +2e64, 10000.
// Gather the positive ones complement halves, then assemble.
// v1 holds the low 21 bits, v2 the high 43 bits.
// More than 64 bits is rejected as NaN.
var _2e21 = Math.pow(2, 21);
var _2e42 = Math.pow(2, 42);
var _2e43 = Math.pow(2, 43);
function decodeVarint64( buf, pos ) {
var byte = buf[pos.p++];
var v1 = (byte & 0x7f);
if (! (byte & 0x80)) return v1;
// low 21 bits
byte = buf[pos.p++];
v1 += (byte & 0x7f) << 7;
if (! (byte & 0x80)) return v1;
byte = buf[pos.p++];
v1 += (byte & 0x7f) << 14;
if (! (byte & 0x80)) return v1;
// high 43 bits
var v2 = decodeUVarint(buf, pos);
if (v2 >= _2e43) return NaN;
if (v2 < _2e42) return v2 * _2e21 + v1; // positive
v2 = -(_2e43 - v2);
return v2 * _2e21 + v1;
}
var tmpbuf = new Buffer(8);
function decodeFloat( buf, pos ) {
return fp.readFloatLE(buf, (pos.p += 4) - 4);
}
function decodeDouble( buf, pos) {
return fp.readDoubleLE(buf, (pos.p += 8) - 8);
}
function decodeInt32( buf, pos ) {
return buf[pos.p++] + (buf[pos.p++] << 8) + (buf[pos.p++] << 16) + (buf[pos.p++] << 24);
}
function decodeUInt32( buf, pos ) {
return buf[pos.p++] + (buf[pos.p++] << 8) + (buf[pos.p++] << 16) + ((buf[pos.p++] << 24) >>> 0);
}
function decodeInt64( buf, pos ) {
var negative = buf[pos.p + 7] & 0x80;
return decodeUInt32(buf, pos) + decodeInt32(buf, pos) * 0x100000000;
}
function decodeUInt64( buf, pos ) {
var negative = buf[pos.p + 7] & 0x80;
return decodeUInt32(buf, pos) + decodeUInt32(buf, pos) * 0x100000000;
}
function decodeString( buf, pos ) {
var len = decodeUVarint(buf, pos);
var base = pos.p;
// fast with toString
if (Buffer.isBuffer(buf)) return buf.toString(undefined, base, pos.p += len);
else return qutf8.utf8_decode(buf, base, pos.p += len);
}
function decodeBinary( buf, pos ) {
var len = decodeUVarint(buf, pos);
var bytes = new Buffer(len);
for (var i=0; i<len; i++) bytes[i] = buf[pos.p++];
return bytes;
}
/** quicktest:
var assert = require('assert');
var qtimeit = require('qtimeit');
assert.equal(decodeUVarint(new Buffer([0x9E, 0xA7, 0x05]), {p:0}), 86942);
assert.equal(decodeUVarint(new Buffer([0x8E, 0x02]), {p:0}), 270);
assert.equal(decodeUVarint(new Buffer([0xAC, 0x02]), {p:0}), 300);
var buf = new Buffer([0x82, 0x81, 0x01]); // 1.000001.0 1.0000001 0.0000001 = 1 + 64 + 128*64 = 8257
var val = decodeVarint(buf, {p: 0});
console.log("AR:", val, buf);
// encode/decode unsigned varint
for (var i = 0; i < 100100; i++) {
var buf = [];
encodeUVarint(i, buf, {p:0})
var n = decodeUVarint(buf, {p:0});
assert.equal(i, n);
}
// encode/decode signed varint
for (var i = -100100; i < 100100; i++) {
var buf = [];
encodeVarint(i, buf, {p:0})
var n = decodeVarint(buf, {p:0});
assert.equal(i, n);
}
var str = "foo\u1234bar";
var buf = [];
encodeString(str, buf, {p:0});
console.log("AR:", buf);
assert.equal(decodeString(buf, {p:0}), str);
var qtimeit = require('qtimeit');
// first test
var data = [512, 513, 514, 515, "hello, world! now is the time for the quick brown fox to jump over the lazy dog."];
var data = [512, 513, 514, 515, "hello, world"];
var format = "IIII";
var format = "IIIIa";
// bson test (ish):
var data = [ "ABC", 1, "DEFGHI\xff", 12345.67e-1, null ];
var format = "aiadb";
// protobufjs test
var format = "aVlqilbbbbbbbdf";
var data = [ "Lorem ipsum dolor sit amet.", 9000, 20161110, 151234 * 0x100000000 + 1051, 1, -42, 1, 0, 0, 1, 0, 0, 1, 204.8, 0.25 ];
var jsonString = JSON.stringify(data);
var jsonBuf = new Buffer(JSON.stringify(data));
var packBuf = pack(format, data);
var packArray = _pack(format, data, new Array(), {p:0});
//console.log("AR: pack/unpack buf", data, packBuf, _unpack(format, packBuf, {p:0}));
//console.log("AR: pack/unpack array", data, new Buffer(packArray), _unpack(format, packArray, {p:0}));
var tmpBuf = new Buffer(1000);
var floatBuf = new Buffer(4); floatBuf.writeFloatLE(1234.5);
assert.equal(decodeFloat(floatBuf, {p:0}), floatBuf.readFloatLE());
var doubleBuf = new Buffer(8); doubleBuf.writeDoubleLE(1234.5e-200);
var doubleBuf = new Buffer(8); doubleBuf.writeDoubleLE(1234.5);
assert.equal(decodeDouble(doubleBuf, {p:0}), doubleBuf.readDoubleLE());
if (1) {
var qtimeit = require('qtimeit');
var x;
qtimeit.bench.timeGoal = .20;
qtimeit.bench.visualize = true;
qtimeit.bench({
'pack': function() { x = pack(format, data) },
// 620 k/s into new Buffer(), 2000 k/s into array -> then new Buffer(). (small struct, 1k buf)
// '_pack': function() { x = _pack(format, data, tmpBuf, {p:0}) },
// 900 k/s
'_pack arr': function() { x = _pack(format, data, new Array(), {p:0}) },
// 900 k/s to buffer, 2000 k/s to array
'_unpack': function() { x = _unpack(format, packBuf, {p:0}) },
// 3100 k/s
'unpack': function() { x = unpack(format, packBuf) },
// 3450 k/s
// 1600 k/s js, 600 k/s Buffer.read
'jsonBuf': function() { x = JSON.parse(jsonBuf) },
// 1760 k/s
'jsonString': function() { x = JSON.parse(jsonString) },
// 3100 k/s
'pack2': function() { x = pack(format, data) },
});
console.log(x, unpack(format, x));
}
/**/