idx-data
Version:
An idx binary data format loader.
182 lines • 6.33 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
var tslib_1 = require("tslib");
var fs = require("fs");
var utils_1 = require("./utils");
var getBufferTypeString = function (b) {
if (b instanceof Float32Array)
return 'float32';
if (b instanceof Int32Array)
return 'int32';
if (b instanceof Uint8Array)
return 'uint8';
throw utils_1.assertNever(b, 'Unexpected buffer received');
};
var getBufferEncoding = function (b) {
if (b === 'float32')
return 13;
if (b === 'int32')
return 12;
if (b === 'uint8')
return 8;
throw utils_1.assertNever(b, 'Unexpected buffer type string received');
};
var getBufferType = function (b) {
if (b === 13)
return Float32Array;
if (b === 12)
return Int32Array;
if (b === 8)
return Uint8Array;
throw new Error('Unexpected buffer type received');
};
var getBufferSizeOffset = function (b) {
if (b === 13)
return 4;
if (b === 12)
return 4;
if (b === 8)
return 1;
throw new Error('Unexpected buffer type received');
};
var buildBatch = function (start, end, bufferType, data) {
var enc = getBufferEncoding(bufferType);
var sizeOffset = getBufferSizeOffset(enc);
// safeguard to make sure we don't request too much data
end = end >= data.length ? data.length : end;
var buf = Buffer.alloc((end - start) * sizeOffset, 0);
for (var i = start; i < end; ++i) {
if (bufferType === 'float32')
buf.writeFloatBE(data[i], i * sizeOffset);
else if (bufferType === 'int32')
buf.writeInt32BE(data[i], i * sizeOffset);
else if (bufferType === 'uint8')
buf.writeUInt8(data[i], i * sizeOffset);
}
return buf;
};
/**
* @param data A TypedArray containing the data to be written
* @param shape The shape of the matrix, each element specifying the size of the corresponding dimension
* @param stream A writable stream that the data (including the header) will be written to.
*
* Writes raw bytes in big endian mode to the writable stream, starting with the idx header.
* Writing occurs in batches to reduce memory consumption.
*/
function writeToStream(data, shape, stream) {
var bufferType = getBufferTypeString(data);
var headerSize = 4 + shape.length * 4;
var header = Buffer.alloc(headerSize, 0);
// first two bytes are always 0
header.writeUInt16BE(0, 0);
// next byte shows the data type
header.writeUInt8(getBufferEncoding(bufferType), 2);
// 4th byte shows the number of dimensions
header.writeUInt8(shape.length, 3);
// remainder of header should encode the size of each dim
for (var d = 0; d < shape.length; ++d) {
header.writeUInt32BE(shape[d], 4 + 4 * d);
}
// go ahead and write the header
stream.write(header);
// write the data to the file
var batchSize = 1024;
for (var i = 0; i < data.length / batchSize; i++) {
var batch = buildBatch(i * batchSize, (i + 1) * batchSize, bufferType, data);
stream.write(batch);
}
}
exports.writeToStream = writeToStream;
/**
* @param data A TypedArray containing the data to be written
* @param shape The shape of the matrix, each element specifying the size of the corresponding dimension
* @param file The filepath that the data will be written to
*
* Writes the TypedArray data to file in the IDX format.
* Recursively creates the folder path if necessary.
*/
function saveBits(data, shape, file) {
return tslib_1.__awaiter(this, void 0, void 0, function () {
var stream;
return tslib_1.__generator(this, function (_a) {
switch (_a.label) {
case 0: return [4 /*yield*/, utils_1.createFolder(file)];
case 1:
_a.sent();
stream = fs.createWriteStream(file, "binary");
writeToStream(data, shape, stream);
stream.close();
return [2 /*return*/, new Promise(function (resolve) { return stream.on('close', resolve); })];
}
});
});
}
exports.saveBits = saveBits;
/**
* @param stream The stream where the data will be sent
*
* Listens to the stream and saves incoming data to a TypedArray.
*/
function readFromStream(stream) {
var data;
var type;
var dims;
var shape = [];
var idx = 0;
stream.on('readable', function () {
var buf = stream.read();
if (!buf)
return;
var start = 0;
// on first data, get header info
if (!data) {
type = buf.readUInt8(2);
dims = buf.readUInt8(3);
var expectedData = 1;
for (var d = 0; d < dims; ++d) {
var dimSize = buf.readUInt32BE(4 + 4 * d);
shape.push(dimSize);
expectedData *= dimSize;
}
start = 4 + 4 * dims;
var Buf = getBufferType(type);
data = new Buf(expectedData);
}
// the end of the buffer needs to be offset
// by the number of bytes the unit takes up
var sizeOffset = getBufferSizeOffset(type);
for (var i = 0; i < ((buf.length - start) / sizeOffset); i++) {
if (type === 13)
data[idx++] = buf.readFloatBE(start + i * sizeOffset);
else if (type === 12)
data[idx++] = buf.readInt32BE(start + i * sizeOffset);
else if (type === 8)
data[idx++] = buf.readUInt8(start + i * sizeOffset);
}
});
return new Promise(function (resolve, reject) {
stream.on('end', function () {
if (!data)
return reject(new Error('No data found!'));
var tensor = {
shape: shape,
type: getBufferTypeString(data),
data: data,
};
resolve(tensor);
});
});
}
exports.readFromStream = readFromStream;
/**
*
* @param file Filepath that will be read from
*
* Reads from an IDX formatted binary file into a TypedArray.
*/
function loadBits(file) {
var stream = fs.createReadStream(file);
return readFromStream(stream);
}
exports.loadBits = loadBits;
//# sourceMappingURL=index.js.map