UNPKG

idx-data

Version:

An idx binary data format loader.

182 lines 6.33 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var tslib_1 = require("tslib"); var fs = require("fs"); var utils_1 = require("./utils"); var getBufferTypeString = function (b) { if (b instanceof Float32Array) return 'float32'; if (b instanceof Int32Array) return 'int32'; if (b instanceof Uint8Array) return 'uint8'; throw utils_1.assertNever(b, 'Unexpected buffer received'); }; var getBufferEncoding = function (b) { if (b === 'float32') return 13; if (b === 'int32') return 12; if (b === 'uint8') return 8; throw utils_1.assertNever(b, 'Unexpected buffer type string received'); }; var getBufferType = function (b) { if (b === 13) return Float32Array; if (b === 12) return Int32Array; if (b === 8) return Uint8Array; throw new Error('Unexpected buffer type received'); }; var getBufferSizeOffset = function (b) { if (b === 13) return 4; if (b === 12) return 4; if (b === 8) return 1; throw new Error('Unexpected buffer type received'); }; var buildBatch = function (start, end, bufferType, data) { var enc = getBufferEncoding(bufferType); var sizeOffset = getBufferSizeOffset(enc); // safeguard to make sure we don't request too much data end = end >= data.length ? data.length : end; var buf = Buffer.alloc((end - start) * sizeOffset, 0); for (var i = start; i < end; ++i) { if (bufferType === 'float32') buf.writeFloatBE(data[i], i * sizeOffset); else if (bufferType === 'int32') buf.writeInt32BE(data[i], i * sizeOffset); else if (bufferType === 'uint8') buf.writeUInt8(data[i], i * sizeOffset); } return buf; }; /** * @param data A TypedArray containing the data to be written * @param shape The shape of the matrix, each element specifying the size of the corresponding dimension * @param stream A writable stream that the data (including the header) will be written to. * * Writes raw bytes in big endian mode to the writable stream, starting with the idx header. * Writing occurs in batches to reduce memory consumption. */ function writeToStream(data, shape, stream) { var bufferType = getBufferTypeString(data); var headerSize = 4 + shape.length * 4; var header = Buffer.alloc(headerSize, 0); // first two bytes are always 0 header.writeUInt16BE(0, 0); // next byte shows the data type header.writeUInt8(getBufferEncoding(bufferType), 2); // 4th byte shows the number of dimensions header.writeUInt8(shape.length, 3); // remainder of header should encode the size of each dim for (var d = 0; d < shape.length; ++d) { header.writeUInt32BE(shape[d], 4 + 4 * d); } // go ahead and write the header stream.write(header); // write the data to the file var batchSize = 1024; for (var i = 0; i < data.length / batchSize; i++) { var batch = buildBatch(i * batchSize, (i + 1) * batchSize, bufferType, data); stream.write(batch); } } exports.writeToStream = writeToStream; /** * @param data A TypedArray containing the data to be written * @param shape The shape of the matrix, each element specifying the size of the corresponding dimension * @param file The filepath that the data will be written to * * Writes the TypedArray data to file in the IDX format. * Recursively creates the folder path if necessary. */ function saveBits(data, shape, file) { return tslib_1.__awaiter(this, void 0, void 0, function () { var stream; return tslib_1.__generator(this, function (_a) { switch (_a.label) { case 0: return [4 /*yield*/, utils_1.createFolder(file)]; case 1: _a.sent(); stream = fs.createWriteStream(file, "binary"); writeToStream(data, shape, stream); stream.close(); return [2 /*return*/, new Promise(function (resolve) { return stream.on('close', resolve); })]; } }); }); } exports.saveBits = saveBits; /** * @param stream The stream where the data will be sent * * Listens to the stream and saves incoming data to a TypedArray. */ function readFromStream(stream) { var data; var type; var dims; var shape = []; var idx = 0; stream.on('readable', function () { var buf = stream.read(); if (!buf) return; var start = 0; // on first data, get header info if (!data) { type = buf.readUInt8(2); dims = buf.readUInt8(3); var expectedData = 1; for (var d = 0; d < dims; ++d) { var dimSize = buf.readUInt32BE(4 + 4 * d); shape.push(dimSize); expectedData *= dimSize; } start = 4 + 4 * dims; var Buf = getBufferType(type); data = new Buf(expectedData); } // the end of the buffer needs to be offset // by the number of bytes the unit takes up var sizeOffset = getBufferSizeOffset(type); for (var i = 0; i < ((buf.length - start) / sizeOffset); i++) { if (type === 13) data[idx++] = buf.readFloatBE(start + i * sizeOffset); else if (type === 12) data[idx++] = buf.readInt32BE(start + i * sizeOffset); else if (type === 8) data[idx++] = buf.readUInt8(start + i * sizeOffset); } }); return new Promise(function (resolve, reject) { stream.on('end', function () { if (!data) return reject(new Error('No data found!')); var tensor = { shape: shape, type: getBufferTypeString(data), data: data, }; resolve(tensor); }); }); } exports.readFromStream = readFromStream; /** * * @param file Filepath that will be read from * * Reads from an IDX formatted binary file into a TypedArray. */ function loadBits(file) { var stream = fs.createReadStream(file); return readFromStream(stream); } exports.loadBits = loadBits; //# sourceMappingURL=index.js.map