UNPKG

app-package-builder

Version:

Idea is very simple — in the runtime we don't need to process or understand archive format. Wwe just need to know file data ranges. Where file data begins and where ends.

255 lines (224 loc) 10.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.computeBlockMap = exports.createPackageFileInfo = exports.createDifferentialPackage = undefined; var _bluebirdLst; function _load_bluebirdLst() { return _bluebirdLst = require("bluebird-lst"); } var _bluebirdLst2; function _load_bluebirdLst2() { return _bluebirdLst2 = _interopRequireDefault(require("bluebird-lst")); } /* Approach like AppX block map, but with one difference - block not compressed individually, instead, the whole file is compressed using LZMA compression. See (Package File in the developer readme) about compression. So, delta will be not ideal (because compressed data can change not only actually changed block in the file, but others, and we don't set even dict size and default 64M is used), but full package size will be still relative small and will save initial download time/costs. */ let createDifferentialPackage = exports.createDifferentialPackage = (() => { var _ref = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (archiveFile) { const fd = yield (0, (_fsExtraP || _load_fsExtraP()).open)(archiveFile, "a+"); try { // compute block map using compressed file data const sevenZFile = new (_SevenZFile || _load_SevenZFile()).SevenZFile(fd); yield sevenZFile.read(); const blockMap = yield computeBlockMap(sevenZFile); const blockMapFileData = yield serializeBlockMap(blockMap, archiveFile); yield (0, (_fsExtraP || _load_fsExtraP()).write)(fd, blockMapFileData, 0, blockMapFileData.length); yield (0, (_fsExtraP || _load_fsExtraP()).close)(fd); const result = yield createFileInfo(blockMapFileData, archiveFile, (yield (0, (_fsExtraP || _load_fsExtraP()).stat)(archiveFile)).size); result.headerSize = sevenZFile.archive.headerSize; return result; } catch (e) { yield (0, (_fsExtraP || _load_fsExtraP()).close)(fd); throw e; } }); return function createDifferentialPackage(_x) { return _ref.apply(this, arguments); }; })(); let serializeBlockMap = (() => { var _ref2 = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (blockMap, file) { // lzma doesn't make a lof of sense (151 KB lzma vs 156 KB deflate) for small text file where most of the data are unique strings (encoded checksums) // protobuf size — BlockMap size: 153104, compressed: 151256 So, it means that it doesn't make sense - better to use deflate instead of complicating (another runtime dependency (google-protobuf), proto files and so on) // size encoding in a form where next value is a relative to previous doesn't make sense (zero savings in tests), since in our case next size can be less than previous (so, int will be negative and `-` symbol will be added) // sha2556 secure hash is not required, md5 collision-resistance is good for our purpose, secure hash algorithm not required, in any case sha512 checksum is checked for the whole file. And size of matched block is checked in addition to. const blockMapDataString = JSON.stringify(blockMap); const blockMapFileData = yield gzip(blockMapDataString, { level: 9, chunkSize: 1024 * 1024 }); if (process.env.DEBUG_BLOCKMAP) { const buffer = Buffer.from(blockMapDataString); yield (0, (_fsExtraP || _load_fsExtraP()).writeFile)(`${file}.blockMap.json`, buffer); console.log(`BlockMap size: ${buffer.length}, compressed: ${blockMapFileData.length}`); } return blockMapFileData; }); return function serializeBlockMap(_x2, _x3) { return _ref2.apply(this, arguments); }; })(); let createFileInfo = (() => { var _ref3 = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (blockMapData, file, fileSize) { return { path: file, size: fileSize, blockMapSize: blockMapData.length, blockMapData, sha512: yield (0, (_builderUtil || _load_builderUtil()).hashFile)(file) }; }); return function createFileInfo(_x4, _x5, _x6) { return _ref3.apply(this, arguments); }; })(); let createPackageFileInfo = exports.createPackageFileInfo = (() => { var _ref4 = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (file, blockMapSize) { return { path: file, size: (yield (0, (_fsExtraP || _load_fsExtraP()).stat)(file)).size, blockMapSize, sha512: yield (0, (_builderUtil || _load_builderUtil()).hashFile)(file) }; }); return function createPackageFileInfo(_x7, _x8) { return _ref4.apply(this, arguments); }; })(); let computeBlockMap = exports.computeBlockMap = (() => { var _ref5 = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (sevenZFile) { const archive = sevenZFile.archive; const builder = new BlockMapBuilder(archive); const files = []; for (const file of archive.files) { if (!file.isDirectory) { builder.buildFile(file); // do not add empty files if (file.dataStart !== file.dataEnd) { files.push(file); } } } return yield doComputeBlockMap(files, sevenZFile.fd); }); return function computeBlockMap(_x9) { return _ref5.apply(this, arguments); }; })(); let doComputeBlockMap = (() => { var _ref6 = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (files, fd) { // just to be sure that file data really doesn't have gap and grouped one by one for (let i = 0; i < files.length - 1; i++) { if (files[i].dataEnd !== files[i + 1].dataStart) { throw new Error("Must be no gap"); } } const stats = []; const blocks = yield (_bluebirdLst2 || _load_bluebirdLst2()).default.map(files, (() => { var _ref7 = (0, (_bluebirdLst || _load_bluebirdLst()).coroutine)(function* (file) { const chunker = new (_ContentDefinedChunker || _load_ContentDefinedChunker()).ContentDefinedChunker(); const blocks = yield chunker.computeChunks(fd, file.dataStart, file.dataEnd, file.name); if (process.env.DEBUG_BLOCKMAP) { stats.push(getStat(blocks.sizes, file.name)); } return Object.assign({ name: file.name.replace(/\\/g, "/"), offset: file.dataStart }, blocks); }); return function (_x12) { return _ref7.apply(this, arguments); }; })(), { concurrency: 2 }); if (process.env.DEBUG_BLOCKMAP) { let duplicate = 0; let savedSize = 0; // noinspection JSMismatchedCollectionQueryUpdate const checksums = []; // noinspection JSMismatchedCollectionQueryUpdate const sizes = []; const index = new Map(); for (const file of blocks) { for (let i = 0; i < file.checksums.length; i++) { const checksum = file.checksums[i]; const size = file.sizes[i]; if (index.has(checksum)) { duplicate++; savedSize += size; } else { index.set(checksum, checksums.length); checksums.push(checksum); sizes.push(size); } } } console.log(stats.join("\n")); console.log(`duplicates: ${duplicate}, saved: ${savedSize}`); } return { version: "2", files: blocks }; }); return function doComputeBlockMap(_x10, _x11) { return _ref6.apply(this, arguments); }; })(); var _builderUtil; function _load_builderUtil() { return _builderUtil = require("builder-util"); } var _blockMapApi; function _load_blockMapApi() { return _blockMapApi = require("builder-util-runtime/out/blockMapApi"); } var _fsExtraP; function _load_fsExtraP() { return _fsExtraP = require("fs-extra-p"); } var _ContentDefinedChunker; function _load_ContentDefinedChunker() { return _ContentDefinedChunker = require("./ContentDefinedChunker"); } var _SevenZFile; function _load_SevenZFile() { return _SevenZFile = require("./SevenZFile"); } function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } const gzip = (_bluebirdLst2 || _load_bluebirdLst2()).default.promisify(require("zlib").gzip); class BlockMapBuilder { constructor(archive) { this.archive = archive; this.currentFolderIndex = -1; } // noinspection BadExpressionStatementJS buildFile(file) { const archive = this.archive; const folderIndex = file.blockIndex; if (folderIndex < 0) { // empty file file.dataStart = 0; file.dataEnd = 0; return; } if (folderIndex === this.currentFolderIndex) { throw new Error("Solid not supported"); } this.currentFolderIndex = folderIndex; const folder = archive.folders[folderIndex]; const firstPackStreamIndex = folder.firstPackedStreamIndex; const folderOffset = (_blockMapApi || _load_blockMapApi()).SIGNATURE_HEADER_SIZE + archive.packPosition + archive.streamMap.packStreamOffsets[firstPackStreamIndex]; let size = 0; for (let i = 0; i < folder.packedStreams.length; i++) { size += archive.packedSizes[firstPackStreamIndex + i]; } file.dataStart = folderOffset; file.dataEnd = folderOffset + size; // console.log(`${file.name} ${size}, ${folder.totalInputStreams}`) } } function getStat(sizes, name) { const sortedSizes = sizes.slice().sort((a, b) => a - b); const middle = Math.floor(sortedSizes.length / 2); const isEven = sortedSizes.length % 2 === 0; const median = isEven ? (sortedSizes[middle] + sortedSizes[middle - 1]) / 2 : sortedSizes[middle]; return `${sizes.length} chunks generated for ${name} (min: ${sortedSizes[0]}, max: ${sortedSizes[sortedSizes.length - 1]}, median: ${median})`; } //# sourceMappingURL=blockMap.js.map