UNPKG

jsroot

Version:
1,202 lines (1,019 loc) 42.7 kB
import { isStr, isObject } from './core.mjs'; import { R__unzip } from './io.mjs'; import { TDrawSelector, treeDraw } from './tree.mjs'; const LITTLE_ENDIAN = true; class RBufferReader { constructor(buffer) { if (buffer instanceof ArrayBuffer) { this.buffer = buffer; this.byteOffset = 0; this.byteLength = buffer.byteLength; } else if (ArrayBuffer.isView(buffer)) { this.buffer = buffer.buffer; this.byteOffset = buffer.byteOffset; this.byteLength = buffer.byteLength; } else throw new TypeError('Invalid buffer type'); this.view = new DataView(this.buffer); // important - offset should start from actual place in the buffer this.offset = this.byteOffset; } // Move to a specific position in the buffer seek(position) { if (typeof position === 'bigint') { if (position > BigInt(Number.MAX_SAFE_INTEGER)) throw new Error(`Offset too large to seek safely: ${position}`); this.offset = Number(position); } else this.offset = position; } // Read unsigned 8-bit integer (1 BYTE) readU8() { const val = this.view.getUint8(this.offset); this.offset += 1; return val; } // Read unsigned 16-bit integer (2 BYTES) readU16() { const val = this.view.getUint16(this.offset, LITTLE_ENDIAN); this.offset += 2; return val; } // Read unsigned 32-bit integer (4 BYTES) readU32() { const val = this.view.getUint32(this.offset, LITTLE_ENDIAN); this.offset += 4; return val; } // Read signed 8-bit integer (1 BYTE) readS8() { const val = this.view.getInt8(this.offset); this.offset += 1; return val; } // Read signed 16-bit integer (2 BYTES) readS16() { const val = this.view.getInt16(this.offset, LITTLE_ENDIAN); this.offset += 2; return val; } // Read signed 32-bit integer (4 BYTES) readS32() { const val = this.view.getInt32(this.offset, LITTLE_ENDIAN); this.offset += 4; return val; } // Read 32-bit float (4 BYTES) readF32() { const val = this.view.getFloat32(this.offset, LITTLE_ENDIAN); this.offset += 4; return val; } // Read 64-bit float (8 BYTES) readF64() { const val = this.view.getFloat64(this.offset, LITTLE_ENDIAN); this.offset += 8; return val; } // Read a string with 32-bit length prefix readString() { const length = this.readU32(); let str = ''; for (let i = 0; i < length; i++) str += String.fromCharCode(this.readU8()); return str; } // Read unsigned 64-bit integer (8 BYTES) readU64() { const val = this.view.getBigUint64(this.offset, LITTLE_ENDIAN); this.offset += 8; return val; } // Read signed 64-bit integer (8 BYTES) readS64() { const val = this.view.getBigInt64(this.offset, LITTLE_ENDIAN); this.offset += 8; return val; } } const ENTupleColumnType = { kBit: 0x00, kByte: 0x01, kChar: 0x02, kInt8: 0x03, kUInt8: 0x04, kInt16: 0x05, kUInt16: 0x06, kInt32: 0x07, kUInt32: 0x08, kInt64: 0x09, kUInt64: 0x0A, kReal16: 0x0B, kReal32: 0x0C, kReal64: 0x0D, kIndex32: 0x0E, kIndex64: 0x0F, kSwitch: 0x10, kSplitInt16: 0x11, kSplitUInt16: 0x12, kSplitInt32: 0x13, kSplitUInt32: 0x14, kSplitInt64: 0x15, kSplitUInt64: 0x16, kSplitReal16: 0x17, kSplitReal32: 0x18, kSplitReal64: 0x19, kSplitIndex32: 0x1A, kSplitIndex64: 0x1B, kReal32Trunc: 0x1C, kReal32Quant: 0x1D }; /** * @summary Rearrange bytes from split format to normal format (row-wise) for decoding */ function recontructUnsplitBuffer(blob, columnDescriptor) { const { coltype } = columnDescriptor; if ( coltype === ENTupleColumnType.kSplitUInt16 || coltype === ENTupleColumnType.kSplitUInt32 || coltype === ENTupleColumnType.kSplitUInt64 || coltype === ENTupleColumnType.kSplitReal16 || coltype === ENTupleColumnType.kSplitReal32 || coltype === ENTupleColumnType.kSplitReal64 || coltype === ENTupleColumnType.kSplitIndex32 || coltype === ENTupleColumnType.kSplitIndex64 || coltype === ENTupleColumnType.kSplitInt16 || coltype === ENTupleColumnType.kSplitInt32 || coltype === ENTupleColumnType.kSplitInt64 ) { // Determine byte size based on column type let byteSize; switch (coltype) { case ENTupleColumnType.kSplitReal64: case ENTupleColumnType.kSplitInt64: case ENTupleColumnType.kSplitUInt64: case ENTupleColumnType.kSplitIndex64: byteSize = 8; break; case ENTupleColumnType.kSplitReal32: case ENTupleColumnType.kSplitInt32: case ENTupleColumnType.kSplitIndex32: case ENTupleColumnType.kSplitUInt32: byteSize = 4; break; case ENTupleColumnType.kSplitInt16: case ENTupleColumnType.kSplitUInt16: case ENTupleColumnType.kSplitReal16: byteSize = 2; break; default: throw new Error(`Unsupported split coltype: ${coltype} (0x${coltype.toString(16).padStart(2, '0')})`); } const splitView = new DataView(blob.buffer, blob.byteOffset, blob.byteLength), count = blob.byteLength / byteSize, outBuffer = new ArrayBuffer(blob.byteLength), outBytes = new Uint8Array(outBuffer); for (let i = 0; i < count; ++i) { for (let b = 0; b < byteSize; ++b) { const splitIndex = b * count + i, byte = splitView.getUint8(splitIndex), writeIndex = i * byteSize + b; outBytes[writeIndex] = byte; } } // Return updated blob and remapped coltype const newBlob = outBuffer; let newColtype; switch (coltype) { case ENTupleColumnType.kSplitUInt16: newColtype = ENTupleColumnType.kUInt16; break; case ENTupleColumnType.kSplitUInt32: newColtype = ENTupleColumnType.kUInt32; break; case ENTupleColumnType.kSplitUInt64: newColtype = ENTupleColumnType.kUInt64; break; case ENTupleColumnType.kSplitIndex32: newColtype = ENTupleColumnType.kIndex32; break; case ENTupleColumnType.kSplitIndex64: newColtype = ENTupleColumnType.kIndex64; break; case ENTupleColumnType.kSplitReal16: newColtype = ENTupleColumnType.kReal16; break; case ENTupleColumnType.kSplitReal32: newColtype = ENTupleColumnType.kReal32; break; case ENTupleColumnType.kSplitReal64: newColtype = ENTupleColumnType.kReal64; break; case ENTupleColumnType.kSplitInt16: newColtype = ENTupleColumnType.kInt16; break; case ENTupleColumnType.kSplitInt32: newColtype = ENTupleColumnType.kInt32; break; case ENTupleColumnType.kSplitInt64: newColtype = ENTupleColumnType.kInt64; break; default: throw new Error(`Unsupported split coltype for reassembly: ${coltype}`); } return { blob: newBlob, coltype: newColtype }; } // If no split type, return original blob and coltype return { blob, coltype }; } /** * @summary Decode a reconstructed index buffer (32- or 64-bit deltas to absolute indices) */ function DecodeDeltaIndex(blob, coltype) { let deltas, result; if (coltype === ENTupleColumnType.kIndex32) { deltas = new Int32Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 4); result = new Int32Array(deltas.length); } else if (coltype === ENTupleColumnType.kIndex64) { deltas = new BigInt64Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 8); result = new BigInt64Array(deltas.length); } else throw new Error(`DecodeDeltaIndex: unsupported column type ${coltype}`); if (deltas.length > 0) result[0] = deltas[0]; for (let i = 1; i < deltas.length; ++i) result[i] = result[i - 1] + deltas[i]; return { blob: result, coltype }; } /** * @summary Decode a reconstructed signed integer buffer using ZigZag encoding */ function decodeZigzag(blob, coltype) { let zigzag, result; if (coltype === ENTupleColumnType.kInt16) { zigzag = new Uint16Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 2); result = new Int16Array(zigzag.length); } else if (coltype === ENTupleColumnType.kInt32) { zigzag = new Uint32Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 4); result = new Int32Array(zigzag.length); } else if (coltype === ENTupleColumnType.kInt64) { zigzag = new BigUint64Array(blob.buffer || blob, blob.byteOffset || 0, blob.byteLength / 8); result = new BigInt64Array(zigzag.length); } else throw new Error(`decodeZigzag: unsupported column type ${coltype}`); for (let i = 0; i < zigzag.length; ++i) { // ZigZag decode: (x >>> 1) ^ (-(x & 1)) const x = zigzag[i]; result[i] = (x >>> 1) ^ (-(x & 1)); } return { blob: result, coltype }; } // Envelope Types // TODO: Define usage logic for envelope types in future // const kEnvelopeTypeHeader = 0x01, // kEnvelopeTypeFooter = 0x02, // kEnvelopeTypePageList = 0x03, // Field Flags const kFlagRepetitiveField = 0x01, kFlagProjectedField = 0x02, kFlagHasTypeChecksum = 0x04, // Column Flags kFlagDeferredColumn = 0x01, kFlagHasValueRange = 0x02; class RNTupleDescriptorBuilder { deserializeHeader(header_blob) { if (!header_blob) return; const reader = new RBufferReader(header_blob), payloadStart = reader.offset, // Read the envelope metadata { envelopeLength } = this._readEnvelopeMetadata(reader), // Seek to end of envelope to get checksum checksumPos = payloadStart + envelopeLength - 8, currentPos = reader.offset; reader.seek(checksumPos); this.headerEnvelopeChecksum = reader.readU64(); reader.seek(currentPos); // Read feature flags list (may span multiple 64-bit words) this._readFeatureFlags(reader); // Read metadata strings this.name = reader.readString(); this.description = reader.readString(); this.writer = reader.readString(); // 4 list frames inside the header envelope this._readSchemaDescription(reader); } deserializeFooter(footer_blob) { if (!footer_blob) return; const reader = new RBufferReader(footer_blob); // Read the envelope metadata this._readEnvelopeMetadata(reader); // Feature flag(32 bits) this._readFeatureFlags(reader); // Header checksum (64-bit xxhash3) const headerChecksumFromFooter = reader.readU64(); if (headerChecksumFromFooter !== this.headerEnvelopeChecksum) throw new Error('RNTuple corrupted: header checksum does not match footer checksum.'); const schemaExtensionSize = reader.readS64(); if (schemaExtensionSize < 0) throw new Error('Schema extension frame is not a record frame, which is unexpected.'); // Schema extension record frame (4 list frames inside) this._readSchemaDescription(reader); // Cluster Group record frame this._readClusterGroups(reader); } _readEnvelopeMetadata(reader) { const typeAndLength = reader.readU64(), // Envelope metadata // The 16 bits are the envelope type ID, and the 48 bits are the envelope length envelopeType = Number(typeAndLength & 0xFFFFn), envelopeLength = Number((typeAndLength >> 16n) & 0xFFFFFFFFFFFFn); return { envelopeType, envelopeLength }; } _readSchemaDescription(reader) { // Reading new descriptor arrays from the input const newFields = this._readFieldDescriptors(reader), newColumns = this._readColumnDescriptors(reader), newAliases = this._readAliasColumn(reader), newExtra = this._readExtraTypeInformation(reader); // Merging these new arrays into existing arrays this.fieldDescriptors = (this.fieldDescriptors || []).concat(newFields); this.columnDescriptors = (this.columnDescriptors || []).concat(newColumns); this.aliasColumns = (this.aliasColumns || []).concat(newAliases); this.extraTypeInfo = (this.extraTypeInfo || []).concat(newExtra); } _readFeatureFlags(reader) { this.featureFlags = []; while (true) { const val = reader.readU64(); this.featureFlags.push(val); if ((val & 0x8000000000000000n) === 0n) break; // MSB not set: end of list } // verify all feature flags are zero if (this.featureFlags.some(v => v !== 0n)) throw new Error('Unexpected non-zero feature flags: ' + this.featureFlags); } _readFieldDescriptors(reader) { const startOffset = BigInt(reader.offset), fieldListSize = reader.readS64(), // signed 64-bit fieldListIsList = fieldListSize < 0; if (!fieldListIsList) throw new Error('Field list frame is not a list frame, which is required.'); const fieldListCount = reader.readU32(), // number of field entries // List frame: list of field record frames fieldDescriptors = []; for (let i = 0; i < fieldListCount; ++i) { const recordStart = BigInt(reader.offset), fieldRecordSize = reader.readS64(), fieldVersion = reader.readU32(), typeVersion = reader.readU32(), parentFieldId = reader.readU32(), structRole = reader.readU16(), flags = reader.readU16(), fieldName = reader.readString(), typeName = reader.readString(), typeAlias = reader.readString(), description = reader.readString(); let arraySize = null, sourceFieldId = null, checksum = null; if (flags & kFlagRepetitiveField) arraySize = reader.readU64(); if (flags & kFlagProjectedField) sourceFieldId = reader.readU32(); if (flags & kFlagHasTypeChecksum) checksum = reader.readU32(); fieldDescriptors.push({ fieldVersion, typeVersion, parentFieldId, structRole, flags, fieldName, typeName, typeAlias, description, arraySize, sourceFieldId, checksum }); reader.seek(Number(recordStart + fieldRecordSize)); } reader.seek(Number(startOffset - fieldListSize)); return fieldDescriptors; } _readColumnDescriptors(reader) { const startOffset = BigInt(reader.offset), columnListSize = reader.readS64(), columnListIsList = columnListSize < 0; if (!columnListIsList) throw new Error('Column list frame is not a list frame, which is required.'); const columnListCount = reader.readU32(), // number of column entries columnDescriptors = []; for (let i = 0; i < columnListCount; ++i) { const recordStart = BigInt(reader.offset), columnRecordSize = reader.readS64(), coltype = reader.readU16(), bitsOnStorage = reader.readU16(), fieldId = reader.readU32(), flags = reader.readU16(), representationIndex = reader.readU16(); let firstElementIndex = null, minValue = null, maxValue = null; if (flags & kFlagDeferredColumn) firstElementIndex = reader.readU64(); if (flags & kFlagHasValueRange) { minValue = reader.readF64(); maxValue = reader.readF64(); } const column = { coltype, bitsOnStorage, fieldId, flags, representationIndex, firstElementIndex, minValue, maxValue, index: i }; column.isDeferred = function() { return (this.flags & RNTupleDescriptorBuilder.kFlagDeferredColumn) !== 0; }; column.isSuppressed = function() { return this.firstElementIndex !== null && this.firstElementIndex < 0; }; columnDescriptors.push(column); reader.seek(Number(recordStart + columnRecordSize)); } reader.seek(Number(startOffset - columnListSize)); return columnDescriptors; } _readAliasColumn(reader) { const startOffset = BigInt(reader.offset), aliasColumnListSize = reader.readS64(), aliasListisList = aliasColumnListSize < 0; if (!aliasListisList) throw new Error('Alias column list frame is not a list frame, which is required.'); const aliasColumnCount = reader.readU32(), // number of alias column entries aliasColumns = []; for (let i = 0; i < aliasColumnCount; ++i) { const recordStart = BigInt(reader.offset), aliasColumnRecordSize = reader.readS64(), physicalColumnId = reader.readU32(), fieldId = reader.readU32(); aliasColumns.push({ physicalColumnId, fieldId }); reader.seek(Number(recordStart + aliasColumnRecordSize)); } reader.seek(Number(startOffset - aliasColumnListSize)); return aliasColumns; } _readExtraTypeInformation(reader) { const startOffset = BigInt(reader.offset), extraTypeInfoListSize = reader.readS64(), isList = extraTypeInfoListSize < 0; if (!isList) throw new Error('Extra type info frame is not a list frame, which is required.'); const entryCount = reader.readU32(), extraTypeInfo = []; for (let i = 0; i < entryCount; ++i) { const recordStart = BigInt(reader.offset), extraTypeInfoRecordSize = reader.readS64(), contentId = reader.readU32(), typeVersion = reader.readU32(); extraTypeInfo.push({ contentId, typeVersion }); reader.seek(Number(recordStart + extraTypeInfoRecordSize)); } reader.seek(Number(startOffset - extraTypeInfoListSize)); return extraTypeInfo; } _readClusterGroups(reader) { const startOffset = BigInt(reader.offset), clusterGroupListSize = reader.readS64(), isList = clusterGroupListSize < 0; if (!isList) throw new Error('Cluster group frame is not a list frame'); const groupCount = reader.readU32(), clusterGroups = []; for (let i = 0; i < groupCount; ++i) { const recordStart = BigInt(reader.offset), clusterRecordSize = reader.readS64(), minEntry = reader.readU64(), entrySpan = reader.readU64(), numClusters = reader.readU32(), pageListLength = reader.readU64(), // Locator method to get the page list locator offset pageListLocator = this._readLocator(reader), group = { minEntry, entrySpan, numClusters, pageListLocator, pageListLength }; clusterGroups.push(group); reader.seek(Number(recordStart + clusterRecordSize)); } reader.seek(Number(startOffset - clusterGroupListSize)); this.clusterGroups = clusterGroups; } _readLocator(reader) { const sizeAndType = reader.readU32(); // 4 bytes: size + T bit if ((sizeAndType | 0) < 0) // | makes the sizeAndType as signed throw new Error('Non-standard locators (T=1) not supported yet'); const size = sizeAndType, offset = reader.readU64(); // 8 bytes: offset return { size, offset }; } deserializePageList(page_list_blob) { if (!page_list_blob) throw new Error('deserializePageList: received an invalid or empty page list blob'); const reader = new RBufferReader(page_list_blob); this._readEnvelopeMetadata(reader); // Page list checksum (64-bit xxhash3) const pageListHeaderChecksum = reader.readU64(); if (pageListHeaderChecksum !== this.headerEnvelopeChecksum) throw new Error('RNTuple corrupted: header checksum does not match Page List Header checksum.'); const listStartOffset = BigInt(reader.offset), // Read cluster summaries list frame clusterSummaryListSize = reader.readS64(); if (clusterSummaryListSize >= 0) throw new Error('Expected a list frame for cluster summaries'); const clusterSummaryCount = reader.readU32(), clusterSummaries = []; for (let i = 0; i < clusterSummaryCount; ++i) { const recordStart = BigInt(reader.offset), clusterSummaryRecordSize = reader.readS64(), firstEntry = reader.readU64(), combined = reader.readU64(), flags = combined >> 56n; if (flags & 0x01n) throw new Error('Cluster summary uses unsupported sharded flag (0x01)'); const numEntries = Number(combined & 0x00FFFFFFFFFFFFFFn); clusterSummaries.push({ firstEntry, numEntries, flags }); reader.seek(Number(recordStart + clusterSummaryRecordSize)); } reader.seek(Number(listStartOffset - clusterSummaryListSize)); this.clusterSummaries = clusterSummaries; this._readNestedFrames(reader); /* const checksumPagelist = */ reader.readU64(); } _readNestedFrames(reader) { const clusterPageLocations = [], numListClusters = reader.readS64(); if (numListClusters >= 0) throw new Error('Expected list frame for clusters'); const numRecordCluster = reader.readU32(); for (let i = 0; i < numRecordCluster; ++i) { const outerListSize = reader.readS64(); if (outerListSize >= 0) throw new Error('Expected outer list frame for columns'); const numColumns = reader.readU32(), columns = []; for (let c = 0; c < numColumns; ++c) { const innerListSize = reader.readS64(); if (innerListSize >= 0) throw new Error('Expected inner list frame for pages'); const numPages = reader.readU32(), pages = []; for (let p = 0; p < numPages; ++p) { const numElementsWithBit = reader.readS32(), hasChecksum = numElementsWithBit < 0, numElements = BigInt(Math.abs(Number(numElementsWithBit))), locator = this._readLocator(reader); pages.push({ numElements, hasChecksum, locator }); } const elementOffset = reader.readS64(), isSuppressed = elementOffset < 0; let compression = null; if (!isSuppressed) compression = reader.readU32(); columns.push({ pages, elementOffset, isSuppressed, compression }); } clusterPageLocations.push(columns); } this.pageLocations = clusterPageLocations; } // Example Of Deserializing Page Content deserializePage(blob, columnDescriptor, pageInfo) { const originalColtype = columnDescriptor.coltype, { coltype } = recontructUnsplitBuffer(blob, columnDescriptor); let { blob: processedBlob } = recontructUnsplitBuffer(blob, columnDescriptor); // Handle split index types if (originalColtype === ENTupleColumnType.kSplitIndex32 || originalColtype === ENTupleColumnType.kSplitIndex64) { const { blob: decodedArray } = DecodeDeltaIndex(processedBlob, coltype); processedBlob = decodedArray; } // Handle Split Signed Int types if (originalColtype === ENTupleColumnType.kSplitInt16 || originalColtype === ENTupleColumnType.kSplitInt32 || originalColtype === ENTupleColumnType.kSplitInt64) { const { blob: decodedArray } = decodeZigzag(processedBlob, coltype); processedBlob = decodedArray; } const reader = new RBufferReader(processedBlob), values = [], // Use numElements from pageInfo parameter numValues = Number(pageInfo.numElements), // Helper for all simple types extractValues = (readFunc) => { for (let i = 0; i < numValues; ++i) values.push(readFunc()); }; switch (coltype) { case ENTupleColumnType.kBit: { let bitCount = 0; const totalBitsInBuffer = processedBlob.byteLength * 8; if (totalBitsInBuffer < numValues) throw new Error(`kBit: Not enough bits in buffer (${totalBitsInBuffer}) for numValues (${numValues})`); for (let byteIndex = 0; byteIndex < processedBlob.byteLength; ++byteIndex) { const byte = reader.readU8(); // Extract 8 bits from this byte for (let bitPos = 0; bitPos < 8 && bitCount < numValues; ++bitPos, ++bitCount) { const bitValue = (byte >>> bitPos) & 1, boolValue = bitValue === 1; values.push(boolValue); } } break; } case ENTupleColumnType.kReal64: extractValues(reader.readF64.bind(reader)); break; case ENTupleColumnType.kReal32: extractValues(reader.readF32.bind(reader)); break; case ENTupleColumnType.kInt64: extractValues(reader.readS64.bind(reader)); break; case ENTupleColumnType.kUInt64: extractValues(reader.readU64.bind(reader)); break; case ENTupleColumnType.kInt32: extractValues(reader.readS32.bind(reader)); break; case ENTupleColumnType.kUInt32: extractValues(reader.readU32.bind(reader)); break; case ENTupleColumnType.kInt16: extractValues(reader.readS16.bind(reader)); break; case ENTupleColumnType.kUInt16: extractValues(reader.readU16.bind(reader)); break; case ENTupleColumnType.kInt8: extractValues(reader.readS8.bind(reader)); break; case ENTupleColumnType.kUInt8: case ENTupleColumnType.kByte: extractValues(reader.readU8.bind(reader)); break; case ENTupleColumnType.kChar: extractValues(() => String.fromCharCode(reader.readS8())); break; case ENTupleColumnType.kIndex32: extractValues(reader.readS32.bind(reader)); break; case ENTupleColumnType.kIndex64: extractValues(reader.readS64.bind(reader)); break; default: throw new Error(`Unsupported column type: ${columnDescriptor.coltype}`); } return values; } } // class RNTupleDescriptorBuilder /** @summary Very preliminary function to read header/footer from RNTuple * @private */ async function readHeaderFooter(tuple) { // if already read - return immediately, make possible to call several times if (tuple?.builder) return true; if (!tuple.$file) return false; // request header and footer buffers from the file return tuple.$file.readBuffer([tuple.fSeekHeader, tuple.fNBytesHeader, tuple.fSeekFooter, tuple.fNBytesFooter]).then(blobs => { if (blobs?.length !== 2) return false; // Handle both compressed and uncompressed cases const processBlob = (blob, uncompressedSize) => { // If uncompressedSize matches blob size, it's uncompressed if (blob.byteLength === uncompressedSize) return Promise.resolve(blob); return R__unzip(blob, uncompressedSize); }; return Promise.all([ processBlob(blobs[0], tuple.fLenHeader), processBlob(blobs[1], tuple.fLenFooter) ]).then(unzip_blobs => { const [header_blob, footer_blob] = unzip_blobs; if (!header_blob || !footer_blob) return false; tuple.builder = new RNTupleDescriptorBuilder; tuple.builder.deserializeHeader(header_blob); tuple.builder.deserializeFooter(footer_blob); // Build fieldToColumns mapping tuple.fieldToColumns = {}; for (const colDesc of tuple.builder.columnDescriptors) { const fieldDesc = tuple.builder.fieldDescriptors[colDesc.fieldId], fieldName = fieldDesc.fieldName; if (!tuple.fieldToColumns[fieldName]) tuple.fieldToColumns[fieldName] = []; tuple.fieldToColumns[fieldName].push(colDesc); } // Deserialize Page List const group = tuple.builder.clusterGroups?.[0]; if (!group || !group.pageListLocator) throw new Error('No valid cluster group or page list locator found'); const offset = Number(group.pageListLocator.offset), size = Number(group.pageListLocator.size), uncompressedSize = Number(group.pageListLength); return tuple.$file.readBuffer([offset, size]).then(page_list_blob => { if (!(page_list_blob instanceof DataView)) throw new Error(`Expected DataView from readBuffer, got ${Object.prototype.toString.call(page_list_blob)}`); // Check if page list data is uncompressed if (page_list_blob.byteLength === uncompressedSize) { // Data is uncompressed, use directly tuple.builder.deserializePageList(page_list_blob); return true; } // Attempt to decompress the page list return R__unzip(page_list_blob, uncompressedSize).then(unzipped_blob => { if (!(unzipped_blob instanceof DataView)) throw new Error(`Unzipped page list is not a DataView, got ${Object.prototype.toString.call(unzipped_blob)}`); tuple.builder.deserializePageList(unzipped_blob); return true; }); }); }); }).catch(err => { console.error('Error during readHeaderFooter execution:', err); throw err; }); } function readEntry(rntuple, fieldName, entryIndex) { const builder = rntuple.builder, field = builder.fieldDescriptors.find(f => f.fieldName === fieldName), fieldData = rntuple._clusterData[fieldName]; if (!field) throw new Error(`No descriptor for field ${fieldName}`); if (!fieldData) throw new Error(`No data for field ${fieldName}`); // Detect and decode string fields if (Array.isArray(fieldData) && fieldData.length === 2) { const [offsets, payload] = fieldData, start = entryIndex === 0 ? 0 : Number(offsets[entryIndex - 1]), end = Number(offsets[entryIndex]), decoded = payload.slice(start, end).join(''); // Convert to string return decoded; } // Fallback: primitive type (e.g. int, float) return fieldData[0][entryIndex]; } /** @summary Return field name for specified branch index * @desc API let use field name in selector or field object itself */ function getSelectorFieldName(selector, i) { const br = selector.getBranch(i); return isStr(br) ? br : br?.fieldName; } // Read and process the next data cluster from the RNTuple function readNextCluster(rntuple, selector) { const builder = rntuple.builder; // Add validation if (!builder.clusterSummaries || builder.clusterSummaries.length === 0) throw new Error('No cluster summaries available - possibly incomplete file reading'); const clusterIndex = selector.currentCluster, clusterSummary = builder.clusterSummaries[clusterIndex], // Gather all pages for this cluster from selected fields only pages = [], // Collect only selected field names from selector selectedFields = []; for (let i = 0; i < selector.numBranches(); ++i) selectedFields.push(getSelectorFieldName(selector, i)); // For each selected field, collect its columns' pages for (const fieldName of selectedFields) { const columns = rntuple.fieldToColumns[fieldName]; if (!columns) throw new Error(`Selected field '${fieldName}' not found in RNTuple`); for (const colDesc of columns) { const colEntry = builder.pageLocations[clusterIndex]?.[colDesc.index]; // When the data is missing or broken if (!colEntry || !colEntry.pages) throw new Error(`No pages for column ${colDesc.index} in cluster ${clusterIndex}`); for (const page of colEntry.pages) pages.push({ page, colDesc, fieldName }); } } selector.currentCluster++; // Early exit if no pages to read (i.e., no selected fields matched) if (pages.length === 0) { selector.Terminate(false); return Promise.resolve(); } // Build flat array of [offset, size, offset, size, ...] to read pages const dataToRead = pages.flatMap(p => [Number(p.page.locator.offset), Number(p.page.locator.size)] ); return rntuple.$file.readBuffer(dataToRead).then(blobsRaw => { const blobs = Array.isArray(blobsRaw) ? blobsRaw : [blobsRaw], unzipPromises = blobs.map((blob, idx) => { const { page, colDesc } = pages[idx], colEntry = builder.pageLocations[clusterIndex][colDesc.index], // Access column entry numElements = Number(page.numElements), elementSize = colDesc.bitsOnStorage / 8; // Check if data is compressed if (colEntry.compression === 0) return Promise.resolve(blob); // Uncompressed: use blob directly const expectedSize = numElements * elementSize; // Special handling for boolean fields if (colDesc.coltype === ENTupleColumnType.kBit) { const expectedBoolSize = Math.ceil(numElements / 8); if (blob.byteLength === expectedBoolSize) return Promise.resolve(blob); // Try decompression but catch errors for boolean fields return R__unzip(blob, expectedBoolSize).catch(err => { throw new Error(`Failed to unzip boolean page ${idx}: ${err.message}`); }); } // If the blob is already the expected size, treat as uncompressed if (blob.byteLength === expectedSize) return Promise.resolve(blob); // Try decompression return R__unzip(blob, expectedSize).then(result => { if (!result) return blob; // Fallback to original blob return result; }).catch(err => { throw new Error(`Failed to unzip page ${idx}: ${err.message}`); }); }); return Promise.all(unzipPromises).then(unzipBlobs => { rntuple._clusterData = {}; // store deserialized data per field for (let i = 0; i < unzipBlobs.length; ++i) { const blob = unzipBlobs[i]; // Ensure blob is a DataView if (!(blob instanceof DataView)) throw new Error(`Invalid blob type for page ${i}: ${Object.prototype.toString.call(blob)}`); const { page, colDesc } = pages[i], field = builder.fieldDescriptors[colDesc.fieldId], values = builder.deserializePage(blob, colDesc, page); // Support multiple representations (e.g., string fields with offsets + payload) if (!rntuple._clusterData[field.fieldName]) rntuple._clusterData[field.fieldName] = []; // splitting string fields into offset and payload components if (field.typeName === 'std::string') { if ( colDesc.coltype === ENTupleColumnType.kIndex64 || colDesc.coltype === ENTupleColumnType.kIndex32 || colDesc.coltype === ENTupleColumnType.kSplitIndex64 || colDesc.coltype === ENTupleColumnType.kSplitIndex32 ) // Index64/Index32 rntuple._clusterData[field.fieldName][0] = values; // Offsets else if (colDesc.coltype === ENTupleColumnType.kChar) rntuple._clusterData[field.fieldName][1] = values; // Payload else throw new Error(`Unsupported column type for string field: ${colDesc.coltype}`); } else rntuple._clusterData[field.fieldName][0] = values; } // Ensure string fields have ending offset for proper reconstruction of the last entry for (const fieldName of selectedFields) { const field = builder.fieldDescriptors.find(f => f.fieldName === fieldName), colData = rntuple._clusterData[fieldName]; if (field.typeName === 'std::string') { if (!Array.isArray(colData) || colData.length !== 2) throw new Error(`String field '${fieldName}' must have 2 columns`); if (colData[0].length !== builder.clusterSummaries[clusterIndex].numEntries) throw new Error(`Malformed string field '${fieldName}': missing final offset`); } } const numEntries = clusterSummary.numEntries; for (let i = 0; i < numEntries; ++i) { for (let b = 0; b < selector.numBranches(); ++b) { const fieldName = getSelectorFieldName(selector, b), tgtName = selector.nameOfBranch(b), values = rntuple._clusterData[fieldName]; if (!values) throw new Error(`Missing values for selected field: ${fieldName}`); selector.tgtobj[tgtName] = readEntry(rntuple, fieldName, i); } selector.Process(); } selector.Terminate(true); }); }); } // TODO args can later be used to filter fields, limit entries, etc. // Create reader and deserialize doubles from the buffer function rntupleProcess(rntuple, selector, args) { return readHeaderFooter(rntuple).then(() => { selector.Begin(); selector.currentCluster = 0; return readNextCluster(rntuple, selector, args); }).then(() => selector); } class TDrawSelectorTuple extends TDrawSelector { /** @summary Return total number of entries * @desc TODO: check implementation details ! */ getNumEntries(tuple) { let cnt = 0; tuple?.builder.clusterSummaries.forEach(summary => { cnt += summary.numEntries; }); return cnt; } /** @summary Search for field in tuple * @desc TODO: Can be more complex when name includes extra parts referencing member or collection size or more */ findBranch(tuple, name) { return tuple.builder?.fieldDescriptors.find(field => { return field.fieldName === name; }); } /** @summary Returns true if field can be used as array */ isArrayBranch(/* tuple, br */) { return false; } } // class TDrawSelectorTuple /** @summary implementation of drawing for RNTuple * @param {object|string} args - different setting or simply draw expression * @param {string} args.expr - draw expression * @param {string} [args.cut=undefined] - cut expression (also can be part of 'expr' after '::') * @param {string} [args.drawopt=undefined] - draw options for result histogram * @param {number} [args.firstentry=0] - first entry to process * @param {number} [args.numentries=undefined] - number of entries to process, all by default * @param {Array} [args.elist=undefined] - array of entries id to process, all by default * @param {boolean} [args.staged] - staged processing, first apply cut to select entries and then perform drawing for selected entries * @param {object} [args.branch=undefined] - TBranch object from TTree itself for the direct drawing * @param {function} [args.progress=undefined] - function called during histogram accumulation with obj argument * @return {Promise} with produced object */ async function rntupleDraw(rntuple, args) { if (isStr(args)) args = { expr: args }; else if (!isObject(args)) args = {}; args.SelectorClass = TDrawSelectorTuple; args.processFunction = rntupleProcess; return readHeaderFooter(rntuple).then(res_header_footer => { return res_header_footer ? treeDraw(rntuple, args) : null; }); } /** @summary Create hierarchy of ROOT::RNTuple object * @desc Used by hierarchy painter to explore sub-elements * @private */ async function tupleHierarchy(tuple_node, tuple) { tuple_node._childs = []; // tuple_node._tuple = tuple; // set reference, will be used later by RNTuple::Draw return readHeaderFooter(tuple).then(res => { if (!res) return res; tuple.builder?.fieldDescriptors.forEach(field => { const item = { _name: field.fieldName, _typename: 'ROOT::RNTupleField', // pseudo class name, used in draw.mjs _kind: 'ROOT::RNTupleField', _title: `Filed of type ${field.typeName}`, $tuple: tuple, // reference on tuple, need for drawing $field: field }; item._obj = item; tuple_node._childs.push(item); }); return true; }); } export { tupleHierarchy, readHeaderFooter, RBufferReader, rntupleProcess, readEntry, rntupleDraw };