@adguard/agtree
Version:
Tool set for working with adblock filter lists
232 lines (229 loc) • 8.32 kB
JavaScript
/*
* AGTree v3.4.3 (build date: Thu, 11 Dec 2025 13:43:19 GMT)
* (c) 2025 Adguard Software Ltd.
* Released under the MIT license
* https://github.com/AdguardTeam/tsurlfilter/tree/master/packages/agtree#readme
*/
import { BINARY_SCHEMA_VERSION } from './binary-schema-version.js';
import { ByteBuffer } from './byte-buffer.js';
import { isChromium } from './is-chromium.js';
import { encodeIntoPolyfill } from './text-encoder-polyfill.js';
/* eslint-disable no-plusplus */
/* eslint-disable no-bitwise */
/**
* @file Output byte buffer for writing binary data.
*/
/**
* Output byte buffer for writing binary data.
*
* @note Internally, this class uses a {@link ByteBuffer} instance, just providing a convenient API for reading data.
*/
class OutputByteBuffer extends ByteBuffer {
/**
* Current offset in the buffer for writing.
*/
offset;
/**
* Size of the shared buffer for encoding strings in bytes.
* This is a divisor of ByteBuffer.CHUNK_SIZE and experience shows that this value works optimally.
* This is sufficient for most strings that occur in filter lists (we checked average string length in popular
* filter lists).
*/
static ENCODER_BUFFER_SIZE = 8192;
/**
* Length threshold for using a shared buffer for encoding strings.
* This temp buffer is needed because we write the short strings in it
* (so there is no need to constantly allocate a new buffer).
* The reason for dividing ENCODER_BUFFER_SIZE by 4 is to ensure that the encoded string fits in the buffer,
* if we also take into account the worst possible case (each character is encoded with 4 bytes).
*/
static SHORT_STRING_THRESHOLD = 2048; // 8192 / 4
/**
* Represents the maximum value that can be written as a 'storage optimized' unsigned integer.
* 0x1FFFFFFF means 29 bits — 32 bits minus 3 bits — because the last bit in each byte is a flag indicating
* if there are more bytes (except for the last byte).
*/
static MAX_OPTIMIZED_UINT = 0x1FFFFFFF;
/**
* Shared buffer for encoding strings.
*/
sharedBuffer;
/**
* Shared native encoder for encoding strings.
*/
sharedNativeEncoder;
/**
* Flag indicating if the current environment is Chromium.
* This is used for performance optimizations, because Chromium's TextEncoder/TextDecoder has a relatively
* large marshalling overhead for small strings.
*/
isChromium;
/**
* Constructs a new OutputByteBuffer instance.
*/
// TODO: add chunks as a parameter, if ever needed
constructor() {
super();
this.sharedBuffer = new Uint8Array(OutputByteBuffer.ENCODER_BUFFER_SIZE);
this.sharedNativeEncoder = new TextEncoder();
this.isChromium = isChromium();
// write the schema version at the beginning of the buffer
this.writeUint32ToIndex(BINARY_SCHEMA_VERSION, 0);
this.offset = 4; // schema version is already written
}
/**
* Writes a 8-bit unsigned integer to the buffer.
*
* @param value Value to write.
* @returns Number of bytes written to the buffer.
*/
writeUint8(value) {
this.writeByte(this.offset++, value);
return 1;
}
/**
* Writes a 16-bit unsigned integer to the buffer.
*
* @param value Value to write.
* @returns Number of bytes written to the buffer.
*/
writeUint16(value) {
this.writeByte(this.offset++, value >> 8);
this.writeByte(this.offset++, value);
return 2;
}
/**
* Writes a 32-bit unsigned integer to the buffer at a specific index.
*
* @param value Value to write.
* @param index Index to write the value to.
* @returns Number of bytes written to the buffer.
*/
writeUint32ToIndex(value, index) {
this.writeByte(index, value >> 24);
this.writeByte(index + 1, value >> 16);
this.writeByte(index + 2, value >> 8);
this.writeByte(index + 3, value);
return 4;
}
/**
* Writes a 32-bit unsigned integer to the buffer.
*
* @param value Value to write.
* @returns Number of bytes written to the buffer.
*/
writeUint32(value) {
this.writeUint32ToIndex(value, this.offset);
this.offset += 4;
return 4;
}
/**
* Writes a 32-bit signed integer to the buffer.
*
* @param value Value to write.
* @returns Number of bytes written to the buffer.
*/
writeInt32(value) {
return this.writeUint32(value ? value >>> 0 : 0);
}
/**
* Writes a Uint8Array to the byte buffer.
*
* @param buffer Buffer to write.
*/
writeBuffer(buffer) {
const { length } = buffer;
this.ensureCapacity(this.offset + length);
let chunkIndex = this.offset >>> 0x000F;
let chunkOffset = this.offset & 0x7FFF;
let remainingBytes = length;
while (remainingBytes) {
const leftInChunk = ByteBuffer.CHUNK_SIZE - chunkOffset;
const bytesToWrite = Math.min(remainingBytes, leftInChunk);
this.chunks[chunkIndex].set(buffer.subarray(length - remainingBytes, length - remainingBytes + bytesToWrite), chunkOffset);
remainingBytes -= bytesToWrite;
chunkIndex += 1;
chunkOffset = 0;
}
}
/**
* Writes a string to the buffer.
*
* @param value Value to write.
* @returns Number of bytes written to the buffer.
*/
writeString(value) {
const start = this.offset;
if (value.length <= OutputByteBuffer.SHORT_STRING_THRESHOLD) {
let bytesWritten;
if (this.isChromium) {
bytesWritten = encodeIntoPolyfill(value, this.sharedBuffer).written ?? 0;
}
else {
bytesWritten = this.sharedNativeEncoder.encodeInto(value, this.sharedBuffer).written ?? 0;
}
this.writeOptimizedUint(bytesWritten);
this.writeBuffer(this.sharedBuffer.subarray(0, bytesWritten));
this.offset += bytesWritten;
return this.offset - start;
}
// TODO: Optimize for long strings, if needed. Not a common case for our use case
const buffer = this.sharedNativeEncoder.encode(value);
const bytesWritten = buffer.length;
this.writeOptimizedUint(bytesWritten);
this.writeBuffer(buffer);
this.offset += bytesWritten;
return this.offset - start;
}
/**
* Writes chunks to the storage.
*
* @param storage Storage to write the chunks to.
* @param key Key to write the chunks to.
* @note For performance reasons, chunks are passed by reference and not copied.
* @throws If the storage write operation throws.
*/
async writeChunksToStorage(storage, key) {
await storage.set(key, this.chunks);
}
/**
* Writes an 'optimized' unsigned integer to the buffer.
* 'Optimized' means smaller storage usage for smaller numbers.
* Except for the last byte, each byte's most significant bit is a flag indicating if there are more bytes.
*
* @param value Value to write.
* @returns Number of bytes written to the buffer.
* @throws If the value exceeds the 29-bit limit.
*/
writeOptimizedUint(value) {
if (value < 0 || value > OutputByteBuffer.MAX_OPTIMIZED_UINT) {
throw new Error('Value exceeds 29-bit limit');
}
let remainingValue = value;
const startOffset = this.offset;
while (remainingValue >= 0x80) {
const byteValue = remainingValue & 0x7F;
remainingValue >>>= 7;
this.writeByte(this.offset++, byteValue | 0x80);
}
this.writeByte(this.offset++, remainingValue);
return this.offset - startOffset;
}
/**
* Gets the current offset in the buffer for writing.
*
* @returns Current offset in the buffer for writing.
*/
get currentOffset() {
return this.offset;
}
/**
* Gets the chunks of the buffer.
*
* @returns Chunks of the buffer.
*/
getChunks() {
return this.chunks;
}
}
export { OutputByteBuffer };