pickleparser
Version:
A pure Javascript implemented parser for Python pickle format
159 lines (139 loc) • 5.12 kB
text/typescript
type Encoding = 'ascii' | 'utf-8';
export interface IReader {
byte(): number;
bytes(length: number): Uint8Array | Int8Array | Uint8ClampedArray;
uint16(): number;
int32(): number;
uint32(): number;
uint64(): number;
float64(): number;
skip(offset: number): void;
string(size: number, encoding: Encoding): string;
line(): string;
hasNext(): boolean;
}
export class BufferReader implements IReader {
private readonly _buffer: Uint8Array | Int8Array | Uint8ClampedArray;
private readonly _dataView: DataView;
private readonly _utf8Decoder = new TextDecoder('utf-8');
private readonly _asciiDecoder = new TextDecoder('ascii');
private _position: number;
constructor(buffer: Uint8Array | Int8Array | Uint8ClampedArray) {
this._buffer = buffer;
this._dataView = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
this._position = 0;
}
get length() {
return this._buffer.byteLength;
}
get position() {
return this._position;
}
byte() {
const position = this._position;
this.skip(1);
return this._dataView.getUint8(position);
}
bytes(length: number) {
const position = this._position;
this.skip(length);
return this._buffer.subarray(position, this._position);
}
uint16() {
const position = this.position;
this.skip(2);
return this._dataView.getUint16(position, true);
}
int32() {
const position = this.position;
this.skip(4);
return this._dataView.getInt32(position, true);
}
uint32() {
const position = this.position;
this.skip(4);
return this._dataView.getUint32(position, true);
}
uint64() {
const position = this.position;
this.skip(8);
// split 64-bit number into two 32-bit parts
const left = this._dataView.getUint32(position, true);
const right = this._dataView.getUint32(position + 4, true);
// combine the two 32-bit values
const number = left + 2 ** 32 * right;
if (!Number.isSafeInteger(number)) {
console.warn(number, 'exceeds MAX_SAFE_INTEGER. Precision may be lost');
}
// new Uint8Array([0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) => 255,
// new Uint8Array([0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) => 65535,
// new Uint8Array([0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00]) => 4294967295,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00]) => 4294967296,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00]) => 1099511627776,
// new Uint8Array([0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00]) => 281474976710656,
// new Uint8Array([0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x1f, 0x00]) => 9007199254740991, // maximum precision
return number;
}
float64() {
const position = this.position;
this.skip(8);
return this._dataView.getFloat64(position, false);
}
skip(offset: number) {
this._position += offset;
if (this._position > this._buffer.length) {
throw new Error(
'Expected ' +
(this._position - this._buffer.length) +
' more bytes. The file might be corrupted. Unexpected end of file.',
);
}
}
string(size: number, encoding: Encoding) {
const data = this.bytes(size);
return encoding == 'utf-8' ? this._utf8Decoder.decode(data) : this._asciiDecoder.decode(data);
}
line() {
const index = this._buffer.indexOf(0x0a /** LF(\\n) */, this._position);
if (index == -1) {
throw new Error('Could not find end of line.');
}
const size = index - this._position;
const text = this.string(size, 'ascii');
this.skip(1);
return text;
}
hasNext() {
return this.position < this.length;
}
}
export function readUint64(data: Uint8Array | Int8Array | Uint8ClampedArray) {
if (data.length > 8) {
throw new Error('Value too large to unpickling');
}
// Padding to 8 bytes
const buffer = new ArrayBuffer(8);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
const subReader = new BufferReader(uint8);
const number = subReader.uint64();
return number;
}
export function readUint64WithBigInt(data: Uint8Array | Int8Array | Uint8ClampedArray) {
let fixedLength = 0;
let partCount = 0;
while (fixedLength < data.length) {
fixedLength += 4;
partCount += 1;
}
const buffer = new ArrayBuffer(fixedLength);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
const view = new DataView(buffer, 0, fixedLength);
let number = BigInt(0);
for (let partIndex = 0; partIndex < partCount; partIndex++) {
const part = BigInt(view.getUint32(partIndex * 4, true));
number |= part << BigInt(partIndex * 32);
}
return number;
}