pickleparser
Version:
A pure Javascript implemented parser for Python pickle format
653 lines (644 loc) • 25.4 kB
JavaScript
(function (global, factory) {
typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
typeof define === 'function' && define.amd ? define(['exports'], factory) :
(global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.pickleparser = {}));
})(this, (function (exports) { 'use strict';
class BufferReader {
constructor(buffer) {
this._utf8Decoder = new TextDecoder('utf-8');
this._asciiDecoder = new TextDecoder('ascii');
this._buffer = buffer;
this._dataView = new DataView(buffer.buffer, buffer.byteOffset, buffer.byteLength);
this._position = 0;
}
get length() {
return this._buffer.byteLength;
}
get position() {
return this._position;
}
byte() {
const position = this._position;
this.skip(1);
return this._dataView.getUint8(position);
}
bytes(length) {
const position = this._position;
this.skip(length);
return this._buffer.subarray(position, this._position);
}
uint16() {
const position = this.position;
this.skip(2);
return this._dataView.getUint16(position, true);
}
int32() {
const position = this.position;
this.skip(4);
return this._dataView.getInt32(position, true);
}
uint32() {
const position = this.position;
this.skip(4);
return this._dataView.getUint32(position, true);
}
uint64() {
const position = this.position;
this.skip(8);
const left = this._dataView.getUint32(position, true);
const right = this._dataView.getUint32(position + 4, true);
const number = left + 2 ** 32 * right;
if (!Number.isSafeInteger(number)) {
console.warn(number, 'exceeds MAX_SAFE_INTEGER. Precision may be lost');
}
return number;
}
float64() {
const position = this.position;
this.skip(8);
return this._dataView.getFloat64(position, false);
}
skip(offset) {
this._position += offset;
if (this._position > this._buffer.length) {
throw new Error('Expected ' +
(this._position - this._buffer.length) +
' more bytes. The file might be corrupted. Unexpected end of file.');
}
}
string(size, encoding) {
const data = this.bytes(size);
return encoding == 'utf-8' ? this._utf8Decoder.decode(data) : this._asciiDecoder.decode(data);
}
line() {
const index = this._buffer.indexOf(0x0a, this._position);
if (index == -1) {
throw new Error('Could not find end of line.');
}
const size = index - this._position;
const text = this.string(size, 'ascii');
this.skip(1);
return text;
}
hasNext() {
return this.position < this.length;
}
}
function readUint64(data) {
if (data.length > 8) {
throw new Error('Value too large to unpickling');
}
const buffer = new ArrayBuffer(8);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
const subReader = new BufferReader(uint8);
const number = subReader.uint64();
return number;
}
function readUint64WithBigInt(data) {
let fixedLength = 0;
let partCount = 0;
while (fixedLength < data.length) {
fixedLength += 4;
partCount += 1;
}
const buffer = new ArrayBuffer(fixedLength);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
const view = new DataView(buffer, 0, fixedLength);
let number = BigInt(0);
for (let partIndex = 0; partIndex < partCount; partIndex++) {
const part = BigInt(view.getUint32(partIndex * 4, true));
number |= part << BigInt(partIndex * 32);
}
return number;
}
const ArraySetProvider = {
create: () => [],
createWithItems: (value) => Array.from(value),
addMethod: function (set, value) {
set.push(value);
},
};
const StdandardSetProvider = {
create: () => new Set(),
createWithItems: (value) => new Set(value),
addMethod: function (set, value) {
set.add(value);
},
};
const SetProviderFactory = (type) => {
switch (type) {
case 'Set':
return StdandardSetProvider;
case 'array':
return ArraySetProvider;
default:
throw new Error(`Unknown unpickling type [${type}] of Set.`);
}
};
const ObjectDictionaryProvider = {
create: () => ({}),
setMethod: function (dict, key, value) {
dict[key] = value;
},
};
const StdandardDictionaryProvider = {
create: () => new Map(),
setMethod: function (dict, key, value) {
dict.set(key, value);
},
};
const DictionaryProviderFactory = (type) => {
switch (type) {
case 'Map':
return StdandardDictionaryProvider;
case 'object':
return ObjectDictionaryProvider;
default:
throw new Error(`Unknown unpickling type [${type}] of Dictionary.`);
}
};
function createPObject(module, name) {
const PObject = function (...args) {
if (new.target) {
Object.defineProperty(this, 'args', {
value: args,
enumerable: false,
configurable: false,
writable: false,
});
}
else {
const PFunction = function (...args) {
Object.defineProperty(this, 'args', {
value: args,
enumerable: false,
configurable: false,
writable: false,
});
};
PFunction.prototype.__module__ = module;
PFunction.prototype.__name__ = name;
return Reflect.construct(PFunction, args);
}
};
PObject.prototype.__module__ = module;
PObject.prototype.__name__ = name;
PObject.prototype.__setnewargs_ex__ = function (...kwargs) {
Object.defineProperty(this, 'kwargs', {
value: kwargs,
enumerable: false,
configurable: false,
writable: false,
});
};
return PObject;
}
const DefualtOptions = {
nameResolver: {
resolve: (module, name) => createPObject(module, name),
},
persistentResolver: {
resolve: (pid) => {
throw new Error(`Unregistered persistent id: \`${pid}\`.`);
},
},
extensionResolver: {
resolve: (extCode) => {
throw new Error(`Unregistered extension code: \`${extCode.toString(16)}\`.`);
},
},
unpicklingTypeOfSet: 'array',
unpicklingTypeOfDictionary: 'object',
};
class Parser {
constructor(options) {
this._options = { ...DefualtOptions, ...options };
this._nameResolver = this._options.nameResolver;
this._persistentResolver = this._options.persistentResolver;
this._extensionResolver = this._options.extensionResolver;
this._setProvider = SetProviderFactory(this._options.unpicklingTypeOfSet);
this._dictionaryProvider = DictionaryProviderFactory(this._options.unpicklingTypeOfDictionary);
this._buffers = options?.buffers;
}
parse(buffer) {
const reader = new BufferReader(buffer);
return this.read(reader);
}
read(reader) {
let stack = [];
const metastack = [];
const memo = new Map();
while (reader.hasNext()) {
const opcode = reader.byte();
switch (opcode) {
case 128: {
const version = reader.byte();
if (version > 5) {
throw new Error(`Unsupported protocol version '${version}'.`);
}
break;
}
case 46:
return stack.pop();
case 149:
reader.skip(8);
break;
case 40:
metastack.push(stack);
stack = [];
break;
case 49:
stack = metastack.pop();
break;
case 48:
stack.pop();
break;
case 50:
stack.push(stack[stack.length - 1]);
break;
case 112: {
const index = parseInt(reader.line(), 10);
memo.set(index, stack[stack.length - 1]);
break;
}
case 113:
memo.set(reader.byte(), stack[stack.length - 1]);
break;
case 114:
memo.set(reader.uint32(), stack[stack.length - 1]);
break;
case 148:
memo.set(memo.size, stack[stack.length - 1]);
break;
case 103: {
const index = parseInt(reader.line(), 10);
stack.push(memo.get(index));
break;
}
case 104:
stack.push(memo.get(reader.byte()));
break;
case 106:
stack.push(memo.get(reader.uint32()));
break;
case 78:
stack.push(null);
break;
case 136:
stack.push(true);
break;
case 137:
stack.push(false);
break;
case 73: {
const value = reader.line();
if (value == '01') {
stack.push(true);
}
else if (value == '00') {
stack.push(false);
}
else {
stack.push(parseInt(value, 10));
}
break;
}
case 76:
stack.push(parseInt(reader.line(), 10));
break;
case 70:
stack.push(parseFloat(reader.line()));
break;
case 83: {
const data = reader.line();
if (data.length >= 2 && data[0] == data.slice(-1) && ['"', "'"].includes(data[0])) {
stack.push(data.slice(1, -1));
break;
}
else {
throw new Error('Insecure string pickle.');
}
}
case 86:
stack.push(reader.line());
break;
case 74:
stack.push(reader.int32());
break;
case 75:
stack.push(reader.byte());
break;
case 77:
stack.push(reader.uint16());
break;
case 138: {
const length = reader.byte();
const data = reader.bytes(length);
const number = readUint64(data);
stack.push(number);
break;
}
case 139: {
const length = reader.uint32();
const data = reader.bytes(length);
const number = readUint64WithBigInt(data);
stack.push(number);
break;
}
case 71:
stack.push(reader.float64());
break;
case 66:
stack.push(reader.bytes(reader.int32()));
break;
case 67:
stack.push(reader.bytes(reader.byte()));
break;
case 142:
stack.push(reader.bytes(reader.uint64()));
break;
case 84:
stack.push(reader.string(reader.uint32(), 'ascii'));
break;
case 85:
stack.push(reader.string(reader.byte(), 'ascii'));
break;
case 88:
stack.push(reader.string(reader.uint32(), 'utf-8'));
break;
case 140:
stack.push(reader.string(reader.byte(), 'utf-8'));
break;
case 141:
stack.push(reader.string(reader.uint64(), 'utf-8'));
break;
case 41:
stack.push([]);
break;
case 116: {
const items = stack;
stack = metastack.pop();
stack.push(items);
break;
}
case 133: {
stack.push([stack.pop()]);
break;
}
case 134: {
const b = stack.pop();
const a = stack.pop();
stack.push([a, b]);
break;
}
case 135: {
const c = stack.pop();
const b = stack.pop();
const a = stack.pop();
stack.push([a, b, c]);
break;
}
case 93:
stack.push([]);
break;
case 108: {
const items = stack;
stack = metastack.pop();
stack.push(items);
break;
}
case 97: {
const append = stack.pop();
stack[stack.length - 1].push(append);
break;
}
case 101: {
const appends = stack;
stack = metastack.pop();
const list = stack[stack.length - 1];
list.push(...appends);
break;
}
case 125: {
const dict = this._dictionaryProvider.create();
stack.push(dict);
break;
}
case 100: {
const items = stack;
stack = metastack.pop();
const dict = this._dictionaryProvider.create();
for (let i = 0; i < items.length; i += 2) {
dict[items[i]] = items[i + 1];
}
stack.push(dict);
break;
}
case 115: {
const value = stack.pop();
const key = stack.pop();
const dict = stack[stack.length - 1];
this._dictionaryProvider.setMethod(dict, key, value);
break;
}
case 117: {
const items = stack;
stack = metastack.pop();
const dict = stack[stack.length - 1];
for (let pos = 0; pos < items.length; pos += 2) {
this._dictionaryProvider.setMethod(dict, items[pos], items[pos + 1]);
}
break;
}
case 143: {
const set = this._setProvider.create();
stack.push(set);
break;
}
case 145: {
const items = stack;
stack = metastack.pop();
const set = this._setProvider.createWithItems(items);
stack.push(set);
break;
}
case 144: {
const items = stack;
stack = metastack.pop();
const set = stack[stack.length - 1];
for (let i = 0; i < items.length; i++) {
this._setProvider.addMethod(set, items[i]);
}
break;
}
case 130: {
const extCode = reader.byte();
const cls = this._extensionResolver.resolve(extCode);
stack.push(cls);
break;
}
case 131: {
const extCode = reader.uint16();
const cls = this._extensionResolver.resolve(extCode);
stack.push(cls);
break;
}
case 132: {
const extCode = reader.uint32();
const cls = this._extensionResolver.resolve(extCode);
stack.push(cls);
break;
}
case 99: {
const module = reader.line();
const name = reader.line();
const cls = this._nameResolver.resolve(module, name);
stack.push(cls);
break;
}
case 147: {
const name = stack.pop();
const module = stack.pop();
const cls = this._nameResolver.resolve(module, name);
stack.push(cls);
break;
}
case 105: {
const module = reader.line();
const name = reader.line();
const args = stack;
stack = metastack.pop();
const cls = this._nameResolver.resolve(module, name);
const obj = Reflect.construct(cls, args);
stack.push(obj);
break;
}
case 111: {
const args = stack;
const cls = args.pop();
stack = metastack.pop();
const obj = Reflect.construct(cls, args);
stack.push(obj);
break;
}
case 129: {
const args = stack.pop();
const cls = stack.pop();
const obj = Reflect.construct(cls, args);
stack.push(obj);
break;
}
case 146: {
const kwargs = stack.pop();
const args = stack.pop();
const cls = stack.pop();
const obj = Reflect.construct(cls, args);
if (obj.__setnewargs_ex__) {
obj.__setnewargs_ex__(kwargs);
}
stack.push(obj);
break;
}
case 80: {
const pid = reader.line();
const cls = this._persistentResolver.resolve(pid);
stack.push(cls);
break;
}
case 81: {
const pid = stack.pop();
const cls = this._persistentResolver.resolve(pid);
stack.push(cls);
break;
}
case 82: {
const args = stack.pop();
const func = stack.pop();
stack.push(func(...args));
break;
}
case 98: {
const state = stack.pop();
const obj = stack[stack.length - 1];
if (obj.__setstate__) {
obj.__setstate__(state);
break;
}
if (obj instanceof Map) {
if (state instanceof Map && state.has('__dict__')) {
for (const [key, value] of state.get('__dict__')) {
obj.set(key, value);
}
break;
}
if (state.__dict__) {
for (const key in state.__dict__) {
obj.set(key, state.__dict__[key]);
}
break;
}
}
else {
if (state instanceof Map) {
Object.assign(obj, Object.fromEntries(state));
}
else {
Object.assign(obj, state);
}
}
break;
}
case 150:
stack.push(reader.bytes(reader.uint64()));
break;
case 151: {
if (this._buffers == null) {
throw new Error('pickle stream refers to out-of-band data but no *buffers* argument was given');
}
const next = this._buffers.next();
if (next.done) {
throw new Error('not enough out-of-band buffers');
}
stack.push(next.value);
break;
}
case 152:
stack.push(stack.pop());
break;
default:
throw new Error(`Unsupported opcode '${opcode}'.`);
}
}
throw new Error('Unexpected end of file.');
}
}
class NameRegistry {
constructor() {
this._registry = new Map();
}
register(module, name, func) {
const fqn = NameRegistry.getFullyQualifiedName(module, name);
if (this._registry.has(fqn)) {
throw new Error(`'${fqn}' is already registered.`);
}
this._registry.set(fqn, func);
return this;
}
resolve(module, name) {
const fqn = NameRegistry.getFullyQualifiedName(module, name);
return this._registry.get(fqn) ?? this.onMissingName(module, name);
}
onMissingName(module, name) {
return createPObject(module, name);
}
static getFullyQualifiedName(module, name) {
return module + '.' + name;
}
}
exports.BufferReader = BufferReader;
exports.NameRegistry = NameRegistry;
exports.Parser = Parser;
}));
//# sourceMappingURL=index.js.map