@sapientpro/json-stream
Version:
A JSON stream parser
301 lines (300 loc) • 10.9 kB
JavaScript
import { Readable, Writable } from 'node:stream';
import { firstValueFrom, Subject } from "rxjs";
export const Any = Symbol('Any');
export const Rest = Symbol('Rest');
export class JsonStream extends Writable {
#observers = { children: {} };
constructor(start = '', collectJson = false) {
let buffer = '';
let pos = 0;
let continuation = Promise.withResolvers();
let process = Promise.withResolvers();
const next = async (shift = 0, callback) => {
while (pos + shift >= buffer.length) {
callback?.();
const { resolve } = continuation;
continuation = Promise.withResolvers();
resolve();
await process.promise;
}
//Cleanup buffer if it's too big
if (!collectJson && pos > 512) {
buffer = buffer.substring(pos);
pos = 0;
}
return this.writable || pos < buffer.length;
};
const waitStart = async () => {
const length = start?.length ?? 0;
if (!length)
return;
while (await next(length)) {
const startPos = buffer.indexOf(start, pos);
if (startPos >= 0) {
pos = startPos + start.length;
await skipSpaces();
buffer = buffer.substring(pos);
pos = 0;
break;
}
pos = buffer.length - length + 1;
}
};
const skipSpaces = async () => {
while (await next()) {
if (buffer.at(pos).trim() === '') {
++pos;
continue;
}
break;
}
};
const parse = async (path = []) => {
await skipSpaces();
let value;
switch (buffer.at(pos)) {
case '{': {
pos++;
value = {};
while (await next()) {
await skipSpaces();
if (buffer.at(pos) === '}') {
++pos;
break;
}
const name = await parseString();
await skipSpaces();
if (buffer.at(pos) !== ':') {
throw new SyntaxError('Json syntax error at ' + pos);
}
++pos;
value[name] = await parse([...path, name]);
await skipSpaces();
if (buffer.at(pos) === ',') {
++pos;
}
}
break;
}
case '[': {
++pos;
let index = 0;
value = [];
while (await next()) {
await skipSpaces();
if (buffer.at(pos) === ']') {
++pos;
break;
}
value.push(await parse([...path, index]));
++index;
await skipSpaces();
if (buffer.at(pos) === ',') {
++pos;
}
}
break;
}
case '"':
value = await parseString(path);
break;
case "t":
await next(3);
if (buffer.substring(pos, pos + 4) !== 'true') {
throw new SyntaxError('Json syntax error at ' + pos);
}
value = true;
pos += 4;
break;
case "f":
await next(4);
if (buffer.substring(pos, pos + 5) !== 'false') {
throw new SyntaxError('Json syntax error at ' + pos);
}
value = false;
pos += 5;
break;
case "n":
await next(3);
if (buffer.substring(pos, pos + 4) !== 'null') {
throw new SyntaxError('Json syntax error at ' + pos);
}
value = null;
pos += 4;
break;
default:
do {
const str = buffer.substring(pos);
const match = str.match(/^(-?\d+(\.\d+)?([eE][+-]?\d+)?)([^.eE])?/);
if ((this.writable || this.writableLength > 0) && match && match[4] === void 0) {
await next(str.length + 1);
continue;
}
if (!match) {
throw new SyntaxError('Json syntax error at ' + pos);
}
value = Number(match[1]);
pos += match[1].length;
} while (false);
}
pushValue(this.#observers, path, value);
return value;
};
const parseString = async (path) => {
let value = '';
let chunk = '';
++pos;
const stream = path && this.#resolveDesc(path, false)?.stream;
loop: while (await next(0, () => {
value += chunk;
stream?.push(chunk);
chunk = '';
})) {
switch (buffer.at(pos)) {
case '"':
++pos;
break loop;
case '\\':
++pos;
await next();
switch (buffer.at(pos)) {
case 't':
chunk += '\t';
++pos;
break;
case 'r':
chunk += '\r';
++pos;
break;
case 'n':
chunk += '\n';
++pos;
break;
case 'b':
chunk += '\b';
++pos;
break;
case 'f':
chunk += '\f';
++pos;
break;
case 'u':
await next(4);
chunk += String.fromCharCode(parseInt(buffer.substring(pos + 1, pos + 5), 16));
pos += 5;
break;
default:
chunk += buffer.at(pos);
++pos;
break;
}
break;
default:
chunk += buffer.at(pos);
++pos;
break;
}
}
if (chunk) {
value += chunk;
stream?.push(chunk);
}
stream?.push(null);
return value;
};
const pushValue = (observers, path, value, originalPath = [...path]) => {
if (path.length === 0) {
observers.observer?.next({
path: originalPath,
value
});
return;
}
const key = path.shift();
if (observers.children[key]) {
pushValue(observers.children[key], path, value, originalPath);
}
if (observers.children[Any]) {
pushValue(observers.children[Any], path, value, originalPath);
}
if (observers.children[Rest]) {
pushValue(observers.children[Rest], [], value, originalPath);
}
};
const cleanup = (observer) => {
observer.stream?.push(null);
observer.observer?.complete();
for (const child of Object.values(observer.children)) {
cleanup(child);
}
};
super({
defaultEncoding: 'utf-8',
construct(callback) {
waitStart()
.then(() => parse())
.then(async (value) => {
this.emit('value', value);
continuation.resolve();
})
.catch((e) => {
this.emit('error', e);
});
callback();
},
async write(chunk, encoding, callback) {
buffer += chunk.toString('utf-8');
continuation.promise.then(() => {
callback();
});
const { resolve } = process;
process = Promise.withResolvers();
resolve();
},
final(callback) {
continuation.promise.then(() => {
callback();
}, callback);
},
destroy(error, callback) {
cleanup(this.#observers);
callback(error);
}
});
}
#resolveDesc(path, create = true) {
if (typeof path === 'string') {
path = path.split('.');
}
let observer = this.#observers;
for (const key of path) {
if (Object.hasOwn(observer.children, key)) {
observer = observer.children[key];
}
else if (create) {
observer = observer.children[key] = { children: {} };
}
else {
return null;
}
}
return observer;
}
observe(path = []) {
return (this.#resolveDesc(path).observer ??= new Subject());
}
stream(path) {
if (this.#resolveDesc(path).stream) {
throw new Error('Stream already exists');
}
return (this.#resolveDesc(path).stream = new Readable({
encoding: 'utf-8',
read() {
}
}));
}
async value(path = []) {
const { value } = await firstValueFrom(this.observe(path));
return value;
}
}