json24
Version:
FuzzyJSONParser is a robust JSON parser designed to handle and recover data from JSON strings with extraneous text and incomplete structures. It can parse JSON strings that include redundant pre/post content and recover from missing closing characters lik
268 lines (267 loc) • 10.1 kB
JavaScript
"use strict";
var __assign = (this && this.__assign) || function () {
__assign = Object.assign || function(t) {
for (var s, i = 1, n = arguments.length; i < n; i++) {
s = arguments[i];
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p))
t[p] = s[p];
}
return t;
};
return __assign.apply(this, arguments);
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.FuzzyJsonParser = void 0;
/**
* A robust JSON parser designed to handle and recover data from JSON strings
* with extraneous text and incomplete structures. Ideal for parsing JSON strings that include
* redundant pre/post content or are missing closing characters like \", ], and }.
*/
var FuzzyJsonParser = /** @class */ (function () {
function FuzzyJsonParser(_a) {
var _b = _a === void 0 ? {} : _a, _c = _b.appendStrOnEnd, appendStrOnEnd = _c === void 0 ? '...' : _c, _d = _b.hasExplicitUndefined, hasExplicitUndefined = _d === void 0 ? false : _d;
this.index = 0;
this.str = "";
this.appendStrOnEnd = appendStrOnEnd;
this.hasExplicitUndefined = hasExplicitUndefined;
}
/**
* Parses a JSON string while handling extraneous and incomplete structures.
* @param {string} jsonStr - The JSON string to parse.
* @param {string[]} requiredKeys - An optional array of keys that must be present in the JSON string.
* @param {ParseOptions} [options] - Optional settings to override the default parser options.
* @returns {any} The parsed JSON object or `null` if parsing fails.
*/
FuzzyJsonParser.prototype.parse = function (jsonStr, requiredKeys, options) {
try {
return JSON.parse(jsonStr);
}
catch (error) {
console.log('Classic JSON parse error', jsonStr.slice(0, this.index + 38) + '...');
this.currParseSettings = __assign({ appendStrOnEnd: this.appendStrOnEnd, hasExplicitUndefined: this.hasExplicitUndefined }, options);
var jsonSegments = this.extractJsonLikeSegments(jsonStr);
return this.findAndParseValidJsonSegment(jsonSegments, requiredKeys);
}
};
FuzzyJsonParser.prototype.extractJsonLikeSegments = function (input) {
var segments = [];
var stack = [];
var jsonStart = -1;
for (var i = 0; i < input.length; i++) {
var char = input[i];
if (char === '{' || char === '[') {
if (jsonStart === -1)
jsonStart = i;
stack.push(char);
}
else if ((char === '}' && stack[stack.length - 1] === '{') || (char === ']' && stack[stack.length - 1] === '[')) {
stack.pop();
if (stack.length === 0) {
segments.push(input.substring(jsonStart, i + 1));
jsonStart = -1;
}
}
// Push incomplete segment
if (i === input.length - 1 && jsonStart !== -1) {
segments.push(input.substring(jsonStart, i + 1));
}
}
return segments;
};
FuzzyJsonParser.prototype.findAndParseValidJsonSegment = function (segments, requiredKeys) {
for (var _i = 0, segments_1 = segments; _i < segments_1.length; _i++) {
var segment = segments_1[_i];
var obj = this.parseBase(segment);
if (obj && this.containsRequiredKeys(obj, requiredKeys))
return obj;
}
return null;
};
FuzzyJsonParser.prototype.containsRequiredKeys = function (obj, requiredKeys) {
if (!requiredKeys || (requiredKeys === null || requiredKeys === void 0 ? void 0 : requiredKeys.length) === 0)
return true;
if (typeof obj !== 'object' || obj === null)
return false;
return requiredKeys.every(function (key) { return key in obj; });
};
FuzzyJsonParser.prototype.parseBase = function (jsonStr) {
try {
return JSON.parse(jsonStr);
}
catch (error) {
this.resetParser(jsonStr);
return this.parseInternal();
}
};
FuzzyJsonParser.prototype.resetParser = function (jsonStr) {
this.index = 0;
this.str = jsonStr;
};
FuzzyJsonParser.prototype.parseInternal = function () {
try {
return this.parseValue();
}
catch (error) {
console.error("Error parsing JSON:", error);
return null;
}
};
FuzzyJsonParser.prototype.parseValue = function () {
this.skipWhitespace();
var char = this.str[this.index];
if (char === '{')
return this.parseObject();
if (char === '[')
return this.parseArray();
if (char === '"')
return this.parseString();
if (char === '-' || this.isDigit(char))
return this.parseNumber();
if (char === 't' || char === 'f')
return this.parseBoolean();
if (char === 'n')
return this.parseNull();
if (char === 'u')
return this.parseUndefined();
// throw new Error(`Unexpected char: ${char}`);
};
FuzzyJsonParser.prototype.parseObject = function () {
var _a;
this.index++;
var obj = {};
while (this.index < this.str.length) {
this.skipWhitespace();
if (this.str[this.index] === '}') {
this.index++;
return obj;
}
var key = this.parseString();
this.skipWhitespace();
// if (this.str[this.index] !== ':') {
// throw new Error('Expected ":" after key in object');
// }
this.index++;
this.skipWhitespace();
var value = this.parseValue();
if (value === undefined && !((_a = this.currParseSettings) === null || _a === void 0 ? void 0 : _a.hasExplicitUndefined))
continue;
obj[key] = value;
this.skipWhitespace();
if (this.str[this.index] === ',')
this.index++;
else if (this.str[this.index] === '}') {
this.index++;
return obj;
}
else if (this.index >= this.str.length)
return obj;
}
return obj;
};
FuzzyJsonParser.prototype.parseArray = function () {
this.index++;
var arr = [];
while (this.index < this.str.length) {
this.skipWhitespace();
if (this.str[this.index] === ']') {
this.index++;
return arr;
}
arr.push(this.parseValue());
this.skipWhitespace();
if (this.str[this.index] === ',')
this.index++;
else if (this.str[this.index] === ']') {
this.index++;
return arr;
}
else if (this.index >= this.str.length)
return arr;
}
return arr;
};
FuzzyJsonParser.prototype.parseString = function () {
var _a;
var result = '';
this.index++;
while (this.index < this.str.length) {
if (this.str[this.index] === '"') {
this.index++;
return result;
}
if (this.str[this.index] === '\\') {
this.index++;
var escapeChars = {
'"': '"',
'\\': '\\',
'/': '/',
b: '\b',
f: '\f',
n: '\n',
r: '\r',
t: '\t'
};
result += escapeChars[this.str[this.index]] || this.str[this.index];
}
else
result += this.str[this.index];
this.index++;
}
return result + (((_a = this.currParseSettings) === null || _a === void 0 ? void 0 : _a.appendStrOnEnd) || this.appendStrOnEnd); // unclosed string
};
FuzzyJsonParser.prototype.parseNumber = function () {
var start = this.index;
if (this.str[this.index] === '-')
this.index++;
while (this.isDigit(this.str[this.index]))
this.index++;
if (this.str[this.index] === '.') {
this.index++;
while (this.isDigit(this.str[this.index]))
this.index++;
}
if (this.str[this.index] === 'e' || this.str[this.index] === 'E') {
this.index++;
if (this.str[this.index] === '-' || this.str[this.index] === '+')
this.index++;
while (this.isDigit(this.str[this.index]))
this.index++;
}
return Number(this.str.slice(start, this.index));
};
FuzzyJsonParser.prototype.parseBoolean = function () {
if (this.str.startsWith('true', this.index)) {
this.index += 4;
return true;
}
else if (this.str.startsWith('false', this.index)) {
this.index += 5;
return false;
}
throw new Error('Unexpected token in JSON');
};
FuzzyJsonParser.prototype.parseNull = function () {
if (this.str.startsWith('null', this.index)) {
this.index += 4;
return null;
}
throw new Error('Unexpected token in JSON');
};
FuzzyJsonParser.prototype.parseUndefined = function () {
if (this.str.startsWith('undefined', this.index)) {
this.index += 9;
return undefined;
}
throw new Error('Unexpected token in JSON');
};
FuzzyJsonParser.prototype.skipWhitespace = function () {
while (/\s/.test(this.str[this.index])) {
this.index++;
}
};
FuzzyJsonParser.prototype.isDigit = function (char) {
return char >= '0' && char <= '9';
};
return FuzzyJsonParser;
}());
exports.FuzzyJsonParser = FuzzyJsonParser;