dirty-json
Version:
A permissive parser for dirty JSON data that does its best
685 lines (543 loc) • 23.6 kB
JavaScript
// < begin copyright >
// Copyright Ryan Marcus 2018
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// < end copyright >
;
const assert = require("assert");
const dJSON = require("../dirty-json");
const fs = require("fs");
function compareResults(json, done) {
let result = jeq(dJSON.parse(json, {"fallback": false}), JSON.parse(json));
done(result);
}
function compareResultsToValid(invalid, valid, done, config) {
// confirm that the invalid json is invalid
let testConfig = { "fallback": false };
if (config) {
testConfig = {...testConfig, ...config};
}
try {
var j = JSON.parse(invalid);
// it didn't fail!
done("json was valid!");
} catch (e) {
let result = jeq(dJSON.parse(invalid, testConfig),
JSON.parse(valid));
done(result);
}
}
function jeq(obj1, obj2) {
assert.equal(JSON.stringify(obj1), JSON.stringify(obj2));
}
describe("parser", function () {
describe("parse() on valid JSON", function (done) {
it('should handle an empty object', function (done) {
compareResults("{}", done);
});
it('should handle an empty list', function (done) {
compareResults("[]", done);
});
it('should handle an single-item list', function (done) {
compareResults("[4]", done);
});
it('should handle an single-item object', function (done) {
compareResults("{ \"test\": 4 }", done);
});
it('should handle a list of numbers', function (done) {
compareResults("[3, 4, -2, 5.5, 0.5, 0.32]", done);
});
it('should handle a list of numbers and strings', function (done) {
compareResults("[3, 4, -2, \"5.5\", 0.5, 0.32]", done);
});
it('should handle a list of numbers, strings, and booleans', function (done) {
compareResults("[3, 4, -2, \"5.5\", 0.5, false]", done);
});
it('should handle a list of numbers, strings, and booleans', function (done) {
compareResults("[3, 4, -2, \"5.5\", 0.5, false, true, false]", done);
});
it('should handle a list of numbers, strings, and booleans', function (done) {
compareResults('["some text", 4, "some more text", "text"]', done);
});
it('should handle a list of numbers, strings, and booleans', function (done) {
compareResults('["[],4,5", "false", ","]', done);
});
it('should handle an object with mixed values', function (done) {
compareResults('{ "test": 56, "test2": "hello!", "test3": false }', done);
});
it('should handle an object with list values', function (done) {
compareResults('{ "test": [3, "str", false, 0.5], "test2": [1, 2, "str2"] }', done);
});
it('should handle embedded objects', function (done) {
compareResults('{ "test": { "test": [1, 2, 3] } }', done);
});
it('should handle embedded lists', function (done) {
compareResults('[1, 2, [3, 4], 5]', done);
});
it('should handle embedded lists when the first item is a list', function (done) {
compareResults('[[1, false], 2, [3, 4], 5]', done);
});
it('should handle a nested list', function(done) {
compareResults('{ "rows": [["this", "is", "failing"]] }', done);
});
it('should handle a nested list', function(done) {
compareResults('{ "rows": [[], ["this", "is", "failing"]] }', done);
});
it('should handle a nested list', function(done) {
compareResults('{ "rows": [["this", "is", "failing"], []] }', done);
});
it('should handle objects embedded in lists', function (done) {
compareResults('[2, {"test": "str"}]', done);
});
it('should handle objects embedded in lists', function (done) {
compareResults('[{"test": "str"}, 2, [3, {"test2": "str2"}], 5]', done);
});
it('should handle a complex JSON structure', function (done) {
compareResults('[{"test": "str"}, [2, false, 0.4], [3, {"test2": ["str2", 6]}], 5]', done);
});
it('should handle a complex JSON structure', function (done) {
compareResults('[{"test": "str"}, [2, false, ",", 0.4, "[val]"], [3, {"test2": ["str2", 6]}], 5]', done);
});
it('should handle nulls in lists', function (done) {
compareResults("[null]", done);
});
it('should handle nulls in objects', function (done) {
compareResults("{ \"test\": null}", done);
});
it('should handle nulls in objects and lists', function (done) {
compareResults("{ \"test\": null, \"test2\": [4, null] }", done);
});
it('should handle arbitrary whitespace', function (done) {
compareResults("{ \"test\": null, \"test2\": [4, null] }", done);
});
it('should handle a list key with a single object value', function (done) {
compareResults('{"key": [{"a":"b"}]}', done);
});
it('should handle multiple list keys with a single object value', function (done) {
compareResults('{"key": [{"a":"b"}], "key2": [{"a": "b"}]}', done);
});
it('should handle a list key with a single object value', function (done) {
compareResults('{"key": [{"a":2}]}', done);
});
it('should handle multiple list keys with a single object value', function (done) {
compareResults('{"key": [{"a":2}], "key2": [{"a": 5.0}]}', done);
});
it('should handle a string key with a single object value', function (done) {
compareResults('{"key": ["test"]}', done);
});
it('should handle escaped double-quotes', function (done) {
compareResults('["this\\"is", "a test"]', done);
});
it('should handle curly braces in a quoted string', function (done) {
compareResults('{"action": "with curly \\"${blahblah}\\""}', done);
});
it('should handle raw string values', function (done) {
compareResults('"test"', done);
});
it('should differentiate strings and special values', function (done) {
compareResults('["false", false, "true", true, "null", null]', done);
});
describe("should pass the NST / Minefield test cases", function() {
fs.readdirSync("test/nst").forEach(f => {
if (!f.endsWith(".json"))
return;
it(`should parse ${f} correctly`, function(done) {
const str = fs.readFileSync("test/nst/" + f,
{"options":
{"encoding": "utf8" }
});
compareResults(str, done);
});
});
});
});
describe("parse() on invalid JSON", function () {
it('should handle non-quoted object keys', function(done) {
compareResultsToValid('{test: 5}', '{"test": 5}', done);
});
it('should handle single-quoted object keys', function(done) {
compareResultsToValid('{\'test\': 5}', '{"test": 5}', done);
});
it('should handle single-quoted object values', function(done) {
compareResultsToValid('{\'test\': \'5\'}', '{"test": "5"}', done);
});
it('should handle quotes-in-quotes (list)', function(done) {
compareResultsToValid('["some "quoted" text"]', '["some \\"quoted\\" text"]', done);
});
it('should handle quotes-in-quotes (list)', function(done) {
compareResultsToValid('[3, "some "quoted" text", 2]', '[3, "some \\"quoted\\" text", 2]', done);
});
it('should handle quotes-in-quotes (object)', function(done) {
compareResultsToValid('{"test": "some "quoted" text"}', '{"test": "some \\"quoted\\" text"}', done);
});
it('should handle quotes-in-quotes (object)', function(done) {
compareResultsToValid('{"test0": false, "test": "some "quoted" text", "test1": 5}', '{"test0": false, "test": "some \\"quoted\\" text", "test1": 5}', done);
});
it('should handle non-quoted string values', function(done) {
compareResultsToValid('{"this": that}', '{"this": "that"}', done);
});
it('should handle non-quoted string values', function(done) {
compareResultsToValid('{"this": that, "another": "maybe"}', '{"this": "that", "another": "maybe"}', done);
});
it('should handle non-quoted string values', function(done) {
compareResultsToValid('{"this": "that", "another": maybe}', '{"this": "that", "another": "maybe"}', done);
});
it('should handle non-quoted string values', function(done) {
compareResultsToValid('{"this": that, "another": maybe}', '{"this": "that", "another": "maybe"}', done);
});
it('should handle non-quoted string values in lists', function(done) {
compareResultsToValid('["this", that]', '["this", "that"]', done);
});
it('should handle non-quoted string values in lists', function(done) {
compareResultsToValid('[this, that]', '["this", "that"]', done);
});
it('should handle non-quoted string values in lists with trailing comma', function(done) {
compareResultsToValid('[this, that,]', '["this", "that"]', done);
});
it('should handle non-quoted string values in lists', function(done) {
compareResultsToValid('[a, b]', '["a", "b"]', done);
});
it('should handle non-quoted string values in lists with trailing comma', function(done) {
compareResultsToValid('[a,b,c,]', '["a", "b", "c"]', done);
});
it('should handle non-quoted string values in nested lists with trailing comma', function(done) {
compareResultsToValid('[a,[b,d,],[c,],]', '["a", ["b", "d"], ["c"]]', done);
});
it('should handle non-quoted singletons in nested lists with trailing comma', function(done) {
compareResultsToValid('[a,[b,],[c,],]', '["a", ["b"], ["c"]]', done);
});
it('should handle object singletons in lists with trailing comma', function(done) {
compareResultsToValid('[{"a": 1},]','[{"a": 1}]', done);
});
it('should handle string singletons in lists with trailing comma', function(done) {
compareResultsToValid('["a",]','["a"]', done);
});
it('should handle integer singletons in lists with trailing comma', function(done) {
compareResultsToValid('[5,]','[5]', done);
});
it('should handle float singletons in lists with trailing comma', function(done) {
compareResultsToValid('[0.05,]','[0.05]', done);
});
it('should handle non-quoted string values in lists', function(done) {
compareResultsToValid('["this", that, "another", maybe]', '["this", "that", "another", "maybe"]', done);
});
it('should handle raw strings before values in a map', function(done) {
compareResultsToValid('{"this": h"that"}',
'{"this": "hthat"}',
done);
});
it('should handle raw strings before values in a list', function (done) {
compareResultsToValid('[1, "this", hex"test"]',
'[1, "this", "hextest"]',
done);
});
it('should handle raw strings before values in a map', function(done) {
compareResultsToValid('{"this": hex3}',
'{"this": "hex3"}',
done);
});
it('should handle raw strings before values in a list', function (done) {
compareResultsToValid('[1, "this", hex3]',
'[1, "this", "hex3"]',
done);
});
it('should handle mixed quotes/single quotes with curly braces', function(done) {
compareResultsToValid('{"action": \'this has ${} in it\'}',
'{"action": "this has ${} in it"}',
done);
});
it('should handle a decimal with no trailing digits', function(done) {
compareResultsToValid('{ "test": 5. }', '{"test": 5.0 }', done);
});
it('should handle a decimal with no trailing digits', function(done) {
compareResultsToValid('["test", 5.]', '["test", 5.0 ]', done);
});
it('should handle a decimal with no leading digits', function(done) {
compareResultsToValid('{ "test": .5 }', '{"test": 0.5 }', done);
});
it('should handle multiple commas', function(done) {
compareResultsToValid(
'{"ss":[["Thu","7:00","Final",,"BAL","19","ATL","20",,,"56808",,"PRE4","2015"],["Thu","7:00","Final",,"NO","10","GB","38",,,"56809",,"PRE4","2015"]]}',
'{"ss":[["Thu","7:00","Final","BAL","19","ATL","20","56808","PRE4","2015"],["Thu","7:00","Final","NO","10","GB","38","56809","PRE4","2015"]]}',
done);
});
it('should handle multiple commas', function(done) {
compareResultsToValid(
'{"test": [1,,,,,,,,,2]}',
'{"test": [1,2]}',
done
);
});
it('should handle dashes in unquoted strings', function(done) {
compareResultsToValid(
'{"test": cool-cat}',
'{"test": "cool-cat"}',
done
);
});
describe("with special characters", function () {
it('should handle all kinds of escaped characters', function(done) {
const r = dJSON.parse('" \\\\ \\"\\0!"');
assert.equal(r[0], ' ');
assert.equal(r[1], '\\');
assert.equal(r[2], ' ');
assert.equal(r[3], '"');
assert.equal(r[4], '\0');
assert.equal(r[5], '!');
done();
});
it("should handle strange symbols", function(done) {
compareResultsToValid(
'{te!st: ug&*sd}',
'{"te!st": "ug&*sd"}',
done
);
});
it("should handle strange symbols", function(done) {
compareResultsToValid(
'{te!st: [ug&*s,d]}',
'{"te!st": ["ug&*s", "d"]}',
done
);
});
it("should handle strange symbols", function(done) {
compareResultsToValid(
'{te!st: [ug&*s,d,",,,"]}',
'{"te!st": ["ug&*s", "d", ",,,"]}',
done
);
});
it("should handle strange symbols", function(done) {
compareResultsToValid(
'{te!st: [ug&*s,d,",,,", {test: aga()in}]}',
'{"te!st": ["ug&*s", "d", ",,,", {"test": "aga()in"}]}',
done
);
});
it("should handle strange symbols mixed with numerics", function(done) {
compareResultsToValid(
'{te!st: [1,ug&*s,d,",,,", {test: aga()in}]}',
'{"te!st": [1, "ug&*s", "d", ",,,", {"test": "aga()in"}]}',
done
);
});
it("should handle strange symbols mixed with numerics", function(done) {
compareResultsToValid(
'{te!st: [1,.5,",",true,ug&*s,d,",,,", {test: aga()in}]}',
'{"te!st": [1, 0.5, ",", true, "ug&*s", "d", ",,,", {"test": "aga()in"}]}',
done
);
});
});
describe("with new lines", function() {
it ('should handle a newline in a string in object', function(done) {
const r = dJSON.parse('{ "test0": "a '+"\n"+'string" }');
assert.equal(r.test0, 'a '+"\n"+'string');
done();
});
it ('should handle a newline in a string in a list', function(done) {
const r = dJSON.parse('["a '+"\n"+'string"]');
assert.equal(r[0], 'a '+"\n"+'string');
done();
});
it('should handle newline in misquoted string in object', function(done) {
const str = 'this\n"quote"\ntext';
const r = dJSON.parse('{ "test0": "' + str + '"}');
assert.equal(r.test0, str);
done();
});
it('should handle newline in misquoted string in object', function(done) {
const str = 'this\n"quote"\ntext';
const r = dJSON.parse('{ "test1": false, "test0": "' + str + '", test2: 5.5}');
assert.equal(r.test0, str);
assert.equal(r.test1, false);
assert.equal(r.test2, 5.5);
done();
});
it('should handle newline in misquoted string in list', function(done) {
const str = 'this\n"quote"\ntext';
const r = dJSON.parse('["' + str + '"]');
assert.equal(r[0], str);
done();
});
it('should handle newline in misquoted string in list', function(done) {
const str = 'this\n"quote"\ntext';
const r = dJSON.parse('[5, 6, "' + str + '", "test"]');
assert.equal(r[2], str);
assert.equal(r[0], 5);
assert.equal(r[1], 6);
assert.equal(r[3], "test");
done();
});
});
describe("with embedded HTML", function() {
it('should handle an embedded DIV tag', function(done) {
const r = dJSON.parse('["<div class="class">some text</div>"]');
assert.equal(r[0], '<div class="class">some text</div>');
assert.equal(r.length, 1);
done();
});
it('should handle an embedded span tag', function(done) {
const r = dJSON.parse('["<span class="class">some text</span>"]');
assert.equal(r[0], '<span class="class">some text</span>');
assert.equal(r.length, 1);
done();
});
it('should handle an embedded span tag in a div tag', function(done) {
const r = dJSON.parse('["<div class="divclass"><span class="class">some text</span></div>"]');
assert.equal(r[0], '<div class="divclass"><span class="class">some text</span></div>');
assert.equal(r.length, 1);
done();
});
});
it('should handle plain strings', done => {
const r = dJSON.parse("this is a test");
assert.equal("this is a test", r);
done();
});
});
describe("tickets", () => {
it("should handle ticket #10", done => {
compareResultsToValid('{\n "some": [a,b,c,],\n "b": a\n}', '{"some": ["a", "b", "c"], "b": "a"}', done);
});
it("should handle ticket #14", done => {
compareResultsToValid(
'{ "key": "<div class="cool-css">some text</div>" }',
'{ "key": "<div class=\\"cool-css\\">some text</div>"}',
done
);
});
it("should handle ticket #16", done => {
compareResultsToValid(
' { "key": "<div class="coolCSS>text</div>" }',
' { "key": "<div class=\\"coolCSS>text</div>\\" }" }',
done
);
});
it("should handle ticket #16", done => {
compareResultsToValid(
' { "key": "test"',
' { "key": "test" } ',
done
);
});
it("should handle ticket #16", done => {
compareResultsToValid(
' { "key": test',
' { "key": "test" } ',
done
);
});
it("should handle ticket #16", done => {
compareResultsToValid(
' [ "key", test',
' ["key", "test"]',
done
);
});
it("should handle ticket #15 (keep whitespace in quotes)", done => {
compareResultsToValid(
'{"claim": ""this is a test" of whitespace"}',
'{"claim": "\\"this is a test\\" of whitespace"}',
done
);
});
it("should handle ticket #17 (unmatched single quotes, i.e. contractions)", done => {
compareResultsToValid(
'{"key": "this "isn\'t valid"",\n "other": true}',
'{ "key": "this \'isn\'t valid\\"", "other": true }',
done
);
});
it("should handle ticket #19 (option for duplicate keys)", done => {
compareResultsToValid(
'{"key": 1, "key": 2, \'key\': [1, 2, 3]}',
'{ "key": { "value": { "value": 1, "next": 2 }, "next": [ 1, 2, 3 ] } }',
done, {"duplicateKeys": true}
);
});
describe("should handle ticket #21 (trailing comma)", () => {
it("in objects", done => {
compareResultsToValid(
'{"key": 1, "test": 2,}',
'{"key": 1, "test": 2 }',
done
);
});
it("in objects with newlines", done => {
compareResultsToValid(
'{"key": 1,\n"test": 2,\n}',
'{"key": 1, "test": 2 }',
done
);
});
it("in lists", done => {
compareResultsToValid(
'["val1", 1,]',
'["val1", 1]',
done
);
});
});
it("should handle issue #27 (extra whitespace)", done => {
compareResultsToValid(
'id: \"test\"\nlang: \"en\"\nresult {\n source: \"agent\"\n}',
'{"id":"test","lang":"en","result": { "source":"agent"}}',
done
);
});
it("should handle issue #28 (non-quoted key and value)", done => {
compareResultsToValid(
"{type: String, value: 'something'}",
'{"type": "String", "value": "something"}',
done
);
});
describe("should handle issue #30 (missing values)", () => {
it("as the last key of an object", done => {
compareResultsToValid(
'{"key": }',
'{"key": null}',
done
);
});
it("as an internal key of an object", done => {
compareResultsToValid(
'{"key1": "test", "key": , "key2": "test"}',
'{"key1": "test", "key": null, "key2": "test"}',
done
);
});
});
});
describe("should throw exceptions for JSON that is too malformed to deal with", () => {
it('should throw on }}', done => {
try {
dJSON.parse('\n\n\n\n\n }}');
done(new Error("Should have thrown exception"));
} catch (e) {
done();
}
});
it('should throw on ]:"test"', done => {
try {
dJSON.parse(']:"test"');
done(new Error("Should have thrown exception"));
} catch (e) {
done();
}
});
});
});