dirty-json
Version:
A permissive parser for dirty JSON data that does its best
282 lines (239 loc) • 9.6 kB
JavaScript
// < begin copyright >
// Copyright Ryan Marcus 2018
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
//
// < end copyright >
;
var assert = require("assert");
var lexer = require("../lexer");
// terminals
const LEX_KV = 0;
const LEX_KVLIST = 1;
const LEX_VLIST = 2;
const LEX_BOOLEAN = 3;
const LEX_COVALUE = 4;
const LEX_CVALUE = 5;
const LEX_FLOAT = 6;
const LEX_INT = 7;
const LEX_KEY = 8;
const LEX_LIST = 9;
const LEX_OBJ = 10;
const LEX_QUOTE = 11;
const LEX_RB = 12;
const LEX_RCB = 13;
const LEX_TOKEN = 14;
const LEX_VALUE = 15;
// non-terminals
const LEX_COLON = -1;
const LEX_COMMA = -2;
const LEX_LCB = -3;
const LEX_LB = -4;
const LEX_DOT = -5;
describe("lexer", function () {
describe("getAllTokens()", function () {
it('should handle an empty string', function (done) {
const res = lexer.getAllTokens("");
assert.equal(res.length, 0);
done();
});
it('should handle quoted strings', function (done) {
const res = lexer.getAllTokens('"this is a test"');
assert.equal(res.length, 1);
assert.equal(res[0].value, "this is a test");
done();
});
it('should handle integers', function (done) {
const res = lexer.getAllTokens('5600');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_INT);
assert.equal(res[0].value, 5600);
done();
});
it('should handle negative integers', function (done) {
const res = lexer.getAllTokens('-5600');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_INT);
assert.equal(res[0].value, -5600);
done();
});
it('should handle floats', function (done) {
const res = lexer.getAllTokens('5600.5');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_FLOAT);
assert.equal(res[0].value, 5600.5);
done();
});
it('should handle negative floats', function (done) {
const res = lexer.getAllTokens('-5600.5');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_FLOAT);
assert.equal(res[0].value, -5600.5);
done();
});
it('should handle floats without leading digits', function (done) {
const res = lexer.getAllTokens('.5');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_FLOAT);
assert.equal(res[0].value, 0.5);
done();
});
it('should handle negative floats without leading digits', function (done) {
const res = lexer.getAllTokens('-.5');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_FLOAT);
assert.equal(res[0].value, -0.5);
done();
});
it('should handle special characters', function (done) {
const res = lexer.getAllTokens('{}[],:');
assert.equal(res.length, 6);
assert.equal(res[0].type, LEX_LCB);
assert.equal(res[1].type, LEX_RCB);
assert.equal(res[2].type, LEX_LB);
assert.equal(res[3].type, LEX_RB);
assert.equal(res[4].type, LEX_COMMA);
assert.equal(res[5].type, LEX_COLON);
done();
});
it('should handle quoted special characters', function (done) {
const res = lexer.getAllTokens('{}[],:"{}[],:"');
assert.equal(res.length, 7);
assert.equal(res[0].type, LEX_LCB);
assert.equal(res[1].type, LEX_RCB);
assert.equal(res[2].type, LEX_LB);
assert.equal(res[3].type, LEX_RB);
assert.equal(res[4].type, LEX_COMMA);
assert.equal(res[5].type, LEX_COLON);
assert.equal(res[6].type, LEX_QUOTE);
assert.equal(res[6].value, "{}[],:");
done();
});
it('should handle quoted numbers', function (done) {
const res = lexer.getAllTokens('"576 450.5"');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, "576 450.5");
done();
});
it('should handle unmatched quotes on the right', function (done) {
const res = lexer.getAllTokens('"test" again"');
assert.equal(res.length, 7);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, "test");
assert.equal(res[1].type, LEX_TOKEN);
assert.equal(res[1].value, "a");
assert.equal(res[2].type, LEX_TOKEN);
assert.equal(res[2].value, "g");
assert.equal(res[3].type, LEX_TOKEN);
assert.equal(res[3].value, "a");
assert.equal(res[4].type, LEX_TOKEN);
assert.equal(res[4].value, "i");
assert.equal(res[5].type, LEX_TOKEN);
assert.equal(res[5].value, "n");
assert.equal(res[6].type, LEX_QUOTE);
assert.equal(res[6].value, "");
done();
});
it('should handle unmatched quotes on the left', function (done) {
const res = lexer.getAllTokens('"test" "again');
assert.equal(res.length, 2);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, "test");
assert.equal(res[1].type, LEX_QUOTE);
assert.equal(res[1].value, "again");
done();
});
});
it('should handle totally unmatched quotes on the left', function (done) {
const res = lexer.getAllTokens('"test again');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, "test again");
done();
});
it('should handle totally unmatched quotes on the right', function (done) {
const res = lexer.getAllTokens('t"');
assert.equal(res.length, 2);
assert.equal(res[0].type, LEX_TOKEN);
assert.equal(res[0].value, "t");
assert.equal(res[1].type, LEX_QUOTE);
assert.equal(res[1].value, "");
done();
});
it('should handle single quoted strings', function (done) {
const res = lexer.getAllTokens("'this' 'is'");
assert.equal(res.length, 2);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, "this");
assert.equal(res[1].type, LEX_QUOTE);
assert.equal(res[1].value, "is");
done();
});
it('should handle embedded quotes', function (done) {
const res = lexer.getAllTokens('"this is "a" test"');
assert.equal(res.length, 3);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, "this is ");
assert.equal(res[1].type, LEX_TOKEN);
assert.equal(res[1].value, "a");
assert.equal(res[2].type, LEX_QUOTE);
assert.equal(res[2].value, " test");
done();
});
it('should parse integers as integers', function (done) {
const res = lexer.getAllTokens('[4]');
assert.equal(res.length, 3);
assert.equal(res[0].type, LEX_LB);
assert.equal(res[1].type, LEX_INT);
assert.equal(res[1].value, 4);
assert.equal(res[2].type, LEX_RB);
done();
});
it('should parse floats as floats', function (done) {
const res = lexer.getAllTokens('[4.0]');
assert.equal(res.length, 3);
assert.equal(res[0].type, LEX_LB);
assert.equal(res[1].type, LEX_FLOAT);
assert.equal(res[1].value, 4.0);
assert.equal(res[2].type, LEX_RB);
done();
});
it('should handle newlines in quotes', done => {
const res = lexer.getAllTokens('{ "test0": "a '+"\n"+'string" }');
assert.equal(res.length, 5);
assert.equal(res[0].type, LEX_LCB);
assert.equal(res[1].type, LEX_QUOTE);
assert.equal(res[1].value, "test0");
assert.equal(res[2].type, LEX_COLON);
assert.equal(res[3].type, LEX_QUOTE);
assert.equal(res[3].value, "a \nstring");
assert.equal(res[4].type, LEX_RCB);
done();
});
it('should handle escaped double quotes', done => {
const res = lexer.getAllTokens('"this is\\" a test"');
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, 'this is\" a test');
done();
});
it('should handle escaped single quotes', done => {
const res = lexer.getAllTokens("'this is\\' a test");
assert.equal(res.length, 1);
assert.equal(res[0].type, LEX_QUOTE);
assert.equal(res[0].value, 'this is\' a test');
done();
});
});