UNPKG

@desertnet/html-parser

Version:

HTML parser and non-strict validator

509 lines (423 loc) 23.1 kB
import chai, {expect} from 'chai' import sinon from 'sinon' import sinonChai from 'sinon-chai' import chaiThings from 'chai-things' import {Token as ScannerToken} from '@desertnet/scanner' import Instr from '../lib/Instr' import Op from '../lib/Op' import Compiler from '../lib/Compiler' chai.use(sinonChai) chai.use(chaiThings) chai.Assertion.addMethod('toMatchCodeDescription', function (expectedCodeDesc) { new chai.Assertion(this._obj).to.be.instanceof(Array) new chai.Assertion(this._obj).to.all.be.instanceof(Op) const actualCodeDesc = this._obj.join(" ") if (expectedCodeDesc === undefined) { expectedCodeDesc = '' } this.assert( actualCodeDesc === expectedCodeDesc, 'expected #{act} to be #{exp}', 'expected #{act} to not be #{exp}', expectedCodeDesc, actualCodeDesc, true ) }) describe("Compiler", function () { var compiler, token; beforeEach(function () { compiler = new Compiler(); token = makeTok("text", "foo"); resetMakeTok(); }) afterEach(function () { resetMakeTok(); }) describe("#generateCodeForTokenInDialect", function () { it("should throw an error when passed an unexpected dialect", function () { expect(function () { compiler.generateCodeForTokenInDialect(token, "bar"); }).to.throw(); }) it("should call generateCodeForContentToken when passed a token in the content dialect", function () { sinon.spy(compiler, 'generateCodeForContentToken'); compiler.generateCodeForTokenInDialect(token, "content"); expect(compiler.generateCodeForContentToken).to.have.been.calledWith(token); }) it("should call generateCodeForCommentToken when passed a token in the comment dialect", function () { sinon.spy(compiler, 'generateCodeForCommentToken'); compiler.generateCodeForTokenInDialect(token, "comment"); expect(compiler.generateCodeForCommentToken).to.have.been.calledWith(token); }) it("should call generateCodeForEntityToken when passed a token in the entity dialect", function () { sinon.stub(compiler, 'generateCodeForEntityToken'); compiler.generateCodeForTokenInDialect(token, "entity"); expect(compiler.generateCodeForEntityToken).to.have.been.calledWith(token); }) it("should call generateCodeForTagToken when passed a token in the tag dialect", function () { sinon.stub(compiler, 'generateCodeForTagToken'); compiler.generateCodeForTokenInDialect(token, "tag"); expect(compiler.generateCodeForTagToken).to.have.been.calledWith(token); }) it("should call generateCodeForAttributeToken when passed a token in the attribute dialect", function () { sinon.stub(compiler, 'generateCodeForAttributeToken'); compiler.generateCodeForTokenInDialect(token, "attribute"); expect(compiler.generateCodeForAttributeToken).to.have.been.calledWith(token); }) it("should call generateCodeForAttributeValueToken when passed a token in the attributeValue dialect", function () { sinon.spy(compiler, 'generateCodeForAttributeValueToken'); compiler.generateCodeForTokenInDialect(token, "attributeValue"); expect(compiler.generateCodeForAttributeValueToken).to.have.been.calledWith(token); }) it("should call generateCodeForAttributeValueQuotedToken when passed a token in the attributeValueQuoted dialect", function () { sinon.spy(compiler, 'generateCodeForAttributeValueQuotedToken'); compiler.generateCodeForTokenInDialect(token, "attributeValueQuoted"); expect(compiler.generateCodeForAttributeValueQuotedToken).to.have.been.calledWith(token); }) it("should call generateCodeForCloseTagToken when passed a token in the closeTag dialect", function () { sinon.stub(compiler, 'generateCodeForCloseTagToken'); compiler.generateCodeForTokenInDialect(token, "closeTag"); expect(compiler.generateCodeForCloseTagToken).to.have.been.calledWith(token); }) it("should call generateCodeForRawtextToken when passed a token in the rawtext dialect", function () { sinon.spy(compiler, 'generateCodeForRawtextToken'); compiler.generateCodeForTokenInDialect(token, "rawtext"); expect(compiler.generateCodeForRawtextToken).to.have.been.calledWith(token); }) }) describe("#generateCodeForContentToken", function () { it("should return correct code for a text token", function () { var code = compiler.generateCodeForContentToken(makeTok("text", "foo")); expect(code).toMatchCodeDescription("PUSH_NODE:TEXT ADD_TOKEN:text POP_NODE"); }) it("should return correct code for an error token", function () { var code = compiler.generateCodeForContentToken(makeTok("error", "foo")); expect(code).toMatchCodeDescription("PUSH_NODE:TEXT ADD_TOKEN:error POP_NODE"); }) it("should return correct code for a commentStart token", function () { var code = compiler.generateCodeForContentToken(makeTok("commentStart", "<!--")); expect(code).toMatchCodeDescription("PUSH_NODE:COMMENT ADD_TOKEN:commentStart"); }) it("should call pushDialect when passed a commentStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForContentToken(makeTok("commentStart", "<!--")); expect(compiler.pushDialect).to.have.been.calledWith("comment"); }) it("should return correct code for an entityStart token", function () { var code = compiler.generateCodeForContentToken(makeTok("entityStart", "&")); expect(code).toMatchCodeDescription("PUSH_NODE:ENTITY ADD_TOKEN:entityStart"); }) it("should call pushDialect when passed an entityStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForContentToken(makeTok("entityStart", "&")); expect(compiler.pushDialect).to.have.been.calledWith("entity"); }) it("should return correct code for a tagStart token", function () { var code = compiler.generateCodeForContentToken(makeTok("tagStart", "<foo")); expect(code).toMatchCodeDescription("PUSH_NODE:TAG ADD_TOKEN:tagStart"); }) it("should call pushDialect when passed a tagStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForContentToken(makeTok("tagStart", "<foo")); expect(compiler.pushDialect).to.have.been.calledWith("tag"); }) it("should return the correct code for a closeTagStart token", function () { var code = compiler.generateCodeForContentToken(makeTok("closeTagStart", "</foo")); expect(code).toMatchCodeDescription("PUSH_NODE:CLOSETAG ADD_TOKEN:closeTagStart"); }) it("should call pushDialect when passed a closeTagStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForContentToken(makeTok("closeTagStart", "</foo")); expect(compiler.pushDialect).to.have.been.calledWith("closeTag"); }) }) describe("#generateCodeForCommentToken", function () { it("should return correct code for a text token", function () { var code = compiler.generateCodeForCommentToken(makeTok("text", "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:text"); }) it("should return correct code for a dash token", function () { var code = compiler.generateCodeForCommentToken(makeTok("dash", "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:dash"); }) it("should return correct code for a commentEnd token", function () { var code = compiler.generateCodeForCommentToken(makeTok("commentEnd", "-->")); expect(code).toMatchCodeDescription("ADD_TOKEN:commentEnd POP_NODE"); }) it("should call popDialect when passed a commentEnd token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForCommentToken(makeTok("commentEnd", "-->")); expect(compiler.popDialect).to.have.been.calledWith(); }) }) describe("#generateCodeForEntityToken", function () { "hex dec named".split(" ").forEach(function (tokenType) { it("should return correct code for a " + tokenType + " token", function () { var code = compiler.generateCodeForEntityToken(makeTok(tokenType, "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:" + tokenType); }) }) "entityEnd error".split(" ").forEach(function (tokenType) { it("should return correct code for an " + tokenType + " token", function () { var code = compiler.generateCodeForEntityToken(makeTok(tokenType, "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:" + tokenType + " POP_NODE"); }) it("should call popDialect when passed an " + tokenType + " token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForEntityToken(makeTok(tokenType, "foo")); expect(compiler.popDialect).to.have.been.calledWith(); }) }) }) describe("#generateCodeForTagToken", function () { it("should return correct code for a tagEnd token", function () { var code = compiler.generateCodeForTagToken(makeTok("tagEnd", ">")); expect(code).toMatchCodeDescription("ADD_TOKEN:tagEnd POP_NODE"); }) it("should call popDialect when passed a tagEnd token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForTagToken(makeTok("tagEnd", ">")); expect(compiler.popDialect).to.have.been.calledWith(); }) it("should return correct code for a whitespace token", function () { var code = compiler.generateCodeForTagToken(makeTok("whitespace", " ")); expect(code).toMatchCodeDescription("ADD_TOKEN:whitespace"); }) it("should return correct code for an attributeStart token", function () { var code = compiler.generateCodeForTagToken(makeTok("attributeStart", "foo")); expect(code).toMatchCodeDescription("PUSH_NODE:ATTRIBUTE ADD_TOKEN:attributeStart"); }) it("should call pushDialect when passed an attributeStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForTagToken(makeTok("attributeStart", "foo")); expect(compiler.pushDialect).to.have.been.calledWith("attribute"); }) it("should set its scanner's dialect to attribute after being passed an attributeStart token", function () { compiler.generateCodeForTagToken(makeTok("attributeStart", "foo")); expect(compiler.currentDialect()).to.be.equal("attribute"); }) it("should return correct code for a selfClose token", function () { var code = compiler.generateCodeForTagToken(makeTok("selfClose", "/")); expect(code).toMatchCodeDescription("ADD_TOKEN:selfClose"); }) it("should return correct code for an error token", function () { var code = compiler.generateCodeForTagToken(makeTok("error", "'")); expect(code).toMatchCodeDescription("PUSH_NODE:ATTRIBUTE ADD_TOKEN:error"); }) }) describe("#generateCodeForAttributeToken", function () { it("should return the correct code for whitespace token", function () { var code = compiler.generateCodeForAttributeToken(makeTok("whitespace", " ")); expect(code).toMatchCodeDescription("ADD_TOKEN:whitespace"); }) it("should return the correct code for tagEnd token", function () { var code = compiler.generateCodeForAttributeToken(makeTok("tagEnd", ">")); expect(code).toMatchCodeDescription("POP_NODE ADD_TOKEN:tagEnd POP_NODE"); }) it("should call popDialect twice when passed a tagEnd token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForAttributeToken(makeTok("tagEnd", ">")); expect(compiler.popDialect).to.have.been.calledTwice; }) it("should return the correct code for a selfClose token", function () { var code = compiler.generateCodeForAttributeToken(makeTok("selfClose", "/")); expect(code).toMatchCodeDescription("POP_NODE ADD_TOKEN:selfClose"); }) it("should call popDialect when passed a selfClose token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForAttributeToken(makeTok("selfClose", "/")); expect(compiler.popDialect).to.have.been.calledWith(); }) "attributeValueStart attributeValueQuotedStart".split(" ").forEach(function (tokenType) { it("should return the correct code for an " + tokenType + " token", function () { var code = compiler.generateCodeForAttributeToken(makeTok(tokenType, "=")); expect(code).toMatchCodeDescription("ADD_TOKEN:" + tokenType); }) it("should call popDialect and pushDialect when passed an " + tokenType + " token", function () { sinon.spy(compiler, 'popDialect'); sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForAttributeToken(makeTok(tokenType, "=")); expect(compiler.popDialect).to.have.been.calledWith(); expect(compiler.pushDialect).to.have.been.calledWith(tokenType.replace(/Start$/, "")); }) it("should set its scanner's dialect to " + tokenType.replace(/Start$/, "") + " after being passed an " + tokenType + " token", function () { compiler.generateCodeForAttributeToken(makeTok(tokenType, "=")); expect(compiler.currentDialect()).to.be.equal(tokenType.replace(/Start$/, "")); }) }) it("should set the expected attribute end token to dquo when passed an attributeValueQuotedStart with a double quote", function () { compiler.generateCodeForAttributeToken(makeTok("attributeValueQuotedStart", '="')); expect(compiler.expectedAttributeValueEndTokenType()).to.be.equal("dquo"); }) it("should set the expected attribute end token to squo when passed an attributeValueQuotedStart with a single quote", function () { compiler.generateCodeForAttributeToken(makeTok("attributeValueQuotedStart", "='")); expect(compiler.expectedAttributeValueEndTokenType()).to.be.equal("squo"); }) "attributeStart error".split(" ").forEach(function (tokenType) { it("should return the correct code for an " + tokenType + " token", function () { var code = compiler.generateCodeForAttributeToken(makeTok(tokenType, "foo")); expect(code).toMatchCodeDescription("POP_NODE PUSH_NODE:ATTRIBUTE ADD_TOKEN:" + tokenType); }) }) }) describe("#generateCodeForAttributeValueToken", function () { it("should return the correct code for a whitespace token", function () { var code = compiler.generateCodeForAttributeValueToken(makeTok("whitespace", " ")); expect(code).toMatchCodeDescription("POP_NODE ADD_TOKEN:whitespace"); }) it("should call popDialect when passed a whitespace token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForAttributeValueToken(makeTok("whitespace", " ")); expect(compiler.popDialect).to.have.been.calledWith(); }) it("should return the correct code for an entityStart token", function () { var code = compiler.generateCodeForAttributeValueToken(makeTok("entityStart", "&foo")); expect(code).toMatchCodeDescription("PUSH_NODE:ENTITY ADD_TOKEN:entityStart"); }) it("should push the entity dialect when passed an entityStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForAttributeValueToken(makeTok("entityStart", "&foo")); expect(compiler.pushDialect).to.have.been.calledWith("entity"); }) it("should return the correct code for a tagEnd token", function () { var code = compiler.generateCodeForAttributeValueToken(makeTok("tagEnd", ">")); expect(code).toMatchCodeDescription("POP_NODE ADD_TOKEN:tagEnd POP_NODE"); }) it("should call popDialect twice when passed a tagEnd token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForAttributeValueToken(makeTok("tagEnd", ">")); expect(compiler.popDialect).to.have.been.calledTwice; }) "text error".split(" ").forEach(function (tokenType) { it("should return the correct code for a " + tokenType + " token", function () { var code = compiler.generateCodeForAttributeValueToken(makeTok(tokenType, "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:" + tokenType); }) }) }) describe("#generateCodeForAttributeValueQuotedToken", function () { it("should return the correct code for a dquo token when expecting dquo as the attribute value end token type", function () { compiler.setExpectedAttributeValueEndTokenType("dquo"); var code = compiler.generateCodeForAttributeValueQuotedToken(makeTok("dquo", '"')); expect(code).toMatchCodeDescription("ADD_TOKEN:dquo POP_NODE"); }) it("should call popDialect when passed a dquo token when expecting dquo as the attribute value end token type", function () { compiler.setExpectedAttributeValueEndTokenType("dquo"); sinon.spy(compiler, 'popDialect'); compiler.generateCodeForAttributeValueQuotedToken(makeTok("dquo", '"')); expect(compiler.popDialect).to.have.been.calledWith(); }) it("should return the correct code for a dquo token when expecting squo as the attribute value end token type", function () { compiler.setExpectedAttributeValueEndTokenType("squo"); var code = compiler.generateCodeForAttributeValueQuotedToken(makeTok("dquo", '"')); expect(code).toMatchCodeDescription("ADD_TOKEN:text"); }) it("should return the correct code for a squo token when expecting squo as the attribute value end token type", function () { compiler.setExpectedAttributeValueEndTokenType("squo"); var code = compiler.generateCodeForAttributeValueQuotedToken(makeTok("squo", "'")); expect(code).toMatchCodeDescription("ADD_TOKEN:squo POP_NODE"); }) it("should call popDialect when passed a squo token when expecting squo as the attribute value end token type", function () { compiler.setExpectedAttributeValueEndTokenType("squo"); sinon.spy(compiler, 'popDialect'); compiler.generateCodeForAttributeValueQuotedToken(makeTok("squo", "'")); expect(compiler.popDialect).to.have.been.calledWith(); }) it("should return the correct code for a squo token when expecting dquo as the attribute value end token type", function () { compiler.setExpectedAttributeValueEndTokenType("dquo"); var code = compiler.generateCodeForAttributeValueQuotedToken(makeTok("squo", "'")); expect(code).toMatchCodeDescription("ADD_TOKEN:text"); }) it("should return the correct code for an entityStart token", function () { var code = compiler.generateCodeForAttributeValueQuotedToken(makeTok("entityStart", "&foo")); expect(code).toMatchCodeDescription("PUSH_NODE:ENTITY ADD_TOKEN:entityStart"); }) it("should call pushDialect when passed an entityStart token", function () { sinon.spy(compiler, 'pushDialect'); compiler.generateCodeForAttributeValueQuotedToken(makeTok("entityStart", "&foo")); expect(compiler.pushDialect).to.have.been.calledWith("entity"); }) "text error".split(" ").forEach(function (tokenType) { it("should return the correct code for a " + tokenType + " token", function () { var code = compiler.generateCodeForAttributeValueQuotedToken(makeTok(tokenType, "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:" + tokenType); }) }) }) describe("#generateCodeForCloseTagToken", function () { it("should return the correct code for a whitespace token", function () { var code = compiler.generateCodeForCloseTagToken(makeTok("whitespace", " ")); expect(code).toMatchCodeDescription("ADD_TOKEN:whitespace"); }) it("should return the correct code for an error token", function () { var code = compiler.generateCodeForCloseTagToken(makeTok("error", "foo")); expect(code).toMatchCodeDescription("ADD_TOKEN:error"); }) it("should call popDialect when passed a tagEnd token", function () { sinon.spy(compiler, 'popDialect'); compiler.generateCodeForCloseTagToken(makeTok("tagEnd", ">")); expect(compiler.popDialect).to.have.been.calledWith(); }) it("should return the correct code for a tagEnd token", function () { var code = compiler.generateCodeForCloseTagToken(makeTok("tagEnd", ">")); expect(code).toMatchCodeDescription("ADD_TOKEN:tagEnd POP_NODE"); }) }) describe("#generateCodeForRawtextToken", function () { it("should return the correct code for a closeTag that matches the opening tag", function () { compiler.setExpectedRawtextClosingTagName("script"); var code = compiler.generateCodeForRawtextToken(makeTok("closeTag", "</script>")); expect(code).toMatchCodeDescription("PUSH_NODE:CLOSETAG ADD_TOKEN:closeTag POP_NODE"); }) it("should call popDialect when passed a closeTag that matches the openening tag", function () { sinon.spy(compiler, 'popDialect'); compiler.setExpectedRawtextClosingTagName("script"); compiler.generateCodeForRawtextToken(makeTok("closeTag", "</script>")); expect(compiler.popDialect).to.have.been.calledWith(); }) it("should return the correct code for a closeTag token that does not match the opening tag", function () { compiler.setExpectedRawtextClosingTagName("script"); var code = compiler.generateCodeForRawtextToken(makeTok("closeTag", "</div>")); expect(code).toMatchCodeDescription("PUSH_NODE:TEXT ADD_TOKEN:closeTag POP_NODE"); }) "text lt".split(" ").forEach(function (tokenType) { it("should return the correct code for an " + tokenType + " token", function () { var code = compiler.generateCodeForRawtextToken(makeTok(tokenType, "foo")); expect(code).toMatchCodeDescription("PUSH_NODE:TEXT ADD_TOKEN:" + tokenType + " POP_NODE"); }) }) }) describe("#expectedRawtextClosingTagName", function () { it("should always return the lowercase name", function () { compiler.setExpectedRawtextClosingTagName("FOO"); expect(compiler.expectedRawtextClosingTagName()).to.be.equal("foo"); }) }) // ------------------------------------------------------------ // Helper functions for testing opsForTokenInDialect functions. // ------------------------------------------------------------ var makeTokOffset = 0; function makeTok (type, value) { return new ScannerToken(type, value, makeTokOffset, 1, makeTokOffset); } function resetMakeTok () { makeTokOffset = 0; } /** @param {...*} foo */ function makeOps (foo) { return Array.from(arguments).map(opDef => { var op = new Op(opDef[0] || opDef); if (Array.isArray(opDef)) { if (op.instruction() === Instr.PUSH_NODE) { op.setNode(opDef[1]); } else if (op.instruction() === Instr.ADD_TOKEN) { op.setToken(opDef[1]); } return op; } return op; }); } })