@desertnet/html-parser
Version:
HTML parser and non-strict validator
445 lines (364 loc) • 14.6 kB
JavaScript
import chai, {expect} from 'chai'
import sinon from 'sinon'
import sinonChai from 'sinon-chai'
import HTMLParser from '../lib/HTMLParser'
import Instr from '../lib/Instr'
import Op from '../lib/Op'
import {TagNode, TextNode, AttrNode, CloseTagNode} from '../lib/HTMLNode'
chai.use(sinonChai)
describe("HTMLParser", function () {
var parser;
beforeEach(function () {
parser = new HTMLParser();
})
describe("#parse", function () {
var parse = function (html) {
return parser.parse(html).toString();
};
it("should parse the empty string", function () {
expect(parse("")).to.be.equal("");
})
it("should parse a text node", function () {
expect(parse("hi")).to.be.equal("'hi'");
})
it("should parse a comment", function () {
expect(parse("<!-- hiya -->")).to.be.equal("<!-- hiya -->");
})
it("should parse a comment with dashes", function () {
expect(parse("<!-- hello--world -->")).to.be.equal("<!-- hello--world -->");
})
it("should parse a named HTML entity", function () {
expect(parse(">")).to.be.equal("&(gt);");
})
it("should parse a hex HTML entity", function () {
expect(parse("€")).to.be.equal("&(#x20AC);");
})
it("should parse a lone ampersand", function () {
expect(parse("lone & ampersand")).to.be.equal("'lone ''&'' ampersand'");
})
it("should parse a lone ampersand inside an attribute value", function () {
expect(parse('<a href="http://example.com/foo?a=1&b=2"></a>'))
.to.be.equal("<a href='http://example.com/foo?a=1&b=2'></a>")
})
it("should parse a lone >", function () {
expect(parse("lone > gt")).to.be.equal("'lone ''>'' gt'");
})
it("should parse a void tag", function () {
expect(parse("<br>")).to.be.equal("<br>");
})
it("should parse a tag with an attribute delimited by double quotes", function () {
expect(parse('<br clear="right">')).to.be.equal("<br clear='right'>");
})
it("should parse a tag with two attributes", function () {
expect(parse('<br clear="right" style="foo:bar">')).to.be.equal("<br clear='right' style='foo:bar'>");
})
it("should parse an empty attribute", function () {
expect(parse('<hr prop>')).to.be.equal("<hr prop=''>");
})
it("should parse a self closing void tag", function () {
expect(parse('<hr />')).to.be.equal("<hr>");
})
it("should parse a lone closing tag", function () {
expect(parse('</div>')).to.be.equal("</div>");
})
it("should parse a simple div tag", function () {
expect(parse('<div>foo</div>')).to.be.equal("<div>'foo'</div>");
})
it("should avoid throwing ScannerError on a `&` followed by whitespace", function () {
expect(function () { parser.parse("&C\n") }).not.to.throw();
})
})
describe("validate", function () {
var validate = HTMLParser.validate;
it("should consider header tags valid HTML", function () {
expect(validate("<h1>foo</h1>")).to.deep.equal([]);
})
it("should not consider a correctly named entity to be an error", () => {
expect(validate("é")).to.deep.equal([])
})
it("should consider a misspelled named entity an error", function () {
const errors = validate("&foobar;")
expect(errors.length).to.be.equal(1)
})
})
describe("#finalize", function () {
var tagNode;
beforeEach(function () {
tagNode = new TagNode();
tagNode.tagName = "br";
})
it("should unfinished nodes from the stack", function () {
parser.pushNode(tagNode);
parser.finalize();
expect(function () { parser.currentNode() }).to.throw();
expect(parser.currentOpenElement()).to.be.equal(parser.parseTree());
})
it("should move open elements into the parse tree", function () {
parser.pushOpenElement(tagNode);
parser.finalize();
expect(parser.parseTree().lastChild).to.be.equal(tagNode);
})
it("should add errors to the topmost unfinished node", function () {
var attrNode = new AttrNode();
parser.pushNode(tagNode);
parser.pushNode(attrNode);
parser.finalize();
expect(tagNode.ownErrors.length).to.be.equal(0);
expect(attrNode.ownErrors.length).to.be.equal(1);
})
})
describe("#executeOp", function () {
it("should call popNode when passed a POP_NODE op", function () {
sinon.stub(parser, 'popNode');
var op = new Op(Instr.POP_NODE);
parser.executeOp(op);
expect(parser.popNode).to.have.been.called;
})
it("should call pushNode when passed a PUSH_NODE op", function () {
sinon.spy(parser, 'pushNode');
var op = new Op(Instr.PUSH_NODE);
var node = new TextNode();
op.setNode(node);
parser.executeOp(op);
expect(parser.pushNode).to.have.been.calledWith(op.node());
})
it("should call node.addToken when passed a ADD_TOKEN op", function () {
var node = new TextNode();
sinon.spy(node, 'addToken');
var op = new Op(Instr.ADD_TOKEN);
var tok = {type: "text", value: "foo", index: 0, line: 1, column: 0};
op.setToken(tok);
parser.pushNode(node);
parser.executeOp(op);
expect(node.addToken).to.have.been.calledWith(tok);
})
})
describe("#currentNode", function () {
it("should return the node at the top of the node stack", function () {
var node = new TextNode();
parser.pushNode(node);
expect(parser.currentNode()).to.be.equal(node);
})
it("should throw an error when the node stack is empty", function () {
expect(function () { parser.currentNode() }).to.throw();
})
})
describe("#popNode", function () {
it("should remove the node at the top of the stack", function () {
var node1 = new TextNode();
var node2 = new TextNode();
parser.pushNode(node1);
parser.pushNode(node2);
parser.popNode();
expect(parser.currentNode()).to.be.equal(node1);
})
it("should call the applyCompletedNode method", function () {
sinon.spy(parser, 'applyCompletedNode');
var node = new TextNode();
parser.pushNode(node);
parser.popNode();
expect(parser.applyCompletedNode).to.have.been.calledWith(node);
})
it("should throw an error if the node stack is empty", function () {
expect(function () { parser.popNode() }).to.throw();
})
})
describe("#applyCompletedNode", function () {
it("should add a text node to the currently open element", function () {
var node = new TextNode();
parser.applyCompletedNode(node);
expect(parser.currentOpenElement().children.pop()).to.be.equal(node);
})
it("should add an attribute node to the current tag node", function () {
var attr = new AttrNode();
parser.pushNode(new TagNode());
parser.applyCompletedNode(attr);
expect(parser.currentNode().attributes[0]).to.be.equal(attr);
})
it("should throw an error if it attempts to add an attribute node to a non-tag node", function () {
parser.pushNode(new TextNode());
expect(function () {
parser.applyCompletedNode(new AttrNode());
}).to.throw();
})
it("should add void tags to the current open element", function () {
var br = new TagNode();
br.tagName = "br";
parser.applyCompletedNode(br);
var openElement = parser.currentOpenElement();
expect(openElement !== br);
expect(openElement.lastChild).to.be.equal(br);
})
it("should push non-void tags onto the open element stack", function () {
var div = new TagNode();
div.tagName = "div";
parser.applyCompletedNode(div);
expect(parser.currentOpenElement()).to.be.equal(div);
})
it("should call mostRecentOpenElementWithName when passed a close tag node", function () {
sinon.spy(parser, 'mostRecentOpenElementWithName');
var close = new CloseTagNode();
close.tagName = "FOO";
parser.applyCompletedNode(close);
expect(parser.mostRecentOpenElementWithName).to.have.been.calledWith("foo");
})
it("should call addClosedElementToParent when passed a close tag with a matching open tag", function () {
sinon.spy(parser, 'addClosedElementToParent');
var div = new TagNode();
div.tagName = "div";
var closeDiv = new CloseTagNode();
closeDiv.tagName = "div";
parser.pushOpenElement(div);
parser.applyCompletedNode(closeDiv);
expect(parser.addClosedElementToParent).to.have.been.calledWith(div);
})
it("should call setRawtextModeForTag on the compiler object when called with a rawtext tag node", function () {
var compiler = parser.compiler();
sinon.spy(compiler, 'setRawtextModeForTag');
var script = new TagNode();
script.tagName = "script";
parser.applyCompletedNode(script);
expect(compiler.setRawtextModeForTag).to.have.been.calledWith("script");
})
it("should add a closing tag node to the tag node it closes", function () {
var div = new TagNode();
div.tagName = "div";
var span = new TagNode();
span.tagName = "span";
var divClose = new CloseTagNode();
divClose.tagName = "div";
parser.pushOpenElement(div);
parser.pushOpenElement(span);
parser.applyCompletedNode(divClose);
expect(div.closingTag).to.be.equal(divClose);
})
it("should add a closing tag node to the current open element if there is no matching open tag", function () {
var div = new TagNode();
div.tagName = "div";
var spanClose = new CloseTagNode();
spanClose.tagName = "span";
parser.pushOpenElement(div);
parser.applyCompletedNode(spanClose);
expect(parser.currentOpenElement().lastChild).to.be.equal(spanClose);
})
it("should set an error on bogus closing tags", function () {
var div = new TagNode();
div.tagName = "div";
var spanClose = new CloseTagNode();
spanClose.tagName = "span";
parser.pushOpenElement(div);
parser.applyCompletedNode(spanClose);
expect(spanClose.ownErrors.length).to.be.equal(1);
})
})
describe("#addClosedElementToParent", function () {
var div1, div2, div3;
beforeEach(function () {
div1 = new TagNode();
div1.tagName = "div";
div2 = new TagNode();
div2.tagName = "div";
div3 = new TagNode();
div3.tagName = "div";
})
it("should cause the currentOpenElement to return the closing node's parent", function () {
parser.pushOpenElement(div1);
parser.pushOpenElement(div2);
parser.addClosedElementToParent(div2);
expect(parser.currentOpenElement()).to.be.equal(div1);
})
it("should make closed elements children of each other and the currently open element", function () {
var closeDiv = new CloseTagNode();
closeDiv.tagName = "div";
parser.pushOpenElement(div1);
parser.pushOpenElement(div2);
parser.pushOpenElement(div3);
div1.appendChild(closeDiv);
parser.addClosedElementToParent(div1);
expect(parser.currentOpenElement().lastChild).to.be.equal(div1);
expect(div1.lastChild).to.be.equal(div2);
expect(div2.lastChild).to.be.equal(div3);
expect(div3.lastChild).to.be.equal(null);
})
it("should set an error on closing tag when it closes other tags still open", function () {
var span = new TagNode();
span.tagName = "span";
var closeDiv = new CloseTagNode();
closeDiv.tagName = "div";
div1.appendChild(closeDiv);
parser.pushOpenElement(div1);
parser.pushOpenElement(span);
parser.addClosedElementToParent(div1);
expect(closeDiv.ownErrors.length).to.be.equal(1);
})
it("should set an error on elements without closing tags", function () {
var closeDiv = new CloseTagNode();
closeDiv.tagName = "div";
div1.appendChild(closeDiv);
parser.pushOpenElement(div1);
parser.pushOpenElement(div2);
parser.pushOpenElement(div3);
parser.addClosedElementToParent(div1);
expect(div1.ownErrors.length).to.be.equal(0);
expect(div2.ownErrors.length).to.be.equal(1);
expect(div3.ownErrors.length).to.be.equal(1);
})
})
describe("#popElementsToAndIncluding", function () {
var div1, div2, div3;
beforeEach(function () {
div1 = new TagNode();
div1.tagName = "div";
div2 = new TagNode();
div2.tagName = "div";
div3 = new TagNode();
div3.tagName = "div";
})
it("should cause the currentOpenElement to return the closing node's parent", function () {
parser.pushOpenElement(div1);
parser.pushOpenElement(div2);
parser.popElementsToAndIncluding(div2);
expect(parser.currentOpenElement()).to.be.equal(div1);
})
it("should throw an error when trying to find an element not on the open element stack", function () {
parser.pushOpenElement(div1);
expect(function () { parser.popElementsToAndIncluding(div2) }).to.throw();
})
it("should return the closed elements", function () {
parser.pushOpenElement(div1);
parser.pushOpenElement(div2);
parser.pushOpenElement(div3);
var closedElements = parser.popElementsToAndIncluding(div1);
expect(closedElements).to.deep.equal([div3, div2, div1]);
})
})
describe("#mostRecentOpenElementWithName", function () {
it("should return the topmost element in the open element stack with a matching name", function () {
var div1 = new TagNode();
div1.tagName = "div";
var div2 = new TagNode();
div2.tagName = "div";
var p = new TagNode();
p.tagName = "p";
parser.pushOpenElement(div1);
parser.pushOpenElement(div2);
parser.pushOpenElement(p);
expect(parser.mostRecentOpenElementWithName("div")).to.be.equal(div2);
})
})
describe("#currentOpenElement", function () {
it("should return the root element for a brand new parser", function () {
expect(parser.currentOpenElement()).to.be.equal(parser.parseTree());
})
it("should return the top of the open element stack", function () {
var node = new TagNode();
parser.pushOpenElement(node);
expect(parser.currentOpenElement()).to.be.equal(node);
})
})
describe("#popOpenElement", function () {
it("should throw an error when trying to pop the root node", function () {
expect(function () { parser.popOpenElement() }).to.throw();
})
})
})