@grammar/html
Version:
HTML grammar.
477 lines • 20.6 kB
JavaScript
import $ from './spellu-engine.mjs';
/**
* HTML 5
*
*/
var grammar$html;
(function (grammar$html) {
function createRecipe() {
return {
name: 'html',
rules: {
'root': {
parser: '|', argument: [
{ parser: 'html-syntax' },
{ parser: 'xhtml-syntax' },
],
},
'html-syntax': {
parser: '&', argument: [
{ parser: 'document-type', optional: true },
{ parser: 'html-node-list' },
],
},
'xhtml-syntax': {
parser: '&', argument: [
{ parser: 'xml-declaration', optional: true },
{ parser: 'document-type' },
{ parser: 'xml-node-list' },
],
},
// <?xml ...?>
// <?xml-stylesheet ...?>
'xml-declaration': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: "html.token.XmlDeclaration.L" /* XmlDeclarationOpenToken */, pattern: '<?xml' } },
{ parser: '?', argument: { syntax: "html.TextSection" /* TextSection */, pattern: /[^\?]+/ } },
{ parser: '?', argument: { syntax: "html.token.XmlDeclaration.R" /* XmlDeclarationCloseToken */, pattern: '?>' } },
],
},
// <!doctype ...>
// <!...>
'document-type': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: "html.token.MarkupDeclaration.L" /* MarkupDeclarationOpenToken */, pattern: '<!' } },
{ parser: '?', argument: { syntax: "html.DoctypeDeclaration" /* DoctypeDeclaration */, pattern: /(?:doctype|DOCTYPE)\s+([0-9a-zA-Z:_\-]+)\s*/, capture: 1 } },
{ parser: '?', argument: { syntax: "html.token.MarkupDeclaration.R" /* MarkupDeclarationCloseToken */, pattern: '>' } },
],
},
'html-node-list': {
parser: '|', argument: [
{ parser: 'comment' },
{ parser: 'tag-open' },
{ parser: 'tag-close' },
{ parser: 'cdata' },
{ parser: 'text' },
],
multiplicity: 0,
},
'xml-node-list': {
parser: '|', argument: [
{ parser: 'comment' },
{ parser: 'tag-open' },
{ parser: 'tag-close' },
{ parser: 'cdata' },
{ parser: 'text' },
],
multiplicity: 0,
},
'comment': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: "html.CommentSection" /* CommentSection */, pattern: '<!--' } },
{
parser: '?', argument: [
{ syntax: 'xml.comment.content', pattern: /.+(?=-->)/ },
]
},
{ parser: '?', argument: { syntax: "html.CommentSection" /* CommentSection */, pattern: '-->' } },
],
},
'tag-open': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: "html.token.<" /* TagOpenToken */, pattern: '<' } },
{ parser: '#', argument: { syntax: "html.TagName" /* TagName */, pattern: /[0-9a-zA-Z:_\-]+/, label: 'TAG_NAME' } },
{ parser: 'attribute', multiplicity: 0 },
{ parser: '?', argument: { syntax: "html.token./" /* SlashToken */, pattern: '/' }, optional: true },
{ parser: '?', argument: { syntax: "html.token.>" /* TagCloseToken */, pattern: '>' } },
],
},
'tag-close': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: "html.token.<" /* TagOpenToken */, pattern: '<' } },
{ parser: '#', argument: { syntax: "html.token./" /* SlashToken */, pattern: '/' } },
{ parser: '#', argument: { syntax: "html.TagName" /* TagName */, pattern: /[0-9a-zA-Z:_\-]+/ } },
{ parser: '?', argument: { syntax: "html.token.>" /* TagCloseToken */, pattern: '>' } },
],
},
'attribute': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: "html.AttributeName" /* AttributeName */, pattern: /[^\s"'>/=\p{Control}]+/, label: 'ATTRIBUTE_NAME' } },
{ parser: '?', argument: { syntax: "html.token.=" /* EqualToken */, pattern: '=' } },
{ parser: 'string-literal' },
],
},
'cdata': {
parser: '&', argument: [
// FIXME: Syntax
{ parser: '?', argument: { syntax: "html.token.<" /* TagOpenToken */, pattern: '<![CDATA[' } },
{ parser: '#', argument: { syntax: "html.Text" /* Text */, pattern: /.*?(?=\]\]>)/ms } },
{ parser: '#', argument: { syntax: "html.token.>" /* TagCloseToken */, pattern: ']]>' } },
],
},
'text': { parser: '?', argument: { syntax: "html.Text" /* Text */, pattern: /.+?(?=<)|.+/ms } },
'string-literal': {
parser: '&', argument: [
{ parser: '?', argument: { syntax: 'html.token.doublequote', pattern: '"' } },
{ parser: '#', argument: { syntax: 'html.token.content', pattern: /[^"]*/ } },
{ parser: '#', argument: { syntax: 'html.token.doublequote', pattern: '"' } },
],
},
},
};
}
grammar$html.createRecipe = createRecipe;
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
var processors;
(function (processors) {
processors.cst = {
recipe: grammar$html.createRecipe(),
parts: {},
};
})(processors = grammar$html.processors || (grammar$html.processors = {}));
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
var processors;
(function (processors) {
processors.ast = {
recipe: grammar$html.createRecipe(),
parts: {
'html-syntax'(_) {
return $.createNode(_, "html.HtmlDocument" /* HtmlDocument */, {
doctype: _[0],
nodeList: _[1],
xmlDeclaration: null,
});
},
'xhtml-syntax'(_) {
return $.createNode(_, "html.HtmlDocument" /* HtmlDocument */, {
doctype: _[1],
nodeList: _[2],
xmlDeclaration: _[0] || null,
});
},
'xml-declaration'(_) {
return $.createNode(_, "html.XmlDeclaration" /* XmlDeclaration */, {
attributeList: [],
});
},
'document-type'(_) {
return $.createNode(_, "html.DoctypeDeclaration" /* DoctypeDeclaration */, {
string: _[1],
});
},
'comment'(_) {
return $.createNode(_, "html.CommentSection" /* CommentSection */, {
text: _,
});
},
'tag-open'(_) {
return $.createNode(_, "html.TagSection" /* TagSection */, {
isOpen: true,
isClose: _[3] ? true : false,
tagName: _[1],
attributeList: _[2],
});
},
'tag-close'(_) {
return $.createNode(_, "html.TagSection" /* TagSection */, {
isOpen: false,
isClose: true,
tagName: _[2],
attributeList: [],
});
},
'attribute'(_) {
return $.createNode(_, "html.Attribute" /* Attribute */, {
name: _[0],
value: _[2],
});
},
'cdata'(_) {
return $.createNode(_, "html.CDataSection" /* CDataSection */, {
text: _[1],
});
},
'text'(_) {
return $.createNode(_, "html.TextSection" /* TextSection */, {
text: _,
});
},
'string-literal'(_) {
return _[1];
},
},
};
})(processors = grammar$html.processors || (grammar$html.processors = {}));
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
class SourceWalker {
visitor;
constructor(visitor) {
this.visitor = visitor;
}
walk(initialValue, source) {
let value = initialValue;
if (source.xmlDeclaration) {
value = this.visitor.visitXmlDeclaration ? this.visitor.visitXmlDeclaration(value, source.xmlDeclaration) : value;
}
if (source.doctype) {
value = this.visitor.visitDoctypeDeclaration ? this.visitor.visitDoctypeDeclaration(value, source.doctype) : value;
}
for (const node of source.nodeList) {
value = this.walkNode(value, node);
}
return value;
}
walkElement(value, element) {
const v = this.visitor;
value = v.visitElementBefore ? v.visitElementBefore(value, element) : value;
for (const node of element.nodeList) {
value = this.walkNode(value, node);
}
value = v.visitElementAfter ? v.visitElementAfter(value, element) : value;
return value;
}
walkNode(value, node) {
const v = this.visitor;
switch (node.syntax) {
case "html.ElementDeclaration" /* ElementDeclaration */:
return this.walkElement(value, node);
break;
case "html.TagSection" /* TagSection */:
return v.visitTag ? v.visitTag(value, node) : value;
break;
case "html.TextSection" /* TextSection */:
return v.visitText ? v.visitText(value, node) : value;
break;
default:
console.log(99, node.syntax);
return v.onError ? v.onError(value, node) : value;
}
}
}
grammar$html.SourceWalker = SourceWalker;
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
const voidTagsOnHtml_4 = [
'area', 'base', 'br', 'col', 'hr', 'img', 'input', 'link', 'meta', 'param',
];
// HTML 5.0 specification
const voidTagsOnHtml_50 = [
'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr',
];
// HTML 5.2 specification
const voidTagsOnHtml_52 = [
'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr',
];
const voidTagsOnHtml_5 = voidTagsOnHtml_50;
function isVoidTag(tagName) {
return voidTagsOnHtml_5.includes(tagName);
}
grammar$html.isVoidTag = isVoidTag;
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
let ProcessorSuite;
(function (ProcessorSuite) {
ProcessorSuite["CST"] = "cst";
ProcessorSuite["AST"] = "ast";
})(ProcessorSuite = grammar$html.ProcessorSuite || (grammar$html.ProcessorSuite = {}));
function selectProcessorSuite(processor) {
return [grammar$html.processors[processor]];
}
grammar$html.selectProcessorSuite = selectProcessorSuite;
function scan(source, suite = ProcessorSuite.AST, rule, options = {}) {
const processors = selectProcessorSuite(suite);
return $.scan($.createSource(source), rule ?? processors[0]?.recipe.name, {
...options,
processors: processors
});
}
grammar$html.scan = scan;
function tokens(source, options = {}) {
return $.flatten(scan(source, ProcessorSuite.CST, undefined, options));
}
grammar$html.tokens = tokens;
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
const printOptions = {
pretty: false,
format: 'html'
};
function printSource(document, options = {}) {
options = { ...printOptions, ...options };
const printer = new $.TextPrinter({
space: options.pretty ? ' ' : '',
lineBreak: options.pretty ? '\r\n' : '',
indent: options.pretty ? 'tab' : 0,
});
const walker = new grammar$html.SourceWalker(new class {
visitXmlDeclaration(_, node) {
if (options.format == 'xml') {
printer.text('<?xml ', node.attributeList.map(attribute => `${attribute.name}="${attribute.value}"`).join(' '), ' ?>').newLine();
}
}
visitDoctypeDeclaration(_, node) {
printer.text('<!doctype ', node.string.value, '>').newLine();
}
visitElementBefore(_, node) {
const attributeText = node.attributeList.map(_ => ` ${_.name.value}="${_.value.value}"`).join(' ');
printer.text('<', node.tagName.value, attributeText, '>').newLine().indentUp();
}
visitElementAfter(_, node) {
printer.indentDown().text('</', node.tagName.value, '>').newLine(-1);
}
visitTag(_, node) {
if (node.isOpen) {
const attributeText = node.attributeList.map(_ => ` ${_.name.value}="${_.value.value}"`).join(' ');
if (node.isClose) {
printer.text('<', node.tagName.value, attributeText, ' />');
}
else {
printer.text('<', node.tagName.value, attributeText, '>');
}
}
else {
printer.text('</', node.tagName.value, '>');
}
printer.newLine();
}
visitText(_, node) {
printer.text(node.text.value.trim()).newLine();
}
}());
walker.walk(undefined, document);
return printer.flash();
}
grammar$html.printSource = printSource;
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
function assemble(source) {
return new NodeTreeAssembler().assemble(source);
}
grammar$html.assemble = assemble;
class NodeTreeAssembler {
nodeStack = [];
tagStack = [];
assemble(source) {
this.nodeStack = [...source.nodeList];
this.tagStack = [];
return $.createNode(source, "html.HtmlDocument" /* HtmlDocument */, {
doctype: source.doctype,
nodeList: this.assembleNodes(),
xmlDeclaration: source.xmlDeclaration,
});
}
assembleNodes() {
const semanticNodeList = [];
while (this.nodeStack.length > 0) {
let node = this.nodeStack.shift();
if (node.syntax === "html.TagSection" /* TagSection */) {
const nodeTag = node;
// void tag
if (grammar$html.isVoidTag(nodeTag.tagName.value)) {
const element = $.createNode(nodeTag, "html.ElementDeclaration" /* ElementDeclaration */, {
tagName: nodeTag.tagName,
attributeList: nodeTag.attributeList,
nodeList: [],
void: true,
});
semanticNodeList.push(element);
}
else if (nodeTag.isOpen) {
// self closing tag
if (nodeTag.isClose) {
const element = $.createNode(nodeTag, "html.ElementDeclaration" /* ElementDeclaration */, {
tagName: nodeTag.tagName,
attributeList: nodeTag.attributeList,
nodeList: [],
void: false,
});
semanticNodeList.push(element);
}
// open tag
else {
this.tagStack.push(nodeTag);
const element = $.createNode(nodeTag, "html.ElementDeclaration" /* ElementDeclaration */, {
tagName: nodeTag.tagName,
attributeList: nodeTag.attributeList,
nodeList: this.assembleNodes(),
void: false,
});
semanticNodeList.push(element);
}
}
// close tag
else {
if (this.tagStack.length > 0) {
const openNode = this.tagStack.pop();
const closeNode = nodeTag;
if (openNode.tagName.value === closeNode.tagName.value) {
return semanticNodeList;
}
else {
// Error: close tag is not matched
console.error('Ca1');
}
}
else {
// Error: close tag is not matched
console.error('Ca2');
}
}
}
else {
semanticNodeList.push(node);
}
}
return semanticNodeList;
}
}
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
function checkStrict(source) {
new Checker().check(source);
}
grammar$html.checkStrict = checkStrict;
class Checker {
xmlDeclarationAppeared = false;
doctypeDeclarationAppeared = false;
// protected ids : {}
check(source) {
this.checkRoot(source.nodeList);
for (const node of source.nodeList) {
this.checkNode(node);
}
}
checkRoot(nodeList) {
// [1] top node must 1.
if (nodeList.length > 1)
this.error('top node must 1.');
}
checkNode(node) {
switch (node.syntax) {
case "html.TagSection" /* TagSection */:
const nodeTag = node;
break;
default:
}
}
error(message) {
}
warn(message) {
}
}
})(grammar$html || (grammar$html = {}));
(function (grammar$html) {
function createDocument(...nodes) {
return $.createNode(nodes, "html.HtmlDocument" /* HtmlDocument */, {
xmlDeclaration: null,
doctype: null,
nodeList: nodes,
});
}
grammar$html.createDocument = createDocument;
})(grammar$html || (grammar$html = {}));
export default grammar$html;
//# sourceMappingURL=grammar-html.mjs.map