UNPKG

@grammar/html

Version:

HTML grammar.

477 lines 20.6 kB
import $ from './spellu-engine.mjs'; /** * HTML 5 * */ var grammar$html; (function (grammar$html) { function createRecipe() { return { name: 'html', rules: { 'root': { parser: '|', argument: [ { parser: 'html-syntax' }, { parser: 'xhtml-syntax' }, ], }, 'html-syntax': { parser: '&', argument: [ { parser: 'document-type', optional: true }, { parser: 'html-node-list' }, ], }, 'xhtml-syntax': { parser: '&', argument: [ { parser: 'xml-declaration', optional: true }, { parser: 'document-type' }, { parser: 'xml-node-list' }, ], }, // <?xml ...?> // <?xml-stylesheet ...?> 'xml-declaration': { parser: '&', argument: [ { parser: '?', argument: { syntax: "html.token.XmlDeclaration.L" /* XmlDeclarationOpenToken */, pattern: '<?xml' } }, { parser: '?', argument: { syntax: "html.TextSection" /* TextSection */, pattern: /[^\?]+/ } }, { parser: '?', argument: { syntax: "html.token.XmlDeclaration.R" /* XmlDeclarationCloseToken */, pattern: '?>' } }, ], }, // <!doctype ...> // <!...> 'document-type': { parser: '&', argument: [ { parser: '?', argument: { syntax: "html.token.MarkupDeclaration.L" /* MarkupDeclarationOpenToken */, pattern: '<!' } }, { parser: '?', argument: { syntax: "html.DoctypeDeclaration" /* DoctypeDeclaration */, pattern: /(?:doctype|DOCTYPE)\s+([0-9a-zA-Z:_\-]+)\s*/, capture: 1 } }, { parser: '?', argument: { syntax: "html.token.MarkupDeclaration.R" /* MarkupDeclarationCloseToken */, pattern: '>' } }, ], }, 'html-node-list': { parser: '|', argument: [ { parser: 'comment' }, { parser: 'tag-open' }, { parser: 'tag-close' }, { parser: 'cdata' }, { parser: 'text' }, ], multiplicity: 0, }, 'xml-node-list': { parser: '|', argument: [ { parser: 'comment' }, { parser: 'tag-open' }, { parser: 'tag-close' }, { parser: 'cdata' }, { parser: 'text' }, ], multiplicity: 0, }, 'comment': { parser: '&', argument: [ { parser: '?', argument: { syntax: "html.CommentSection" /* CommentSection */, pattern: '<!--' } }, { parser: '?', argument: [ { syntax: 'xml.comment.content', pattern: /.+(?=-->)/ }, ] }, { parser: '?', argument: { syntax: "html.CommentSection" /* CommentSection */, pattern: '-->' } }, ], }, 'tag-open': { parser: '&', argument: [ { parser: '?', argument: { syntax: "html.token.<" /* TagOpenToken */, pattern: '<' } }, { parser: '#', argument: { syntax: "html.TagName" /* TagName */, pattern: /[0-9a-zA-Z:_\-]+/, label: 'TAG_NAME' } }, { parser: 'attribute', multiplicity: 0 }, { parser: '?', argument: { syntax: "html.token./" /* SlashToken */, pattern: '/' }, optional: true }, { parser: '?', argument: { syntax: "html.token.>" /* TagCloseToken */, pattern: '>' } }, ], }, 'tag-close': { parser: '&', argument: [ { parser: '?', argument: { syntax: "html.token.<" /* TagOpenToken */, pattern: '<' } }, { parser: '#', argument: { syntax: "html.token./" /* SlashToken */, pattern: '/' } }, { parser: '#', argument: { syntax: "html.TagName" /* TagName */, pattern: /[0-9a-zA-Z:_\-]+/ } }, { parser: '?', argument: { syntax: "html.token.>" /* TagCloseToken */, pattern: '>' } }, ], }, 'attribute': { parser: '&', argument: [ { parser: '?', argument: { syntax: "html.AttributeName" /* AttributeName */, pattern: /[^\s"'>/=\p{Control}]+/, label: 'ATTRIBUTE_NAME' } }, { parser: '?', argument: { syntax: "html.token.=" /* EqualToken */, pattern: '=' } }, { parser: 'string-literal' }, ], }, 'cdata': { parser: '&', argument: [ // FIXME: Syntax { parser: '?', argument: { syntax: "html.token.<" /* TagOpenToken */, pattern: '<![CDATA[' } }, { parser: '#', argument: { syntax: "html.Text" /* Text */, pattern: /.*?(?=\]\]>)/ms } }, { parser: '#', argument: { syntax: "html.token.>" /* TagCloseToken */, pattern: ']]>' } }, ], }, 'text': { parser: '?', argument: { syntax: "html.Text" /* Text */, pattern: /.+?(?=<)|.+/ms } }, 'string-literal': { parser: '&', argument: [ { parser: '?', argument: { syntax: 'html.token.doublequote', pattern: '"' } }, { parser: '#', argument: { syntax: 'html.token.content', pattern: /[^"]*/ } }, { parser: '#', argument: { syntax: 'html.token.doublequote', pattern: '"' } }, ], }, }, }; } grammar$html.createRecipe = createRecipe; })(grammar$html || (grammar$html = {})); (function (grammar$html) { var processors; (function (processors) { processors.cst = { recipe: grammar$html.createRecipe(), parts: {}, }; })(processors = grammar$html.processors || (grammar$html.processors = {})); })(grammar$html || (grammar$html = {})); (function (grammar$html) { var processors; (function (processors) { processors.ast = { recipe: grammar$html.createRecipe(), parts: { 'html-syntax'(_) { return $.createNode(_, "html.HtmlDocument" /* HtmlDocument */, { doctype: _[0], nodeList: _[1], xmlDeclaration: null, }); }, 'xhtml-syntax'(_) { return $.createNode(_, "html.HtmlDocument" /* HtmlDocument */, { doctype: _[1], nodeList: _[2], xmlDeclaration: _[0] || null, }); }, 'xml-declaration'(_) { return $.createNode(_, "html.XmlDeclaration" /* XmlDeclaration */, { attributeList: [], }); }, 'document-type'(_) { return $.createNode(_, "html.DoctypeDeclaration" /* DoctypeDeclaration */, { string: _[1], }); }, 'comment'(_) { return $.createNode(_, "html.CommentSection" /* CommentSection */, { text: _, }); }, 'tag-open'(_) { return $.createNode(_, "html.TagSection" /* TagSection */, { isOpen: true, isClose: _[3] ? true : false, tagName: _[1], attributeList: _[2], }); }, 'tag-close'(_) { return $.createNode(_, "html.TagSection" /* TagSection */, { isOpen: false, isClose: true, tagName: _[2], attributeList: [], }); }, 'attribute'(_) { return $.createNode(_, "html.Attribute" /* Attribute */, { name: _[0], value: _[2], }); }, 'cdata'(_) { return $.createNode(_, "html.CDataSection" /* CDataSection */, { text: _[1], }); }, 'text'(_) { return $.createNode(_, "html.TextSection" /* TextSection */, { text: _, }); }, 'string-literal'(_) { return _[1]; }, }, }; })(processors = grammar$html.processors || (grammar$html.processors = {})); })(grammar$html || (grammar$html = {})); (function (grammar$html) { class SourceWalker { visitor; constructor(visitor) { this.visitor = visitor; } walk(initialValue, source) { let value = initialValue; if (source.xmlDeclaration) { value = this.visitor.visitXmlDeclaration ? this.visitor.visitXmlDeclaration(value, source.xmlDeclaration) : value; } if (source.doctype) { value = this.visitor.visitDoctypeDeclaration ? this.visitor.visitDoctypeDeclaration(value, source.doctype) : value; } for (const node of source.nodeList) { value = this.walkNode(value, node); } return value; } walkElement(value, element) { const v = this.visitor; value = v.visitElementBefore ? v.visitElementBefore(value, element) : value; for (const node of element.nodeList) { value = this.walkNode(value, node); } value = v.visitElementAfter ? v.visitElementAfter(value, element) : value; return value; } walkNode(value, node) { const v = this.visitor; switch (node.syntax) { case "html.ElementDeclaration" /* ElementDeclaration */: return this.walkElement(value, node); break; case "html.TagSection" /* TagSection */: return v.visitTag ? v.visitTag(value, node) : value; break; case "html.TextSection" /* TextSection */: return v.visitText ? v.visitText(value, node) : value; break; default: console.log(99, node.syntax); return v.onError ? v.onError(value, node) : value; } } } grammar$html.SourceWalker = SourceWalker; })(grammar$html || (grammar$html = {})); (function (grammar$html) { const voidTagsOnHtml_4 = [ 'area', 'base', 'br', 'col', 'hr', 'img', 'input', 'link', 'meta', 'param', ]; // HTML 5.0 specification const voidTagsOnHtml_50 = [ 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr', ]; // HTML 5.2 specification const voidTagsOnHtml_52 = [ 'area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr', ]; const voidTagsOnHtml_5 = voidTagsOnHtml_50; function isVoidTag(tagName) { return voidTagsOnHtml_5.includes(tagName); } grammar$html.isVoidTag = isVoidTag; })(grammar$html || (grammar$html = {})); (function (grammar$html) { let ProcessorSuite; (function (ProcessorSuite) { ProcessorSuite["CST"] = "cst"; ProcessorSuite["AST"] = "ast"; })(ProcessorSuite = grammar$html.ProcessorSuite || (grammar$html.ProcessorSuite = {})); function selectProcessorSuite(processor) { return [grammar$html.processors[processor]]; } grammar$html.selectProcessorSuite = selectProcessorSuite; function scan(source, suite = ProcessorSuite.AST, rule, options = {}) { const processors = selectProcessorSuite(suite); return $.scan($.createSource(source), rule ?? processors[0]?.recipe.name, { ...options, processors: processors }); } grammar$html.scan = scan; function tokens(source, options = {}) { return $.flatten(scan(source, ProcessorSuite.CST, undefined, options)); } grammar$html.tokens = tokens; })(grammar$html || (grammar$html = {})); (function (grammar$html) { const printOptions = { pretty: false, format: 'html' }; function printSource(document, options = {}) { options = { ...printOptions, ...options }; const printer = new $.TextPrinter({ space: options.pretty ? ' ' : '', lineBreak: options.pretty ? '\r\n' : '', indent: options.pretty ? 'tab' : 0, }); const walker = new grammar$html.SourceWalker(new class { visitXmlDeclaration(_, node) { if (options.format == 'xml') { printer.text('<?xml ', node.attributeList.map(attribute => `${attribute.name}="${attribute.value}"`).join(' '), ' ?>').newLine(); } } visitDoctypeDeclaration(_, node) { printer.text('<!doctype ', node.string.value, '>').newLine(); } visitElementBefore(_, node) { const attributeText = node.attributeList.map(_ => ` ${_.name.value}="${_.value.value}"`).join(' '); printer.text('<', node.tagName.value, attributeText, '>').newLine().indentUp(); } visitElementAfter(_, node) { printer.indentDown().text('</', node.tagName.value, '>').newLine(-1); } visitTag(_, node) { if (node.isOpen) { const attributeText = node.attributeList.map(_ => ` ${_.name.value}="${_.value.value}"`).join(' '); if (node.isClose) { printer.text('<', node.tagName.value, attributeText, ' />'); } else { printer.text('<', node.tagName.value, attributeText, '>'); } } else { printer.text('</', node.tagName.value, '>'); } printer.newLine(); } visitText(_, node) { printer.text(node.text.value.trim()).newLine(); } }()); walker.walk(undefined, document); return printer.flash(); } grammar$html.printSource = printSource; })(grammar$html || (grammar$html = {})); (function (grammar$html) { function assemble(source) { return new NodeTreeAssembler().assemble(source); } grammar$html.assemble = assemble; class NodeTreeAssembler { nodeStack = []; tagStack = []; assemble(source) { this.nodeStack = [...source.nodeList]; this.tagStack = []; return $.createNode(source, "html.HtmlDocument" /* HtmlDocument */, { doctype: source.doctype, nodeList: this.assembleNodes(), xmlDeclaration: source.xmlDeclaration, }); } assembleNodes() { const semanticNodeList = []; while (this.nodeStack.length > 0) { let node = this.nodeStack.shift(); if (node.syntax === "html.TagSection" /* TagSection */) { const nodeTag = node; // void tag if (grammar$html.isVoidTag(nodeTag.tagName.value)) { const element = $.createNode(nodeTag, "html.ElementDeclaration" /* ElementDeclaration */, { tagName: nodeTag.tagName, attributeList: nodeTag.attributeList, nodeList: [], void: true, }); semanticNodeList.push(element); } else if (nodeTag.isOpen) { // self closing tag if (nodeTag.isClose) { const element = $.createNode(nodeTag, "html.ElementDeclaration" /* ElementDeclaration */, { tagName: nodeTag.tagName, attributeList: nodeTag.attributeList, nodeList: [], void: false, }); semanticNodeList.push(element); } // open tag else { this.tagStack.push(nodeTag); const element = $.createNode(nodeTag, "html.ElementDeclaration" /* ElementDeclaration */, { tagName: nodeTag.tagName, attributeList: nodeTag.attributeList, nodeList: this.assembleNodes(), void: false, }); semanticNodeList.push(element); } } // close tag else { if (this.tagStack.length > 0) { const openNode = this.tagStack.pop(); const closeNode = nodeTag; if (openNode.tagName.value === closeNode.tagName.value) { return semanticNodeList; } else { // Error: close tag is not matched console.error('Ca1'); } } else { // Error: close tag is not matched console.error('Ca2'); } } } else { semanticNodeList.push(node); } } return semanticNodeList; } } })(grammar$html || (grammar$html = {})); (function (grammar$html) { function checkStrict(source) { new Checker().check(source); } grammar$html.checkStrict = checkStrict; class Checker { xmlDeclarationAppeared = false; doctypeDeclarationAppeared = false; // protected ids : {} check(source) { this.checkRoot(source.nodeList); for (const node of source.nodeList) { this.checkNode(node); } } checkRoot(nodeList) { // [1] top node must 1. if (nodeList.length > 1) this.error('top node must 1.'); } checkNode(node) { switch (node.syntax) { case "html.TagSection" /* TagSection */: const nodeTag = node; break; default: } } error(message) { } warn(message) { } } })(grammar$html || (grammar$html = {})); (function (grammar$html) { function createDocument(...nodes) { return $.createNode(nodes, "html.HtmlDocument" /* HtmlDocument */, { xmlDeclaration: null, doctype: null, nodeList: nodes, }); } grammar$html.createDocument = createDocument; })(grammar$html || (grammar$html = {})); export default grammar$html; //# sourceMappingURL=grammar-html.mjs.map