UNPKG

micro-mdx-parser

Version:

A tiny parser to convert markdown or html into JSON

876 lines (838 loc) 20.8 kB
const { inspect } = require("util") const { test } = require("uvu") const assert = require("uvu/assert") const { lexer } = require("./lexer") const { parser } = require("./parser") const { getTextBetweenChars } = require('./utils') function deepLog(obj) { console.log(inspect(obj, { showHidden: false, depth: null, colors: true })) } function ps(index) { return { index, line: 1, column: index + 1 } } const lexerOptions = { childlessTags: [] } const parserOptions = { voidTags: [], closingTags: [], closingTagAncestorBreakers: {}, } test("parser() should return nodes", (t) => { const str = "<h1>Hello world</h1>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "h1", props: {}, propsRaw: "", children: [ { type: "text", content: "Hello world", position: { start: ps(4), end: ps(15), }, }, ], position: { start: ps(0), end: ps(str.length), }, }, ]) }) test("parser() should not nest within void tags", (t) => { const str = "<div>abc<img/>def</div>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, { voidTags: "img", closingTags: [] }) const answer = [ { type: "element", tagName: "div", props: {}, propsRaw: "", children: [ { type: "text", content: "abc", position: { start: ps(5), end: ps(8), }, }, { type: "element", tagName: "img", props: {}, propsRaw: "", children: [], position: { start: ps(8), end: ps(14), }, isSelfClosing: true }, { type: "text", content: "def", position: { start: ps(14), end: ps(17), }, }, ], position: { start: ps(0), end: ps(str.length), }, }, ] /* console.log('nodes') deepLog(nodes) console.log('answer') deepLog(answer) process.exit(1) /** */ assert.equal(nodes, answer) }) test("parser() should handle optional-close tags", (t) => { { const parserOptions = { voidTags: [], closingTags: ["p"], closingTagAncestorBreakers: {}, } const str = "<p>This is one<p>This is two</p>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "p", props: {}, propsRaw: "", children: [ { type: "text", content: "This is one", position: { start: ps(3), end: ps(14), }, }, ], position: { start: ps(0), end: ps(14), }, }, { type: "element", tagName: "p", props: {}, propsRaw: "", children: [ { type: "text", content: "This is two", position: { start: ps(17), end: ps(28), }, }, ], position: { start: ps(14), end: ps(str.length), }, }, ]) } { const parserOptions = { voidTags: [], closingTags: ["p", "span"], closingTagAncestorBreakers: {}, } const str = "<p>This is one <span>okay<p>This is two</p>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "p", props: {}, propsRaw: "", children: [ { type: "text", content: "This is one ", position: { start: ps(3), end: ps(15), }, }, { type: "element", tagName: "span", props: {}, propsRaw: "", children: [ { type: "text", content: "okay", position: { start: ps(21), end: ps(25), }, }, ], position: { start: ps(15), end: ps(25), }, }, ], position: { start: ps(0), end: ps(25), }, }, { type: "element", tagName: "p", props: {}, propsRaw: "", children: [ { type: "text", content: "This is two", position: { start: ps(28), end: ps(39), }, }, ], position: { start: ps(25), end: ps(43), }, }, ]) } }) test("parser() should auto-close unmatched child tags", (t) => { const parserOptions = { voidTags: [], closingTags: [], closingTagAncestorBreakers: {}, } const str = "<div>This is <b>one <span>okay</div>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "div", props: {}, propsRaw: "", position: { start: ps(0), end: ps(36), }, children: [ { type: "text", content: "This is ", position: { start: ps(5), end: ps(13), }, }, { type: "element", tagName: "b", props: {}, propsRaw: "", position: { start: ps(13), end: ps(30), }, children: [ { type: "text", content: "one ", position: { start: ps(16), end: ps(20), }, }, { type: "element", tagName: "span", props: {}, propsRaw: "", position: { start: ps(20), end: ps(30), }, children: [ { type: "text", content: "okay", position: { start: ps(26), end: ps(30), }, }, ], }, ], }, ], }, ]) }) test("parser() should handle empty token arrays", (t) => { const tokens = [] const nodes = parser(tokens, parserOptions) assert.equal(nodes, []) }) test("parser() should report the element attributes", (t) => { const str = '<div class="cake" data-key="abc" disabled></div>' const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) const answer = [ { type: "element", tagName: "div", // attributes: ['class="cake"', 'data-key="abc"', "disabled"], props: { class: 'cake', 'data-key': 'abc', disabled: true }, propsRaw: ' class="cake" data-key="abc" disabled', position: { start: ps(0), end: ps(48), }, children: [], }, ] /* console.log('nodes') deepLog(nodes) console.log('answer') deepLog(answer) process.exit(1) /** */ assert.equal(nodes, answer) }) test("parser() should handle unclosed elements", (t) => { const str = "<div>abc" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "div", props: {}, propsRaw: "", position: { start: ps(0), end: ps(str.length), }, children: [ { type: "text", content: "abc", position: { start: ps(5), end: ps(str.length), }, }, ], }, ]) }) test("parser() should preserve case-sensitive tag names", (t) => { const str = "<You-Know-8>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "You-Know-8", props: {}, propsRaw: "", position: { start: ps(0), end: ps(str.length), }, children: [], }, ]) }) test("parser() should match by case-insensitive tags", (t) => { const str = "<div>abc</DIV>def" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) assert.equal(nodes, [ { type: "element", tagName: "div", props: {}, propsRaw: "", position: { start: ps(0), end: ps(14), }, children: [ { type: "text", content: "abc", position: { start: ps(5), end: ps(8), }, }, ], }, { type: "text", content: "def", position: { start: ps(14), end: ps(17), }, }, ]) }) test("parser() should handle ancestor breaker special case (#39)", (t) => { /* To summarize, this special case is where a <ul|ol|menu> is encountered within an <li>. The default behavior for <li>s as closing tags is to rewind up and auto-close the previous <li>. However, <li> may contain <ul|ol|menu> before being closed so we should not rewind the stack in those cases. This edge-case also applies to <dt|dd> in <dl>s. */ { const str = "<ul><li>abc<ul><li>def</li></ul></li></ul>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, { voidTags: [], closingTags: ["li"], closingTagAncestorBreakers: { li: ["ul"], }, }) assert.equal(nodes, [ { type: "element", tagName: "ul", props: {}, propsRaw: "", position: { start: ps(0), end: ps(42), }, children: [ { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(4), end: ps(37), }, children: [ { type: "text", content: "abc", position: { start: ps(8), end: ps(11), }, }, { type: "element", tagName: "ul", props: {}, propsRaw: "", position: { start: ps(11), end: ps(32), }, children: [ { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(15), end: ps(27), }, children: [ { type: "text", content: "def", position: { start: ps(19), end: ps(22), }, }, ], }, ], }, ], }, ], }, ]) } { const str = "<ul><li>abc<ul><span><li>def</li></span></ul></li></ul>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, { voidTags: [], closingTags: ["li"], closingTagAncestorBreakers: { li: ["ul"], }, }) assert.equal(nodes, [ { type: "element", tagName: "ul", props: {}, propsRaw: "", position: { start: ps(0), end: ps(55), }, children: [ { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(4), end: ps(50), }, children: [ { type: "text", content: "abc", position: { start: ps(8), end: ps(11), }, }, { type: "element", tagName: "ul", props: {}, propsRaw: "", position: { start: ps(11), end: ps(45), }, children: [ { type: "element", tagName: "span", props: {}, propsRaw: "", position: { start: ps(15), end: ps(40), }, children: [ { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(21), end: ps(33), }, children: [ { type: "text", content: "def", position: { start: ps(25), end: ps(28), }, }, ], }, ], }, ], }, ], }, ], }, ]) } { const str = "<ul><li>abc<ul><li>def<li>ghi</li></ul></li></ul>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, { voidTags: [], closingTags: ["li"], closingTagAncestorBreakers: { li: ["ul"], }, }) assert.equal(nodes, [ { type: "element", tagName: "ul", props: {}, propsRaw: "", position: { start: ps(0), end: ps(49), }, children: [ { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(4), end: ps(44), }, children: [ { type: "text", content: "abc", position: { start: ps(8), end: ps(11), }, }, { type: "element", tagName: "ul", props: {}, propsRaw: "", position: { start: ps(11), end: ps(39), }, children: [ { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(15), end: ps(22), }, children: [ { type: "text", content: "def", position: { start: ps(19), end: ps(22), }, }, ], }, { type: "element", tagName: "li", props: {}, propsRaw: "", position: { start: ps(22), end: ps(34), }, children: [ { type: "text", content: "ghi", position: { start: ps(26), end: ps(29), }, }, ], }, ], }, ], }, ], }, ]) } }) test("parser() should handle nested tables", (t) => { const str = "<table><tbody><tr><td><table><tbody><tr><td></td></tr></tbody></table></td></tr></tbody></table>" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, { voidTags: [], closingTags: ["tbody"], closingTagAncestorBreakers: { tbody: ["table"], tr: ["table"], td: ["table"], }, }) const answer = [ { type: "element", tagName: "table", props: {}, propsRaw: "", position: { start: ps(0), end: ps(96), }, children: [ { type: "element", tagName: "tbody", props: {}, propsRaw: "", position: { start: ps(7), end: ps(88), }, children: [ { type: "element", tagName: "tr", props: {}, propsRaw: "", position: { start: ps(14), end: ps(80), }, children: [ { type: "element", tagName: "td", props: {}, propsRaw: "", position: { start: ps(18), end: ps(75), }, children: [ { type: "element", tagName: "table", props: {}, propsRaw: "", position: { start: ps(22), end: ps(70), }, children: [ { type: "element", tagName: "tbody", props: {}, propsRaw: "", position: { start: ps(29), end: ps(62), }, children: [ { type: "element", tagName: "tr", props: {}, propsRaw: "", position: { start: ps(36), end: ps(54), }, children: [ { type: "element", tagName: "td", props: {}, propsRaw: "", position: { start: ps(40), end: ps(49), }, children: [], }, ], }, ], }, ], }, ], }, ], }, ], }, ], }, ] // console.log(getTextBetweenChars(str, 4, 5)) // console.log('nodes') // deepLog(nodes) // console.log('answer') // deepLog(answer) // process.exit(1) assert.equal(nodes, answer) }) test("parser() should ignore unnecessary closing tags", (t) => { /* In this case the </i> bit is unnecessary and should not be represented in the output nor interfere with the stack. */ const str = "</i>x" const tokens = lexer(str, lexerOptions) const nodes = parser(tokens, parserOptions) const answer = [ { type: "text", content: "x", position: { start: ps(4), end: ps(str.length), }, }, ] /* console.log(getTextBetweenChars(str, 4, 5)) deepLog(tokens) deepLog(nodes) deepLog(answer) //process.exit(1) /** */ assert.equal(nodes, [ { type: "text", content: "x", position: { start: ps(4), end: ps(str.length), }, }, ]) }) test.run()