micro-mdx-parser
Version:
A tiny parser to convert markdown or html into JSON
125 lines (108 loc) • 3.62 kB
JavaScript
const CLOSE_ELEMENT_SYMBOL = '◪◪'
const CLOSE_ELEMENT_SYMBOL_PATTERN = /◪◪/g
// https://regex101.com/r/T0QMif/1
const SINGLE_COMPONENT_REGEX = /<([A-Z][a-zA-Z_]*)(\s*(?:(?:'[^']*'|"[^"]*"|[^'">])*)\/>)|<([A-Z][a-z_]*)\b([^>]*)>*(?:>([\s\S]*?)<\/\3>|\s?\/?>)/gm
// https://regex101.com/r/rK6kVF/1
const COMPONENT_REGEX =
/<([A-Z][a-zA-Z_]*)(\s*(:?(?:'[^']*'|"[^"]*"|[^'">])*)\/>)(\s*(:?(?:'[^']*'|"[^"]*"|[^'">])*)\/>)?|<([A-Z][a-z-zA-Z__]*)\b([^>]*)>*(?:>([\s\S]*?)<\/\6>|\s?\/)/gm
const TRAILING_CLOSE = /\/>$/
function findComponent(block) {
let matches
let component
while ((matches = SINGLE_COMPONENT_REGEX.exec(block)) !== null) {
if (matches.index === SINGLE_COMPONENT_REGEX.lastIndex) {
SINGLE_COMPONENT_REGEX.lastIndex++ // avoid infinite loops with zero-width matches
}
const [ _match, name, body, otherName, props, children ] = matches
// console.log('matches', matches)
component = {
name,
propsRaw: body.replace(TRAILING_CLOSE, ''),
match: _match,
}
}
return component
}
function findComponents(block) {
let matches
const components = []
while ((matches = COMPONENT_REGEX.exec(block)) !== null) {
if (matches.index === COMPONENT_REGEX.lastIndex) {
COMPONENT_REGEX.lastIndex++ // avoid infinite loops with zero-width matches
}
// '<Builder beans=true>cool</Builder>', // 0
// name, // 1
// bodyToSelfCloseTag, // 2
// undefined, // 3
// undefined, // 4
// undefined, // 5
// 'Builder', // 6
// ' beans=true',
// 'cool',
const [ _match, name, bodyToSelfCloseTag, x, y, rest, nameWithChildren, props, children ] = matches
/*
console.log('_match', _match)
console.log('name', name)
console.log('nameWithChildren', nameWithChildren)
console.log('bodyToSelfCloseTag', bodyToSelfCloseTag)
console.log('_match', _match)
/** */
/* Count instances of components */
const componentCount = _match.match(SINGLE_COMPONENT_REGEX)
// console.log('componentCount', componentCount)
if (!componentCount || !componentCount.length) {
continue
}
if (componentCount.length === 1) {
if (nameWithChildren) {
components.push({
index: matches.index,
name: nameWithChildren,
propsRaw: props,
children,
match: _match,
})
continue;
}
const text = (bodyToSelfCloseTag) ? `<${name}${bodyToSelfCloseTag}` : _match
// console.log('text', text)
const restContent = bodyToSelfCloseTag + (rest || '')
const propsRaw = restContent.replace(TRAILING_CLOSE, '')
// console.log('propsRaw', propsRaw)
components.push({
index: matches.index,
name,
// propsRaw: _match.trim().replace(new RegExp(`^<${name}`), '').replace(TRAILING_CLOSE, ''),
propsRaw: propsRaw,
match: text,
})
continue
}
/*
console.log(componentCount)
console.log(componentCount.length)
/** */
/* Original regex match caught too many elements. fix it */
for (let count = 0; count < componentCount.length; count++) {
const element = componentCount[count];
// console.log('element', element)
const inner = findComponent(element, matches.index)
if (inner) {
components.push(inner)
}
}
}
/*
console.log('components', components)
process.exit(1)
/** */
return components
}
module.exports = {
findComponent,
findComponents,
COMPONENT_REGEX,
SINGLE_COMPONENT_REGEX,
CLOSE_ELEMENT_SYMBOL,
CLOSE_ELEMENT_SYMBOL_PATTERN,
}