UNPKG

micro-mdx-parser

Version:

A tiny parser to convert markdown or html into JSON

125 lines (108 loc) 3.62 kB
const CLOSE_ELEMENT_SYMBOL = '◪◪' const CLOSE_ELEMENT_SYMBOL_PATTERN = /◪◪/g // https://regex101.com/r/T0QMif/1 const SINGLE_COMPONENT_REGEX = /<([A-Z][a-zA-Z_]*)(\s*(?:(?:'[^']*'|"[^"]*"|[^'">])*)\/>)|<([A-Z][a-z_]*)\b([^>]*)>*(?:>([\s\S]*?)<\/\3>|\s?\/?>)/gm // https://regex101.com/r/rK6kVF/1 const COMPONENT_REGEX = /<([A-Z][a-zA-Z_]*)(\s*(:?(?:'[^']*'|"[^"]*"|[^'">])*)\/>)(\s*(:?(?:'[^']*'|"[^"]*"|[^'">])*)\/>)?|<([A-Z][a-z-zA-Z__]*)\b([^>]*)>*(?:>([\s\S]*?)<\/\6>|\s?\/?>)/gm const TRAILING_CLOSE = /\/>$/ function findComponent(block) { let matches let component while ((matches = SINGLE_COMPONENT_REGEX.exec(block)) !== null) { if (matches.index === SINGLE_COMPONENT_REGEX.lastIndex) { SINGLE_COMPONENT_REGEX.lastIndex++ // avoid infinite loops with zero-width matches } const [ _match, name, body, otherName, props, children ] = matches // console.log('matches', matches) component = { name, propsRaw: body.replace(TRAILING_CLOSE, ''), match: _match, } } return component } function findComponents(block) { let matches const components = [] while ((matches = COMPONENT_REGEX.exec(block)) !== null) { if (matches.index === COMPONENT_REGEX.lastIndex) { COMPONENT_REGEX.lastIndex++ // avoid infinite loops with zero-width matches } // '<Builder beans=true>cool</Builder>', // 0 // name, // 1 // bodyToSelfCloseTag, // 2 // undefined, // 3 // undefined, // 4 // undefined, // 5 // 'Builder', // 6 // ' beans=true', // 'cool', const [ _match, name, bodyToSelfCloseTag, x, y, rest, nameWithChildren, props, children ] = matches /* console.log('_match', _match) console.log('name', name) console.log('nameWithChildren', nameWithChildren) console.log('bodyToSelfCloseTag', bodyToSelfCloseTag) console.log('_match', _match) /** */ /* Count instances of components */ const componentCount = _match.match(SINGLE_COMPONENT_REGEX) // console.log('componentCount', componentCount) if (!componentCount || !componentCount.length) { continue } if (componentCount.length === 1) { if (nameWithChildren) { components.push({ index: matches.index, name: nameWithChildren, propsRaw: props, children, match: _match, }) continue; } const text = (bodyToSelfCloseTag) ? `<${name}${bodyToSelfCloseTag}` : _match // console.log('text', text) const restContent = bodyToSelfCloseTag + (rest || '') const propsRaw = restContent.replace(TRAILING_CLOSE, '') // console.log('propsRaw', propsRaw) components.push({ index: matches.index, name, // propsRaw: _match.trim().replace(new RegExp(`^<${name}`), '').replace(TRAILING_CLOSE, ''), propsRaw: propsRaw, match: text, }) continue } /* console.log(componentCount) console.log(componentCount.length) /** */ /* Original regex match caught too many elements. fix it */ for (let count = 0; count < componentCount.length; count++) { const element = componentCount[count]; // console.log('element', element) const inner = findComponent(element, matches.index) if (inner) { components.push(inner) } } } /* console.log('components', components) process.exit(1) /** */ return components } module.exports = { findComponent, findComponents, COMPONENT_REGEX, SINGLE_COMPONENT_REGEX, CLOSE_ELEMENT_SYMBOL, CLOSE_ELEMENT_SYMBOL_PATTERN, }