UNPKG

micro-mdx-parser

Version:

A tiny parser to convert markdown or html into JSON

133 lines (124 loc) 4 kB
const REP_SYMBOL = '◊' const REP_SYMBOL_PATTERN = /◊/g const ARROW_SYMBOL = '_≡►_' const ARROW_SYMBOL_PATTERN = /_≡►_/g // https://regex101.com/r/nIlW1U/6 const CODE_BLOCK_REGEX = /^([A-Za-z \t]*)```([A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*$/gm // https://regex101.com/r/oPKKoC/1 const REMOVE_CODE_BLOCK_REGEX = /^(?:[A-Za-z \t]*)?(```(?:[A-Za-z]*)?\n(?:[\s\S]*?)```)([A-Za-z \t]*)*$/gm // https://regex101.com/r/ZEcVR0/1 // results in timeouts... // const CODE_INLINE_REGEX = /`([^\\`]*(?:(\\`*)|[^\\`]*)*)`/gm // const CODE_INLINE_REGEX = /`([^`\n\r]*)`/gm // const CODE_INLINE_REGEX = /`([^`][^`\n\r][^\\\s]*)`[^`]/gm // const CODE_INLINE_REGEX = /`([^`][^`\n\r][^\s]*)`[^`]/gm // const CODE_INLINE_REGEX = /`((?:\\`|[^`])*)`/gm // const CODE_INLINE_REGEX = /[^\\\n]`((?:\\`|[^`])*)`/gm // https://regex101.com/r/9bmDHe/6 // const CODE_INLINE_REGEX = /(^`((?:\\`|[^`])*)`|[^\\\n]`((?:\\`|[^`])*)`)/gm // https://regex101.com/r/9bmDHe/7 const CODE_INLINE_REGEX = /(^`((?:\\`|[^`])*)+`|([^\\\n])`((?:\\`|[^`])*)`)/gm const STARTS_WITH_FENCE = /^`{2,}/ /** * Parse code blocks out of markdown * @param {string} md - markdown string * @returns {Object} * @example * const blocks = findCodeBlocks(content) * console.log('blocks', blocks) */ function findCodeBlocks(md = '') { let matches let blocks = [] while ((matches = CODE_BLOCK_REGEX.exec(md)) !== null) { if (matches.index === CODE_BLOCK_REGEX.lastIndex) { CODE_BLOCK_REGEX.lastIndex++ // avoid infinite loops with zero-width matches } const [ match, prefix, syntax, content, postFix ] = matches /* // debug console.log(`prefix: "${prefix}"`) console.log(`postFix: "${postFix}"`) console.log('syntax:', lang) console.log('Content:') console.log(content.trim()) console.log('───────────────────────') /** */ blocks.push({ index: matches.index, syntax: syntax || '', block: match, code: content.trim() }) } return blocks } /** * Parse inline code blocks out of markdown * @param {string} md - markdown string * @returns {Object} * @example * const blocks = findCodeBlocks(content) * console.log('blocks', blocks) */ function findInlineCode(md = '') { let matches let blocks = [] while ((matches = CODE_INLINE_REGEX.exec(md)) !== null) { if (matches.index === CODE_INLINE_REGEX.lastIndex) { CODE_INLINE_REGEX.lastIndex++ // avoid infinite loops with zero-width matches } const [ match, _fullMatch, innerOne, space, innerTwo ] = matches const content = innerOne || innerTwo /* // debug console.log(matches) console.log('content', content) console.log('───────────────────────') /** */ if (content && (content.indexOf('\n') === -1) && !STARTS_WITH_FENCE.test(content)) { blocks.push({ index: (space) ? matches.index + 1 : matches.index, block: (space) ? match.substring(1) : match, code: content.trim() }) } } return blocks } /** * Parse all code blocks out of markdown * @param {string} md - markdown string * @returns {Object} * @example * const blocks = findCodeBlocks(content) * console.log('blocks', blocks) */ function findCode(md = '') { // const blocks = findCodeBlocks(md) const inline = findInlineCode(md) return inline /* console.log('blocks', blocks) console.log('inline', inline) process.exit(1) /** */ return blocks.concat(inline) .sort((a, b) => { if (a.index > b.index) return 1 if (a.index < b.index) return -1 return 0 }) } function fixOpenBracket(content = '') { return (content.indexOf(REP_SYMBOL) === -1) ? content : content.replace(REP_SYMBOL_PATTERN, '<') } module.exports = { findCode, findCodeBlocks, findInlineCode, fixOpenBracket, REP_SYMBOL, REP_SYMBOL_PATTERN, ARROW_SYMBOL, ARROW_SYMBOL_PATTERN, CODE_BLOCK_REGEX, REMOVE_CODE_BLOCK_REGEX }