micro-mdx-parser
Version:
A tiny parser to convert markdown or html into JSON
133 lines (124 loc) • 4 kB
JavaScript
const REP_SYMBOL = '◊'
const REP_SYMBOL_PATTERN = /◊/g
const ARROW_SYMBOL = '_≡►_'
const ARROW_SYMBOL_PATTERN = /_≡►_/g
// https://regex101.com/r/nIlW1U/6
const CODE_BLOCK_REGEX = /^([A-Za-z \t]*)```([A-Za-z]*)?\n([\s\S]*?)```([A-Za-z \t]*)*$/gm
// https://regex101.com/r/oPKKoC/1
const REMOVE_CODE_BLOCK_REGEX = /^(?:[A-Za-z \t]*)?(```(?:[A-Za-z]*)?\n(?:[\s\S]*?)```)([A-Za-z \t]*)*$/gm
// https://regex101.com/r/ZEcVR0/1 // results in timeouts...
// const CODE_INLINE_REGEX = /`([^\\`]*(?:(\\`*)|[^\\`]*)*)`/gm
// const CODE_INLINE_REGEX = /`([^`\n\r]*)`/gm
// const CODE_INLINE_REGEX = /`([^`][^`\n\r][^\\\s]*)`[^`]/gm
// const CODE_INLINE_REGEX = /`([^`][^`\n\r][^\s]*)`[^`]/gm
// const CODE_INLINE_REGEX = /`((?:\\`|[^`])*)`/gm
// const CODE_INLINE_REGEX = /[^\\\n]`((?:\\`|[^`])*)`/gm
// https://regex101.com/r/9bmDHe/6
// const CODE_INLINE_REGEX = /(^`((?:\\`|[^`])*)`|[^\\\n]`((?:\\`|[^`])*)`)/gm
// https://regex101.com/r/9bmDHe/7
const CODE_INLINE_REGEX = /(^`((?:\\`|[^`])*)+`|([^\\\n])`((?:\\`|[^`])*)`)/gm
const STARTS_WITH_FENCE = /^`{2,}/
/**
* Parse code blocks out of markdown
* @param {string} md - markdown string
* @returns {Object}
* @example
* const blocks = findCodeBlocks(content)
* console.log('blocks', blocks)
*/
function findCodeBlocks(md = '') {
let matches
let blocks = []
while ((matches = CODE_BLOCK_REGEX.exec(md)) !== null) {
if (matches.index === CODE_BLOCK_REGEX.lastIndex) {
CODE_BLOCK_REGEX.lastIndex++ // avoid infinite loops with zero-width matches
}
const [ match, prefix, syntax, content, postFix ] = matches
/* // debug
console.log(`prefix: "${prefix}"`)
console.log(`postFix: "${postFix}"`)
console.log('syntax:', lang)
console.log('Content:')
console.log(content.trim())
console.log('───────────────────────')
/** */
blocks.push({
index: matches.index,
syntax: syntax || '',
block: match,
code: content.trim()
})
}
return blocks
}
/**
* Parse inline code blocks out of markdown
* @param {string} md - markdown string
* @returns {Object}
* @example
* const blocks = findCodeBlocks(content)
* console.log('blocks', blocks)
*/
function findInlineCode(md = '') {
let matches
let blocks = []
while ((matches = CODE_INLINE_REGEX.exec(md)) !== null) {
if (matches.index === CODE_INLINE_REGEX.lastIndex) {
CODE_INLINE_REGEX.lastIndex++ // avoid infinite loops with zero-width matches
}
const [ match, _fullMatch, innerOne, space, innerTwo ] = matches
const content = innerOne || innerTwo
/* // debug
console.log(matches)
console.log('content', content)
console.log('───────────────────────')
/** */
if (content && (content.indexOf('\n') === -1) && !STARTS_WITH_FENCE.test(content)) {
blocks.push({
index: (space) ? matches.index + 1 : matches.index,
block: (space) ? match.substring(1) : match,
code: content.trim()
})
}
}
return blocks
}
/**
* Parse all code blocks out of markdown
* @param {string} md - markdown string
* @returns {Object}
* @example
* const blocks = findCodeBlocks(content)
* console.log('blocks', blocks)
*/
function findCode(md = '') {
// const blocks = findCodeBlocks(md)
const inline = findInlineCode(md)
return inline
/*
console.log('blocks', blocks)
console.log('inline', inline)
process.exit(1)
/** */
return blocks.concat(inline)
.sort((a, b) => {
if (a.index > b.index) return 1
if (a.index < b.index) return -1
return 0
})
}
function fixOpenBracket(content = '') {
return (content.indexOf(REP_SYMBOL) === -1) ? content : content.replace(REP_SYMBOL_PATTERN, '<')
}
module.exports = {
findCode,
findCodeBlocks,
findInlineCode,
fixOpenBracket,
REP_SYMBOL,
REP_SYMBOL_PATTERN,
ARROW_SYMBOL,
ARROW_SYMBOL_PATTERN,
CODE_BLOCK_REGEX,
REMOVE_CODE_BLOCK_REGEX
}