@antora/assembler
Version:
An extension library for Antora that assembles content from multiple pages into a single AsciiDoc file to converted and publish.
96 lines (90 loc) • 3.67 kB
JavaScript
const ATTRIBUTE_REFERENCE_RX = /\{[a-z0-9_][a-z0-9_-]*\}/g
const STRICT_WORD_CHAR_RX = /[\p{L}\d_]/u
const WORD_CHAR_RX = /[\p{L}\d_;}:<>]/u
const MARK_FOR_TAG = { code: '`', em: '_', mark: '#', span: '#', strong: '*' }
const SKIP_SPAN = { icon: '<i ', image: '<img ' }
module.exports = (str) => {
if (!str) return str
let matchIndex = str.indexOf('<')
if (!~matchIndex) return ~str.indexOf('{') ? str.replace(ATTRIBUTE_REFERENCE_RX, '\\$&') : str
let current = { contents: '' }
const stack = [current]
let lastIndex = 0
do {
if (matchIndex > lastIndex) {
const matched = str.slice(lastIndex, matchIndex)
current.contents += ~matched.indexOf('{') ? matched.replace(ATTRIBUTE_REFERENCE_RX, '\\$&') : matched
}
const isCloseTag = str[++matchIndex] === '/' ? ++matchIndex : false
let tagName = str.slice(matchIndex, (lastIndex = str.indexOf('>', matchIndex) + 1) - 1)
if (isCloseTag) {
const parent = current // TODO expect tagName to equal current.tagName
stack.pop()
current = stack[stack.length - 1]
if (parent.mark) {
let { contents, mark, id, role } = parent
const attrlist = (id ? '#' + id : '') + (role ? '.' + role.replace(/ /g, '.') : '')
if (
current.mark === mark ||
current.mark === '_' ||
isWordChar(str.charAt(lastIndex), true) ||
isWordChar(current.contents[current.contents.length - 1] || current.mark)
) {
mark = mark.repeat(2)
}
current.contents += (attrlist ? '[' + attrlist + ']' : '') + mark + contents + mark
} else {
current.contents += parent.contents
}
} else {
let attrs, attrlistIndex, role
if (~(attrlistIndex = tagName.indexOf(' '))) {
role = (attrs = parseAttrlist(tagName.slice(attrlistIndex))).class
tagName = tagName.slice(0, attrlistIndex)
}
if (tagName === 'img') {
current.contents += 'image:' + attrs.src + '[' + attrs.alt + ']'
} else if (tagName === 'i' && current.tagName === 'span' && current.role === 'icon') {
current.contents += 'icon:' + role.slice(6) + '[]'
lastIndex += 4
} else {
let check, mark
const id = attrs?.id
if (tagName !== 'span' || id || (role && !((check = SKIP_SPAN[role]) && str.startsWith(check, lastIndex)))) {
mark = MARK_FOR_TAG[tagName]
}
stack.push((current = { tagName, role, id, mark, contents: '' }))
}
}
} while (~(matchIndex = str.indexOf('<', lastIndex)))
const rest = str.slice(lastIndex)
if (rest) current.contents += ~rest.indexOf('{') ? rest.replace(ATTRIBUTE_REFERENCE_RX, '\\$&') : rest
return current.contents
}
function parseAttrlist (str) {
let lastIndex = 0
const attrs = {}
while (str.charAt(lastIndex++) === ' ') {
const spaceIndex = str.indexOf(' ', lastIndex)
const equalsIndex = str.indexOf('=', lastIndex)
if (~spaceIndex && spaceIndex < equalsIndex) {
attrs[str.slice(lastIndex, (lastIndex = spaceIndex))] = true
} else if (~equalsIndex) {
const name = str.slice(lastIndex, equalsIndex)
const valueIndex = equalsIndex + 1
attrs[name] =
str.charAt(valueIndex) === '"'
? str.slice(valueIndex + 1, (lastIndex = str.indexOf('"', valueIndex + 1) + 1) - 1)
: str.slice(valueIndex, (lastIndex = ~spaceIndex ? spaceIndex : str.length))
} else {
attrs[str.slice(lastIndex)] = true
break
}
}
return attrs
}
function isWordChar (str, strict) {
if (!str) return false
return (strict ? STRICT_WORD_CHAR_RX : WORD_CHAR_RX).test(str)
}