@antora/assembler
Version:
A JavaScript library that merges AsciiDoc content from multiple pages in an Antora site into assembly files and delegates to an exporter to convert those files to another format, such as PDF.
96 lines (90 loc) • 3.67 kB
JavaScript
const ATTRIBUTE_REFERENCE_RX = /\{[a-z0-9_][a-z0-9_-]*\}/g
const STRICT_WORD_CHAR_RX = /[\p{L}\d_]/u
const WORD_CHAR_RX = /[\p{L}\d_;}:<>]/u
const MARK_FOR_TAG = { code: '`', em: '_', mark: '#', span: '#', strong: '*' }
const SKIP_SPAN = { icon: '<i ', image: '<img ' }
module.exports = (str) => {
if (!str) return str
let matchIndex = str.indexOf('<')
if (!~matchIndex) return ~str.indexOf('{') ? str.replace(ATTRIBUTE_REFERENCE_RX, '\\$&') : str
let current = { contents: '' }
const stack = [current]
let lastIndex = 0
do {
if (matchIndex > lastIndex) {
const matched = str.slice(lastIndex, matchIndex)
current.contents += ~matched.indexOf('{') ? matched.replace(ATTRIBUTE_REFERENCE_RX, '\\$&') : matched
}
const isCloseTag = str[++matchIndex] === '/' ? ++matchIndex : false
let tagName = str.slice(matchIndex, (lastIndex = str.indexOf('>', matchIndex) + 1) - 1)
if (isCloseTag) {
const parent = current // TODO expect tagName to equal current.tagName
stack.pop()
current = stack[stack.length - 1]
if (parent.mark) {
let { contents, mark, id, role } = parent
const attrlist = (id ? '#' + id : '') + (role ? '.' + role.replace(/ /g, '.') : '')
if (
current.mark === mark ||
current.mark === '_' ||
isWordChar(str.charAt(lastIndex), true) ||
isWordChar(current.contents[current.contents.length - 1] || current.mark)
) {
mark = mark.repeat(2)
}
current.contents += (attrlist ? '[' + attrlist + ']' : '') + mark + contents + mark
} else {
current.contents += parent.contents
}
} else {
let attrs, attrlistIndex, role
if (~(attrlistIndex = tagName.indexOf(' '))) {
role = (attrs = parseAttrlist(tagName.slice(attrlistIndex))).class
tagName = tagName.slice(0, attrlistIndex)
}
if (tagName === 'img') {
current.contents += 'image:' + attrs.src + '[' + attrs.alt + ']'
} else if (tagName === 'i' && current.tagName === 'span' && current.role === 'icon') {
current.contents += 'icon:' + role.slice(6) + '[]'
lastIndex += 4
} else {
let check, mark
const id = attrs?.id
if (tagName !== 'span' || id || (role && !((check = SKIP_SPAN[role]) && str.startsWith(check, lastIndex)))) {
mark = MARK_FOR_TAG[tagName]
}
stack.push((current = { tagName, role, id, mark, contents: '' }))
}
}
} while (~(matchIndex = str.indexOf('<', lastIndex)))
const rest = str.slice(lastIndex)
if (rest) current.contents += ~rest.indexOf('{') ? rest.replace(ATTRIBUTE_REFERENCE_RX, '\\$&') : rest
return current.contents
}
function parseAttrlist (str) {
let lastIndex = 0
const attrs = {}
while (str.charAt(lastIndex++) === ' ') {
const spaceIndex = str.indexOf(' ', lastIndex)
const equalsIndex = str.indexOf('=', lastIndex)
if (~spaceIndex && spaceIndex < equalsIndex) {
attrs[str.slice(lastIndex, (lastIndex = spaceIndex))] = true
} else if (~equalsIndex) {
const name = str.slice(lastIndex, equalsIndex)
const valueIndex = equalsIndex + 1
attrs[name] =
str.charAt(valueIndex) === '"'
? str.slice(valueIndex + 1, (lastIndex = str.indexOf('"', valueIndex + 1) + 1) - 1)
: str.slice(valueIndex, (lastIndex = ~spaceIndex ? spaceIndex : str.length))
} else {
attrs[str.slice(lastIndex)] = true
break
}
}
return attrs
}
function isWordChar (str, strict) {
if (!str) return false
return (strict ? STRICT_WORD_CHAR_RX : WORD_CHAR_RX).test(str)
}