@antora/assembler
Version:
An extension library for Antora that assembles content from multiple pages into a single AsciiDoc file to converted and publish.
606 lines (593 loc) • 24.3 kB
JavaScript
const path = require('node:path/posix')
const sanitize = require('./util/sanitize')
const unconvertInlineAsciiDoc = require('./util/unconvert-inline-asciidoc')
function produceAggregateDocument (
loadAsciiDoc,
contentCatalog,
componentVersion,
outline,
doctype,
pages,
asciidocConfig,
mutableAttributes,
sectionMergeStrategy = 'discrete'
) {
const pagesInOutline = selectPagesInOutline(outline, pages)
const navtitle = outline.content
const templateFile = contentCatalog.addFile({
src: {
component: componentVersion.name,
version: componentVersion.version,
module: 'ROOT',
family: 'page',
relative: generateSlug(navtitle),
},
})
const { dir: outDir, name: outName } = path.parse(templateFile.out.path)
const path_ = outName === templateFile.src.relative ? path.join(outDir, outName + '.adoc') : outDir + '.adoc'
contentCatalog.removeFile(templateFile)
const header = buildAsciiDocHeader(componentVersion, navtitle, doctype)
const body = aggregateAsciiDoc(
loadAsciiDoc,
contentCatalog,
header,
componentVersion,
outline,
pagesInOutline,
asciidocConfig,
mutableAttributes,
sectionMergeStrategy
)
return new templateFile.constructor({
aggregate: true,
asciidoc: asciidocConfig,
contents: Buffer.from([...header, ...body].join('\n') + '\n'),
mediaType: 'text/asciidoc',
path: path_,
src: {
component: componentVersion.name,
version: componentVersion.version,
basename: path.basename(path_),
stem: path.basename(path_, '.adoc'),
extname: '.adoc',
},
})
}
function buildAsciiDocHeader (componentVersion, navtitle, doctype = 'book') {
const navtitlePlain = sanitize(navtitle)
const navtitleAsciiDoc = unconvertInlineAsciiDoc(navtitle)
let doctitle = navtitleAsciiDoc
if (navtitlePlain !== componentVersion.title) doctitle = `${componentVersion.title}: ${doctitle}`
const version = componentVersion.version === 'master' ? '' : componentVersion.version
return [
`= ${doctitle}`,
...(version ? [`:revnumber: ${version}`] : []),
...(doctype === 'article' ? [] : [`:doctype: ${doctype}`]),
':underscore: _',
// Q: should we pass these via the CLI so they cannot be modified?
`:page-component-name: ${componentVersion.name}`,
`:page-component-version:${version ? ' ' + version : ''}`,
':page-version: {page-component-version}',
`:page-component-display-version: ${componentVersion.displayVersion}`,
`:page-component-title: ${componentVersion.title}`,
]
}
function selectPagesInOutline (outlineEntry, pages) {
const page = outlineEntry.urlType === 'internal' ? pages.find((it) => it.pub.url === outlineEntry.url) : undefined
return (outlineEntry.items || []).reduce(
(accum, item) => selectPagesInOutline(item, pages).forEach((v, k) => accum.set(k, v)) ?? accum,
new Map(
page && [
[`${page.src.module === 'ROOT' ? '' : page.src.module + ':'}${page.src.relative}`, page],
[page.pub.url, page],
]
)
)
}
function aggregateAsciiDoc (
loadAsciiDoc,
contentCatalog,
header,
componentVersion,
outlineEntry,
pagesInOutline,
asciidocConfig,
mutableAttributes,
sectionMergeStrategy,
lastComponentVersion = componentVersion,
level = 0
) {
const buffer = []
// TODO: we could try to be smart about it and make sure the page with fragment is included at least once
if (outlineEntry.hash) return buffer
const { content: navtitle, items = [], unresolved, urlType, url } = outlineEntry
const hasItems = items.length > 0
const navtitlePlain = sanitize(navtitle)
const navtitleAsciiDoc = unconvertInlineAsciiDoc(navtitle)
const siteUrl = ((val) => {
if (!val || val === '/') return ''
return val.charAt(val.length - 1) === '/' ? val.slice(0, val.length - 1) : val
})(asciidocConfig.attributes['site-url'])
// FIXME: ideally, resource ID would be stored in navigation so we can look up the page more efficiently
let page = urlType === 'internal' && !unresolved ? pagesInOutline.get(url) : undefined
if (page && pagesInOutline.aggregated?.includes(page)) page = undefined
if (page) {
let contents = page.src.contents
if (contents == null) return buffer
// NOTE: blank lines at top and bottom of document create mismatch when using line numbers to navigate source lines
// IMPORTANT: this must not leave behind lines the parser will drop!
// IDEA: another option is to capture initial lineno of reader and use as offset (but preseves those blank lines)
contents = Buffer.from(
contents
.toString()
.replace(/^(?:[ \t]*\r\n?|[ \t]*\n)+/, '')
.trimRight()
)
;(pagesInOutline.aggregated ??= []).push(page)
page = new page.constructor(Object.assign({}, page, { contents, mediaType: 'text/asciidoc' }))
const { component, version, module: module_, relative, origin } = page.src
const topicPrefix = ~relative.indexOf('/') ? path.dirname(relative) + '/' : ''
const doc = loadAsciiDoc(page, contentCatalog, asciidocConfig)
const refs = doc.getCatalog().refs
// NOTE: in Antora, docname is relative src path from module without file extension
const docname = doc.getAttribute('docname')
const docnameForId = docname.replace(/[/]/g, '::').replace(/[.]/g, '-')
const scopeId = component !== componentVersion.name
const idprefix =
(scopeId ? component + ':' : '') +
(module_ === 'ROOT' ? (scopeId ? ':' : '') : module_ + ':') +
docnameForId +
':::'
buffer.push('')
buffer.push(`:docname: ${docname}`)
if (component !== lastComponentVersion.name) {
const thisComponentVersion =
component === componentVersion.name && version === componentVersion.version
? componentVersion
: contentCatalog.getComponentVersion(component, version)
if (thisComponentVersion) {
buffer.push(`:page-component-name: ${thisComponentVersion.name}`)
buffer.push(`:page-component-version:${thisComponentVersion.version ? ' ' + thisComponentVersion.version : ''}`)
buffer.push(':page-version: {page-component-version}')
buffer.push(`:page-component-display-version: ${thisComponentVersion.displayVersion}`)
buffer.push(`:page-component-title: ${thisComponentVersion.title}`)
lastComponentVersion = thisComponentVersion
}
}
buffer.push(`:page-module: ${module_}`)
buffer.push(`:page-relative-src-path: ${relative}`)
//buffer.push(`:page-origin-type: ${origin.type}`)
buffer.push(`:page-origin-url: ${origin.url}`)
buffer.push(`:page-origin-start-path:${origin.startPath && ' '}${origin.startPath}`)
buffer.push(`:page-origin-refname: ${origin.branch || origin.tag}`)
buffer.push(`:page-origin-reftype: ${origin.branch ? 'branch' : 'tag'}`)
buffer.push(`:page-origin-refhash: ${origin.worktree ? '(worktree)' : origin.refhash}`)
let enclosed
// NOTE: if level is 0, doctitle has already been added and we're in the document header
if (level) {
if (level === 1 && navtitlePlain === componentVersion.title) {
level--
} else {
let hlevel = level + 1
if (hlevel > 6) {
hlevel = 6
buffer.push(`[discrete#${idprefix}]`)
} else {
buffer.push(`[#${idprefix}]`)
}
buffer.push(`${'='.repeat(hlevel)} ${navtitleAsciiDoc}`)
}
} else {
header.unshift(`[#${idprefix}]`)
}
if (sectionMergeStrategy === 'enclose' && hasItems && doc.hasSections()) {
enclosed = true
// TODO: make overview section title configurable
//let overviewTitle = doc.getDocumentTitle()
//if (overviewTitle === navtitle) overviewTitle = doc.getAttribute('overview-title', 'Overview')
const overviewTitle = doc.getAttribute('overview-title', 'Overview')
buffer.push('')
// NOTE: try to toggle sectids; otherwise, fallback to globally unique synthetic ID
let toggleSectids, syntheticId
if (doc.isAttribute('sectids')) {
if (doc.isAttributeLocked('sectids')) {
syntheticId = `__object-id-${getObjectId(outlineEntry)}`
} else {
buffer.push(':!sectids:')
toggleSectids = true
}
}
let hlevel = level + 2
if (hlevel > 6) {
hlevel = 6
buffer.push(syntheticId ? `[discrete#${syntheticId}]` : '[discrete]')
} else if (syntheticId) {
buffer.push(`[#${syntheticId}]`)
}
buffer.push(`${'='.repeat(hlevel)} ${overviewTitle}`)
if (toggleSectids) buffer.push(':sectids:')
}
const lines = doc.getSourceLines()
const ignoreLines = []
// TODO: think more about when multipart is allowed; perhaps configurable
if (doc.hasSections()) fixSectionLevels(doc.getSections(), level === 0)
const allBlocks = doc.findBy({ traverse_documents: true }, (it) =>
it.getContext() === 'document'
? it.getDocument().isNested()
: !(it.getContext() === 'table_cell' && it.getStyle() === 'asciidoc')
)
allBlocks.forEach((block) => {
const contentModel = block.content_model
if (
((contentModel === 'verbatim' && block.getContext() !== 'table_cell') ||
contentModel === 'simple' ||
contentModel === 'pass') &&
!block.hasSubstitution('macros')
) {
const lineno = block.getLineNumber()
const idx = typeof lineno === 'number' ? lineno - 1 : undefined
const startLine = lines[idx]
// NOTE: one case this happens if when sourcemap isn't enabled when reducing
if (startLine == null) {
console.log(`null startLine for ${block.getContext()} at ${lineno} in ${relative}`)
return
}
const char0 = startLine.charAt()
// FIXME: needs to be more robust; move logic to helper
const delimited =
startLine.length > 3 &&
startLine === char0.repeat(startLine.length) &&
(char0 === '-' || char0 === '.' || char0 === '+')
// QUESTION: exclude block attribute lines too? what about attribute entries?
for (let i = idx; i < block.lines.length + (delimited ? idx + 2 : idx); i++) ignoreLines.push(i)
}
})
let skipping
for (let idx = 0, len = lines.length; idx < len; idx++) {
if (~ignoreLines.indexOf(idx)) continue
let line = lines[idx]
if (line.startsWith('//')) {
if (line[2] !== '/') continue
if (line.length > 3 && line === '/'.repeat(line.length)) {
if (skipping) {
if (line === skipping) skipping = undefined
} else {
skipping = line
}
continue
}
} else if (skipping) {
continue
}
if (line.charAt() === ':' && /^:(?:leveloffset: .*|!leveloffset:|leveloffset!:)$/.test(line)) {
if (lines[idx - 1] === '') lines[idx - 1] = undefined
lines[idx] = undefined
continue
}
if (~line.indexOf('<<')) {
line = line.replace(/(?<![\\+])<<#?([\p{Alpha}0-9_/.:{][^>,]*?)(?:|, *([^>]+?))?>>/gu, (m, refid, text) => {
// support natural xref
if (!refs['$key?'](refid) && (~refid.indexOf(' ') || refid.toLowerCase() !== refid)) {
if ((refid = doc.$resolve_id(refid))['$nil?']()) return m
}
return `<<${idprefix}${refid}${text ? ',' + text : ''}>>`
})
}
// NOTE: the next check takes care of inline and block anchors
if (~line.indexOf('[[')) {
line = line.replace(/\[\[([\p{Alpha}_:][\p{Alpha}0-9_\-:.]*)(|, *.+?)\]\]/gu, `[[${idprefix}$1$2]]`)
}
if (~line.indexOf('xref:')) {
// Q: should we allow : as first character of target?
line = line.replace(/(?<![\\+])xref:((?:\.\/)?[\p{Alpha}0-9_/.{#].*?)\[(|.*?[^\\])\]/gu, (m, target, text) => {
let pagePart, fragment, targetPage
const hashIdx = target.indexOf('#')
if (~hashIdx) {
pagePart = target.slice(0, hashIdx)
fragment = target.slice(hashIdx + 1)
// TODO: for now, assume .adoc; in the future, consider other file extensions
if (pagePart && !pagePart.endsWith('.adoc')) pagePart += '.adoc'
} else if (target.endsWith('.adoc')) {
pagePart = target
fragment = ''
} else {
fragment = target
}
if (!pagePart) {
// Q: should we validate the internal ID here?
return text && ~text.indexOf('=')
? `xref:${idprefix}${fragment}[${text}]`
: `<<${idprefix}${fragment}${text ? ',' + text.replace(/\\]/g, ']') : ''}>>`
}
if (~pagePart.indexOf('@') || /:.*:/.test(pagePart)) {
if (siteUrl && (targetPage = contentCatalog.resolvePage(pagePart, page.src)) && targetPage.out) {
text ||= targetPage.asciidoc?.xreftext || target
return `${siteUrl}${targetPage.pub.url}${fragment && '#' + fragment}[${text}]`
}
// TODO: handle unresolved page better
return m
}
let targetModule
const colonIdx = pagePart.indexOf(':')
if (~colonIdx) {
targetModule = pagePart.slice(0, colonIdx)
pagePart = pagePart.slice(colonIdx + 1)
} else {
targetModule = module_
}
if (pagePart.startsWith('./')) pagePart = topicPrefix + pagePart.slice(2)
if (targetModule !== 'ROOT') pagePart = `${targetModule}:${pagePart}`
if (!(targetPage = pagesInOutline.get(pagePart))) {
if (siteUrl && (targetPage = contentCatalog.resolvePage(pagePart, page.src)) && targetPage.out) {
text ||= targetPage.asciidoc?.xreftext || target
return `${siteUrl}${targetPage.pub.url}${fragment && '#' + fragment}[${text}]`
}
// TODO: handle unresolved page better
return m
}
pagePart = pagePart
.replace(/[/]/g, '::')
.replace(/\.adoc$/, '')
.replace(/[.]/g, '-')
const refid = `${pagePart}:::${fragment}`
return `<<${refid}${text && text !== targetPage.title ? ',' + text.replace(/\\]/g, ']') : ''}>>`
})
}
if (~line.indexOf('link:{attachmentsdir}/')) {
line = line.replace(/(?<![\\+])link:\{attachmentsdir\}\/([^\s[]+)\[(|.*?[^\\])\]/g, (m, relative, text) => {
const attachment =
siteUrl &&
contentCatalog.getById({
component: componentVersion.name,
version: componentVersion.version,
module: module_,
family: 'attachment',
relative,
})
// TODO: handle unresolved attachment page
return attachment?.out ? `${siteUrl}${attachment.pub.url.replace(/_/g, '{underscore}')}[${text}]` : m
})
}
if (~line.indexOf('image:') && !line.startsWith('image::')) {
line = line.replace(/(?<![\\+])image:([^:\s[](?:[^[]*[^\s[])?)\[([^\]]*)\]/g, (m, target, attrlist) => {
if (isResourceSpec(target)) {
const image = contentCatalog.resolveResource(target, page.src, 'image', ['image'])
// TODO: handle (or report) unresolved image better
if (image?.out) {
image.out.assembled = true
return `image:${image.out.path.replace(/_/g, '{underscore}')}[${attrlist}]`
}
}
return m
})
}
lines[idx] = line
}
// NOTE: need to do this last since it modifies the line numbers
// we could remap the line numbers to make them resilient
// or we could mark which lines to remove and filter them after
let lastImageMacroAt
;[...allBlocks].reverse().forEach((block) => {
const lineno = block.getLineNumber()
// NOTE: lineno is not defined for preamble
if (typeof lineno !== 'number') return
const context = block.getContext()
let idx = lineno - 1
if (context === 'section' && !block.getDocument().isNested()) {
if (block.getSectionName() === 'header') {
lines[idx] = undefined
return
}
let blockStyle = sectionMergeStrategy === 'discrete' ? 'discrete' : undefined
lines[idx] = lines[idx].replace(/^=+ (.+)/, (_, rest) => {
let targetMarkerLength = block.level + 1 + level + (enclosed ? 1 : 0)
if (targetMarkerLength > 6) {
targetMarkerLength = 6
blockStyle = 'discrete'
}
return '='.repeat(targetMarkerLength) + ' ' + rest
})
// NOTE: ID will be undefined if sectids are turned off
if (block.getId()) rewriteStyleAttribute(block, lines, idx, idprefix, blockStyle)
} else {
if (context === 'image') {
let line = lines[idx] || ''
let prefix = ''
// Q: can we use startsWith('image::') in certain cases?
let imageMacroOffset = (
lastImageMacroAt?.[0] === idx ? line.slice(0, lastImageMacroAt[1]) : line
).lastIndexOf('image::')
if (imageMacroOffset > 0) {
if (
block.getDocument().isNested() &&
(prefix = line.slice(0, imageMacroOffset)).trimRight().endsWith('|')
) {
line = line.slice(prefix.length)
} else {
imageMacroOffset = -1
}
}
if (imageMacroOffset >= 0) {
const target = block.getAttribute('target')
if (isResourceSpec(target)) {
const image = contentCatalog.resolveResource(target, page.src, 'image', ['image'])
// FIXME: handle (or report) case when image is not resolved
if (image?.out) {
const boxedAttrlist = line.slice(line.indexOf('['))
image.out.assembled = true
lines[idx] = `${prefix}image::${image.out.path}${boxedAttrlist}`
}
}
lastImageMacroAt = [idx, imageMacroOffset]
}
} else if (context === 'document' && block.hasHeader()) {
// nested document
idx = (block.getHeader().getLineNumber() || idx + 1) - 1
}
if (block.getId()) rewriteStyleAttribute(block, lines, idx, idprefix)
}
})
buffer.push(...lines.filter((it) => it !== undefined))
const attributeEntries = Object.entries(doc.source_header_attributes?.$$smap || {})
if (attributeEntries.length) {
const resolvedAttributeEntries = attributeEntries.reduce(
(accum, [name, val]) => {
// Q: couldn't we just check if attribute is locked?
if (name in mutableAttributes) {
const initialVal = mutableAttributes[name]
if (initialVal == null) {
if (val != null) accum.push(`:!${name}:`)
} else if (val !== initialVal) {
accum.push(`:${name}:${initialVal ? ' ' + initialVal : ''}`)
}
} else if (
!(
val == null ||
doc.isAttributeLocked(name) ||
name === 'doctype' ||
name === 'leveloffset' ||
name === 'underscore'
)
) {
accum.push(`:!${name}:`)
}
return accum
},
['']
)
if (resolvedAttributeEntries.length > 1) buffer.push(...resolvedAttributeEntries)
}
} else if (level) {
if (level === 1 && navtitlePlain === componentVersion.title) {
level--
} else {
buffer.push('')
// NOTE: try to toggle sectids; otherwise, fallback to globally unique synthetic ID
// Q: should we unset docname, page-module, etc?
let toggleSectids, syntheticId
if (!('sectids' in asciidocConfig.attributes)) {
buffer.push(':!sectids:')
toggleSectids = true
} else if (typeof asciidocConfig.attributes.sectids === 'string') {
if ('sectids' in mutableAttributes) {
buffer.push(':!sectids:')
toggleSectids = true
} else {
syntheticId = `__object-id-${global.Opal.hash(outlineEntry).$object_id()}`
}
}
let sectionTitle = navtitleAsciiDoc
if (urlType === 'external') {
sectionTitle = `${url}[${navtitleAsciiDoc.replace(/\]/g, '\\]')}]`
} else if (urlType === 'internal' && !unresolved && siteUrl) {
const resource = contentCatalog.getFiles().find((it) => it.out && it.pub.url === url)
if (resource) sectionTitle = `${siteUrl}${resource.pub.url}[${navtitleAsciiDoc.replace(/\]/g, '\\]')}]`
}
let hlevel = level + 1
if (hlevel > 6) {
hlevel = 6
buffer.push(syntheticId ? `[discrete#${syntheticId}]` : '[discrete]')
} else if (syntheticId) {
buffer.push(`[#${syntheticId}]`)
}
buffer.push(`${'='.repeat(hlevel)} ${sectionTitle}`)
if (toggleSectids) buffer.push(':sectids:')
}
}
const nextLevel = level + 1
if (hasItems) {
// NOTE: drop first child if same as parent; should we keep if content is different?
;(urlType === 'internal' && urlType === items[0].urlType && url === items[0].url && !items[0].items
? items.slice(1)
: items
).forEach((item) => {
buffer.push(
...aggregateAsciiDoc(
loadAsciiDoc,
contentCatalog,
header,
componentVersion,
item,
pagesInOutline,
asciidocConfig,
mutableAttributes,
sectionMergeStrategy,
lastComponentVersion,
nextLevel
)
)
})
}
return buffer
}
function generateSlug (title) {
return title
.toLowerCase()
.replace(/&.+?;|[^ \p{Alpha}0-9_\-.]/gu, '')
.replace(/[ _.]/g, '-')
.replace(/--+/g, '-')
}
function fixSectionLevels (sections, multipart) {
sections.forEach((sect) => {
const targetLevel = sect.getParent().getLevel() + 1
if (multipart ? sect.getLevel() > targetLevel : sect.getLevel() !== targetLevel) sect.level = targetLevel
if (sect.hasSections()) fixSectionLevels(sect.getSections(), multipart)
})
}
function rewriteStyleAttribute (block, lines, idx, idprefix, replacementStyle = '') {
let prevLine = lines[idx - 1]
const char0 = prevLine?.charAt()
if (char0) {
if (
(char0 === '.' && /^\.\.?[^ \t.]/.test(prevLine)) ||
(char0 === '[' &&
prevLine.charAt(1) === '[' &&
/^\[\[(?:|[\p{Alpha}_:][\p{Alpha}0-9_\-:.]*(?:, *.+)?)\]\]$/u.test(prevLine))
) {
return rewriteStyleAttribute(block, lines, idx - 1, idprefix, replacementStyle)
}
}
let cellSpec
if (
char0 &&
(char0 === '[' || (block.getDocument().isNested() && (cellSpec = prevLine.match(/^([^[|]*)\| *(\[.+)/)))) &&
prevLine.charAt(prevLine.length - 1) === ']'
) {
if (cellSpec) {
prevLine = cellSpec[2]
cellSpec = cellSpec[1]
}
let rawStyle
const commaIdx = prevLine.indexOf(',')
if (~commaIdx) {
rawStyle = prevLine.slice(1, commaIdx)
if (~rawStyle.indexOf('=')) rawStyle = undefined
} else if (!~prevLine.indexOf('=')) {
rawStyle = prevLine.slice(1, prevLine.length - 1)
}
if (rawStyle) {
if (~rawStyle.indexOf('#')) {
prevLine = prevLine.replace(/#[^.%,\]]+/, `#${idprefix}${block.getId()}`)
if (replacementStyle) prevLine = prevLine.replace(/^[^#.%,\]]+/, `[${replacementStyle}`)
} else {
prevLine = `[${
replacementStyle ? rawStyle.replace(/^[^.%]*/, replacementStyle) : rawStyle
}#${idprefix}${block.getId()}${prevLine.slice(rawStyle.length + 1)}`
}
} else {
prevLine = `[${replacementStyle}#${idprefix}${block.getId()}${rawStyle == null ? ',' : ''}${prevLine.slice(1)}`
}
if (cellSpec) prevLine = `${cellSpec}|${prevLine}`
lines[idx - 1] = prevLine
} else {
lines.splice(idx, 0, `[${replacementStyle}#${idprefix}${block.getId()}]`)
}
}
function isResourceSpec (str) {
return !(~str.indexOf(':') && (~str.indexOf('://') || (str.startsWith('data:') && ~str.indexOf(','))))
}
function getObjectId (obj) {
return global.Opal.uid()
}
module.exports = produceAggregateDocument