UNPKG

@antora/assembler

Version:

An extension library for Antora that assembles content from multiple pages into a single AsciiDoc file to converted and publish.

606 lines (593 loc) • 24.3 kB

JavaScript

'use strict' const path = require('node:path/posix') const sanitize = require('./util/sanitize') const unconvertInlineAsciiDoc = require('./util/unconvert-inline-asciidoc') function produceAggregateDocument ( loadAsciiDoc, contentCatalog, componentVersion, outline, doctype, pages, asciidocConfig, mutableAttributes, sectionMergeStrategy = 'discrete' ) { const pagesInOutline = selectPagesInOutline(outline, pages) const navtitle = outline.content const templateFile = contentCatalog.addFile({ src: { component: componentVersion.name, version: componentVersion.version, module: 'ROOT', family: 'page', relative: generateSlug(navtitle), }, }) const { dir: outDir, name: outName } = path.parse(templateFile.out.path) const path_ = outName === templateFile.src.relative ? path.join(outDir, outName + '.adoc') : outDir + '.adoc' contentCatalog.removeFile(templateFile) const header = buildAsciiDocHeader(componentVersion, navtitle, doctype) const body = aggregateAsciiDoc( loadAsciiDoc, contentCatalog, header, componentVersion, outline, pagesInOutline, asciidocConfig, mutableAttributes, sectionMergeStrategy ) return new templateFile.constructor({ aggregate: true, asciidoc: asciidocConfig, contents: Buffer.from([...header, ...body].join('\n') + '\n'), mediaType: 'text/asciidoc', path: path_, src: { component: componentVersion.name, version: componentVersion.version, basename: path.basename(path_), stem: path.basename(path_, '.adoc'), extname: '.adoc', }, }) } function buildAsciiDocHeader (componentVersion, navtitle, doctype = 'book') { const navtitlePlain = sanitize(navtitle) const navtitleAsciiDoc = unconvertInlineAsciiDoc(navtitle) let doctitle = navtitleAsciiDoc if (navtitlePlain !== componentVersion.title) doctitle = `${componentVersion.title}: ${doctitle}` const version = componentVersion.version === 'master' ? '' : componentVersion.version return [ `= ${doctitle}`, ...(version ? [`:revnumber: ${version}`] : []), ...(doctype === 'article' ? [] : [`:doctype: ${doctype}`]), ':underscore: _', // Q: should we pass these via the CLI so they cannot be modified? `:page-component-name: ${componentVersion.name}`, `:page-component-version:${version ? ' ' + version : ''}`, ':page-version: {page-component-version}', `:page-component-display-version: ${componentVersion.displayVersion}`, `:page-component-title: ${componentVersion.title}`, ] } function selectPagesInOutline (outlineEntry, pages) { const page = outlineEntry.urlType === 'internal' ? pages.find((it) => it.pub.url === outlineEntry.url) : undefined return (outlineEntry.items || []).reduce( (accum, item) => selectPagesInOutline(item, pages).forEach((v, k) => accum.set(k, v)) ?? accum, new Map( page && [ [`${page.src.module === 'ROOT' ? '' : page.src.module + ':'}${page.src.relative}`, page], [page.pub.url, page], ] ) ) } function aggregateAsciiDoc ( loadAsciiDoc, contentCatalog, header, componentVersion, outlineEntry, pagesInOutline, asciidocConfig, mutableAttributes, sectionMergeStrategy, lastComponentVersion = componentVersion, level = 0 ) { const buffer = [] // TODO: we could try to be smart about it and make sure the page with fragment is included at least once if (outlineEntry.hash) return buffer const { content: navtitle, items = [], unresolved, urlType, url } = outlineEntry const hasItems = items.length > 0 const navtitlePlain = sanitize(navtitle) const navtitleAsciiDoc = unconvertInlineAsciiDoc(navtitle) const siteUrl = ((val) => { if (!val || val === '/') return '' return val.charAt(val.length - 1) === '/' ? val.slice(0, val.length - 1) : val })(asciidocConfig.attributes['site-url']) // FIXME: ideally, resource ID would be stored in navigation so we can look up the page more efficiently let page = urlType === 'internal' && !unresolved ? pagesInOutline.get(url) : undefined if (page && pagesInOutline.aggregated?.includes(page)) page = undefined if (page) { let contents = page.src.contents if (contents == null) return buffer // NOTE: blank lines at top and bottom of document create mismatch when using line numbers to navigate source lines // IMPORTANT: this must not leave behind lines the parser will drop! // IDEA: another option is to capture initial lineno of reader and use as offset (but preseves those blank lines) contents = Buffer.from( contents .toString() .replace(/^(?:[ \t]*\r\n?|[ \t]*\n)+/, '') .trimRight() ) ;(pagesInOutline.aggregated ??= []).push(page) page = new page.constructor(Object.assign({}, page, { contents, mediaType: 'text/asciidoc' })) const { component, version, module: module_, relative, origin } = page.src const topicPrefix = ~relative.indexOf('/') ? path.dirname(relative) + '/' : '' const doc = loadAsciiDoc(page, contentCatalog, asciidocConfig) const refs = doc.getCatalog().refs // NOTE: in Antora, docname is relative src path from module without file extension const docname = doc.getAttribute('docname') const docnameForId = docname.replace(/[/]/g, '::').replace(/[.]/g, '-') const scopeId = component !== componentVersion.name const idprefix = (scopeId ? component + ':' : '') + (module_ === 'ROOT' ? (scopeId ? ':' : '') : module_ + ':') + docnameForId + ':::' buffer.push('') buffer.push(`:docname: ${docname}`) if (component !== lastComponentVersion.name) { const thisComponentVersion = component === componentVersion.name && version === componentVersion.version ? componentVersion : contentCatalog.getComponentVersion(component, version) if (thisComponentVersion) { buffer.push(`:page-component-name: ${thisComponentVersion.name}`) buffer.push(`:page-component-version:${thisComponentVersion.version ? ' ' + thisComponentVersion.version : ''}`) buffer.push(':page-version: {page-component-version}') buffer.push(`:page-component-display-version: ${thisComponentVersion.displayVersion}`) buffer.push(`:page-component-title: ${thisComponentVersion.title}`) lastComponentVersion = thisComponentVersion } } buffer.push(`:page-module: ${module_}`) buffer.push(`:page-relative-src-path: ${relative}`) //buffer.push(`:page-origin-type: ${origin.type}`) buffer.push(`:page-origin-url: ${origin.url}`) buffer.push(`:page-origin-start-path:${origin.startPath && ' '}${origin.startPath}`) buffer.push(`:page-origin-refname: ${origin.branch || origin.tag}`) buffer.push(`:page-origin-reftype: ${origin.branch ? 'branch' : 'tag'}`) buffer.push(`:page-origin-refhash: ${origin.worktree ? '(worktree)' : origin.refhash}`) let enclosed // NOTE: if level is 0, doctitle has already been added and we're in the document header if (level) { if (level === 1 && navtitlePlain === componentVersion.title) { level-- } else { let hlevel = level + 1 if (hlevel > 6) { hlevel = 6 buffer.push(`[discrete#${idprefix}]`) } else { buffer.push(`[#${idprefix}]`) } buffer.push(`${'='.repeat(hlevel)} ${navtitleAsciiDoc}`) } } else { header.unshift(`[#${idprefix}]`) } if (sectionMergeStrategy === 'enclose' && hasItems && doc.hasSections()) { enclosed = true // TODO: make overview section title configurable //let overviewTitle = doc.getDocumentTitle() //if (overviewTitle === navtitle) overviewTitle = doc.getAttribute('overview-title', 'Overview') const overviewTitle = doc.getAttribute('overview-title', 'Overview') buffer.push('') // NOTE: try to toggle sectids; otherwise, fallback to globally unique synthetic ID let toggleSectids, syntheticId if (doc.isAttribute('sectids')) { if (doc.isAttributeLocked('sectids')) { syntheticId = `__object-id-${getObjectId(outlineEntry)}` } else { buffer.push(':!sectids:') toggleSectids = true } } let hlevel = level + 2 if (hlevel > 6) { hlevel = 6 buffer.push(syntheticId ? `[discrete#${syntheticId}]` : '[discrete]') } else if (syntheticId) { buffer.push(`[#${syntheticId}]`) } buffer.push(`${'='.repeat(hlevel)} ${overviewTitle}`) if (toggleSectids) buffer.push(':sectids:') } const lines = doc.getSourceLines() const ignoreLines = [] // TODO: think more about when multipart is allowed; perhaps configurable if (doc.hasSections()) fixSectionLevels(doc.getSections(), level === 0) const allBlocks = doc.findBy({ traverse_documents: true }, (it) => it.getContext() === 'document' ? it.getDocument().isNested() : !(it.getContext() === 'table_cell' && it.getStyle() === 'asciidoc') ) allBlocks.forEach((block) => { const contentModel = block.content_model if ( ((contentModel === 'verbatim' && block.getContext() !== 'table_cell') || contentModel === 'simple' || contentModel === 'pass') && !block.hasSubstitution('macros') ) { const lineno = block.getLineNumber() const idx = typeof lineno === 'number' ? lineno - 1 : undefined const startLine = lines[idx] // NOTE: one case this happens if when sourcemap isn't enabled when reducing if (startLine == null) { console.log(`null startLine for ${block.getContext()} at ${lineno} in ${relative}`) return } const char0 = startLine.charAt() // FIXME: needs to be more robust; move logic to helper const delimited = startLine.length > 3 && startLine === char0.repeat(startLine.length) && (char0 === '-' || char0 === '.' || char0 === '+') // QUESTION: exclude block attribute lines too? what about attribute entries? for (let i = idx; i < block.lines.length + (delimited ? idx + 2 : idx); i++) ignoreLines.push(i) } }) let skipping for (let idx = 0, len = lines.length; idx < len; idx++) { if (~ignoreLines.indexOf(idx)) continue let line = lines[idx] if (line.startsWith('//')) { if (line[2] !== '/') continue if (line.length > 3 && line === '/'.repeat(line.length)) { if (skipping) { if (line === skipping) skipping = undefined } else { skipping = line } continue } } else if (skipping) { continue } if (line.charAt() === ':' && /^:(?:leveloffset: .*|!leveloffset:|leveloffset!:)$/.test(line)) { if (lines[idx - 1] === '') lines[idx - 1] = undefined lines[idx] = undefined continue } if (~line.indexOf('<<')) { line = line.replace(/(?<![\\+])<<#?([\p{Alpha}0-9_/.:{][^>,]*?)(?:|, *([^>]+?))?>>/gu, (m, refid, text) => { // support natural xref if (!refs['$key?'](refid) && (~refid.indexOf(' ') || refid.toLowerCase() !== refid)) { if ((refid = doc.$resolve_id(refid))['$nil?']()) return m } return `<<${idprefix}${refid}${text ? ',' + text : ''}>>` }) } // NOTE: the next check takes care of inline and block anchors if (~line.indexOf('[[')) { line = line.replace(/\[\[([\p{Alpha}_:][\p{Alpha}0-9_\-:.]*)(|, *.+?)\]\]/gu, `[[${idprefix}$1$2]]`) } if (~line.indexOf('xref:')) { // Q: should we allow : as first character of target? line = line.replace(/(?<![\\+])xref:((?:\.\/)?[\p{Alpha}0-9_/.{#].*?)\[(|.*?[^\\])\]/gu, (m, target, text) => { let pagePart, fragment, targetPage const hashIdx = target.indexOf('#') if (~hashIdx) { pagePart = target.slice(0, hashIdx) fragment = target.slice(hashIdx + 1) // TODO: for now, assume .adoc; in the future, consider other file extensions if (pagePart && !pagePart.endsWith('.adoc')) pagePart += '.adoc' } else if (target.endsWith('.adoc')) { pagePart = target fragment = '' } else { fragment = target } if (!pagePart) { // Q: should we validate the internal ID here? return text && ~text.indexOf('=') ? `xref:${idprefix}${fragment}[${text}]` : `<<${idprefix}${fragment}${text ? ',' + text.replace(/\\]/g, ']') : ''}>>` } if (~pagePart.indexOf('@') || /:.*:/.test(pagePart)) { if (siteUrl && (targetPage = contentCatalog.resolvePage(pagePart, page.src)) && targetPage.out) { text ||= targetPage.asciidoc?.xreftext || target return `${siteUrl}${targetPage.pub.url}${fragment && '#' + fragment}[${text}]` } // TODO: handle unresolved page better return m } let targetModule const colonIdx = pagePart.indexOf(':') if (~colonIdx) { targetModule = pagePart.slice(0, colonIdx) pagePart = pagePart.slice(colonIdx + 1) } else { targetModule = module_ } if (pagePart.startsWith('./')) pagePart = topicPrefix + pagePart.slice(2) if (targetModule !== 'ROOT') pagePart = `${targetModule}:${pagePart}` if (!(targetPage = pagesInOutline.get(pagePart))) { if (siteUrl && (targetPage = contentCatalog.resolvePage(pagePart, page.src)) && targetPage.out) { text ||= targetPage.asciidoc?.xreftext || target return `${siteUrl}${targetPage.pub.url}${fragment && '#' + fragment}[${text}]` } // TODO: handle unresolved page better return m } pagePart = pagePart .replace(/[/]/g, '::') .replace(/\.adoc$/, '') .replace(/[.]/g, '-') const refid = `${pagePart}:::${fragment}` return `<<${refid}${text && text !== targetPage.title ? ',' + text.replace(/\\]/g, ']') : ''}>>` }) } if (~line.indexOf('link:{attachmentsdir}/')) { line = line.replace(/(?<![\\+])link:\{attachmentsdir\}\/([^\s[]+)\[(|.*?[^\\])\]/g, (m, relative, text) => { const attachment = siteUrl && contentCatalog.getById({ component: componentVersion.name, version: componentVersion.version, module: module_, family: 'attachment', relative, }) // TODO: handle unresolved attachment page return attachment?.out ? `${siteUrl}${attachment.pub.url.replace(/_/g, '{underscore}')}[${text}]` : m }) } if (~line.indexOf('image:') && !line.startsWith('image::')) { line = line.replace(/(?<![\\+])image:([^:\s[](?:[^[]*[^\s[])?)\[([^\]]*)\]/g, (m, target, attrlist) => { if (isResourceSpec(target)) { const image = contentCatalog.resolveResource(target, page.src, 'image', ['image']) // TODO: handle (or report) unresolved image better if (image?.out) { image.out.assembled = true return `image:${image.out.path.replace(/_/g, '{underscore}')}[${attrlist}]` } } return m }) } lines[idx] = line } // NOTE: need to do this last since it modifies the line numbers // we could remap the line numbers to make them resilient // or we could mark which lines to remove and filter them after let lastImageMacroAt ;[...allBlocks].reverse().forEach((block) => { const lineno = block.getLineNumber() // NOTE: lineno is not defined for preamble if (typeof lineno !== 'number') return const context = block.getContext() let idx = lineno - 1 if (context === 'section' && !block.getDocument().isNested()) { if (block.getSectionName() === 'header') { lines[idx] = undefined return } let blockStyle = sectionMergeStrategy === 'discrete' ? 'discrete' : undefined lines[idx] = lines[idx].replace(/^=+ (.+)/, (_, rest) => { let targetMarkerLength = block.level + 1 + level + (enclosed ? 1 : 0) if (targetMarkerLength > 6) { targetMarkerLength = 6 blockStyle = 'discrete' } return '='.repeat(targetMarkerLength) + ' ' + rest }) // NOTE: ID will be undefined if sectids are turned off if (block.getId()) rewriteStyleAttribute(block, lines, idx, idprefix, blockStyle) } else { if (context === 'image') { let line = lines[idx] || '' let prefix = '' // Q: can we use startsWith('image::') in certain cases? let imageMacroOffset = ( lastImageMacroAt?.[0] === idx ? line.slice(0, lastImageMacroAt[1]) : line ).lastIndexOf('image::') if (imageMacroOffset > 0) { if ( block.getDocument().isNested() && (prefix = line.slice(0, imageMacroOffset)).trimRight().endsWith('|') ) { line = line.slice(prefix.length) } else { imageMacroOffset = -1 } } if (imageMacroOffset >= 0) { const target = block.getAttribute('target') if (isResourceSpec(target)) { const image = contentCatalog.resolveResource(target, page.src, 'image', ['image']) // FIXME: handle (or report) case when image is not resolved if (image?.out) { const boxedAttrlist = line.slice(line.indexOf('[')) image.out.assembled = true lines[idx] = `${prefix}image::${image.out.path}${boxedAttrlist}` } } lastImageMacroAt = [idx, imageMacroOffset] } } else if (context === 'document' && block.hasHeader()) { // nested document idx = (block.getHeader().getLineNumber() || idx + 1) - 1 } if (block.getId()) rewriteStyleAttribute(block, lines, idx, idprefix) } }) buffer.push(...lines.filter((it) => it !== undefined)) const attributeEntries = Object.entries(doc.source_header_attributes?.$$smap || {}) if (attributeEntries.length) { const resolvedAttributeEntries = attributeEntries.reduce( (accum, [name, val]) => { // Q: couldn't we just check if attribute is locked? if (name in mutableAttributes) { const initialVal = mutableAttributes[name] if (initialVal == null) { if (val != null) accum.push(`:!${name}:`) } else if (val !== initialVal) { accum.push(`:${name}:${initialVal ? ' ' + initialVal : ''}`) } } else if ( !( val == null || doc.isAttributeLocked(name) || name === 'doctype' || name === 'leveloffset' || name === 'underscore' ) ) { accum.push(`:!${name}:`) } return accum }, [''] ) if (resolvedAttributeEntries.length > 1) buffer.push(...resolvedAttributeEntries) } } else if (level) { if (level === 1 && navtitlePlain === componentVersion.title) { level-- } else { buffer.push('') // NOTE: try to toggle sectids; otherwise, fallback to globally unique synthetic ID // Q: should we unset docname, page-module, etc? let toggleSectids, syntheticId if (!('sectids' in asciidocConfig.attributes)) { buffer.push(':!sectids:') toggleSectids = true } else if (typeof asciidocConfig.attributes.sectids === 'string') { if ('sectids' in mutableAttributes) { buffer.push(':!sectids:') toggleSectids = true } else { syntheticId = `__object-id-${global.Opal.hash(outlineEntry).$object_id()}` } } let sectionTitle = navtitleAsciiDoc if (urlType === 'external') { sectionTitle = `${url}[${navtitleAsciiDoc.replace(/\]/g, '\\]')}]` } else if (urlType === 'internal' && !unresolved && siteUrl) { const resource = contentCatalog.getFiles().find((it) => it.out && it.pub.url === url) if (resource) sectionTitle = `${siteUrl}${resource.pub.url}[${navtitleAsciiDoc.replace(/\]/g, '\\]')}]` } let hlevel = level + 1 if (hlevel > 6) { hlevel = 6 buffer.push(syntheticId ? `[discrete#${syntheticId}]` : '[discrete]') } else if (syntheticId) { buffer.push(`[#${syntheticId}]`) } buffer.push(`${'='.repeat(hlevel)} ${sectionTitle}`) if (toggleSectids) buffer.push(':sectids:') } } const nextLevel = level + 1 if (hasItems) { // NOTE: drop first child if same as parent; should we keep if content is different? ;(urlType === 'internal' && urlType === items[0].urlType && url === items[0].url && !items[0].items ? items.slice(1) : items ).forEach((item) => { buffer.push( ...aggregateAsciiDoc( loadAsciiDoc, contentCatalog, header, componentVersion, item, pagesInOutline, asciidocConfig, mutableAttributes, sectionMergeStrategy, lastComponentVersion, nextLevel ) ) }) } return buffer } function generateSlug (title) { return title .toLowerCase() .replace(/&.+?;|[^ \p{Alpha}0-9_\-.]/gu, '') .replace(/[ _.]/g, '-') .replace(/--+/g, '-') } function fixSectionLevels (sections, multipart) { sections.forEach((sect) => { const targetLevel = sect.getParent().getLevel() + 1 if (multipart ? sect.getLevel() > targetLevel : sect.getLevel() !== targetLevel) sect.level = targetLevel if (sect.hasSections()) fixSectionLevels(sect.getSections(), multipart) }) } function rewriteStyleAttribute (block, lines, idx, idprefix, replacementStyle = '') { let prevLine = lines[idx - 1] const char0 = prevLine?.charAt() if (char0) { if ( (char0 === '.' && /^\.\.?[^ \t.]/.test(prevLine)) || (char0 === '[' && prevLine.charAt(1) === '[' && /^\[\[(?:|[\p{Alpha}_:][\p{Alpha}0-9_\-:.]*(?:, *.+)?)\]\]$/u.test(prevLine)) ) { return rewriteStyleAttribute(block, lines, idx - 1, idprefix, replacementStyle) } } let cellSpec if ( char0 && (char0 === '[' || (block.getDocument().isNested() && (cellSpec = prevLine.match(/^([^[|]*)\| *(\[.+)/)))) && prevLine.charAt(prevLine.length - 1) === ']' ) { if (cellSpec) { prevLine = cellSpec[2] cellSpec = cellSpec[1] } let rawStyle const commaIdx = prevLine.indexOf(',') if (~commaIdx) { rawStyle = prevLine.slice(1, commaIdx) if (~rawStyle.indexOf('=')) rawStyle = undefined } else if (!~prevLine.indexOf('=')) { rawStyle = prevLine.slice(1, prevLine.length - 1) } if (rawStyle) { if (~rawStyle.indexOf('#')) { prevLine = prevLine.replace(/#[^.%,\]]+/, `#${idprefix}${block.getId()}`) if (replacementStyle) prevLine = prevLine.replace(/^[^#.%,\]]+/, `[${replacementStyle}`) } else { prevLine = `[${ replacementStyle ? rawStyle.replace(/^[^.%]*/, replacementStyle) : rawStyle }#${idprefix}${block.getId()}${prevLine.slice(rawStyle.length + 1)}` } } else { prevLine = `[${replacementStyle}#${idprefix}${block.getId()}${rawStyle == null ? ',' : ''}${prevLine.slice(1)}` } if (cellSpec) prevLine = `${cellSpec}|${prevLine}` lines[idx - 1] = prevLine } else { lines.splice(idx, 0, `[${replacementStyle}#${idprefix}${block.getId()}]`) } } function isResourceSpec (str) { return !(~str.indexOf(':') && (~str.indexOf('://') || (str.startsWith('data:') && ~str.indexOf(',')))) } function getObjectId (obj) { return global.Opal.uid() } module.exports = produceAggregateDocument