@redpanda-data/docs-extensions-and-macros
Version:
Antora extensions and macros developed for Redpanda documentation.
288 lines (248 loc) • 10.3 kB
JavaScript
/**
* Adds Git commit dates to pages as attributes.
*
* This extension:
* 1. Gets the first commit date (when file was created) -> page-git-created-date
* 2. Gets the last commit date (when file was modified) -> page-git-modified-date
* 3. Adds these to page.asciidoc.attributes with page- prefix for UI template access
*
* Supports both local repos (with worktree) and remote repos (bare clones with gitdir).
* Antora caches remote repos as bare Git repos in ~/.cache/antora/content/
*
* Performance optimization: Uses isomorphic-git to walk the entire git log ONCE per
* repository, building a filepath→dates map. This is O(commits) instead of O(files * commits).
* For a repo with 1000 files and 5000 commits, this reduces operations from 5M to 5K.
*
* Attribute naming: Uses page- prefix so attributes appear in page.attributes
* in Handlebars templates (Antora strips the prefix when exposing to UI model).
*
* Only runs on pages that have origin info (skips virtual/generated pages).
*/
const path = require('path')
const fs = require('fs')
/**
* Resolve isomorphic-git from Antora's dependencies
* @param {Object} context - Extension context with module info
* @returns {Object} isomorphic-git module
*/
function requireGit (context) {
return require(
require.resolve('isomorphic-git', {
paths: [require.resolve('@antora/content-aggregator', { paths: context.module.paths }) + '/..']
})
)
}
/**
* Format timestamp to ISO date string (YYYY-MM-DD)
* @param {number} timestamp - Unix timestamp in seconds
* @returns {string} ISO date string
*/
function formatDate (timestamp) {
return new Date(timestamp * 1000).toISOString().substring(0, 10)
}
/**
* Build a map of filepath -> {created, modified} dates from git log
* Walks the entire log once, tracking first and last commit for each MODIFIED file
*
* This compares each commit's tree with its parent to find which files actually changed,
* rather than just looking at all files in the tree (which would give incorrect dates).
*
* @param {Object} git - isomorphic-git module
* @param {string} gitdir - Path to .git directory
* @param {string} ref - Git ref (branch/tag/commit)
* @param {Object} logger - Logger instance
* @returns {Promise<Map<string, {created: string, modified: string}>>}
*/
async function buildFileDateMap (git, gitdir, ref, logger) {
const fileDates = new Map()
const cache = {}
try {
// Get all commits - walking from newest to oldest
const commits = await git.log({
fs,
gitdir,
ref,
cache,
})
logger.info(`Walking ${commits.length} commits for ${path.basename(gitdir)} (ref: ${ref})`)
// Build tree cache to avoid re-reading trees
const treeCache = new Map()
// Process commits from newest to oldest
// First occurrence = modified date, last occurrence = created date
for (let i = 0; i < commits.length; i++) {
const commit = commits[i]
const timestamp = commit.commit.committer.timestamp
const date = formatDate(timestamp)
try {
const currentTreeOid = commit.commit.tree
const parentCommits = commit.commit.parent || []
// Get files in current commit's tree
const currentFiles = await getTreeFiles(git, gitdir, currentTreeOid, '', cache, treeCache)
// Get files in parent commit's tree (if parent exists)
let parentFiles = new Map()
if (parentCommits.length > 0) {
const parentCommit = await git.readCommit({ fs, gitdir, oid: parentCommits[0], cache })
const parentTreeOid = parentCommit.commit.tree
parentFiles = await getTreeFiles(git, gitdir, parentTreeOid, '', cache, treeCache)
}
// Find files that were added or modified (different OID from parent)
for (const [filepath, oid] of currentFiles) {
const parentOid = parentFiles.get(filepath)
const isModified = !parentOid || parentOid !== oid
if (isModified) {
if (!fileDates.has(filepath)) {
// First time seeing this file modified (from newest commit)
fileDates.set(filepath, { created: date, modified: date })
} else {
// Update created date (older commit where file was modified)
const entry = fileDates.get(filepath)
entry.created = date
}
}
}
} catch (err) {
// Skip commits that can't be read
logger.debug(`Skipping commit ${commit.oid.substring(0, 7)}: ${err.message}`)
}
}
} catch (err) {
logger.warn(`Failed to read git log for ${gitdir}: ${err.message}`)
}
return fileDates
}
/**
* Recursively walk a git tree to get all file paths with their OIDs
* Returns a Map of filepath → OID for comparison between commits
*
* @param {Object} git - isomorphic-git module
* @param {string} gitdir - Path to .git directory
* @param {string} oid - Tree object ID
* @param {string} prefix - Current path prefix
* @param {Object} cache - Git object cache
* @param {Map} treeCache - Cache of tree OID → files map
* @returns {Promise<Map<string, string>>} Map of filepath → blob OID
*/
async function getTreeFiles (git, gitdir, oid, prefix, cache, treeCache) {
// Check tree cache first
if (treeCache.has(oid)) {
return treeCache.get(oid)
}
const files = new Map()
try {
const { tree } = await git.readTree({
fs,
gitdir,
oid,
cache,
})
for (const entry of tree) {
const filepath = prefix ? `${prefix}/${entry.path}` : entry.path
if (entry.type === 'blob') {
files.set(filepath, entry.oid)
} else if (entry.type === 'tree') {
// Recurse into subdirectory
const subfiles = await getTreeFiles(git, gitdir, entry.oid, filepath, cache, treeCache)
for (const [subpath, suboid] of subfiles) {
files.set(subpath, suboid)
}
}
}
// Cache this tree's files
treeCache.set(oid, files)
} catch (err) {
// Skip trees that can't be read
}
return files
}
module.exports.register = function () {
const logger = this.getLogger('add-git-dates-extension')
const context = this
// Run on documentsConverted after Antora builds page.asciidoc.attributes
this.on('documentsConverted', async ({ contentCatalog }) => {
const startTime = Date.now()
let processedCount = 0
let skippedCount = 0
// Load isomorphic-git
let git
try {
git = requireGit(context)
} catch (err) {
logger.error(`Failed to load isomorphic-git: ${err.message}`)
return
}
// Group pages by BOTH gitdir AND ref (since same repo can have multiple branches/versions)
const pagesByRepoAndRef = new Map()
const skipLoggedRepos = new Set()
contentCatalog.getPages().forEach((page) => {
const origin = page.src?.origin
if (!origin?.url) {
skippedCount++
return
}
// Need gitdir for isomorphic-git (works for both local and bare repos)
const gitdir = origin.gitdir || (origin.worktree ? path.join(origin.worktree, '.git') : null)
if (!gitdir) {
// Debug: Log which repos don't have gitdir
if (!skipLoggedRepos.has(origin.url)) {
logger.info(`⚠️ Skipping repo without gitdir: ${origin.url} (has gitdir: ${!!origin.gitdir}, has worktree: ${!!origin.worktree})`)
skipLoggedRepos.add(origin.url)
}
skippedCount++
return
}
// Ensure asciidoc.attributes exists
if (!page.asciidoc) page.asciidoc = {}
if (!page.asciidoc.attributes) page.asciidoc.attributes = {}
const startPath = origin.startPath || ''
const relativeFilePath = startPath ? path.join(startPath, page.src.path) : page.src.path
const ref = origin.refhash || origin.refname || 'HEAD'
// Create composite key: gitdir + ref to handle multiple branches per repo
const repoRefKey = `${gitdir}::${ref}`
// Group by repo AND ref
if (!pagesByRepoAndRef.has(repoRefKey)) {
pagesByRepoAndRef.set(repoRefKey, {
gitdir,
ref,
pages: []
})
}
pagesByRepoAndRef.get(repoRefKey).pages.push({ page, relativeFilePath })
})
const totalPages = Array.from(pagesByRepoAndRef.values()).reduce((sum, r) => sum + r.pages.length, 0)
const repoCount = new Set(Array.from(pagesByRepoAndRef.values()).map(r => r.gitdir)).size
logger.info(`Processing ${totalPages} pages across ${repoCount} repos (${pagesByRepoAndRef.size} branches) for git dates (skipped ${skippedCount} virtual/generated)`)
// Log which repos are being processed
const reposBeingProcessed = new Set()
pagesByRepoAndRef.forEach(({ gitdir }) => {
if (!reposBeingProcessed.has(gitdir)) {
logger.info(`✓ Will process git dates for: ${gitdir}`)
reposBeingProcessed.add(gitdir)
}
})
// Process each repository + ref combination
for (const [repoRefKey, { gitdir, ref, pages }] of pagesByRepoAndRef) {
const repoStartTime = Date.now()
try {
// Build the filepath -> dates map for this repo + ref
const fileDateMap = await buildFileDateMap(git, gitdir, ref, logger)
// Apply dates to pages
for (const { page, relativeFilePath } of pages) {
const dates = fileDateMap.get(relativeFilePath)
if (dates) {
page.asciidoc.attributes['page-git-created-date'] = dates.created
page.asciidoc.attributes['page-git-modified-date'] = dates.modified
processedCount++
}
}
const repoTime = Date.now() - repoStartTime
logger.debug(`Processed ${pages.length} pages from ${path.basename(gitdir)}@${ref.substring(0,8)} in ${repoTime}ms (map size: ${fileDateMap.size})`)
} catch (err) {
logger.warn(`Failed to process repo ${gitdir}@${ref}: ${err.message}`)
}
}
const duration = Date.now() - startTime
const perPage = totalPages > 0 ? (duration / totalPages).toFixed(1) : 0
logger.info(`Git dates added: processed=${processedCount}, skipped=${skippedCount}, duration=${duration}ms (${perPage}ms/page)`)
})
}