@redpanda-data/docs-extensions-and-macros
Version:
Antora extensions and macros developed for Redpanda documentation.
172 lines (143 loc) • 5.75 kB
JavaScript
const generateIndex = require('./generate-index')
const algoliasearch = require('algoliasearch')
const http = require('http')
const https = require('https')
const _ = require('lodash')
// Increase thread pool size for better HTTP performance
process.env.UV_THREADPOOL_SIZE = 16
/**
* Algolia indexing for an Antora documentation site.
*
* @module antora-algolia-indexer
*/
function register ({
config: {
indexLatestOnly,
excludes,
...unknownOptions
}
}) {
const logger = this.getLogger('algolia-indexer-extension')
// Validate required environment variables
const requiredEnvVars = ['ALGOLIA_ADMIN_API_KEY', 'ALGOLIA_APP_ID', 'ALGOLIA_INDEX_NAME']
const missingVars = requiredEnvVars.filter(v => !process.env[v])
if (missingVars.length > 0) {
logger.info(`Algolia indexing disabled - missing environment variables: ${missingVars.join(', ')}`)
return
}
// Validate unknown options
if (Object.keys(unknownOptions).length) {
const keys = Object.keys(unknownOptions)
throw new Error(`Unrecognized option${keys.length > 1 ? 's' : ''} specified: ${keys.join(', ')}`)
}
// Create HTTP agents with connection pooling
const httpAgent = new http.Agent({ keepAlive: true, maxSockets: 100 })
const httpsAgent = new https.Agent({ keepAlive: true, maxSockets: 100 })
// Connect and authenticate with Algolia
const client = algoliasearch(process.env.ALGOLIA_APP_ID, process.env.ALGOLIA_ADMIN_API_KEY, {
httpAgent: httpAgent,
httpsAgent: httpsAgent
})
const index = client.initIndex(process.env.ALGOLIA_INDEX_NAME)
this.on('beforePublish', async ({ playbook, contentCatalog }) => {
const algolia = generateIndex(playbook, contentCatalog, { indexLatestOnly, excludes, logger })
if (!algolia || Object.keys(algolia).length === 0) {
logger.warn('No content to index for Algolia')
return
}
const existingObjectsMap = new Map()
// Save objects in a local cache to query later.
// Avoids sending multiple requests.
// browseObjects does not affect analytics or usage limits.
// See https://www.algolia.com/doc/api-reference/api-methods/browse/#about-this-method
try {
await index.browseObjects({
query: '',
batch: batch => {
for (const obj of batch) {
existingObjectsMap.set(obj.objectID, obj)
}
}
})
logger.info(`Loaded ${existingObjectsMap.size} existing objects from Algolia index`)
} catch (err) {
logger.error(`Error browsing existing Algolia objects: ${JSON.stringify(err)}`)
}
let totalObjectsToUpdate = 0
let totalObjectsToAdd = 0
const objectsToDelete = []
for (const c of Object.keys(algolia)) {
for (const v of Object.keys(algolia[c])) {
const objectsToUpdate = []
const objectsToAdd = []
for (const obj of algolia[c][v]) {
const existingObject = existingObjectsMap.get(obj.objectID)
if (existingObject) {
if (!_.isEqual(existingObject, obj)) {
objectsToUpdate.push(obj)
totalObjectsToUpdate++
}
existingObjectsMap.delete(obj.objectID)
} else {
objectsToAdd.push(obj)
totalObjectsToAdd++
}
}
const addObjectActions = objectsToAdd.map(object => ({
action: 'addObject',
indexName: process.env.ALGOLIA_INDEX_NAME,
body: object
}))
const updateObjectActions = objectsToUpdate.map(object => ({
action: 'updateObject',
indexName: process.env.ALGOLIA_INDEX_NAME,
body: object
}))
const batchActions = [...addObjectActions, ...updateObjectActions]
// FIXED: Only send batch if there are actions to perform
if (batchActions.length > 0) {
try {
await client.multipleBatch(batchActions)
logger.debug(`Batch completed: ${objectsToAdd.length} added, ${objectsToUpdate.length} updated for ${c}/${v}`)
} catch (error) {
logger.error(`Error uploading records to Algolia: ${error.message}`)
}
}
}
}
// Identify objects to delete (stale content)
for (const [objectID, obj] of existingObjectsMap) {
// Only delete Doc pages (not API) and Labs that aren't interactive
const shouldDelete = (obj.type === 'Doc' && !obj.objectID.includes('/api/')) ||
(!obj.type) ||
(obj.type === 'Lab' && !obj.interactive)
if (shouldDelete) {
objectsToDelete.push(objectID)
}
}
if (objectsToDelete.length > 0) {
logger.info(`Deleting ${objectsToDelete.length} outdated records...`)
logger.debug(`Objects to delete: ${JSON.stringify(objectsToDelete)}`)
try {
await index.deleteObjects(objectsToDelete)
logger.info(`Successfully deleted ${objectsToDelete.length} outdated records`)
} catch (error) {
logger.error(`Error deleting records from Algolia: ${error.message}`)
}
}
// Summary
logger.info(`Algolia sync complete: ${totalObjectsToAdd} added, ${totalObjectsToUpdate} updated, ${objectsToDelete.length} deleted`)
if (totalObjectsToAdd === 0 && totalObjectsToUpdate === 0 && objectsToDelete.length === 0) {
logger.info('Index is up to date - no changes needed')
}
})
// Cleanup HTTP agents on process exit
// NOTE: This registers a global handler. In watch mode, agents will persist
// between builds, which is generally fine for connection reuse.
process.on('exit', () => {
httpAgent.destroy()
httpsAgent.destroy()
})
}
module.exports = { generateIndex, register }