UNPKG

dht-prometheus

Version:

Bridge to scrape Prometheus metrics fully peer to peer

274 lines (222 loc) 7.9 kB
const ReadyResource = require('ready-resource') const idEnc = require('hypercore-id-encoding') const b4a = require('b4a') const safetyCatch = require('safety-catch') const AliasRpcServer = require('dht-prom-alias-rpc') const ScraperClient = require('dht-prom-client/scraper') const { writePromTargets, readPromTargets } = require('./lib/prom-targets') const debounceify = require('debounceify') const DEFAULT_PROM_TARGETS_LOC = './targets.json' class PrometheusDhtBridge extends ReadyResource { constructor (swarm, server, protomuxRpcClient, sharedSecret, { ownPromClient, _forceFlushOnClientReady = false, prometheusTargetsLoc = DEFAULT_PROM_TARGETS_LOC, entryExpiryMs = 3 * 60 * 60 * 1000, checkExpiredsIntervalMs = 60 * 60 * 1000, serverLogLevel = 'warn' } = {}) { super() if (!protomuxRpcClient.keyPair || !b4a.equals(swarm.keyPair.publicKey, protomuxRpcClient.keyPair.publicKey)) { // This keeps our authentication easy: services which register to the // scraper simply need to check that its public key is the same key they // contacted to register themselves (relying on the keypair authentication) throw new Error('The protomux-rpc-client keyPair option should be set to the swarm keyPair') } this.swarm = swarm this.secret = sharedSecret // Shared with clients this.protomuxRpcClient = protomuxRpcClient this.entryExpiryMs = entryExpiryMs this.checkExpiredsIntervalMs = checkExpiredsIntervalMs this._checkExpiredsInterval = null this.server = server this.server.get( '/scrape/:alias/metrics', { logLevel: serverLogLevel }, this._handleGet.bind(this) ) ownPromClient = ownPromClient || null if (ownPromClient) { this.server.get( '/metrics', { logLevel: serverLogLevel }, async function (req, reply) { const metrics = await ownPromClient.register.metrics() reply.send(metrics) } ) } this.promTargetsLoc = prometheusTargetsLoc this.aliasRpcServer = new AliasRpcServer(this.swarm, this.secret, this.putAlias.bind(this)) this.aliases = new Map() this._writeAliases = debounceify(this._writeAliasesUndebounced.bind(this)) // for tests, to ensure we're connected to the scraper on first scrape this._forceFlushOnClientReady = _forceFlushOnClientReady } get dht () { return this.swarm.dht } get publicKey () { return this.swarm.keyPair.publicKey } async _open () { // It is important that the aliases are first loaded // otherwise the old aliases might get overwritten await this._loadAliases() await this.swarm.listen() this._checkExpiredsInterval = setInterval( () => this.cleanupExpireds(), this.checkExpiredsIntervalMs ) } async _close () { // Should be first (no expireds cleanup during closing) if (this._checkExpiredsInterval) { clearInterval(this._checkExpiredsInterval) } // DEVNOTE: no need to close the entries explicitly // since they have no state, but we could consider // exposing a force-close at protomux-rpc-client level // to clean up the open connections rather than rely on the gc await this.swarm.destroy() if (this.opened) await this._writeAliases() } putAlias (alias, targetPubKey, hostname, service, { write = true } = {}) { if (!this.opened && write) throw new Error('Cannot put aliases before ready') targetPubKey = idEnc.decode(idEnc.normalize(targetPubKey)) const current = this.aliases.get(alias) if (current) { if (b4a.equals(current.targetKey, targetPubKey)) { current.setExpiry(Date.now() + this.entryExpiryMs) const updated = false // Idempotent return updated } } const entry = new AliasesEntry( new ScraperClient(this.protomuxRpcClient, targetPubKey), hostname, service, Date.now() + this.entryExpiryMs ) this.aliases.set(alias, entry) this.emit('set-alias', { alias, entry }) const updated = true if (write === true) { this._writeAliases().catch(safetyCatch) } return updated } async _handleGet (req, reply) { const alias = req.params.alias const entry = this.aliases.get(alias) if (!entry) { reply.code(404) reply.send('Unknown alias') return } if (this._forceFlushOnClientReady && !entry.hasHandledGet) { // TODO: revert back to flushing when bug fixed there // await entry.scrapeClient.swarm.flush() await new Promise(resolve => setTimeout(resolve, 250)) } entry.hasHandledGet = true const scrapeClient = entry.scrapeClient let res try { res = await scrapeClient.requestMetrics() } catch (e) { this.emit('upstream-error', e) reply.code(502) reply.send('Upstream unavailable') return } if (res.success) { reply.send(res.metrics) } else { reply.code(502) reply.send(`Upstream error: ${res.errorMessage}`) } } async _writeAliasesUndebounced () { // should never throw try { await writePromTargets(this.promTargetsLoc, this.aliases) this.emit('aliases-updated', this.promTargetsLoc) } catch (e) { this.emit('write-aliases-error', e) } } async _loadAliases () { // should never throw try { const aliases = await readPromTargets(this.promTargetsLoc) for (const [alias, { z32PubKey, hostname, service }] of aliases) { // Write false since we load an existing state // (otherwise we overwrite them 1 by 1, and can lose // entries if we restart/crash during setup) this.putAlias(alias, z32PubKey, hostname, service, { write: false }) } } catch (e) { // An error is expected if the file does not yet exist // (typically first run only) this.emit('load-aliases-error', e) } } // Should be kept sync (or think hard) cleanupExpireds () { const toRemove = [] for (const [alias, entry] of this.aliases) { if (entry.isExpired) toRemove.push(alias) } for (const alias of toRemove) { const entry = this.aliases.get(alias) this.aliases.delete(alias) this.emit('alias-expired', { publicKey: entry.targetKey, alias }) } if (toRemove.length > 0) { this._writeAliases().catch(safetyCatch) } } registerLogger (logger) { this.on('set-alias', ({ alias, entry }) => { const scrapeClient = entry.scrapeClient const publicKey = scrapeClient.targetKey const { service, hostname } = entry logger.info(`Registered alias: ${alias} -> ${idEnc.normalize(publicKey)} (${service} on host ${hostname})`) }) this.on('aliases-updated', (loc) => { logger.info(`Updated the aliases file at ${loc}`) }) this.on('alias-expired', ({ alias, publicKey }) => { logger.info(`Alias entry expired: ${alias} -> ${idEnc.normalize(publicKey)}`) }) this.on('load-aliases-error', e => { // Expected first time the service starts (creates it then) logger.error(`failed to load aliases file: ${e.stack}`) }) this.on('upstream-error', e => { logger.info(`upstream error: ${e.stack}`) }) this.on('write-aliases-error', e => { logger.error(`Failed to write aliases file ${e.stack}`) }) this.aliasRpcServer.registerLogger(logger) } } class AliasesEntry { constructor (scrapeClient, hostname, service, expiry) { this.scrapeClient = scrapeClient this.hostname = hostname this.service = service this.expiry = expiry this.hasHandledGet = false } get targetKey () { return this.scrapeClient.targetKey } get isExpired () { return this.expiry < Date.now() } setExpiry (expiry) { this.expiry = expiry } } module.exports = PrometheusDhtBridge