dht-prometheus
Version:
Bridge to scrape Prometheus metrics fully peer to peer
282 lines (225 loc) • 8.19 kB
JavaScript
const path = require('path')
const { once } = require('events')
const test = require('brittle')
const promClient = require('prom-client')
const DhtPromClient = require('dht-prom-client')
const createTestnet = require('hyperdht/testnet')
const HyperDHT = require('hyperdht')
const fastify = require('fastify')
const axios = require('axios')
const hypCrypto = require('hypercore-crypto')
const getTmpDir = require('test-tmp')
const PrometheusDhtBridge = require('../index')
const Hyperswarm = require('hyperswarm')
const ProtomuxRpcClient = require('protomux-rpc-client')
test('put alias + lookup happy flow', async t => {
const { bridge, dhtPromClient } = await setup(t)
await dhtPromClient.ready()
await bridge.ready()
const baseUrl = await bridge.server.listen({ host: '127.0.0.1', port: 0 })
bridge.putAlias('dummy', dhtPromClient.publicKey)
await new Promise(resolve => setTimeout(resolve, 1000)) // TODO: use swarm.flush again when bug fixed
const res = await axios.get(
`${baseUrl}/scrape/dummy/metrics`,
{ validateStatus: null }
)
t.is(res.status, 200, 'correct status')
t.is(
res.data.includes('process_cpu_user_seconds_total'),
true,
'Successfully scraped metrics'
)
})
test('404 on unknown alias', async t => {
const { bridge } = await setup(t)
await bridge.ready()
const baseUrl = await bridge.server.listen({ host: '127.0.0.1', port: 0 })
const res = await axios.get(
`${baseUrl}/scrape/nothinghere/metrics`,
{ validateStatus: null }
)
t.is(res.status, 404, 'correct status')
t.is(
res.data.includes('Unknown alias'),
true,
'Sensible err msg'
)
})
test('502 with uid if upstream returns success: false', async t => {
const { bridge, dhtPromClient } = await setup(t)
new promClient.Gauge({ // eslint-disable-line no-new
name: 'broken_metric',
help: 'A metric which throws on collecting it',
collect () {
throw new Error('I break stuff')
}
})
let reqUid = null
dhtPromClient.on('metrics-request', ({ uid }) => {
reqUid = uid
})
await dhtPromClient.ready()
await bridge.ready()
const baseUrl = await bridge.server.listen({ host: '127.0.0.1', port: 0 })
bridge.putAlias('dummy', dhtPromClient.publicKey)
await new Promise(resolve => setTimeout(resolve, 1000)) // TODO: use swarm.flush again when bug fixed
const res = await axios.get(
`${baseUrl}/scrape/dummy/metrics`,
{ validateStatus: null }
)
t.is(res.status, 502, 'correct status')
t.is(
res.data.includes(reqUid),
true,
'uid included in error message'
)
})
test('502 if upstream unavailable', async t => {
const { bridge, dhtPromClient, protomuxRpcClient } = await setup(t)
await dhtPromClient.ready()
await bridge.ready()
const baseUrl = await bridge.server.listen({ host: '127.0.0.1', port: 0 })
bridge.putAlias('dummy', dhtPromClient.publicKey)
await protomuxRpcClient.close()
await dhtPromClient.close()
const res = await axios.get(
`${baseUrl}/scrape/dummy/metrics`,
{ validateStatus: null }
)
t.is(res.status, 502, 'correct status')
t.is(
res.data,
'Upstream unavailable'
)
})
test('No new alias if adding same key', async t => {
const { bridge } = await setup(t)
const key = 'a'.repeat(64)
const key2 = 'b'.repeat(64)
await bridge.ready()
bridge.putAlias('dummy', key)
const clientA = bridge.aliases.get('dummy')
t.is(clientA != null, true, 'sanity check')
bridge.putAlias('dummy', key)
t.is(clientA, bridge.aliases.get('dummy'), 'no new client')
bridge.putAlias('dummy', key2)
t.not(clientA, bridge.aliases.get('dummy'), 'sanity check')
})
test('A client which registers itself can get scraped', async t => {
t.plan(4)
const { bridge, dhtPromClient } = await setup(t)
bridge.aliasRpcServer.on('alias-request', ({ uid, remotePublicKey, alias, targetPublicKey }) => {
t.is(alias, 'dummy', 'correct alias')
t.alike(targetPublicKey, dhtPromClient.publicKey, 'correct target key got registered')
})
bridge.aliasRpcServer.on('register-error', ({ error, uid }) => {
console.error(error)
t.fail('unexpected error')
})
await bridge.ready()
const baseUrl = await bridge.server.listen({ host: '127.0.0.1', port: 0 })
await new Promise(resolve => setTimeout(resolve, 1000)) // TODO: use swarm.flush again when bug fixed
await Promise.all([
dhtPromClient.ready(),
once(dhtPromClient, 'register-alias-success')
])
const res = await axios.get(
`${baseUrl}/scrape/dummy/metrics`,
{ validateStatus: null }
)
t.is(res.status, 200, 'correct status')
t.is(
res.data.includes('process_cpu_user_seconds_total'),
true,
'Successfully scraped metrics'
)
})
test('A client gets removed and closed after it expires', async t => {
const { bridge, dhtPromClient } = await setup(t, {
entryExpiryMs: 1200,
checkExpiredsIntervalMs: 100
})
await bridge.ready()
await dhtPromClient.ready()
bridge.putAlias('dummy', dhtPromClient.publicKey)
await new Promise(resolve => setTimeout(resolve, 1000)) // TODO: use swarm.flush again when bug fixed
// Can be 2 if the alias-request connection isn't cleaned up yet
t.is(bridge.swarm.connections.size > 0, true, 'sanity check: connected')
t.is(bridge.aliases.size, 1, 'sanity check')
const [{ alias: expiredAlias }] = await once(bridge, 'alias-expired')
t.is(expiredAlias, 'dummy', 'alias-expired event emitted')
t.is(bridge.aliases.size, 0, 'alias removed when expired')
await once(bridge, 'aliases-updated')
t.pass('aliases file rewritten after an entry gets removed')
})
test('A client does not get removed if it renews before the expiry', async t => {
// Test is somewhat susceptible to CPU blocking due to timings
// (add more margin if that happens in practice)
const { bridge } = await setup(t, {
entryExpiryMs: 500,
checkExpiredsIntervalMs: 100
})
const key = 'a'.repeat(64)
await bridge.ready()
bridge.putAlias('dummy', key)
setTimeout(() => {
bridge.putAlias('dummy', key)
}, bridge.entryExpiryMs / 2)
t.is(bridge.aliases.size, 1, 'sanity check')
await new Promise(resolve => setTimeout(
resolve, bridge.entryExpiryMs + 100
))
t.is(bridge.aliases.size, 1, 'alias not removed if renewed in time')
await new Promise(resolve => setTimeout(
resolve, bridge.entryExpiryMs + 100
))
t.is(bridge.aliases.size, 0, 'alias removed when expired')
})
async function setup (t, bridgeOpts = {}) {
promClient.collectDefaultMetrics() // So we have something to scrape
t.teardown(() => promClient.register.clear())
const testnet = await createTestnet()
const bootstrap = testnet.bootstrap
const sharedSecret = hypCrypto.randomBytes(32)
const swarm = new Hyperswarm({ bootstrap })
const protomuxRpcClient = new ProtomuxRpcClient(swarm.dht, { keyPair: swarm.keyPair })
const server = fastify({ logger: false })
const tmpDir = await getTmpDir(t)
const prometheusTargetsLoc = path.join(tmpDir, 'prom-targets.json')
const bridge = new PrometheusDhtBridge(swarm, server, protomuxRpcClient, sharedSecret, {
_forceFlushOnClientReady: true, // to avoid race conditions
prometheusTargetsLoc,
...bridgeOpts
})
bridge.on('upstream-error', e => {
console.warn(e.stack)
})
const scraperPubKey = bridge.publicKey
const dhtClient = new HyperDHT({ bootstrap })
const clientProtomuxRpcClient = new ProtomuxRpcClient(dhtClient)
const dhtPromClient = new DhtPromClient(
dhtClient,
clientProtomuxRpcClient,
promClient,
scraperPubKey,
'dummy',
sharedSecret,
'my-service',
{ bootstrap, hostname: 'my-hostname' }
)
dhtPromClient.on('register-alias-error', e => {
console.warn(e.stack)
})
t.teardown(async () => {
await server.close()
await bridge.close()
await protomuxRpcClient.close()
await clientProtomuxRpcClient.close()
await dhtPromClient.close()
await swarm.destroy()
await testnet.destroy()
promClient.register.clear()
})
const ownPublicKey = dhtPromClient.dht.defaultKeyPair.publicKey
return { dhtPromClient, bridge, bootstrap, ownPublicKey, protomuxRpcClient }
}