UNPKG

hamok

Version:

Lightweight Distributed Object Storage on RAFT consensus algorithm

234 lines (197 loc) 7.44 kB
import { Hamok, HamokMap, HamokMessage, setHamokLogLevel } from 'hamok'; import Redis from 'ioredis'; import * as pino from 'pino'; import { createConcurrentExecutor } from './utils/ConcurrentExecutor'; const logger = pino.pino({ name: 'redis-job-executing-example', level: 'debug', });; type Job = { id: string; state: 'pending' | 'running' | 'stopped' ; allocatedTo?: string; result?: unknown; error?: string; note?: string, } type AppData = { name: string, start: (job: Job) => void, stop: (jobId: string) => void, runningJobIds: Set<string>, } const publisher = new Redis(); const subscriber = new Redis(); const servers = new Map<string, Hamok<AppData>>(); const startJob = (job: Job) => [...servers.values()].find(server => server.appData.start)?.appData.start(job); const endJob = (jobId: string) => [...servers.values()].find(server => server.appData.stop)?.appData.stop(jobId); export async function run() { await subscriber.subscribe('hamok-channel', (err, count) => { if (err) { logger.error('Failed to subscribe: %s', err.message); } }); for (let serverNum = 0; serverNum < 5; serverNum++) { const serverName = `server_${serverNum}` servers.set(serverName, createServer(serverName)); } await Promise.all([...servers.values()].map(server => server.join())); for (let i = 0; i < 20; i++) { const jobId = `job_${i}`; logger.info('Creating job with id "%s"', jobId); startJob({ id: jobId, state: 'pending', }); setTimeout(() => { endJob(jobId); }, 10000); } } function createServer(name: string, joined?: () => void) { const hamok = new Hamok<AppData>({ appData: { name, runningJobIds: new Set(), start: () => void 0, stop: () => void 0, } }); const jobs = hamok.createMap<string, Job>({ mapId: 'jobs', }); const executor = createConcurrentExecutor(1); const startJob = async (job: Job): Promise<void> => { if (job.allocatedTo !== hamok.localPeerId) return; if (job.state !== 'pending') return; try { const startedJob: Job = { ...job, state: 'running', }; const started = await jobs.updateIf(job.id, startedJob, job); if (!started) { return logger.warn('%s tried to start job "%s", but it was failed, becasue the job entry changed', hamok.appData.name, job.id); } // here it comes anything related to start the job hamok.appData.runningJobIds.add(job.id); logger.info('%s started job "%s".', hamok.appData.name, job.id); } catch (err) { logger.error(`Failed to handle job on ${hamok.appData.name} "${job.id}": ${err}`); } } const stopJob = async (jobId: string): Promise<void> => { // all of the stuff related to stopping the job if (hamok.appData.runningJobIds.delete(jobId)) { logger.info('%s stopped job "%s".', hamok.appData.name, jobId); } } const scheduleJob = async (job: Job): Promise<void> => { if (job.allocatedTo || job.state !== 'pending') return; executor(async () => { const serverLoads = new Map<string, number>([...hamok.remotePeerIds, hamok.localPeerId].map(peerId => [peerId, 0])); for (const [, runningJob] of jobs) { if (!runningJob.allocatedTo) continue; serverLoads.set(runningJob.allocatedTo, (serverLoads.get(runningJob.allocatedTo) ?? 0) + 1); } logger.info('Server loads: %s', [ ...serverLoads ].map(([peerId, load]) => `${peerId}: ${load}`).join(', ')); const smallestLoadedPeerId = [ ...serverLoads.entries() ].sort((a, b) => a[1] - b[1])[0]?.[0]; const allocatedJob = { ...job, allocatedTo: smallestLoadedPeerId, }; const allocated = await jobs.updateIf(job.id, allocatedJob, job); if (!allocated) { logger.warn('%s tried to allocate job "%s" to "%s", but it was failed, because the job entry changed', hamok.appData.name, job.id, smallestLoadedPeerId); } else { logger.info('%s allocated job "%s" to "%s".', hamok.appData.name, job.id, smallestLoadedPeerId); } }) } const rescheduleJob = async (job: Job): Promise<void> => { const rescheduledJob: Job = { ...job, state: 'pending', allocatedTo: undefined, result: undefined, error: undefined, note: job.note + ', rescheduled' }; try { const rescheduled = await jobs.updateIf(job.id, rescheduledJob, job); if (!rescheduled) { logger.warn('%s tried to rescheduled job "%s", but it was failed.', hamok.appData.name, job.id); } else { logger.info('%s rescheduled job "%s".', hamok.appData.name, job.id); } } catch (err) { logger.error(`Failed to reschedule job "${job.id}": ${err}`); } } const rescheduleDeadPeersJob = async () => { for (const [ , job ] of jobs) { if (job.state !== 'running') continue; if (hamok.remotePeerIds.has(job.allocatedTo ?? '')) continue; await rescheduleJob(job); } } const leaderOnInsert = (jobId: string, job: Job) => scheduleJob(job).catch(() => void 0); const leaderOnUpdate = (jobId: string, oldValue: Job, newValue: Job) => scheduleJob(newValue).catch(() => void 0); const leaderOnRemotePeerLeft = async (remotePeerId: string) => rescheduleDeadPeersJob().catch(() => void 0); hamok.on('follower', () => { jobs.off('insert', leaderOnInsert); jobs.off('update', leaderOnUpdate); hamok.off('remote-peer-left', leaderOnRemotePeerLeft); }) hamok.on('leader', async () => { logger.debug('%s is now the leader, will check if there are running jobs belongs to a "dead" instance.', hamok.appData.name); for (const [ , job ] of jobs) { if (job.state !== 'pending' || job.allocatedTo) continue; await scheduleJob(job); } await rescheduleDeadPeersJob(); jobs.on('insert', leaderOnInsert); jobs.on('update', leaderOnUpdate); hamok.on('remote-peer-left', leaderOnRemotePeerLeft); // now we reschedule jobs that are running but the server is dead for (const [ , job ] of jobs) { if (job.state !== 'running') continue; if (hamok.remotePeerIds.has(job.allocatedTo ?? '')) continue; await rescheduleJob(job); } }); jobs.on('update', (jobId, oldValue, newValue) => startJob(newValue).catch(() => void 0)); jobs.on('remove', (jobId) => stopJob(jobId).catch(() => void 0)); hamok.appData.start = (job: Job) => { logger.info('%s received request to start job "%s"', hamok.appData.name, job.id); jobs.insert(job.id, job).catch(() => void 0).then(alreadyInserted => { if (alreadyInserted) { logger.warn('%s tried to start job "%s", but it was failed, because the job entry changed', hamok.appData.name, job.id); } }).catch(err => { logger.error(`Failed to start job "${job.id}": ${err}`); }); }; hamok.appData.stop = (jobId: string) => { logger.info('%s received request to stop job "%s"', hamok.appData.name, jobId); jobs.delete(jobId).catch(() => void 0).then(deleted => { if (!deleted) { logger.warn('%s tried to stop job "%s", but it was failed, because the job entry changed', hamok.appData.name, jobId); } }); }; subscriber.on('messageBuffer', (channel, buffer) => { hamok.accept(HamokMessage.fromBinary(buffer)); }); hamok.on('message', (message) => publisher.publish('hamok-channel', Buffer.from(message.toBinary()))); logger.info('%s created.', hamok.appData.name); return hamok; } process.on('unhandledRejection', (reason, promise) => { logger.error('Unhandled Rejection at:', promise, 'reason:', reason); }); if (require.main === module) { logger.info('Running from module file'); setHamokLogLevel('info'); run(); }