@atomist/automation-client
Version:
Atomist API for software low-level client
622 lines (566 loc) • 25.9 kB
text/typescript
import * as cluster from "cluster";
import { StatsD } from "hot-shots";
import * as stringify from "json-stringify-safe";
import * as _ from "lodash";
import * as TinyQueue from "tinyqueue";
import * as WebSocket from "ws";
import { Configuration } from "../../../configuration";
import { EventFired } from "../../../HandleEvent";
import {
AutomationContextAware,
HandlerContext,
} from "../../../HandlerContext";
import { HandlerResult } from "../../../HandlerResult";
import { AutomationEventListener } from "../../../server/AutomationEventListener";
import { AutomationServer } from "../../../server/AutomationServer";
import { GraphClient } from "../../../spi/graph/GraphClient";
import { MessageClient } from "../../../spi/message/MessageClient";
import { printError } from "../../../util/error";
import { logger } from "../../../util/logger";
import { CommandInvocation } from "../../invoker/Payload";
import * as namespace from "../../util/cls";
import { Deferred } from "../../util/Deferred";
import {
HealthStatus,
registerHealthIndicator,
} from "../../util/health";
import { poll } from "../../util/poll";
import {
registerShutdownHook,
safeExit,
terminationGraceful,
terminationGracePeriod,
} from "../../util/shutdown";
import { AbstractRequestProcessor } from "../AbstractRequestProcessor";
import {
CommandIncoming,
EventIncoming,
isCommandIncoming,
isEventIncoming,
} from "../RequestProcessor";
import { WebSocketLifecycle } from "../websocket/WebSocketLifecycle";
import {
sendMessage,
WebSocketCommandMessageClient,
WebSocketEventMessageClient,
} from "../websocket/WebSocketMessageClient";
import {
RegistrationConfirmation,
WebSocketRequestProcessor,
} from "../websocket/WebSocketRequestProcessor";
import {
broadcast,
MasterMessage,
WorkerMessage,
} from "./messages";
interface MessageType {
message: MasterMessage;
dispatched: Dispatched<any>;
ts: number;
}
/* tslint:disable:max-file-line-count */
/**
* A RequestProcessor that delegates to Node.JS Cluster workers to do the actual
* command and event processing.
* @see ClusterWorkerRequestProcessor
*/
export class ClusterMasterRequestProcessor extends AbstractRequestProcessor implements WebSocketRequestProcessor {
private registration?: RegistrationConfirmation;
private readonly webSocketLifecycle: WebSocketLifecycle;
private readonly commands: Map<string, { dispatched: Dispatched<HandlerResult>, worker: number }> = new Map();
private readonly events: Map<string, { dispatched: Dispatched<HandlerResult[]>, worker: number }> = new Map();
private readonly messages: TinyQueue<MessageType> = new TinyQueue([], (a: MessageType, b: MessageType) => {
if (a.message.type === "atomist:command" && b.message.type !== "atomist:command") {
return -1;
} else if (a.message.type !== "atomist:command" && b.message.type === "atomist:command") {
return 1;
} else {
return a.ts - b.ts;
}
});
private shutdownInitiated: boolean = false;
private replaceWorkers: boolean = true;
private backoffInitiated: boolean = false;
constructor(protected automations: AutomationServer,
protected configuration: Configuration,
protected listeners: AutomationEventListener[] = [],
protected numWorkers: number = require("os").cpus().length,
protected maxConcurrentPerWorker: number = 4) {
super(automations, configuration, listeners);
this.webSocketLifecycle = (configuration.ws as any).lifecycle as WebSocketLifecycle;
registerHealthIndicator(() => {
const cmds: string[] = [];
for (const key of this.commands.keys()) {
cmds.push(key);
}
const evts: string[] = [];
for (const key of this.events.keys()) {
evts.push(key);
}
if (this.webSocketLifecycle.connected() && this.registration) {
return {
status: HealthStatus.Up,
detail: {
commands: cmds,
events: evts,
},
};
} else {
return {
status: HealthStatus.Down,
detail: {
commands: cmds,
events: evts,
},
};
}
});
registerShutdownHook(async () => {
if (this.shutdownInitiated) {
return 0;
}
this.shutdownInitiated = true;
const gracePeriod = terminationGracePeriod(this.configuration);
if (terminationGraceful(this.configuration)) {
try {
logger.debug("Waiting for queue to empty");
await poll(() => this.queueLength() < 1, gracePeriod);
} catch (e) {
logger.warn("Work queue did not empty within grace period");
return 1;
}
}
logger.debug("Terminating workers");
await this.terminateWorkers(gracePeriod);
return 0;
}, 0, "drain work queue and shutdown workers");
this.scheduleQueueLength();
this.scheduleBackoffCheck();
}
public onRegistration(registration: RegistrationConfirmation): void {
logger.debug("Registration successful: %s", stringify(registration));
(this.configuration.ws as any).session = registration;
this.registration = registration;
broadcast({
type: "atomist:registration",
registration: this.registration,
context: undefined,
});
}
public onConnect(ws: WebSocket): void {
logger.debug("WebSocket connection established. Listening for incoming messages");
this.webSocketLifecycle.set(ws);
this.listeners.forEach(l => l.registrationSuccessful(this));
}
public onDisconnect(): void {
this.webSocketLifecycle.reset();
this.registration = undefined;
}
public run(): Promise<any> {
const ws = () => this.webSocketLifecycle;
const attachEvents = (worker: cluster.Worker, deferred: Deferred<any>) => {
worker.on("message", message => {
const msg = message as WorkerMessage;
// Wait for online message to come in
if (msg.type === "atomist:online") {
deferred.resolve();
return;
}
const ses = namespace.create();
ses.run(() => {
// Only process our messages
if (!msg.type || (msg.type && !msg.type.startsWith("atomist:"))) {
return;
}
namespace.set(msg.context);
logger.debug("Received '%s' message from worker '%s': %j", msg.type, worker.id, msg.context);
const invocationId = namespace.get().invocationId;
const ctx = hydrateContext(msg);
if (msg.type === "atomist:message") {
let messageClient: MessageClient;
if (this.commands.has(invocationId)) {
messageClient = this.commands.get(invocationId).dispatched.context.messageClient;
} else if (this.events.has(invocationId)) {
messageClient = this.events.get(invocationId).dispatched.context.messageClient;
} else {
logger.error("Can't handle message from worker due to missing messageClient");
this.clearNamespace();
return;
}
if (msg.data.destinations && msg.data.destinations.length > 0) {
messageClient.send(msg.data.message, msg.data.destinations, msg.data.options)
.then(this.clearNamespace, this.clearNamespace);
} else {
messageClient.respond(msg.data.message, msg.data.options)
.then(this.clearNamespace, this.clearNamespace);
}
} else if (msg.type === "atomist:status") {
ws().send(msg.data);
} else if (msg.type === "atomist:command_success") {
this.listeners.map(l => () => l.commandSuccessful(msg.event as CommandInvocation,
ctx, msg.data as HandlerResult))
.reduce((p, f) => p.then(f), Promise.resolve())
.then(() => {
if (this.commands.has(invocationId)) {
this.commands.get(invocationId).dispatched.result.resolve(msg.data as HandlerResult);
this.commands.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
})
.catch(e => {
logger.warn(`Failed to run listeners: ${e.message}`);
printError(e);
if (this.commands.has(invocationId)) {
this.commands.get(invocationId).dispatched.result.resolve(msg.data as HandlerResult);
this.commands.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
});
} else if (msg.type === "atomist:command_failure") {
this.listeners.map(l => () => l.commandFailed(msg.event as CommandInvocation,
ctx, msg.data as HandlerResult))
.reduce((p, f) => p.then(f), Promise.resolve())
.then(() => {
if (this.commands.has(invocationId)) {
this.commands.get(invocationId).dispatched.result.reject(msg.data as HandlerResult);
this.commands.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
})
.catch(e => {
logger.warn(`Failed to run listeners: ${e.message}`);
printError(e);
if (this.commands.has(invocationId)) {
this.commands.get(invocationId).dispatched.result.reject(msg.data as HandlerResult);
this.commands.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
});
} else if (msg.type === "atomist:event_success") {
this.listeners.map(l => () => l.eventSuccessful(msg.event as EventFired<any>,
ctx, msg.data as HandlerResult[]))
.reduce((p, f) => p.then(f), Promise.resolve())
.then(() => {
if (this.events.has(invocationId)) {
this.events.get(invocationId).dispatched.result.resolve(msg.data as HandlerResult[]);
this.events.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
})
.catch(e => {
logger.warn(`Failed to run listeners: ${e.message}`);
printError(e);
if (this.events.has(invocationId)) {
this.events.get(invocationId).dispatched.result.resolve(msg.data as HandlerResult);
this.events.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
});
} else if (msg.type === "atomist:event_failure") {
this.listeners.map(l => () => l.eventFailed(msg.event as EventFired<any>,
ctx, msg.data as HandlerResult[]))
.reduce((p, f) => p.then(f), Promise.resolve())
.then(() => {
if (this.events.has(invocationId)) {
this.events.get(invocationId).dispatched.result.reject(msg.data as HandlerResult[]);
this.events.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
})
.catch(e => {
logger.warn(`Failed to run listeners: ${e.message}`);
printError(e);
if (this.events.has(invocationId)) {
this.events.get(invocationId).dispatched.result.reject(msg.data as HandlerResult);
this.events.delete(invocationId);
}
this.clearNamespace();
this.startMessage();
});
} else if (msg.type === "atomist:shutdown") {
logger.info(`Immediate shutdown requested from worker`);
this.configuration.ws.termination.graceful = false;
safeExit(msg.data);
}
});
});
};
attachEvents.bind(this);
const promises: Array<Promise<any>> = [];
for (let i = 0; i < this.numWorkers; i++) {
const worker = cluster.fork();
const deferred = new Deferred<any>();
promises.push(deferred.promise);
attachEvents(worker, deferred);
}
cluster.on("disconnect", worker => {
logger.warn(`Worker '${worker.id}' disconnected`);
});
cluster.on("online", worker => {
logger.debug(`Worker '${worker.id}' connected`);
this.startMessage();
});
cluster.on("exit", (worker, code, signal) => {
const workerId = worker.id;
// Remove all inflight work for died worker
for (const [k, v] of this.events.entries()) {
if (v.worker === workerId) {
this.events.delete(k);
}
}
for (const [k, v] of this.commands.entries()) {
if (v.worker === workerId) {
this.commands.delete(k);
}
}
if (this.replaceWorkers) {
logger.warn(`Worker '${worker.id}' exited with status '${code}' and signal '${signal}', replacing...`);
attachEvents(cluster.fork(), new Deferred());
} else {
logger.debug(`Worker '${worker.id}' shut down with status '${code}' and signal '${signal}'`);
}
});
return Promise.all(promises);
}
protected invokeCommand(ci: CommandInvocation,
ctx: HandlerContext & AutomationContextAware,
command: CommandIncoming,
callback: (result: Promise<HandlerResult>) => void): void {
const message: MasterMessage = {
type: "atomist:command",
registration: this.registration,
context: ctx.context,
data: command,
};
logger.debug(`Queuing incoming command handler request '${command.command}'`);
const dispatched = new Dispatched(new Deferred<HandlerResult>(), ctx);
this.messages.push({ message, dispatched, ts: Date.now() });
callback(dispatched.result.promise);
this.startMessage();
}
protected invokeEvent(ef: EventFired<any>,
ctx: HandlerContext & AutomationContextAware,
event: EventIncoming,
callback: (results: Promise<HandlerResult[]>) => void): void {
const message: MasterMessage = {
type: "atomist:event",
registration: this.registration,
context: ctx.context,
data: event,
};
logger.debug(`Queuing incoming event subscription '${event.extensions.operationName}'`);
const dispatched = new Dispatched(new Deferred<HandlerResult[]>(), ctx);
this.messages.push({ message, dispatched, ts: Date.now() });
callback(dispatched.result.promise);
this.startMessage();
}
protected async sendStatusMessage(payload: any, ctx: HandlerContext & AutomationContextAware): Promise<any> {
return this.webSocketLifecycle.send(payload);
}
protected createGraphClient(event: CommandIncoming | EventIncoming): GraphClient {
return undefined;
}
protected createMessageClient(event: CommandIncoming | EventIncoming): MessageClient {
if (isCommandIncoming(event)) {
return new WebSocketCommandMessageClient(event, this.webSocketLifecycle, this.configuration);
} else if (isEventIncoming(event)) {
return new WebSocketEventMessageClient(event, this.webSocketLifecycle, this.configuration);
}
}
private assignWorker(): cluster.Worker | undefined {
let workers: Array<{ worker: cluster.Worker, messages: number }> = [];
for (const id in cluster.workers) {
if (cluster.workers.hasOwnProperty(id)) {
const worker = cluster.workers[id];
if (worker.isConnected) {
workers.push({ worker, messages: 0 });
}
}
}
const deadEvents = [];
this.events.forEach((e, k) => {
const worker = workers.find(w => w.worker.id === e.worker);
if (!!worker) {
worker.messages = worker.messages + 1;
} else {
deadEvents.push(k);
}
});
deadEvents.forEach(de => this.events.delete(de));
const deadCommands = [];
this.commands.forEach((c, k) => {
const worker = workers.find(w => w.worker.id === c.worker);
if (!!worker) {
worker.messages = worker.messages + 1;
} else {
deadCommands.push(k);
}
});
deadCommands.forEach(dc => this.commands.delete(dc));
workers = workers.filter(w => w.messages < this.maxConcurrentPerWorker);
if (workers.length === 0) {
return undefined;
}
return workers[Math.floor(Math.random() * workers.length)].worker;
}
private startMessage(): void {
if (this.messages.length > 0) {
const worker = this.assignWorker();
if (!!worker) {
const message = this.messages.pop();
namespace.set(message.message.context);
if (message.message.type === "atomist:command") {
this.commands.set(message.message.context.invocationId, {
dispatched: message.dispatched,
worker: worker.id,
});
logger.debug(
"Incoming command handler request '%s' dispatching to worker '%s'",
message.message.data.command,
worker.id);
} else if (message.message.type === "atomist:event") {
this.events.set(message.message.context.invocationId, {
dispatched: message.dispatched,
worker: worker.id,
});
logger.debug(
"Incoming event handler subscription '%s' dispatching to worker '%s'",
message.message.data.extensions.operationName,
worker.id);
}
worker.send(message.message);
}
}
this.reportQueueLength();
}
private queueLength(): number {
return this.messages.length + this.events.size + this.commands.size;
}
private scheduleQueueLength(): void {
if (this.configuration.statsd.enabled) {
setInterval(() => {
this.reportQueueLength();
}, 1000).unref();
}
}
private scheduleBackoffCheck(): void {
const workers = this.numWorkers;
const maxConcurrent = this.maxConcurrentPerWorker;
const threshold = _.get(this.configuration, "ws.backoff.threshold") || (workers * maxConcurrent);
const interval = _.get(this.configuration, "ws.backoff.interval") || 2500;
const duration = _.get(this.configuration, "ws.backoff.duration") || 5000;
let factor = _.get(this.configuration, "ws.backoff.factor") || 0.5;
if (factor > 1) {
factor = 0.5;
} else if (factor < 0) {
factor = 0;
}
setInterval(() => {
const messageCount = this.messages.length;
const statsd = (this.configuration.statsd as any).__instance as StatsD;
if ((!this.backoffInitiated && messageCount >= threshold) ||
(this.backoffInitiated && messageCount >= (threshold * factor)) ||
this.shutdownInitiated) {
sendMessage({
control: {
name: "backoff",
params: {
millis: duration,
},
},
}, this.webSocketLifecycle.get(), false);
if (!this.backoffInitiated) {
logger.debug(`Initiated incoming messages backoff. queue size: ${messageCount}, threshold: ${threshold}`);
}
this.backoffInitiated = true;
if (!!statsd) {
statsd.gauge(
"work_queue.backoff",
1,
[],
() => {
});
}
} else {
if (this.backoffInitiated) {
logger.debug(`Stopped incoming messages backoff. queue size: ${messageCount}, threshold: ${threshold}`);
}
this.backoffInitiated = false;
if (!!statsd) {
statsd.gauge(
"work_queue.backoff",
0,
[],
() => {
});
}
}
}, interval).unref();
}
private reportQueueLength(): void {
if (this.configuration.statsd.enabled) {
const statsd = (this.configuration.statsd as any).__instance as StatsD;
if (!!statsd) {
statsd.gauge(
"work_queue.pending",
this.messages.length,
1,
[],
() => { /* intentionally empty */
},
);
statsd.gauge(
"work_queue.events",
this.events.size,
1,
[],
() => { /* intentionally empty */
},
);
statsd.gauge(
"work_queue.commands",
this.commands.size,
1,
[],
() => { /* intentionally empty */
},
);
}
}
}
private async terminateWorkers(gracePeriod: number): Promise<void> {
if (!this.replaceWorkers) {
return;
}
this.replaceWorkers = false;
const workerIds = Object.keys(cluster.workers).map(id => cluster.workers[id].id);
logger.debug("Sending workers the shutdown message");
workerIds.forEach(id => cluster.workers[id].send({ type: "atomist:shutdown" }));
try {
logger.debug("Waiting for workers to exit");
await poll(() => Object.keys(cluster.workers).length < 1, gracePeriod);
logger.debug("All workers have exited");
} catch (e) {
logger.warn(`Not all workers exited in allotted time: ${Object.keys(cluster.workers).join(",")}`);
}
}
}
class Dispatched<T> {
constructor(public result: Deferred<T>, public context: HandlerContext) {
}
}
function hydrateContext(msg: WorkerMessage): HandlerContext {
return {
invocationId: msg.context.invocationId,
correlationId: msg.context.correlationId,
workspaceId: msg.context.workspaceId,
context: msg.context,
} as any as HandlerContext;
}