@himalaya-quant/synapse
Version:
A lightweight TypeScript utility to spawn and interact with Python modules from Node.js with a native, message-based protocol over stdin/stdout.
311 lines (267 loc) • 10.7 kB
text/typescript
import { join } from 'path';
import { existsSync } from 'fs';
import { encode, decode } from '@msgpack/msgpack';
import { Subject, Subscription, tap } from 'rxjs';
import {
spawn,
spawnSync,
ChildProcessWithoutNullStreams,
} from 'child_process';
type QueuedInput = {
input: any;
parse?: boolean;
resolver: (value: any) => void;
rejector: (error: string) => void;
};
export class InstanceManger {
private readonly inputsQueue: QueuedInput[] = [];
private readonly instanceOutputs$ = new Subject<any>();
private readonly instanceLogs$ = new Subject<string>();
private readonly instanceInputs$ = new Subject<QueuedInput>();
private messageBuffer = Buffer.alloc(0);
private instance!: ChildProcessWithoutNullStreams;
private currentInputResolver!: (value: any) => void;
private instanceInputStreamSubscription!: Subscription;
private instanceOutputStreamSubscription!: Subscription;
/**
* Subscription to the instance logs.
* If you want your script logs to be seen by Synapse, in your python script
* always print to the stderr like:
*
* ```py
* import sys
*
* print("my log", file=sys.stderr)
* ```
*
* every log written this way will pass through this stream.
*
* @returns The observable to which you can subscribe to access
* the logs stream.
*/
get instanceLogs() {
return this.instanceLogs$.asObservable();
}
/**
* Calls the spawned python instance with the given input.
* Throws if the instance has not been spawned first, or if the script sends
* an error response.
*
* To send an error response, just send the usual message from the script,
* but passing a a dictionary with an "error" key and the error message
* as value. Eg: {"error": "my error message"}
*
* @param input Any simple JSON structure will be accepted.
* For more details see: https://msgpack.org/
* @param forceJSONParse Forcefully tries to parse the result. If it
* fails, will return the payload as it is.
*
* @returns The result returned from your python script.
* @throws {Error} If the instance has not been spawned or an error response
* is sent back from the python script
*/
call<ResultType = any>(
input: any,
forceJSONParse = false,
): Promise<ResultType> {
if (!this.instance)
throw new Error(
`Cannot send inputs to instance before spawning it.`,
);
return new Promise((resolver, rejector) => {
this.inputsQueue.push({
input,
resolver,
rejector,
parse: forceJSONParse,
});
if (this.inputsQueue.length === 1)
this.instanceInputs$.next(this.inputsQueue[0]);
});
}
/**
* Spawns a new python script instance and keeps it alive until dispose is
* called. After the spawning you can safely start sending messages to it.
* Throws if there's an error during the spawning process.
*
* What it does:
* - Postfixes the extension .py to the entrypoint if missing
* - Ensures that the paths are correct and the requirements.txt exists
* - Creates a dedicated Python virtual environment
* - Installs dependencies via requirements.txt
* - Spawns Python script as subprocess
* - Reuses instance until explicit disposal avoiding spawn overhead
*
* @param directory The path pointing to the python module directory.
* @param entrypoint The entrypoint file name.
*
* @returns A promise that resolves once the spawn completes.
* @throws {Error} If there's an error during the spawning process.
*/
async spawn(directory: string, entrypoint: string): Promise<void> {
if (this.instance) return;
entrypoint = this.postfixExtension(entrypoint);
this.ensureExistsOrThrow(directory, entrypoint);
if (!existsSync(join(directory, '.venv'))) {
this.createVirtualEnv(directory);
await this.installDependencies(directory);
}
await this.spawnInstance(directory, entrypoint);
this.openSubscriptions();
}
/**
* Disposes the instance, closing the stdin stream, all the subscriptions
* and tries to kill the instance.
* Manages graceful and forceful termination, first tries with a SIGTERM, if
* after 500ms is not killed, will force a SIGKILL.
* Throws if after the SIGKILL the instance is still alive.
*
* After dispose has been called, you will have to call spawn again. Trying
* to send any messages after dispose, will result in an error.
*
* @returns Resolves once the dispose has been done.
* @throws {Error} If after forceful termination the instance is still alive
*/
async dispose() {
this.instanceInputStreamSubscription?.unsubscribe();
this.instanceOutputStreamSubscription?.unsubscribe();
if (!this.instance) return;
this.instance.stdin.end();
this.instance.kill('SIGTERM');
const isTerminated = await this.waitForTermination(500);
if (!isTerminated) {
console.log(
'Instance did not close gracefully in 500ms. Forcing SIGKILL.',
);
this.instance.kill('SIGKILL');
await this.waitForTermination(500);
}
if (!this.instance.killed)
throw new Error(
`Cannot kill instance with PID: ${this.instance.pid}`,
);
this.instance = null!;
}
private waitForTermination(timeout: number): Promise<boolean> {
return new Promise((resolve) => {
setTimeout(() => {
resolve(this.instance.killed);
}, timeout);
});
}
private ensureExistsOrThrow(directory: string, entrypoint: string) {
if (!existsSync(directory))
throw new Error(`Directory "${directory}" does not exist.`);
if (!existsSync(join(directory, entrypoint)))
throw new Error(`Entrypoint "${entrypoint}" does not exist.`);
if (!existsSync(join(directory, 'requirements.txt')))
throw new Error(`"requirements.txt" file is missing.`);
}
private createVirtualEnv(directory: string) {
spawnSync('python', ['-m', 'venv', join(directory, '.venv')]);
}
private installDependencies(directory: string) {
const python = this.getVirtualEnvPythonInterpreter(directory);
const requirementsPath = join(directory, 'requirements.txt');
const installDeps = spawn(python, [
'-m',
'pip',
'install',
'-r',
requirementsPath,
]);
return new Promise<void>((resolve) => {
installDeps.stdout.on('data', (data) => {
console.log(`stdout: ${data}`);
});
installDeps.stderr.on('data', (data) => {
console.log(`stderr: ${data}`);
});
installDeps.on('close', async (code) => {
if (code === 0) {
resolve();
} else {
throw new Error(`Dependencies install failure: ${code}`);
}
});
});
}
private getVirtualEnvPythonInterpreter(directory: string) {
return join(directory, '.venv', 'bin', 'python');
}
private async spawnInstance(directory: string, entrypoint: string) {
this.instance = spawn(this.getVirtualEnvPythonInterpreter(directory), [
join(directory, entrypoint),
]);
this.instance.stdout.on('data', (chunk) => {
this.handleChunk(chunk);
});
// we expect all messages (logs, errors, etc) that is not the actual
// response on the stderr stream so we don't touch the stdin encoding
this.instance.stderr.on('data', (chunk) => {
const msg = `🐍 Python: ${chunk.toString()}`;
this.instanceLogs$.next(msg);
});
}
private packAndSend(message: any) {
const payload = encode(message);
const lengthBuffer = Buffer.alloc(4); // 4 bytes for the payload size
lengthBuffer.writeUint32LE(payload.length, 0);
this.instance.stdin.write(Buffer.concat([lengthBuffer, payload]));
}
private postfixExtension(entrypoint: string) {
return entrypoint.endsWith('.py') ? entrypoint : `${entrypoint}.py`;
}
private handleChunk = (chunk: Buffer) => {
this.messageBuffer = Buffer.concat([this.messageBuffer, chunk]);
while (this.messageBuffer.length >= 4) {
const messageLength = this.messageBuffer.readUInt32LE(0);
if (this.messageBuffer.length >= 4 + messageLength) {
const messagePayload = this.messageBuffer.subarray(
4,
4 + messageLength,
);
const decoded = decode(messagePayload);
this.instanceOutputs$.next(decoded);
this.messageBuffer = this.messageBuffer.subarray(
4 + messageLength,
);
} else {
// Not enough data yet
break;
}
}
};
private openSubscriptions() {
this.instanceInputStreamSubscription = this.instanceInputs$
.pipe(tap(({ input }) => this.packAndSend(input)))
.subscribe();
this.instanceOutputStreamSubscription = this.instanceOutputs$
.pipe(
tap((output) => {
const { parse, resolver, rejector } =
this.inputsQueue.shift()!;
let result = this.extractResult(parse, output);
if (result.error) rejector(result.error);
else resolver(result);
if (this.inputsQueue.length)
this.instanceInputs$.next(this.inputsQueue[0]);
}),
)
.subscribe();
}
private extractResult(parse: boolean | undefined, output: any) {
if (!parse) return output;
let result: any;
try {
// console.log(output);
result = JSON.parse(output);
} catch (e) {
let msg = `[ERROR] forced parsing failed. Expected parsable str from py: `;
msg += e instanceof Error ? e.message : JSON.stringify(e);
this.instanceLogs$.next(msg);
result = output;
}
return result;
}
}