UNPKG

@seasketch/geoprocessing

Version:

Geoprocessing and reporting framework for SeaSketch 2.0

291 lines (249 loc) • 9.37 kB
import { GeoprocessingStack } from "./GeoprocessingStack.js"; import { GeoprocessingNestedStackProps, LambdaStack } from "./LambdaStack.js"; import { GeoprocessingFunctionMetadata, isGeoprocessingFunctionMetadata, isPreprocessingFunctionMetadata, isSyncFunctionMetadata, ProcessingFunctionMetadata, } from "../manifest.js"; import { keyBy } from "../../client-core.js"; import { CfnOutput } from "aws-cdk-lib"; import { Function } from "aws-cdk-lib/aws-lambda"; /** * Creates lambda sub-stacks, as many as needed so as not to break resource limit */ export const createLambdaStacks = ( stack: GeoprocessingStack, props: GeoprocessingNestedStackProps, ): LambdaStack[] => { const FUNCTIONS_PER_STACK = props.functionsPerStack || 20; // create useful arrays and mappings of function metadata const syncFunctionMetas = stack.getSyncFunctionMetas(); const asyncFunctionMetas = stack.getAsyncFunctionMetas(); const asyncFunctionMap = keyBy(asyncFunctionMetas, (f) => f.title); const syncFunctionMap = keyBy(syncFunctionMetas, (f) => f.title); const asyncTitles = Object.keys(asyncFunctionMap); const syncTitles = Object.keys(syncFunctionMap); // Map of async function titles to their worker function titles const asyncWorkerMap: Record<string, string[]> = {}; for (const func of props.manifest.geoprocessingFunctions) { if (func.executionMode === "async") { asyncWorkerMap[func.title] = []; } } for (const asyncFuncMeta of asyncFunctionMetas) { if (asyncFuncMeta.workers) { for (const worker of asyncFuncMeta.workers) { const workerMeta = syncFunctionMap[worker]; if (workerMeta && isSyncFunctionMetadata(workerMeta)) { asyncWorkerMap[asyncFuncMeta.title].push(worker); } else { throw new Error( `worker function ${worker} registered by ${asyncFuncMeta.title} not found in manifest or not a sync geoprocessing function`, ); } } } } // Map of worker function titles to their parent function title const workerAsyncMap: Record<string, string> = {}; for (const func of props.manifest.geoprocessingFunctions) { if (func.workers) { for (const worker of func.workers) { if (workerAsyncMap[worker]) { throw new Error( `Worker function ${worker} is used by more than one parent function: ${workerAsyncMap[worker]} and ${func.title}`, ); } else { workerAsyncMap[worker] = func.title; } } } } // Compile list of sync functions that are not used as workers const nonWorkerSyncTitles: string[] = []; for (const syncTitle of syncTitles) { if (!workerAsyncMap[syncTitle]) { nonWorkerSyncTitles.push(syncTitle); } } for (const syncTitle of syncTitles) { // If worker function is same title as parent + 'Worker' but not registered with it, then throw if (syncTitle.includes("Worker")) { const baseTitle = syncTitle.replace("Worker", ""); if ( asyncTitles.includes(baseTitle) && asyncWorkerMap[baseTitle] && asyncWorkerMap[baseTitle].includes(syncTitle) === false ) { throw new Error( `If function ${syncTitle} is a worker of ${baseTitle} then it will need to be registered in the ${baseTitle} GeoprocessingHandler using workers option. e.g. workers: ['${syncTitle}']`, ); } } } // console.log("functionMetas", JSON.stringify(functionMetas, null, 2)); // console.log("workerMetas", JSON.stringify(workerMetas, null, 2)); // Allocate functions to stack groups const functionTitles = [ ...Object.keys(asyncWorkerMap), ...nonWorkerSyncTitles, ]; const functionMetas = functionTitles.map( (title) => asyncFunctionMap[title] || syncFunctionMap[title], ); const functionMap = keyBy(functionMetas, (f) => f.title); const propFunctionGroups: GeoprocessingFunctionMetadata[][] = props.existingFunctionStacks ? props.existingFunctionStacks.map((g) => g .filter((title) => functionTitles.includes(title)) // filter out any titles that are not in manifest this time .map((title) => asyncFunctionMap[title] || syncFunctionMap[title]), ) : []; const functionGroups = allocateFunctionsToGroups( functionMap, propFunctionGroups, FUNCTIONS_PER_STACK, ); if (process.env.NODE_ENV !== "test") { for (const [index, group] of functionGroups.entries()) { console.log( `Lambda stack ${index}:\n ${group.map((f) => f.title).join("\n ")}`, ); console.log(""); } } new CfnOutput(stack, "stacksFunction", { value: JSON.stringify(functionGroups.map((g) => g.map((f) => f.title))), }); const functionStacks = functionGroups.map((funcGroup, i) => { const newStack = new LambdaStack(stack, `functions-group-${i}`, { ...props, manifest: { // shave down manifest to just the functions in this group ...props.manifest, preprocessingFunctions: funcGroup.filter( isPreprocessingFunctionMetadata, ), geoprocessingFunctions: funcGroup.filter( isGeoprocessingFunctionMetadata, ), }, }); return newStack; }); // Allocate workers to stack groups const workerTitles = Object.keys(workerAsyncMap); const workerMetas = workerTitles.map((title) => syncFunctionMap[title]); const workerMap = keyBy(workerMetas, (f) => f.title); const propWorkerGroups: ProcessingFunctionMetadata[][] = props.existingWorkerStacks ? props.existingWorkerStacks.map( (g) => g .filter((title) => workerTitles.includes(title)) .map((title) => workerMap[title]), // filter out any titles that are not in manifest this time ) : []; const workerGroups = allocateFunctionsToGroups( workerMap, propWorkerGroups, FUNCTIONS_PER_STACK, ); for (const [index, group] of workerGroups.entries()) { console.log( `Worker stack ${index}:\n ${group.map((f) => f.title).join("\n ")}`, ); console.log(""); } new CfnOutput(stack, "stacksWorker", { value: JSON.stringify(workerGroups.map((g) => g.map((f) => f.title))), }); const workerStacks = workerGroups.map((workerGroup, i) => { const newStack = new LambdaStack(stack, `workers-group-${i}`, { ...props, manifest: { // shave down manifest to just the functions in this group ...props.manifest, preprocessingFunctions: workerGroup.filter( isPreprocessingFunctionMetadata, ), geoprocessingFunctions: workerGroup.filter( isGeoprocessingFunctionMetadata, ), }, }); return newStack; }); // get all run lambdas and create policies for them to invoke workers const runLambdas: Function[] = functionStacks.reduce<Function[]>( (acc, curStack) => { return [...acc, ...curStack.getAsyncRunLambdas()]; }, [], ); for (const stack of workerStacks) { stack.createLambdaSyncPolicies(runLambdas); } return [...functionStacks, ...workerStacks]; }; function allocateFunctionsToGroups( functionMap: Record<string, ProcessingFunctionMetadata>, existingGroups: ProcessingFunctionMetadata[][], functionsPerStack: number, ) { const functionTitles = Object.keys(functionMap); let numUnallocatedFunctions = functionTitles.length; const functionGroups: ProcessingFunctionMetadata[][] = []; let curGroupIndex = 0; const allocatedFunctionMap = functionTitles.reduce<Record<string, boolean>>( (acc, cur) => { return { ...acc, [cur]: false }; }, {}, ); const allExistingFunctionTitles = existingGroups.reduce<string[]>( (acc, cur) => [...acc, ...cur.map((f) => f.title)], [], ); let curLoop = 0; const maxLoops = 500; while (numUnallocatedFunctions > 0) { const curGroup: ProcessingFunctionMetadata[] = []; // Start with existing function group if available if (existingGroups.length > 0 && curGroupIndex < existingGroups.length) { const existingFunctions = existingGroups[curGroupIndex]; if (existingFunctions) { curGroup.push(...existingFunctions); numUnallocatedFunctions -= existingFunctions.length; for (const f of existingFunctions) allocatedFunctionMap[f.title] = true; } } // Fill up the rest of the function group for (const functionTitle of functionTitles) { if ( numUnallocatedFunctions === 0 || // all allocated curGroup.length >= functionsPerStack || // current stack is full allocatedFunctionMap[functionTitle] === true || // function already allocated allExistingFunctionTitles.includes(functionTitle) // function already exists in another stack ) { continue; } curGroup.push(functionMap[functionTitle]); allocatedFunctionMap[functionTitle] = true; numUnallocatedFunctions -= 1; curLoop += 1; if (curLoop > maxLoops) { throw new Error( `Too many loops while allocating functions to groups, something is wrong`, ); } } // This function group is full as its gonna get, move on to the next functionGroups.push(curGroup); curGroupIndex += 1; } return functionGroups; }