UNPKG

@seasketch/geoprocessing

Version:

Geoprocessing and reporting framework for SeaSketch 2.0

566 lines (503 loc) • 16.9 kB
import { v4 as uuid } from "uuid"; import { APIGatewayProxyResult } from "aws-lambda"; import { DynamoDBDocument, UpdateCommand, PutCommand, GetCommand, paginateQuery, DynamoDBDocumentPaginationConfiguration, QueryCommandInput, } from "@aws-sdk/lib-dynamodb"; import { updateCommandsSync } from "./dynamodb/updateCommandsSync.js"; import { JSONValue } from "../types/base.js"; export const commonHeaders = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Credentials": true, // Serve stale while revalidating cache if < 24 hours old // don't revalidate if < 5 minutes old "Cache-Control": "max-age=30, stale-while-revalidate=86400", }; export interface GeoprocessingTask<ResultType = any> { id: string; service: string; location: string; startedAt: string; //ISO 8601 duration?: number; //ms logUriTemplate: string; geometryUri: string; status: GeoprocessingTaskStatus; /** websocket url */ wss: string; data?: ResultType; // result data can take any json-serializable form error?: string; estimate: number; disableCache?: boolean; // whether to cache the result server-side, defaults to true // ttl?: number; } export interface MetricGroupItem<ResultType = any> { duration?: number; //ms status: GeoprocessingTaskStatus; data?: ResultType; // result data can take any json-serializable form } export interface RootTaskItem<ResultType = any> extends MetricGroupItem<ResultType> { metricGroupItems: string[]; } export enum GeoprocessingTaskStatus { Pending = "pending", Completed = "completed", Failed = "failed", } /** * Task model responsible for managing task results and estimates in DynamoDB */ export default class TasksModel { /** task table */ table: string; /** task estimate table */ estimatesTable: string; /** database client */ db: DynamoDBDocument; constructor(table: string, estimatesTable: string, db: DynamoDBDocument) { this.table = table; this.estimatesTable = estimatesTable; this.db = db; } init( service: string, id?: string, /** websocket url */ wss?: string, startedAt?: string, duration?: number, status?: GeoprocessingTaskStatus, ) { id = id || uuid(); const location = `/${service}/tasks/${id}`; const task: GeoprocessingTask = { id, service, wss: wss ? wss : `${location}/socket`, location, startedAt: startedAt || new Date().toISOString(), logUriTemplate: `${location}/logs{?limit,nextToken}`, geometryUri: `${location}/geometry`, status: status || GeoprocessingTaskStatus.Pending, estimate: 2, }; return task; } async create( service: string, options: { /** Unique identifier for this task, used as cache key. If not provided a uuid is created */ id?: string; /** websocket url */ wss?: string; disableCache?: boolean; } = {}, ) { const task = this.init(service, options.id, options.wss); task.disableCache = options.disableCache; try { const estimate = await this.getMeanEstimate(task); task.estimate = estimate; } catch { //can happen when testing, will default to 1 if can't get an estimate } const shouldCache = task.disableCache === undefined || task.disableCache === false; if (shouldCache) { await this.db.send( new PutCommand({ TableName: this.table, Item: { ...task, }, }), ); } return task; } /** * @param task * @param results - JSON serializable object, with no string larger than 400KB without a space character. Spaces are used to chunk result * @param options * @returns */ async complete( task: GeoprocessingTask, results: any, options: { minSplitSizeBytes?: number } = {}, ): Promise<APIGatewayProxyResult> { task.data = results; task.status = GeoprocessingTaskStatus.Completed; task.duration = Date.now() - new Date(task.startedAt).getTime(); const shouldCache = task.disableCache === undefined || task.disableCache === false; if (process.env.NODE_ENV !== "test") console.log("shouldCache", shouldCache); if (shouldCache) { const tsStrings = Date.now(); if (process.env.NODE_ENV !== "test") console.time(`split strings - ${tsStrings}`); const jsonStrings = this.toJsonStrings(results, { minSplitSizeBytes: options.minSplitSizeBytes, }); if (process.env.NODE_ENV !== "test") console.timeEnd(`split strings - ${tsStrings}`); const numJsonStrings = jsonStrings.length; const updateCommands: UpdateCommand[] = []; // push root task updateCommands.push( new UpdateCommand({ TableName: this.table, Key: { id: task.id, service: task.service, }, UpdateExpression: "set #data = :data, #status = :status, #duration = :duration", ExpressionAttributeNames: { "#data": "data", "#status": "status", "#duration": "duration", }, ExpressionAttributeValues: { ":data": { numChunks: numJsonStrings }, ":status": task.status, ":duration": task.duration, }, }), ); // const jsonStringsHash = jsonStrings.reduce<Record<string, string>>( // (acc, curString, index) => { // return { [index]: curString, ...acc }; // }, // {} // ); // toJsonFile(jsonStringsHash, "./chunk_toJsonStrings.json"); // Store each JSON substring as a separate dynamodb item, with chunk index // all under same partition key (task.id) as root item for easy retrieval for (const [index, chunk] of jsonStrings.entries()) { if (process.env.NODE_ENV !== "test") { console.log("chunk", chunk); console.log(`Chunk ${index} - ${chunk.length} length`); } updateCommands.push( new UpdateCommand({ TableName: this.table, Key: { id: task.id, service: `${task.service}-chunk-${index}`, }, UpdateExpression: "set #data = :data, #status = :status, #duration = :duration", ExpressionAttributeNames: { "#data": "data", "#status": "status", "#duration": "duration", }, ExpressionAttributeValues: { ":data": { chunk: chunk }, ":status": task.status, ":duration": task.duration, }, }), ); } if (process.env.NODE_ENV !== "test") { console.log(`Saving items, root + ${jsonStrings.length} chunks`); } const tsSaveChunk = Date.now(); if (process.env.NODE_ENV !== "test") console.time(`save items - ${tsSaveChunk}`); await updateCommandsSync(this.db, updateCommands); if (process.env.NODE_ENV !== "test") console.timeEnd(`save items - ${tsSaveChunk}`); } return { statusCode: 200, headers: { ...commonHeaders, "x-gp-cache": "Cache miss", }, body: JSON.stringify(task), }; } async updateEstimate(task: GeoprocessingTask) { const duration: number = task.duration ? task.duration : 0; const service: string = task.service; let meanEstimate = 0; try { const response = await this.db.send( new GetCommand({ TableName: this.estimatesTable, Key: { service, }, }), ); const taskItem = response.Item; if (taskItem && taskItem?.allEstimates) { const allEstimates: number[] = taskItem?.allEstimates; //cap it at five for estimate avg if (allEstimates.length >= 5) { allEstimates.pop(); } allEstimates.push(duration); const meanEstimate = Math.round( allEstimates.reduce((a, b) => a + b, 0) / allEstimates.length, ); await this.db.send( new UpdateCommand({ TableName: this.estimatesTable, Key: { service: task.service, }, UpdateExpression: "set #allEstimates = :allEstimates, #meanEstimate = :meanEstimate", ExpressionAttributeNames: { "#allEstimates": "allEstimates", "#meanEstimate": "meanEstimate", }, ExpressionAttributeValues: { ":allEstimates": allEstimates, ":meanEstimate": meanEstimate, }, }), ); } else { meanEstimate = duration; //no estimates yet await this.db.send( new UpdateCommand({ TableName: this.estimatesTable, Key: { service: task.service, }, UpdateExpression: "set #allEstimates = :allEstimates, #meanEstimate = :meanEstimate", ExpressionAttributeNames: { "#allEstimates": "allEstimates", "#meanEstimate": "meanEstimate", }, ExpressionAttributeValues: { ":allEstimates": [duration], ":meanEstimate": meanEstimate, }, }), ); } return meanEstimate; } catch (error) { console.warn("unable to append duration estimate:", error); } } async fail( task: GeoprocessingTask, errorDescription: string, error?: Error, ): Promise<APIGatewayProxyResult> { if (error) console.error(error); task.status = GeoprocessingTaskStatus.Failed; task.duration = Date.now() - new Date(task.startedAt).getTime(); task.error = errorDescription; const shouldCache = task.disableCache === undefined || task.disableCache === false; if (shouldCache) { await this.db.send( new UpdateCommand({ TableName: this.table, Key: { id: task.id, service: task.service, }, UpdateExpression: "set #error = :error, #status = :status, #duration = :duration", ExpressionAttributeNames: { "#error": "error", "#status": "status", "#duration": "duration", }, ExpressionAttributeValues: { ":error": errorDescription, ":status": task.status, ":duration": task.duration, }, }), ); } return { statusCode: 500, headers: { ...commonHeaders, "Cache-Control": "max-age=0", }, body: JSON.stringify(task), }; } async get( service: string, taskId: string, ): Promise<GeoprocessingTask | undefined> { try { // Get all items under the same partition key (task id) const query: QueryCommandInput = { TableName: this.table, KeyConditionExpression: "#id = :id", ExpressionAttributeNames: { "#id": "id", }, ExpressionAttributeValues: { ":id": taskId, }, ScanIndexForward: true, // sort ascending by range key (service) }; // Pager will return a variable number of items, up to 1MB of data const paginatorConfig: DynamoDBDocumentPaginationConfiguration = { client: this.db, pageSize: 25, }; const pager = paginateQuery(paginatorConfig, query); // Build list of items, page by page const items: Record<string, any>[] = []; for await (const result of pager) { if (result && result.Items) { items.push(...result.Items); } } if (!items || items.length === 0) return undefined; // items.forEach((item, index) => { // console.log(`item ${index}`, item.service); // }); // console.log("itemsLength", items.length); // console.log("items", items.map((item) => item.service).join(", ")); // Filter down to root and chunk items for service const serviceItems = items.filter((item) => item.service.includes(service), ); // console.log("serviceItemsLength", serviceItems.length); if (process.env.NODE_ENV !== "test") console.log( "serviceItems", serviceItems.map((item) => item.service).join(", "), ); const rootItemIndex = serviceItems.findIndex( (item) => item.service === service, ); // console.log("rootItemIndex", rootItemIndex); // Remove root item, mutating serviceItems const rootItem = serviceItems.splice(rootItemIndex, 1)[0]; // console.log("serviceItemsLength", serviceItems.length); // console.log( // "serviceItems", // serviceItems.map((item) => item.service).join(", ") // ); // Filter for chunk items for this service, just in case there's more under partition key const chunkItems = serviceItems.filter((item) => item.service.includes(`${service}-chunk`), ); // console.log("chunkItemsLength", chunkItems.length); // console.log( // "chunkItems", // chunkItems.map((item) => item.service).join(", ") // ); // chunkItems.forEach((item, index) => { // console.log(`chunkItem ${index}`, JSON.stringify(item, null, 2)); // }); // If chunk data, merge it back into root item if (chunkItems.length > 0) { if (process.env.NODE_ENV !== "test") console.log(`Merging ${chunkItems.length} chunks`); // parse chunk number from service name and sort by chunk number const chunkStrings = chunkItems .sort((a, b) => { const aNum = Number.parseInt(a.service.split("-chunk-")[1]); const bNum = Number.parseInt(b.service.split("-chunk-")[1]); return aNum - bNum; }) .map((item) => item.data.chunk); // console.log( // "chunkItemsSorted", // chunkItems.map((item) => item.service).join(", ") // ); rootItem.data = this.fromJsonStrings(chunkStrings); } return rootItem as unknown as GeoprocessingTask; } catch (error: unknown) { console.log("TasksModel get threw an error"); if (error instanceof Error) { console.log(error.message); console.log(error.stack); return undefined; } } } async getMeanEstimate(task: GeoprocessingTask): Promise<number> { const service = task.service; const response = await this.db.send( new GetCommand({ TableName: this.estimatesTable, Key: { service, }, }), ); const meanEstimate: number = response.Item?.meanEstimate; return meanEstimate; } /** * Transform valid JSON object into string and break into pieces no larger than minSplitSizeBytes * @param rootResult * @param minSplitSizeBytes maximum return substring size in bytes (default 350KB, below 400KB dynamodb limit) * @returns array of JSON substrings, in order for re-assembly */ private toJsonStrings( rootResult: JSONValue, options: { minSplitSizeBytes?: number } = {}, ): string[] { const rootString = JSON.stringify(rootResult, null, 1); // add spaces to string for chunking on const minSplitSizeBytes = options.minSplitSizeBytes || 350 * 1024; let buf = Buffer.from(rootString); const result: string[] = []; while (buf.length) { // Find last space before minSplitSizeBytes let i = buf.lastIndexOf(32, minSplitSizeBytes + 1); // If no space found, try forward search if (i < 0) i = buf.indexOf(32, minSplitSizeBytes); // If there's no space at all, take the whole string if (i < 0) i = buf.length; // This is a safe cut-off point; never half-way a multi-byte const partial = buf.slice(0, i).toString(); result.push(partial); buf = buf.slice(i + 1); // Skip space (if any) } return result; } /** * Given array of partial JSON strings, joins them together and parses the result */ private fromJsonStrings(jsonStringChunks: string[]): JSONValue { const mergedString = jsonStringChunks.join(""); // const jsonStringsHash = jsonStringChunks.reduce<Record<string, string>>( // (acc, curString, index) => { // return { [index]: curString, ...acc }; // }, // {} // ); // toJsonFile(jsonStringsHash, "chunk_fromJsonStrings.json"); let parsedString = ""; try { parsedString = JSON.parse(mergedString); } catch (error: unknown) { if (error instanceof Error) { throw new TypeError( "Error merging JSON string chunks: " + error.message, ); } } return parsedString; } }