UNPKG

@seasketch/geoprocessing

Version:

Geoprocessing and reporting framework for SeaSketch 2.0

412 lines • 17.1 kB
import { v4 as uuid } from "uuid"; import { UpdateCommand, PutCommand, GetCommand, paginateQuery, } from "@aws-sdk/lib-dynamodb"; import { updateCommandsSync } from "./dynamodb/updateCommandsSync.js"; export const commonHeaders = { "Access-Control-Allow-Origin": "*", "Access-Control-Allow-Credentials": true, // Serve stale while revalidating cache if < 24 hours old // don't revalidate if < 5 minutes old "Cache-Control": "max-age=30, stale-while-revalidate=86400", }; export var GeoprocessingTaskStatus; (function (GeoprocessingTaskStatus) { GeoprocessingTaskStatus["Pending"] = "pending"; GeoprocessingTaskStatus["Completed"] = "completed"; GeoprocessingTaskStatus["Failed"] = "failed"; })(GeoprocessingTaskStatus || (GeoprocessingTaskStatus = {})); /** * Task model responsible for managing task results and estimates in DynamoDB */ export default class TasksModel { /** task table */ table; /** task estimate table */ estimatesTable; /** database client */ db; constructor(table, estimatesTable, db) { this.table = table; this.estimatesTable = estimatesTable; this.db = db; } init(service, id, /** websocket url */ wss, startedAt, duration, status) { id = id || uuid(); const location = `/${service}/tasks/${id}`; const task = { id, service, wss: wss ? wss : `${location}/socket`, location, startedAt: startedAt || new Date().toISOString(), logUriTemplate: `${location}/logs{?limit,nextToken}`, geometryUri: `${location}/geometry`, status: status || GeoprocessingTaskStatus.Pending, estimate: 2, }; return task; } async create(service, options = {}) { const task = this.init(service, options.id, options.wss); task.disableCache = options.disableCache; try { const estimate = await this.getMeanEstimate(task); task.estimate = estimate; } catch { //can happen when testing, will default to 1 if can't get an estimate } const shouldCache = task.disableCache === undefined || task.disableCache === false; if (shouldCache) { await this.db.send(new PutCommand({ TableName: this.table, Item: { ...task, }, })); } return task; } /** * @param task * @param results - JSON serializable object, with no string larger than 400KB without a space character. Spaces are used to chunk result * @param options * @returns */ async complete(task, results, options = {}) { task.data = results; task.status = GeoprocessingTaskStatus.Completed; task.duration = Date.now() - new Date(task.startedAt).getTime(); const shouldCache = task.disableCache === undefined || task.disableCache === false; if (process.env.NODE_ENV !== "test") console.log("shouldCache", shouldCache); if (shouldCache) { const tsStrings = Date.now(); if (process.env.NODE_ENV !== "test") console.time(`split strings - ${tsStrings}`); const jsonStrings = this.toJsonStrings(results, { minSplitSizeBytes: options.minSplitSizeBytes, }); if (process.env.NODE_ENV !== "test") console.timeEnd(`split strings - ${tsStrings}`); const numJsonStrings = jsonStrings.length; const updateCommands = []; // push root task updateCommands.push(new UpdateCommand({ TableName: this.table, Key: { id: task.id, service: task.service, }, UpdateExpression: "set #data = :data, #status = :status, #duration = :duration", ExpressionAttributeNames: { "#data": "data", "#status": "status", "#duration": "duration", }, ExpressionAttributeValues: { ":data": { numChunks: numJsonStrings }, ":status": task.status, ":duration": task.duration, }, })); // const jsonStringsHash = jsonStrings.reduce<Record<string, string>>( // (acc, curString, index) => { // return { [index]: curString, ...acc }; // }, // {} // ); // toJsonFile(jsonStringsHash, "./chunk_toJsonStrings.json"); // Store each JSON substring as a separate dynamodb item, with chunk index // all under same partition key (task.id) as root item for easy retrieval for (const [index, chunk] of jsonStrings.entries()) { if (process.env.NODE_ENV !== "test") { console.log("chunk", chunk); console.log(`Chunk ${index} - ${chunk.length} length`); } updateCommands.push(new UpdateCommand({ TableName: this.table, Key: { id: task.id, service: `${task.service}-chunk-${index}`, }, UpdateExpression: "set #data = :data, #status = :status, #duration = :duration", ExpressionAttributeNames: { "#data": "data", "#status": "status", "#duration": "duration", }, ExpressionAttributeValues: { ":data": { chunk: chunk }, ":status": task.status, ":duration": task.duration, }, })); } if (process.env.NODE_ENV !== "test") { console.log(`Saving items, root + ${jsonStrings.length} chunks`); } const tsSaveChunk = Date.now(); if (process.env.NODE_ENV !== "test") console.time(`save items - ${tsSaveChunk}`); await updateCommandsSync(this.db, updateCommands); if (process.env.NODE_ENV !== "test") console.timeEnd(`save items - ${tsSaveChunk}`); } return { statusCode: 200, headers: { ...commonHeaders, "x-gp-cache": "Cache miss", }, body: JSON.stringify(task), }; } async updateEstimate(task) { const duration = task.duration ? task.duration : 0; const service = task.service; let meanEstimate = 0; try { const response = await this.db.send(new GetCommand({ TableName: this.estimatesTable, Key: { service, }, })); const taskItem = response.Item; if (taskItem && taskItem?.allEstimates) { const allEstimates = taskItem?.allEstimates; //cap it at five for estimate avg if (allEstimates.length >= 5) { allEstimates.pop(); } allEstimates.push(duration); const meanEstimate = Math.round(allEstimates.reduce((a, b) => a + b, 0) / allEstimates.length); await this.db.send(new UpdateCommand({ TableName: this.estimatesTable, Key: { service: task.service, }, UpdateExpression: "set #allEstimates = :allEstimates, #meanEstimate = :meanEstimate", ExpressionAttributeNames: { "#allEstimates": "allEstimates", "#meanEstimate": "meanEstimate", }, ExpressionAttributeValues: { ":allEstimates": allEstimates, ":meanEstimate": meanEstimate, }, })); } else { meanEstimate = duration; //no estimates yet await this.db.send(new UpdateCommand({ TableName: this.estimatesTable, Key: { service: task.service, }, UpdateExpression: "set #allEstimates = :allEstimates, #meanEstimate = :meanEstimate", ExpressionAttributeNames: { "#allEstimates": "allEstimates", "#meanEstimate": "meanEstimate", }, ExpressionAttributeValues: { ":allEstimates": [duration], ":meanEstimate": meanEstimate, }, })); } return meanEstimate; } catch (error) { console.warn("unable to append duration estimate:", error); } } async fail(task, errorDescription, error) { if (error) console.error(error); task.status = GeoprocessingTaskStatus.Failed; task.duration = Date.now() - new Date(task.startedAt).getTime(); task.error = errorDescription; const shouldCache = task.disableCache === undefined || task.disableCache === false; if (shouldCache) { await this.db.send(new UpdateCommand({ TableName: this.table, Key: { id: task.id, service: task.service, }, UpdateExpression: "set #error = :error, #status = :status, #duration = :duration", ExpressionAttributeNames: { "#error": "error", "#status": "status", "#duration": "duration", }, ExpressionAttributeValues: { ":error": errorDescription, ":status": task.status, ":duration": task.duration, }, })); } return { statusCode: 500, headers: { ...commonHeaders, "Cache-Control": "max-age=0", }, body: JSON.stringify(task), }; } async get(service, taskId) { try { // Get all items under the same partition key (task id) const query = { TableName: this.table, KeyConditionExpression: "#id = :id", ExpressionAttributeNames: { "#id": "id", }, ExpressionAttributeValues: { ":id": taskId, }, ScanIndexForward: true, // sort ascending by range key (service) }; // Pager will return a variable number of items, up to 1MB of data const paginatorConfig = { client: this.db, pageSize: 25, }; const pager = paginateQuery(paginatorConfig, query); // Build list of items, page by page const items = []; for await (const result of pager) { if (result && result.Items) { items.push(...result.Items); } } if (!items || items.length === 0) return undefined; // items.forEach((item, index) => { // console.log(`item ${index}`, item.service); // }); // console.log("itemsLength", items.length); // console.log("items", items.map((item) => item.service).join(", ")); // Filter down to root and chunk items for service const serviceItems = items.filter((item) => item.service.includes(service)); // console.log("serviceItemsLength", serviceItems.length); if (process.env.NODE_ENV !== "test") console.log("serviceItems", serviceItems.map((item) => item.service).join(", ")); const rootItemIndex = serviceItems.findIndex((item) => item.service === service); // console.log("rootItemIndex", rootItemIndex); // Remove root item, mutating serviceItems const rootItem = serviceItems.splice(rootItemIndex, 1)[0]; // console.log("serviceItemsLength", serviceItems.length); // console.log( // "serviceItems", // serviceItems.map((item) => item.service).join(", ") // ); // Filter for chunk items for this service, just in case there's more under partition key const chunkItems = serviceItems.filter((item) => item.service.includes(`${service}-chunk`)); // console.log("chunkItemsLength", chunkItems.length); // console.log( // "chunkItems", // chunkItems.map((item) => item.service).join(", ") // ); // chunkItems.forEach((item, index) => { // console.log(`chunkItem ${index}`, JSON.stringify(item, null, 2)); // }); // If chunk data, merge it back into root item if (chunkItems.length > 0) { if (process.env.NODE_ENV !== "test") console.log(`Merging ${chunkItems.length} chunks`); // parse chunk number from service name and sort by chunk number const chunkStrings = chunkItems .sort((a, b) => { const aNum = Number.parseInt(a.service.split("-chunk-")[1]); const bNum = Number.parseInt(b.service.split("-chunk-")[1]); return aNum - bNum; }) .map((item) => item.data.chunk); // console.log( // "chunkItemsSorted", // chunkItems.map((item) => item.service).join(", ") // ); rootItem.data = this.fromJsonStrings(chunkStrings); } return rootItem; } catch (error) { console.log("TasksModel get threw an error"); if (error instanceof Error) { console.log(error.message); console.log(error.stack); return undefined; } } } async getMeanEstimate(task) { const service = task.service; const response = await this.db.send(new GetCommand({ TableName: this.estimatesTable, Key: { service, }, })); const meanEstimate = response.Item?.meanEstimate; return meanEstimate; } /** * Transform valid JSON object into string and break into pieces no larger than minSplitSizeBytes * @param rootResult * @param minSplitSizeBytes maximum return substring size in bytes (default 350KB, below 400KB dynamodb limit) * @returns array of JSON substrings, in order for re-assembly */ toJsonStrings(rootResult, options = {}) { const rootString = JSON.stringify(rootResult, null, 1); // add spaces to string for chunking on const minSplitSizeBytes = options.minSplitSizeBytes || 350 * 1024; let buf = Buffer.from(rootString); const result = []; while (buf.length) { // Find last space before minSplitSizeBytes let i = buf.lastIndexOf(32, minSplitSizeBytes + 1); // If no space found, try forward search if (i < 0) i = buf.indexOf(32, minSplitSizeBytes); // If there's no space at all, take the whole string if (i < 0) i = buf.length; // This is a safe cut-off point; never half-way a multi-byte const partial = buf.slice(0, i).toString(); result.push(partial); buf = buf.slice(i + 1); // Skip space (if any) } return result; } /** * Given array of partial JSON strings, joins them together and parses the result */ fromJsonStrings(jsonStringChunks) { const mergedString = jsonStringChunks.join(""); // const jsonStringsHash = jsonStringChunks.reduce<Record<string, string>>( // (acc, curString, index) => { // return { [index]: curString, ...acc }; // }, // {} // ); // toJsonFile(jsonStringsHash, "chunk_fromJsonStrings.json"); let parsedString = ""; try { parsedString = JSON.parse(mergedString); } catch (error) { if (error instanceof Error) { throw new TypeError("Error merging JSON string chunks: " + error.message); } } return parsedString; } } //# sourceMappingURL=tasks.js.map