@seasketch/geoprocessing
Version:
Geoprocessing and reporting framework for SeaSketch 2.0
412 lines • 17.1 kB
JavaScript
import { v4 as uuid } from "uuid";
import { UpdateCommand, PutCommand, GetCommand, paginateQuery, } from "@aws-sdk/lib-dynamodb";
import { updateCommandsSync } from "./dynamodb/updateCommandsSync.js";
export const commonHeaders = {
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Credentials": true,
// Serve stale while revalidating cache if < 24 hours old
// don't revalidate if < 5 minutes old
"Cache-Control": "max-age=30, stale-while-revalidate=86400",
};
export var GeoprocessingTaskStatus;
(function (GeoprocessingTaskStatus) {
GeoprocessingTaskStatus["Pending"] = "pending";
GeoprocessingTaskStatus["Completed"] = "completed";
GeoprocessingTaskStatus["Failed"] = "failed";
})(GeoprocessingTaskStatus || (GeoprocessingTaskStatus = {}));
/**
* Task model responsible for managing task results and estimates in DynamoDB
*/
export default class TasksModel {
/** task table */
table;
/** task estimate table */
estimatesTable;
/** database client */
db;
constructor(table, estimatesTable, db) {
this.table = table;
this.estimatesTable = estimatesTable;
this.db = db;
}
init(service, id,
/** websocket url */
wss, startedAt, duration, status) {
id = id || uuid();
const location = `/${service}/tasks/${id}`;
const task = {
id,
service,
wss: wss ? wss : `${location}/socket`,
location,
startedAt: startedAt || new Date().toISOString(),
logUriTemplate: `${location}/logs{?limit,nextToken}`,
geometryUri: `${location}/geometry`,
status: status || GeoprocessingTaskStatus.Pending,
estimate: 2,
};
return task;
}
async create(service, options = {}) {
const task = this.init(service, options.id, options.wss);
task.disableCache = options.disableCache;
try {
const estimate = await this.getMeanEstimate(task);
task.estimate = estimate;
}
catch {
//can happen when testing, will default to 1 if can't get an estimate
}
const shouldCache = task.disableCache === undefined || task.disableCache === false;
if (shouldCache) {
await this.db.send(new PutCommand({
TableName: this.table,
Item: {
...task,
},
}));
}
return task;
}
/**
* @param task
* @param results - JSON serializable object, with no string larger than 400KB without a space character. Spaces are used to chunk result
* @param options
* @returns
*/
async complete(task, results, options = {}) {
task.data = results;
task.status = GeoprocessingTaskStatus.Completed;
task.duration = Date.now() - new Date(task.startedAt).getTime();
const shouldCache = task.disableCache === undefined || task.disableCache === false;
if (process.env.NODE_ENV !== "test")
console.log("shouldCache", shouldCache);
if (shouldCache) {
const tsStrings = Date.now();
if (process.env.NODE_ENV !== "test")
console.time(`split strings - ${tsStrings}`);
const jsonStrings = this.toJsonStrings(results, {
minSplitSizeBytes: options.minSplitSizeBytes,
});
if (process.env.NODE_ENV !== "test")
console.timeEnd(`split strings - ${tsStrings}`);
const numJsonStrings = jsonStrings.length;
const updateCommands = [];
// push root task
updateCommands.push(new UpdateCommand({
TableName: this.table,
Key: {
id: task.id,
service: task.service,
},
UpdateExpression: "set #data = :data, #status = :status, #duration = :duration",
ExpressionAttributeNames: {
"#data": "data",
"#status": "status",
"#duration": "duration",
},
ExpressionAttributeValues: {
":data": { numChunks: numJsonStrings },
":status": task.status,
":duration": task.duration,
},
}));
// const jsonStringsHash = jsonStrings.reduce<Record<string, string>>(
// (acc, curString, index) => {
// return { [index]: curString, ...acc };
// },
// {}
// );
// toJsonFile(jsonStringsHash, "./chunk_toJsonStrings.json");
// Store each JSON substring as a separate dynamodb item, with chunk index
// all under same partition key (task.id) as root item for easy retrieval
for (const [index, chunk] of jsonStrings.entries()) {
if (process.env.NODE_ENV !== "test") {
console.log("chunk", chunk);
console.log(`Chunk ${index} - ${chunk.length} length`);
}
updateCommands.push(new UpdateCommand({
TableName: this.table,
Key: {
id: task.id,
service: `${task.service}-chunk-${index}`,
},
UpdateExpression: "set #data = :data, #status = :status, #duration = :duration",
ExpressionAttributeNames: {
"#data": "data",
"#status": "status",
"#duration": "duration",
},
ExpressionAttributeValues: {
":data": { chunk: chunk },
":status": task.status,
":duration": task.duration,
},
}));
}
if (process.env.NODE_ENV !== "test") {
console.log(`Saving items, root + ${jsonStrings.length} chunks`);
}
const tsSaveChunk = Date.now();
if (process.env.NODE_ENV !== "test")
console.time(`save items - ${tsSaveChunk}`);
await updateCommandsSync(this.db, updateCommands);
if (process.env.NODE_ENV !== "test")
console.timeEnd(`save items - ${tsSaveChunk}`);
}
return {
statusCode: 200,
headers: {
...commonHeaders,
"x-gp-cache": "Cache miss",
},
body: JSON.stringify(task),
};
}
async updateEstimate(task) {
const duration = task.duration ? task.duration : 0;
const service = task.service;
let meanEstimate = 0;
try {
const response = await this.db.send(new GetCommand({
TableName: this.estimatesTable,
Key: {
service,
},
}));
const taskItem = response.Item;
if (taskItem && taskItem?.allEstimates) {
const allEstimates = taskItem?.allEstimates;
//cap it at five for estimate avg
if (allEstimates.length >= 5) {
allEstimates.pop();
}
allEstimates.push(duration);
const meanEstimate = Math.round(allEstimates.reduce((a, b) => a + b, 0) / allEstimates.length);
await this.db.send(new UpdateCommand({
TableName: this.estimatesTable,
Key: {
service: task.service,
},
UpdateExpression: "set #allEstimates = :allEstimates, #meanEstimate = :meanEstimate",
ExpressionAttributeNames: {
"#allEstimates": "allEstimates",
"#meanEstimate": "meanEstimate",
},
ExpressionAttributeValues: {
":allEstimates": allEstimates,
":meanEstimate": meanEstimate,
},
}));
}
else {
meanEstimate = duration;
//no estimates yet
await this.db.send(new UpdateCommand({
TableName: this.estimatesTable,
Key: {
service: task.service,
},
UpdateExpression: "set #allEstimates = :allEstimates, #meanEstimate = :meanEstimate",
ExpressionAttributeNames: {
"#allEstimates": "allEstimates",
"#meanEstimate": "meanEstimate",
},
ExpressionAttributeValues: {
":allEstimates": [duration],
":meanEstimate": meanEstimate,
},
}));
}
return meanEstimate;
}
catch (error) {
console.warn("unable to append duration estimate:", error);
}
}
async fail(task, errorDescription, error) {
if (error)
console.error(error);
task.status = GeoprocessingTaskStatus.Failed;
task.duration = Date.now() - new Date(task.startedAt).getTime();
task.error = errorDescription;
const shouldCache = task.disableCache === undefined || task.disableCache === false;
if (shouldCache) {
await this.db.send(new UpdateCommand({
TableName: this.table,
Key: {
id: task.id,
service: task.service,
},
UpdateExpression: "set #error = :error, #status = :status, #duration = :duration",
ExpressionAttributeNames: {
"#error": "error",
"#status": "status",
"#duration": "duration",
},
ExpressionAttributeValues: {
":error": errorDescription,
":status": task.status,
":duration": task.duration,
},
}));
}
return {
statusCode: 500,
headers: {
...commonHeaders,
"Cache-Control": "max-age=0",
},
body: JSON.stringify(task),
};
}
async get(service, taskId) {
try {
// Get all items under the same partition key (task id)
const query = {
TableName: this.table,
KeyConditionExpression: "#id = :id",
ExpressionAttributeNames: {
"#id": "id",
},
ExpressionAttributeValues: {
":id": taskId,
},
ScanIndexForward: true, // sort ascending by range key (service)
};
// Pager will return a variable number of items, up to 1MB of data
const paginatorConfig = {
client: this.db,
pageSize: 25,
};
const pager = paginateQuery(paginatorConfig, query);
// Build list of items, page by page
const items = [];
for await (const result of pager) {
if (result && result.Items) {
items.push(...result.Items);
}
}
if (!items || items.length === 0)
return undefined;
// items.forEach((item, index) => {
// console.log(`item ${index}`, item.service);
// });
// console.log("itemsLength", items.length);
// console.log("items", items.map((item) => item.service).join(", "));
// Filter down to root and chunk items for service
const serviceItems = items.filter((item) => item.service.includes(service));
// console.log("serviceItemsLength", serviceItems.length);
if (process.env.NODE_ENV !== "test")
console.log("serviceItems", serviceItems.map((item) => item.service).join(", "));
const rootItemIndex = serviceItems.findIndex((item) => item.service === service);
// console.log("rootItemIndex", rootItemIndex);
// Remove root item, mutating serviceItems
const rootItem = serviceItems.splice(rootItemIndex, 1)[0];
// console.log("serviceItemsLength", serviceItems.length);
// console.log(
// "serviceItems",
// serviceItems.map((item) => item.service).join(", ")
// );
// Filter for chunk items for this service, just in case there's more under partition key
const chunkItems = serviceItems.filter((item) => item.service.includes(`${service}-chunk`));
// console.log("chunkItemsLength", chunkItems.length);
// console.log(
// "chunkItems",
// chunkItems.map((item) => item.service).join(", ")
// );
// chunkItems.forEach((item, index) => {
// console.log(`chunkItem ${index}`, JSON.stringify(item, null, 2));
// });
// If chunk data, merge it back into root item
if (chunkItems.length > 0) {
if (process.env.NODE_ENV !== "test")
console.log(`Merging ${chunkItems.length} chunks`);
// parse chunk number from service name and sort by chunk number
const chunkStrings = chunkItems
.sort((a, b) => {
const aNum = Number.parseInt(a.service.split("-chunk-")[1]);
const bNum = Number.parseInt(b.service.split("-chunk-")[1]);
return aNum - bNum;
})
.map((item) => item.data.chunk);
// console.log(
// "chunkItemsSorted",
// chunkItems.map((item) => item.service).join(", ")
// );
rootItem.data = this.fromJsonStrings(chunkStrings);
}
return rootItem;
}
catch (error) {
console.log("TasksModel get threw an error");
if (error instanceof Error) {
console.log(error.message);
console.log(error.stack);
return undefined;
}
}
}
async getMeanEstimate(task) {
const service = task.service;
const response = await this.db.send(new GetCommand({
TableName: this.estimatesTable,
Key: {
service,
},
}));
const meanEstimate = response.Item?.meanEstimate;
return meanEstimate;
}
/**
* Transform valid JSON object into string and break into pieces no larger than minSplitSizeBytes
* @param rootResult
* @param minSplitSizeBytes maximum return substring size in bytes (default 350KB, below 400KB dynamodb limit)
* @returns array of JSON substrings, in order for re-assembly
*/
toJsonStrings(rootResult, options = {}) {
const rootString = JSON.stringify(rootResult, null, 1); // add spaces to string for chunking on
const minSplitSizeBytes = options.minSplitSizeBytes || 350 * 1024;
let buf = Buffer.from(rootString);
const result = [];
while (buf.length) {
// Find last space before minSplitSizeBytes
let i = buf.lastIndexOf(32, minSplitSizeBytes + 1);
// If no space found, try forward search
if (i < 0)
i = buf.indexOf(32, minSplitSizeBytes);
// If there's no space at all, take the whole string
if (i < 0)
i = buf.length;
// This is a safe cut-off point; never half-way a multi-byte
const partial = buf.slice(0, i).toString();
result.push(partial);
buf = buf.slice(i + 1); // Skip space (if any)
}
return result;
}
/**
* Given array of partial JSON strings, joins them together and parses the result
*/
fromJsonStrings(jsonStringChunks) {
const mergedString = jsonStringChunks.join("");
// const jsonStringsHash = jsonStringChunks.reduce<Record<string, string>>(
// (acc, curString, index) => {
// return { [index]: curString, ...acc };
// },
// {}
// );
// toJsonFile(jsonStringsHash, "chunk_fromJsonStrings.json");
let parsedString = "";
try {
parsedString = JSON.parse(mergedString);
}
catch (error) {
if (error instanceof Error) {
throw new TypeError("Error merging JSON string chunks: " + error.message);
}
}
return parsedString;
}
}
//# sourceMappingURL=tasks.js.map