UNPKG

@allma/core-cdk

Version:

Core AWS CDK constructs for deploying the Allma serverless AI orchestration platform.

129 lines 6.48 kB
import { S3Client, PutObjectCommand } from '@aws-sdk/client-s3'; import { DynamoDBClient } from '@aws-sdk/client-dynamodb'; import { DynamoDBDocumentClient, QueryCommand } from '@aws-sdk/lib-dynamodb'; import { z } from 'zod'; import { v4 as uuidv4 } from 'uuid'; import { TransientStepError, ENV_VAR_NAMES, PermanentStepError, } from '@allma/core-types'; import { log_info, log_error, log_debug, offloadIfLarge } from '@allma/core-sdk'; const ddbDocClient = DynamoDBDocumentClient.from(new DynamoDBClient({})); const s3Client = new S3Client({}); const EXECUTION_TRACES_BUCKET_NAME = process.env[ENV_VAR_NAMES.ALLMA_EXECUTION_TRACES_BUCKET_NAME]; // This Zod schema validates the structure of the `stepInput` object // that is expected for this specific module. const DdbQueryToS3ManifestConfigSchema = z.object({ query: z.object({ tableName: z.string().min(1, "tableName is required."), indexName: z.string().min(1, "indexName is required.").optional(), keyConditionExpression: z.string().min(1, "keyConditionExpression is required."), expressionAttributeValues: z.record(z.union([z.string(), z.number(), z.boolean()])), expressionAttributeNames: z.record(z.string()).optional(), filterExpression: z.string().min(1).optional(), projectionExpression: z.string().min(1).optional(), }), destination: z.object({ bucketName: z.string().min(1, "destination bucketName is required."), key: z.string().min(1, "destination key is required."), }), enableItemOffloading: z.boolean().optional().default(false), }); export const handleDdbQueryToS3Manifest = async (stepDef, stepInput, runtimeState) => { const stepInstance = stepDef; const correlationId = runtimeState.flowExecutionId; log_debug('Received stepInput for DDB to S3 manifest', stepInput, correlationId); const configParseResult = DdbQueryToS3ManifestConfigSchema.safeParse(stepInput); if (!configParseResult.success) { log_error("Invalid stepInput for system/ddb-query-to-s3-manifest module.", { errors: configParseResult.error.flatten() }, correlationId); throw new Error(`Invalid configuration for DDB to S3 Manifest step: ${configParseResult.error.message}`); } const config = configParseResult.data; const { enableItemOffloading } = config; if (enableItemOffloading && !EXECUTION_TRACES_BUCKET_NAME) { throw new PermanentStepError('Item offloading is enabled, but the required ALLMA_EXECUTION_TRACES_BUCKET_NAME environment variable is not configured.'); } const renderedExpressionAttributeValues = config.query.expressionAttributeValues; const renderedKey = config.destination.key; const renderedTableName = config.query.tableName; log_info('Starting DDB query to S3 manifest collection', { tableName: renderedTableName, s3Key: renderedKey, offloadingEnabled: enableItemOffloading, }, correlationId); let manifestContent = ''; let lastEvaluatedKey; let itemsCollected = 0; let itemsOffloaded = 0; const MAX_ITEMS_LIMIT = 500000; // Safety brake try { do { const queryParams = { TableName: renderedTableName, IndexName: config.query.indexName, KeyConditionExpression: config.query.keyConditionExpression, ExpressionAttributeValues: renderedExpressionAttributeValues, ExpressionAttributeNames: config.query.expressionAttributeNames, FilterExpression: config.query.filterExpression, ProjectionExpression: config.query.projectionExpression, ExclusiveStartKey: lastEvaluatedKey, }; const result = await ddbDocClient.send(new QueryCommand(queryParams)); if (result.Items) { for (const item of result.Items) { let itemToWrite = item; if (enableItemOffloading) { const offloadKeyPrefix = `manifest_items/${correlationId}/${stepInstance.stepInstanceId}`; const offloadedItemOrPointer = await offloadIfLarge(item, EXECUTION_TRACES_BUCKET_NAME, // We've already checked this is defined. // Use a unique prefix for each item to avoid collisions `${offloadKeyPrefix}/${uuidv4()}`, correlationId // No threshold is passed, so the system default is used. ); // offloadIfLarge returns a wrapper if offloaded if (offloadedItemOrPointer && '_s3_output_pointer' in offloadedItemOrPointer) { itemsOffloaded++; } itemToWrite = offloadedItemOrPointer; } manifestContent += JSON.stringify(itemToWrite) + '\n'; itemsCollected++; } } lastEvaluatedKey = result.LastEvaluatedKey; if (itemsCollected > MAX_ITEMS_LIMIT) { throw new Error(`Safety limit of ${MAX_ITEMS_LIMIT} items reached. Aborting query.`); } } while (lastEvaluatedKey); await s3Client.send(new PutObjectCommand({ Bucket: config.destination.bucketName, Key: renderedKey, Body: manifestContent, ContentType: 'application/x-jsonlines', })); log_info(`Successfully created S3 manifest with ${itemsCollected} items.`, { bucket: config.destination.bucketName, key: renderedKey, itemsOffloaded, }, correlationId); return { outputData: { manifest: { bucket: config.destination.bucketName, key: renderedKey, }, itemCount: itemsCollected, itemsOffloaded, }, }; } catch (error) { log_error('Failed during DDB query to S3 manifest creation', { errorName: error.name, errorMessage: error.message, stack: error.stack?.substring(0, 10000), // Log a snippet of the stack }, correlationId); if (error instanceof PermanentStepError) throw error; throw new TransientStepError(error.message); } }; //# sourceMappingURL=ddb-query-to-s3-manifest.js.map