UNPKG

@allma/core-cdk

Version:

Core AWS CDK constructs for deploying the Allma serverless AI orchestration platform.

110 lines 5.02 kB
import { S3DataLoaderCustomConfigSchema, S3DataLoaderOutputFormat, S3DataLoaderOnMissingBehavior, TransientStepError, } from '@allma/core-types'; import { log_error, log_info, log_warn } from '@allma/core-sdk'; import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'; import { Readable } from 'stream'; const s3Client = new S3Client({}); // Singleton S3 client for default region const streamToString = (stream, encoding) => new Promise((resolve, reject) => { const chunks = []; stream.on('data', (chunk) => chunks.push(chunk)); stream.on('error', reject); stream.on('end', () => resolve(Buffer.concat(chunks).toString(encoding))); }); const streamToBuffer = (stream) => new Promise((resolve, reject) => { const chunks = []; stream.on('data', (chunk) => chunks.push(chunk)); stream.on('error', reject); stream.on('end', () => resolve(Buffer.concat(chunks))); }); /** * A standard StepHandler for fetching and processing data from S3. * It reads its specific configuration from the stepInput, which is a combination * of the step's customConfig and the dynamic input from the previous step. */ export const handleS3DataLoader = async (stepDefinition, stepInput, // This is now the combinedInput runtimeState) => { const correlationId = runtimeState.flowExecutionId; // 1. Validate the combined input for this module. // The stepInput now contains the fully rendered and merged configuration. const configParseResult = S3DataLoaderCustomConfigSchema.safeParse(stepInput); if (!configParseResult.success) { log_error("Invalid stepInput for system/s3-data-loader.", { errors: configParseResult.error.flatten() }, correlationId); throw new Error(`Invalid stepInput for s3-data-loader: ${configParseResult.error.message}`); } const config = configParseResult.data; // 2. The sourceS3Uri is now expected to be fully rendered. const renderedS3Uri = config.sourceS3Uri; log_info(`Executing S3 Data Loader for URI: ${renderedS3Uri}`, {}, correlationId); const uriMatch = renderedS3Uri.match(/^s3:\/\/([^/]+)\/(.*)$/); if (!uriMatch) { throw new Error(`Invalid S3 URI format: ${renderedS3Uri}`); } const [, Bucket, Key] = uriMatch; let s3Response; try { const getObjectCommand = new GetObjectCommand({ Bucket, Key }); const client = config.region ? new S3Client({ region: config.region }) : s3Client; s3Response = await client.send(getObjectCommand); } catch (error) { if (error.name === 'NoSuchKey') { if (config.onMissing === S3DataLoaderOnMissingBehavior.IGNORE) { log_warn(`S3 object not found, but onMissing is IGNORE. Continuing.`, { bucket: Bucket, key: Key }, correlationId); return { outputData: { content: null, _meta: { found: false } } }; } else { log_error(`S3 object not found and onMissing is FAIL.`, { bucket: Bucket, key: Key }, correlationId); throw error; } } else if (error.name === 'AccessDenied') { log_error(`S3 Access Denied. Check Lambda IAM permissions.`, { bucket: Bucket, key: Key }, correlationId); throw error; } else { log_error('An unexpected S3 error occurred.', { error: error.message, name: error.name }, correlationId); throw new TransientStepError(`S3 GetObject failed: ${error.message}`); } } if (!s3Response.Body || !(s3Response.Body instanceof Readable)) { throw new Error('S3 response body is empty or not a readable stream.'); } let content; try { switch (config.outputFormat) { case S3DataLoaderOutputFormat.JSON: { const jsonString = await streamToString(s3Response.Body, config.encoding); content = JSON.parse(jsonString); break; } case S3DataLoaderOutputFormat.RAW_BUFFER: { const buffer = await streamToBuffer(s3Response.Body); content = buffer.toString('base64'); break; } case S3DataLoaderOutputFormat.TEXT: default: { content = await streamToString(s3Response.Body, config.encoding); break; } } } catch (parseError) { if (config.outputFormat === 'JSON' && parseError instanceof SyntaxError) { throw new Error(`InvalidOutputFormat: Failed to parse S3 object content as JSON. Error: ${parseError.message}`); } throw parseError; } return { outputData: { content: content, _meta: { found: true, sourceS3Uri: renderedS3Uri, contentType: s3Response.ContentType, contentLength: s3Response.ContentLength, eTag: s3Response.ETag, } } }; }; //# sourceMappingURL=s3-loader.js.map