UNPKG

@allma/core-cdk

Version:

Core AWS CDK constructs for deploying the Allma serverless AI orchestration platform.

124 lines 5.8 kB
import { FileDownloadStepPayloadSchema, TransientStepError, ENV_VAR_NAMES, } from '@allma/core-types'; import { log_info, log_error } from '@allma/core-sdk'; import { TemplateService } from '../template-service.js'; import axios from 'axios'; import { S3Client } from '@aws-sdk/client-s3'; import { Upload } from '@aws-sdk/lib-storage'; import { JSONPath } from 'jsonpath-plus'; import { v4 as uuidv4 } from 'uuid'; const s3Client = new S3Client({}); const DEFAULT_BUCKET_NAME = process.env[ENV_VAR_NAMES.ALLMA_EXECUTION_TRACES_BUCKET_NAME]; /** * Handles the FILE_DOWNLOAD step. * Streams data from a source URL directly to S3 using the @aws-sdk/lib-storage Upload utility. * This ensures robust handling of streams even when Content-Length is unknown or the file is large. */ export const handleFileDownload = async (stepDefinition, stepInput, runtimeState) => { const correlationId = runtimeState.flowExecutionId; // 1. Validation const validationResult = FileDownloadStepPayloadSchema.safeParse(stepDefinition); if (!validationResult.success) { log_error('Invalid configuration for FILE_DOWNLOAD step', { errors: validationResult.error.flatten() }, correlationId); throw new Error('Invalid StepDefinition for FILE_DOWNLOAD: ' + validationResult.error.message); } const config = validationResult.data; // 2. Context Preparation & Templating const templateService = TemplateService.getInstance(); const templateContext = { ...runtimeState.currentContextData, ...runtimeState, ...stepInput }; // Render URL const sourceUrl = templateService.render(config.sourceUrlTemplate, templateContext); // Render Destination Key let destinationKey; if (config.destinationKeyTemplate) { destinationKey = templateService.render(config.destinationKeyTemplate, templateContext); } else { destinationKey = `downloads/${runtimeState.flowExecutionId}/${runtimeState.currentStepInstanceId}/${uuidv4()}`; } const destinationBucket = config.destinationBucket || DEFAULT_BUCKET_NAME; if (!destinationBucket) { throw new Error('Destination bucket is not configured. Set destinationBucket or ensure ALLMA_EXECUTION_TRACES_BUCKET_NAME is set.'); } // Build Headers const headers = {}; if (config.headersTemplate) { for (const [headerName, jsonPath] of Object.entries(config.headersTemplate)) { const val = JSONPath({ path: jsonPath, json: templateContext, wrap: false }); if (val !== undefined && val !== null) { headers[headerName] = String(val); } } } log_info(`Starting file download`, { sourceUrl, destinationBucket, destinationKey, method: config.method }, correlationId); try { // 3. Request Stream const response = await axios({ method: config.method || 'GET', url: sourceUrl, headers: headers, responseType: 'stream', timeout: config.customConfig?.timeoutMs || 30000, validateStatus: (status) => status >= 200 && status < 300, ...(config.customConfig?.verifySsl === false && { httpsAgent: new (await import('https')).Agent({ rejectUnauthorized: false }) }) }); // 4. Stream to S3 using Upload const contentType = response.headers['content-type'] || 'application/octet-stream'; const contentLength = response.headers['content-length']; // Use Upload instead of PutObjectCommand to handle streaming correctly const upload = new Upload({ client: s3Client, params: { Bucket: destinationBucket, Key: destinationKey, Body: response.data, ContentType: contentType, Metadata: { sourceUrl: sourceUrl, originalName: sourceUrl.split('/').pop() || 'unknown', } }, // Optional: concurrency configuration for performance on large files queueSize: 4, partSize: 1024 * 1024 * 5, // 5MB leavePartsOnError: false, }); await upload.done(); log_info(`File download completed successfully.`, { key: destinationKey, size: contentLength }, correlationId); // 5. Construct Output const s3Pointer = { bucket: destinationBucket, key: destinationKey, }; return { outputData: { filePointer: s3Pointer, // For manual reference // Standard wrapper for auto-hydration in downstream steps content: { _s3_output_pointer: s3Pointer }, contentType: contentType, contentLength: contentLength ? parseInt(contentLength, 10) : undefined, _meta: { status: 'SUCCESS', sourceUrl: sourceUrl, } } }; } catch (error) { log_error(`File download failed`, { sourceUrl, error: error.message }, correlationId); if (axios.isAxiosError(error)) { if (error.response?.status && error.response.status >= 500) { throw new TransientStepError(`Download failed with server error: ${error.response.status}`); } if (error.code === 'ECONNABORTED' || error.code === 'ETIMEDOUT') { throw new TransientStepError(`Download timed out.`); } } if (error.name === 'TimeoutError' || error.name === 'RequestTimeout') { throw new TransientStepError(`S3 Upload timed out: ${error.message}`); } throw error; } }; //# sourceMappingURL=file-download-handler.js.map