alepha
Version:
Alepha is a convention-driven TypeScript framework for building robust, end-to-end type-safe applications, from serverless APIs to full-stack React apps.
585 lines (583 loc) • 22.2 kB
TypeScript
import * as _alepha_core1 from "alepha";
import { Descriptor, KIND, Static, TSchema } from "alepha";
import { DateTimeProvider, DurationLike } from "alepha/datetime";
import * as _alepha_logger0 from "alepha/logger";
import * as _alepha_retry0 from "alepha/retry";
import { RetryDescriptorOptions } from "alepha/retry";
import * as typebox0 from "typebox";
//#region src/descriptors/$batch.d.ts
/**
* Creates a batch processing descriptor for efficient grouping and processing of multiple operations.
*
* This descriptor provides a powerful batching mechanism that collects multiple individual items
* and processes them together in groups, significantly improving performance by reducing overhead
* and enabling bulk operations. It supports partitioning, concurrent processing, automatic flushing,
* and intelligent retry mechanisms for robust batch processing workflows.
*
* **Key Features**
*
* - **Intelligent Batching**: Groups items based on size and time thresholds
* - **Partitioning Support**: Process different types of items in separate batches
* - **Concurrent Processing**: Handle multiple batches simultaneously with configurable limits
* - **Automatic Flushing**: Time-based and size-based automatic batch execution
* - **Type Safety**: Full TypeScript support with schema validation using TypeBox
* - **Retry Logic**: Built-in retry mechanisms for failed batch operations
* - **Resource Management**: Automatic cleanup and graceful shutdown handling
*
* **Use Cases**
*
* Perfect for optimizing high-throughput operations:
* - Database bulk inserts and updates
* - API call batching and rate limit optimization
* - Log aggregation and bulk shipping
* - File processing and bulk uploads
* - Event processing and analytics ingestion
* - Notification delivery optimization
* - Cache invalidation batching
*
* @example
* **Basic database batch operations:**
* ```ts
* import { $batch } from "alepha/batch";
* import { t } from "alepha";
*
* class UserService {
* userBatch = $batch({
* schema: t.object({
* id: t.string(),
* name: t.string(),
* email: t.string(),
* createdAt: t.optional(t.string())
* }),
* maxSize: 50, // Process up to 50 users at once
* maxDuration: [5, "seconds"], // Or flush every 5 seconds
* handler: async (users) => {
* // Bulk insert users - much faster than individual inserts
* console.log(`Processing batch of ${users.length} users`);
*
* const result = await this.database.users.insertMany(users.map(user => ({
* ...user,
* createdAt: user.createdAt || new Date().toISOString()
* })));
*
* console.log(`Successfully inserted ${result.length} users`);
* return { inserted: result.length, userIds: result.map(r => r.id) };
* }
* });
*
* async createUser(userData: { name: string; email: string }) {
* // Individual calls are automatically batched
* const result = await this.userBatch.push({
* id: generateId(),
* name: userData.name,
* email: userData.email
* });
*
* return result; // Returns the batch result once batch is processed
* }
* }
* ```
*
* @example
* **API call batching with partitioning:**
* ```ts
* class NotificationService {
* notificationBatch = $batch({
* schema: t.object({
* userId: t.string(),
* type: t.enum(["email", "sms", "push"]),
* message: t.string(),
* priority: t.enum(["high", "normal", "low"])
* }),
* maxSize: 100,
* maxDuration: [10, "seconds"],
* // Partition by notification type for different processing
* partitionBy: (notification) => notification.type,
* concurrency: 3, // Process up to 3 different types simultaneously
* handler: async (notifications) => {
* const type = notifications[0].type; // All items in batch have same type
* console.log(`Processing ${notifications.length} ${type} notifications`);
*
* switch (type) {
* case 'email':
* return await this.emailProvider.sendBulk(notifications.map(n => ({
* to: n.userId,
* subject: 'Notification',
* body: n.message,
* priority: n.priority
* })));
*
* case 'sms':
* return await this.smsProvider.sendBulk(notifications.map(n => ({
* to: n.userId,
* message: n.message
* })));
*
* case 'push':
* return await this.pushProvider.sendBulk(notifications.map(n => ({
* userId: n.userId,
* title: 'Notification',
* body: n.message,
* priority: n.priority
* })));
* }
* }
* });
*
* async sendNotification(userId: string, type: 'email' | 'sms' | 'push', message: string, priority: 'high' | 'normal' | 'low' = 'normal') {
* // Notifications are automatically batched by type
* return await this.notificationBatch.push({
* userId,
* type,
* message,
* priority
* });
* }
* }
* ```
*
* @example
* **Log aggregation with retry logic:**
* ```ts
* class LoggingService {
* logBatch = $batch({
* schema: t.object({
* timestamp: t.number(),
* level: t.enum(["info", "warn", "error"]),
* message: t.string(),
* metadata: t.optional(t.record(t.string(), t.any())),
* source: t.string()
* }),
* maxSize: 1000, // Large batches for log efficiency
* maxDuration: [30, "seconds"], // Longer duration for log aggregation
* concurrency: 2, // Limit concurrent log shipments
* retry: {
* maxAttempts: 5,
* delay: [2, "seconds"],
* backoff: "exponential"
* },
* handler: async (logEntries) => {
* console.log(`Shipping ${logEntries.length} log entries`);
*
* try {
* // Ship logs to external service (e.g., Elasticsearch, Splunk)
* const response = await this.logShipper.bulkIndex({
* index: 'application-logs',
* body: logEntries.map(entry => ([
* { index: { _index: 'application-logs' } },
* {
* ...entry,
* '@timestamp': new Date(entry.timestamp).toISOString()
* }
* ])).flat()
* });
*
* if (response.errors) {
* console.error(`Some log entries failed to index`, response.errors);
* // Retry will be triggered by throwing
* throw new Error(`Failed to index ${response.errors.length} log entries`);
* }
*
* console.log(`Successfully shipped ${logEntries.length} log entries`);
* return { shipped: logEntries.length, indexedAt: Date.now() };
*
* } catch (error) {
* console.error(`Failed to ship logs batch`, error);
* throw error; // Trigger retry mechanism
* }
* }
* });
*
* async log(level: 'info' | 'warn' | 'error', message: string, metadata?: Record<string, any>, source: string = 'application') {
* // Individual log calls are batched and shipped efficiently
* return await this.logBatch.push({
* timestamp: Date.now(),
* level,
* message,
* metadata,
* source
* });
* }
* }
* ```
*
* @example
* **File processing with dynamic partitioning:**
* ```ts
* class FileProcessingService {
* fileProcessingBatch = $batch({
* schema: t.object({
* filePath: t.string(),
* fileType: t.enum(["image", "video", "document"]),
* processingOptions: t.object({
* quality: t.optional(t.enum(["low", "medium", "high"])),
* format: t.optional(t.string()),
* compress: t.optional(t.boolean())
* }),
* priority: t.enum(["urgent", "normal", "background"])
* }),
* maxSize: 20, // Smaller batches for file processing
* maxDuration: [2, "minutes"], // Reasonable time for file accumulation
* // Partition by file type and priority for optimal resource usage
* partitionBy: (file) => `${file.fileType}-${file.priority}`,
* concurrency: 4, // Multiple concurrent processing pipelines
* retry: {
* maxAttempts: 3,
* delay: [5, "seconds"]
* },
* handler: async (files) => {
* const fileType = files[0].fileType;
* const priority = files[0].priority;
*
* console.log(`Processing ${files.length} ${fileType} files with ${priority} priority`);
*
* try {
* const results = [];
*
* for (const file of files) {
* const result = await this.processFile(file.filePath, file.fileType, file.processingOptions);
* results.push({
* originalPath: file.filePath,
* processedPath: result.outputPath,
* size: result.size,
* duration: result.processingTime
* });
* }
*
* // Update database with batch results
* await this.updateProcessingStatus(results);
*
* console.log(`Successfully processed ${files.length} ${fileType} files`);
* return {
* processed: files.length,
* fileType,
* priority,
* totalSize: results.reduce((sum, r) => sum + r.size, 0),
* results
* };
*
* } catch (error) {
* console.error(`Batch file processing failed for ${fileType} files`, error);
* throw error;
* }
* }
* });
*
* async processFile(filePath: string, fileType: 'image' | 'video' | 'document', options: any, priority: 'urgent' | 'normal' | 'background' = 'normal') {
* // Files are automatically batched by type and priority
* return await this.fileProcessingBatch.push({
* filePath,
* fileType,
* processingOptions: options,
* priority
* });
* }
* }
* ```
*/
declare const $batch: {
<TItem extends TSchema, TResponse>(options: BatchDescriptorOptions<TItem, TResponse>): BatchDescriptor<TItem, TResponse>;
[KIND]: typeof BatchDescriptor;
};
interface BatchDescriptorOptions<TItem extends TSchema, TResponse = any> {
/**
* TypeBox schema for validating each item added to the batch.
*
* This schema:
* - Validates every item pushed to the batch for data integrity
* - Provides full TypeScript type inference for batch items
* - Ensures type safety between item producers and batch handlers
* - Enables automatic serialization/deserialization if needed
*
* **Schema Design Guidelines**:
* - Keep schemas focused on the data needed for batch processing
* - Use optional fields for data that might not always be present
* - Include identifiers that might be needed for partitioning
* - Consider versioning for schema evolution in long-running systems
*
* @example
* ```ts
* t.object({
* id: t.string(),
* operation: t.enum(["create", "update"]),
* data: t.record(t.string(), t.any()),
* timestamp: t.optional(t.number()),
* priority: t.optional(t.enum(["high", "normal"]))
* })
* ```
*/
schema: TItem;
/**
* The batch processing handler function that processes arrays of validated items.
*
* This handler:
* - Receives an array of validated items based on the schema
* - Should implement bulk operations for maximum efficiency
* - Can be async and perform any operations (database, API calls, etc.)
* - Should handle errors appropriately (retry logic is provided separately)
* - Has access to the full Alepha dependency injection container
* - Returns results that will be provided to all items in the batch
*
* **Handler Design Guidelines**:
* - Implement true bulk operations rather than loops of individual operations
* - Use transactions when processing related data for consistency
* - Log batch processing progress and results for monitoring
* - Handle partial failures gracefully when possible
* - Consider memory usage for large batch sizes
*
* **Performance Considerations**:
* - Batch operations should be significantly faster than individual operations
* - Use database bulk operations (INSERT, UPDATE, etc.) when available
* - Optimize for the expected batch size and data characteristics
* - Consider connection pooling and resource limits
*
* @param items - Array of validated items to process in this batch
* @returns Result that will be returned to all callers who contributed items to this batch
*
* @example
* ```ts
* handler: async (items) => {
* console.log(`Processing batch of ${items.length} items`);
*
* try {
* // Use bulk operations for maximum efficiency
* const results = await this.database.transaction(async (tx) => {
* const insertResults = await tx.items.insertMany(items);
*
* // Update related data in bulk
* const updates = items.map(item => ({ id: item.id, processed: true }));
* await tx.items.updateMany(updates);
*
* return insertResults;
* });
*
* // Log successful processing
* console.log(`Successfully processed ${items.length} items`);
*
* return {
* processed: items.length,
* results: results.map(r => ({ id: r.id, status: 'success' })),
* timestamp: Date.now()
* };
*
* } catch (error) {
* console.error(`Batch processing failed for ${items.length} items`, error);
* throw error; // Will trigger retry logic if configured
* }
* }
* ```
*/
handler: (items: Static<TItem>[]) => TResponse;
/**
* Maximum number of items to collect before automatically flushing the batch.
*
* When this threshold is reached, the batch will be processed immediately
* regardless of the time duration. This provides an upper bound on batch size
* and ensures processing doesn't wait indefinitely for more items.
*
* **Size Selection Guidelines**:
* - Database operations: 100-1000 items depending on record size
* - API calls: 10-100 items depending on rate limits and payload size
* - File operations: 10-50 items depending on processing complexity
* - Memory operations: 1000+ items for simple transformations
*
* **Trade-offs**:
* - Larger batches: Better efficiency, higher memory usage, longer latency
* - Smaller batches: Lower latency, less efficiency, more frequent processing
*
* @default 10
*
* @example 50 // Good for database bulk operations
* @example 100 // Good for API batching with rate limits
* @example 1000 // Good for high-throughput log processing
*/
maxSize?: number;
/**
* Maximum time to wait before flushing a batch, even if it hasn't reached maxSize.
*
* This timer starts when the first item is added to a partition and ensures
* that items don't wait indefinitely for a batch to fill up. It provides
* a maximum latency guarantee for batch processing.
*
* **Duration Selection Guidelines**:
* - Real-time systems: 100ms - 1 second for low latency
* - Background processing: 5 - 30 seconds for higher throughput
* - Bulk operations: 1 - 5 minutes for maximum efficiency
* - Log shipping: 30 seconds - 2 minutes for log aggregation
*
* **Latency Impact**:
* - Shorter durations: Lower latency, potentially smaller batches
* - Longer durations: Higher throughput, potentially better efficiency
*
* @default [1, "second"]
*
* @example [500, "milliseconds"] // Low latency for real-time processing
* @example [10, "seconds"] // Balanced latency and throughput
* @example [2, "minutes"] // High throughput for bulk operations
*/
maxDuration?: DurationLike;
/**
* Function to determine partition keys for grouping items into separate batches.
*
* Items with the same partition key are batched together, while items with
* different keys are processed in separate batches. This enables:
* - Processing different types of items with different logic
* - Parallel processing of independent item groups
* - Resource optimization based on item characteristics
*
* **Partitioning Strategies**:
* - By type: Group similar operations together
* - By destination: Group items going to the same endpoint
* - By priority: Process high-priority items separately
* - By size/complexity: Group items with similar processing requirements
* - By tenant/user: Process items per customer or tenant
*
* **Partition Key Guidelines**:
* - Use descriptive, consistent naming
* - Keep key cardinality reasonable (avoid too many unique keys)
* - Consider memory impact of multiple active partitions
* - Balance between parallelism and resource usage
*
* If not provided, all items are placed in a single default partition.
*
* @param item - The validated item to determine partition for
* @returns String key identifying the partition this item belongs to
*
* @example
* ```ts
* // Partition by operation type
* partitionBy: (item) => item.operation,
*
* // Partition by priority and type
* partitionBy: (item) => `${item.priority}-${item.type}`,
*
* // Partition by destination service
* partitionBy: (item) => item.targetService,
*
* // Dynamic partitioning based on size
* partitionBy: (item) => {
* const size = JSON.stringify(item).length;
* return size > 1000 ? 'large' : 'small';
* }
* ```
*/
partitionBy?: (item: Static<TItem>) => string;
/**
* Maximum number of batch handlers that can execute simultaneously.
*
* This controls the level of parallelism for batch processing across
* all partitions. Higher concurrency can improve throughput but may
* increase resource usage and contention.
*
* **Concurrency Considerations**:
* - Database operations: Limit based on connection pool size
* - API calls: Consider rate limits and server capacity
* - CPU-intensive operations: Set to number of CPU cores
* - Memory-intensive operations: Consider available RAM
* - I/O operations: Can be higher than CPU count
*
* **Resource Planning**:
* - Each concurrent handler may use significant memory/connections
* - Monitor resource usage and adjust based on system capacity
* - Consider downstream system limits and capabilities
*
* @default 1
*
* @example 1 // Sequential processing, lowest resource usage
* @example 4 // Moderate parallelism for balanced systems
* @example 10 // High concurrency for I/O-bound operations
*/
concurrency?: number;
/**
* Retry configuration for failed batch processing operations.
*
* When batch handlers fail, this configuration determines how and when
* to retry the operation. Uses the `@alepha/retry` module for robust
* retry logic with exponential backoff, jitter, and other strategies.
*
* **Retry Strategies**:
* - Exponential backoff: Increasingly longer delays between attempts
* - Fixed delays: Consistent intervals between retries
* - Jitter: Random variation to avoid thundering herd problems
*
* **Failure Scenarios to Consider**:
* - Temporary network issues
* - Database connection problems
* - Rate limiting from external services
* - Resource exhaustion (memory, disk space)
* - Downstream service temporary unavailability
*
* **Retry Guidelines**:
* - Use exponential backoff for network-related failures
* - Set reasonable max attempts to avoid infinite loops
* - Consider the impact of retries on overall system performance
* - Monitor retry patterns to identify systemic issues
*
* @example
* ```ts
* retry: {
* maxAttempts: 3,
* delay: [1, "second"],
* backoff: "exponential",
* maxDelay: [30, "seconds"],
* jitter: true
* }
* ```
*/
retry?: Omit<RetryDescriptorOptions<() => Array<Static<TItem>>>, "handler">;
}
declare class BatchDescriptor<TItem extends TSchema, TResponse = any> extends Descriptor<BatchDescriptorOptions<TItem, TResponse>> {
protected readonly log: _alepha_logger0.Logger;
protected readonly dateTime: DateTimeProvider;
protected readonly partitions: Map<any, any>;
protected activeHandlers: PromiseWithResolvers<void>[];
protected retry: _alepha_retry0.RetryDescriptorFn<(items: typebox0.StaticType<"Encode", {}, {}, TItem>[]) => TResponse>;
/**
* Pushes an item into the batch. The item will be processed
* asynchronously with other items when the batch is flushed.
*/
push(item: Static<TItem>): Promise<TResponse>;
flush(partitionKey?: string): Promise<void>;
protected flushPartition(partitionKey: string): Promise<void>;
protected readonly dispose: _alepha_core1.HookDescriptor<"stop">;
}
//#endregion
//#region src/index.d.ts
/**
* This module allows you to group multiple asynchronous operations into a single "batch," which is then processed together.
* This is an essential pattern for improving performance, reducing I/O, and interacting efficiently with rate-limited APIs or databases.
*
* ```ts
* import { Alepha, $hook, run, t } from "alepha";
* import { $batch } from "alepha/batch";
*
* class LoggingService {
* // define the batch processor
* logBatch = $batch({
* schema: t.string(),
* maxSize: 10,
* maxDuration: [5, "seconds"],
* handler: async (items) => {
* console.log(`[BATCH LOG] Processing ${items.length} events:`, items);
* },
* });
*
* // example of how to use it
* onReady = $hook({
* on: "ready",
* handler: async () => {
* this.logBatch.push("Application started.");
* this.logBatch.push("User authenticated.");
* // ... more events pushed from elsewhere in the app
* },
* });
* }
* ```
*
* @see {@link $batch}
* @module alepha.batch
*/
declare const AlephaBatch: _alepha_core1.Service<_alepha_core1.Module>;
//#endregion
export { $batch, AlephaBatch, BatchDescriptor, BatchDescriptorOptions };
//# sourceMappingURL=index.d.ts.map