ai-functions
Version:
Core AI primitives for building intelligent applications
492 lines (413 loc) • 14.2 kB
text/typescript
/**
* Batch Processing Workflow Example (1000+ items)
*
* This example demonstrates processing large batches efficiently using ai-functions.
* It shows how to:
* - Process 1000+ items with automatic batching
* - Use provider batch APIs for 50% cost savings
* - Handle progress tracking and error recovery
* - Implement parallel processing with concurrency limits
*
* @example
* ```bash
* ANTHROPIC_API_KEY=sk-... npx tsx examples/08-batch-processing.ts
* ```
*/
import {
write,
list,
ai,
is,
configure,
createBatch,
withBatch,
BatchQueue,
withRetry,
BudgetTracker,
type BatchItem,
} from '../src/index.js'
// For demo, use memory adapter
import '../src/batch/memory.js'
// ============================================================================
// Types
// ============================================================================
interface Product {
id: string
name: string
description: string
category: string
}
interface ProcessedProduct {
id: string
originalName: string
enhancedDescription: string
seoTitle: string
seoKeywords: string[]
sentiment: string
qualityScore: number
}
interface BatchProgress {
total: number
processed: number
successful: number
failed: number
startTime: number
estimatedRemaining: number
}
// ============================================================================
// Sample Data Generator
// ============================================================================
function generateSampleProducts(count: number): Product[] {
const categories = ['Electronics', 'Clothing', 'Home & Garden', 'Sports', 'Books', 'Toys']
const adjectives = ['Premium', 'Deluxe', 'Basic', 'Pro', 'Ultra', 'Eco', 'Smart', 'Classic']
const nouns = ['Widget', 'Gadget', 'Tool', 'Device', 'Item', 'Product', 'Solution', 'System']
const products: Product[] = []
for (let i = 0; i < count; i++) {
const adj = adjectives[Math.floor(Math.random() * adjectives.length)]
const noun = nouns[Math.floor(Math.random() * nouns.length)]
const category = categories[Math.floor(Math.random() * categories.length)]
products.push({
id: `PROD-${String(i + 1).padStart(5, '0')}`,
name: `${adj} ${noun} ${i + 1}`,
description: `A high-quality ${noun.toLowerCase()} designed for ${category.toLowerCase()} enthusiasts. Features include advanced technology and durable construction.`,
category,
})
}
return products
}
// ============================================================================
// Progress Tracking
// ============================================================================
class ProgressTracker {
private progress: BatchProgress
constructor(total: number) {
this.progress = {
total,
processed: 0,
successful: 0,
failed: 0,
startTime: Date.now(),
estimatedRemaining: 0,
}
}
update(success: boolean): void {
this.progress.processed++
if (success) {
this.progress.successful++
} else {
this.progress.failed++
}
// Calculate estimated remaining time
const elapsed = Date.now() - this.progress.startTime
const rate = this.progress.processed / elapsed
const remaining = this.progress.total - this.progress.processed
this.progress.estimatedRemaining = remaining / rate
}
display(): void {
const percent = ((this.progress.processed / this.progress.total) * 100).toFixed(1)
const elapsed = ((Date.now() - this.progress.startTime) / 1000).toFixed(0)
const remaining = (this.progress.estimatedRemaining / 1000).toFixed(0)
// Clear line and update progress
process.stdout.write(
`\r[${percent}%] ${this.progress.processed}/${this.progress.total} | Success: ${this.progress.successful} | Failed: ${this.progress.failed} | Elapsed: ${elapsed}s | ETA: ${remaining}s `
)
}
getStats(): BatchProgress {
return { ...this.progress }
}
}
// ============================================================================
// Batch Processor
// ============================================================================
class ProductEnhancer {
private budgetTracker: BudgetTracker
private concurrency: number
private results: ProcessedProduct[] = []
private errors: { id: string; error: string }[] = []
constructor(options: { maxCost?: number; concurrency?: number } = {}) {
this.budgetTracker = new BudgetTracker({
maxCost: options.maxCost || 100,
maxTokens: 1000000,
alertThresholds: [0.5, 0.8, 0.95],
onAlert: (alert) => {
console.log(`\n[Budget Alert] ${(alert.threshold * 100).toFixed(0)}% of budget used`)
},
})
this.concurrency = options.concurrency || 10
}
/**
* Process a single product
*/
private async processProduct(product: Product): Promise<ProcessedProduct> {
// Generate enhanced description
const enhanced = await ai`Enhance this product description for better marketing appeal:
Product: ${product.name}
Category: ${product.category}
Original Description: ${product.description}
Provide:
- enhancedDescription: improved, engaging description (2-3 sentences)
- seoTitle: SEO-optimized title (under 60 chars)
- seoKeywords: array of 5 relevant keywords
- sentiment: the tone (professional/casual/luxury/budget)
- qualityScore: quality score 1-10 based on the original`
// Track token usage (estimated)
this.budgetTracker.recordUsage({
inputTokens: 150,
outputTokens: 100,
model: 'sonnet',
})
return {
id: product.id,
originalName: product.name,
enhancedDescription: (enhanced as any).enhancedDescription || '',
seoTitle: (enhanced as any).seoTitle || '',
seoKeywords: (enhanced as any).seoKeywords || [],
sentiment: (enhanced as any).sentiment || 'professional',
qualityScore: (enhanced as any).qualityScore || 5,
}
}
/**
* Process products in chunks with concurrency control
*/
async processInChunks(products: Product[]): Promise<void> {
console.log(
`\nProcessing ${products.length} products with concurrency ${this.concurrency}...\n`
)
const tracker = new ProgressTracker(products.length)
// Process in chunks for better memory management
const chunkSize = this.concurrency * 5
const chunks: Product[][] = []
for (let i = 0; i < products.length; i += chunkSize) {
chunks.push(products.slice(i, i + chunkSize))
}
for (const chunk of chunks) {
// Process chunk items with concurrency limit
const promises = chunk.map(async (product) => {
try {
const result = await withRetry(() => this.processProduct(product), {
maxRetries: 2,
baseDelay: 1000,
})
this.results.push(result)
tracker.update(true)
} catch (error) {
this.errors.push({ id: product.id, error: (error as Error).message })
tracker.update(false)
}
tracker.display()
})
// Process with concurrency limit
const executing: Promise<void>[] = []
for (const promise of promises) {
const p = promise.then(() => {
executing.splice(executing.indexOf(p), 1)
})
executing.push(p)
if (executing.length >= this.concurrency) {
await Promise.race(executing)
}
}
await Promise.all(executing)
}
console.log('\n') // New line after progress
}
/**
* Use batch API for processing (50% cost savings)
*/
async processWithBatchAPI(products: Product[]): Promise<void> {
console.log(`\nUsing Batch API for ${products.length} products (50% cost savings)...\n`)
// Create batch queue
const batch = createBatch({
provider: 'openai',
autoSubmit: {
threshold: 100,
maxWait: 5000,
},
})
// Add all items to batch
const promises = products.map((product) =>
batch.add(
`Enhance this product description:
Name: ${product.name}
Category: ${product.category}
Description: ${product.description}
Return: enhanced description, SEO title, and 5 keywords as JSON`
)
)
console.log(`Added ${products.length} items to batch queue`)
// Submit batch
console.log('Submitting batch...')
const submission = await batch.submit()
if (submission.job) {
console.log(`Batch submitted: ${submission.job.id}`)
console.log('Status: Processing (this would take up to 24 hours in production)')
}
// For demo, we simulate the results
console.log('\n[Demo mode: Simulating batch results]')
}
/**
* Get processing results
*/
getResults(): {
results: ProcessedProduct[]
errors: { id: string; error: string }[]
stats: {
total: number
successful: number
failed: number
cost: number
tokens: number
}
} {
return {
results: this.results,
errors: this.errors,
stats: {
total: this.results.length + this.errors.length,
successful: this.results.length,
failed: this.errors.length,
cost: this.budgetTracker.getTotalCost(),
tokens: this.budgetTracker.getTotalTokens(),
},
}
}
}
// ============================================================================
// Using list.map() for Batch Processing
// ============================================================================
async function processWithListMap(): Promise<void> {
console.log('\n--- Using list.map() for automatic batching ---\n')
// Generate ideas and process each in batch
const ideas = await list`10 product improvement ideas for a smart home device`
console.log(`Generated ${ideas.length} ideas`)
// Map processes each item - batched automatically when batchMode is enabled
const evaluated = await Promise.all(
ideas.map(async (idea) => {
const { feasibility, cost, impact } = await ai`Evaluate this product improvement idea:
"${idea}"
Provide:
- feasibility: score 1-10
- cost: estimated cost (low/medium/high)
- impact: customer impact score 1-10`
return {
idea,
feasibility,
cost,
impact,
}
})
)
console.log('\nEvaluated ideas:')
evaluated.forEach((e, i) => {
console.log(
` ${i + 1}. ${(e.idea as string).substring(0, 40)}... (feasibility: ${
e.feasibility
}, impact: ${e.impact})`
)
})
}
// ============================================================================
// Parallel Processing with Streams
// ============================================================================
async function processWithStreams(products: Product[]): Promise<void> {
console.log('\n--- Stream-based Processing ---\n')
let processed = 0
const total = products.length
// Process as async generator
async function* processGenerator(): AsyncGenerator<ProcessedProduct> {
for (const product of products) {
const result = await ai`Quick enhancement for: ${product.name}`
processed++
if (processed % 10 === 0) {
console.log(` Processed ${processed}/${total}`)
}
yield {
id: product.id,
originalName: product.name,
enhancedDescription: result as string,
seoTitle: product.name,
seoKeywords: [],
sentiment: 'professional',
qualityScore: 7,
}
}
}
// Consume stream
const results: ProcessedProduct[] = []
for await (const result of processGenerator()) {
results.push(result)
if (results.length >= 5) break // Demo limit
}
console.log(`\nProcessed ${results.length} products via stream`)
}
// ============================================================================
// Main Example
// ============================================================================
async function main() {
console.log('\n=== Batch Processing Workflow Example (1000+ items) ===\n')
// Configure the AI provider
configure({
model: 'sonnet',
provider: 'anthropic',
batchMode: 'auto',
batchThreshold: 10,
})
// Generate sample products
const smallBatch = generateSampleProducts(25)
const largeBatch = generateSampleProducts(100)
console.log(`Generated ${smallBatch.length} products for demo`)
console.log(`Would generate ${largeBatch.length}+ products for production\n`)
// Method 1: Process with concurrency control
console.log('=== Method 1: Concurrent Processing ===')
const enhancer = new ProductEnhancer({
maxCost: 10,
concurrency: 5,
})
await enhancer.processInChunks(smallBatch.slice(0, 10)) // Demo with 10 items
const { stats } = enhancer.getResults()
console.log('\nProcessing Statistics:')
console.log(` Total: ${stats.total}`)
console.log(` Successful: ${stats.successful}`)
console.log(` Failed: ${stats.failed}`)
console.log(` Estimated Cost: $${stats.cost.toFixed(4)}`)
console.log(` Total Tokens: ${stats.tokens}`)
// Method 2: Batch API (50% savings)
console.log('\n=== Method 2: Batch API (50% Cost Savings) ===')
await enhancer.processWithBatchAPI(smallBatch.slice(0, 5))
// Method 3: list.map() automatic batching
console.log('\n=== Method 3: list.map() Automatic Batching ===')
await processWithListMap()
// Method 4: Stream-based processing
console.log('\n=== Method 4: Stream-based Processing ===')
await processWithStreams(smallBatch.slice(0, 5))
// Summary
console.log('\n=== Batch Processing Summary ===')
console.log(`
For processing 1000+ items, consider:
1. Concurrent Processing
- Best for: Real-time results needed
- Cost: Standard pricing
- Latency: Low (parallel execution)
2. Provider Batch API
- Best for: Large volumes, non-urgent
- Cost: 50% discount
- Latency: Up to 24 hours
3. list.map() with batchMode
- Best for: Simple transformations
- Cost: Automatic optimization
- Latency: Variable
4. Stream-based Processing
- Best for: Memory efficiency
- Cost: Standard pricing
- Latency: Progressive results
`)
}
main()
.then(() => {
console.log('\n=== Example Complete ===\n')
process.exit(0)
})
.catch((error) => {
console.error('\nError:', error.message)
process.exit(1)
})