ai-functions
Version:
Core AI primitives for building intelligent applications
337 lines (296 loc) • 10.7 kB
text/typescript
/**
* BatchProvider Port
*
* Defines the explicit port (interface) that every batch provider adapter must
* satisfy, plus helpers that concentrate logic genuinely shared across adapters
* (polling, concurrent processing, in-memory job tracking, JSON-Schema conversion).
*
* Each provider file (`anthropic.ts`, `openai.ts`, `google.ts`, `bedrock.ts`,
* `cloudflare.ts`, `memory.ts`) is now a small adapter that satisfies this port
* and concentrates on provider-specific HTTP/SDK calls and request/response shapes.
*
* Port + 4 adapters = real seam. The port lives here so provider files don't
* import shared logic from each other and can't accidentally diverge.
*
* @packageDocumentation
*/
import type {
BatchAdapter,
BatchItem,
BatchJob,
BatchQueueOptions,
BatchResult,
BatchStatus,
} from '../batch-queue.js'
// Re-export the port and its types so adapters can import everything from
// `./provider.js` rather than reaching into `../batch-queue.js`.
export type {
BatchAdapter,
BatchItem,
BatchJob,
BatchQueueOptions,
BatchResult,
BatchSubmitResult,
BatchProvider,
BatchStatus,
FlexAdapter,
} from '../batch-queue.js'
export { registerBatchAdapter, registerFlexAdapter } from '../batch-queue.js'
// ============================================================================
// Polling helper (shared by all adapters that have async batch jobs)
// ============================================================================
/** Terminal states for a batch job — polling stops here. */
const TERMINAL_STATUSES: ReadonlySet<BatchStatus> = new Set([
'completed',
'failed',
'cancelled',
'expired',
])
/** Statuses that should trigger result fetch. */
const RESULT_STATUSES: ReadonlySet<BatchStatus> = new Set(['completed', 'cancelled', 'failed'])
/**
* Default `waitForCompletion` implementation built on top of `getStatus` +
* `getResults`. Adapters with non-standard completion semantics can still
* override `waitForCompletion`, but most don't need to.
*
* @param adapter The batch adapter to poll
* @param batchId The batch id to poll
* @param options.pollInterval Poll interval in ms (default: 5000)
* @param options.fetchResultsOn Statuses for which results should be fetched.
* Defaults to `completed`, `cancelled`, `failed`.
* @param options.throwOn Statuses that should throw rather than fetch results.
* Useful for OpenAI which throws on `cancelled`/`expired`.
*/
export async function pollUntilComplete(
adapter: Pick<BatchAdapter, 'getStatus' | 'getResults'>,
batchId: string,
options: {
pollInterval?: number
fetchResultsOn?: ReadonlySet<BatchStatus>
throwOn?: ReadonlySet<BatchStatus>
} = {}
): Promise<BatchResult[]> {
const pollInterval = options.pollInterval ?? 5000
const fetchResultsOn = options.fetchResultsOn ?? RESULT_STATUSES
const throwOn = options.throwOn
while (true) {
const status = await adapter.getStatus(batchId)
if (throwOn?.has(status.status)) {
throw new Error(`Batch ${status.status}`)
}
if (fetchResultsOn.has(status.status) || TERMINAL_STATUSES.has(status.status)) {
return adapter.getResults(batchId)
}
await sleep(pollInterval)
}
}
// ============================================================================
// Concurrent processing helper (shared by flex adapters and "local" providers)
// ============================================================================
/**
* Run `processItem` over `items` with bounded concurrency, optionally
* sleeping between waves to respect provider rate limits. Per-item failures
* are caught and emitted as `{ status: 'failed', error }` results so a single
* bad item never poisons the whole batch.
*
* Used by flex adapters (OpenAI, Google, Bedrock) and by the "local" providers
* (Google, Bedrock, Cloudflare) that fake batch processing with concurrent
* direct API calls.
*/
export async function processConcurrently(
items: BatchItem[],
processItem: (item: BatchItem) => Promise<BatchResult>,
options: {
concurrency?: number
delayBetweenWaves?: number
onWaveComplete?: (results: BatchResult[]) => void
} = {}
): Promise<BatchResult[]> {
const concurrency = options.concurrency ?? 10
const delay = options.delayBetweenWaves ?? 0
const results: BatchResult[] = []
for (let i = 0; i < items.length; i += concurrency) {
const wave = items.slice(i, i + concurrency)
const waveResults = await Promise.all(
wave.map(async (item) => {
try {
return await processItem(item)
} catch (error) {
return failedResult(item, error)
}
})
)
results.push(...waveResults)
options.onWaveComplete?.(results)
if (delay > 0 && i + concurrency < items.length) {
await sleep(delay)
}
}
return results
}
/** Build a `failed` BatchResult from an unknown thrown value. */
export function failedResult(item: BatchItem, error: unknown): BatchResult {
return {
id: item.id,
customId: item.id,
status: 'failed',
error: error instanceof Error ? error.message : 'Unknown error',
}
}
// ============================================================================
// LocalJobStore — in-memory job tracking shared by google/bedrock/cloudflare
// ============================================================================
/**
* Internal job state for adapters that don't have a real provider-side batch
* API and need to track jobs locally (Google, Bedrock, Cloudflare).
*/
export interface LocalJobState {
items: BatchItem[]
options: BatchQueueOptions
results: BatchResult[]
status: BatchStatus
createdAt: Date
completedAt?: Date
/** Optional adapter-specific metadata (e.g. Bedrock's jobArn) */
meta?: Record<string, unknown>
}
/**
* Per-provider in-memory job registry. Encapsulates the
* `Map<jobId, state>` + counter + status/result lookup pattern that
* google/bedrock/cloudflare were each duplicating.
*/
export class LocalJobStore {
private readonly jobs = new Map<string, LocalJobState>()
private counter = 0
constructor(private readonly idPrefix: string) {}
create(items: BatchItem[], options: BatchQueueOptions): { id: string; state: LocalJobState } {
const id = `${this.idPrefix}_${++this.counter}_${Date.now()}`
const state: LocalJobState = {
items,
options,
results: [],
status: 'pending',
createdAt: new Date(),
}
this.jobs.set(id, state)
return { id, state }
}
get(id: string): LocalJobState {
const state = this.jobs.get(id)
if (!state) {
throw new Error(`Batch not found: ${id}`)
}
return state
}
has(id: string): boolean {
return this.jobs.has(id)
}
/** Build a `BatchJob` snapshot for a tracked job. */
snapshot(id: string, provider: BatchJob['provider']): BatchJob {
const state = this.get(id)
const completedItems = state.results.filter((r) => r.status === 'completed').length
const failedItems = state.results.filter((r) => r.status === 'failed').length
return {
id,
provider,
status: state.status,
totalItems: state.items.length,
completedItems,
failedItems,
createdAt: state.createdAt,
...(state.completedAt && { completedAt: state.completedAt }),
}
}
/**
* Wait for a tracked job to reach a terminal status by polling its in-memory
* state. Adapters that drive the state machine in a background promise can
* call this from `waitForCompletion`.
*/
async waitForCompletion(id: string, pollInterval = 1000): Promise<BatchResult[]> {
const state = this.get(id)
while (
state.status !== 'completed' &&
state.status !== 'failed' &&
state.status !== 'cancelled'
) {
await sleep(pollInterval)
}
return state.results
}
/** For tests: drop everything. */
clear(): void {
this.jobs.clear()
this.counter = 0
}
}
// ============================================================================
// JSON-Schema conversion (used by Anthropic + OpenAI adapters)
// ============================================================================
/** Zod schema definition structure for type introspection. */
interface ZodDef {
typeName?: string
type?: unknown
shape?: () => Record<string, unknown>
}
/** Zod schema with `_def` property for introspection. */
interface ZodSchemaLike {
_def?: ZodDef
}
/**
* Minimal Zod -> JSON Schema converter.
*
* This is the same simplified converter that previously lived (duplicated)
* inside `anthropic.ts` and `openai.ts`. Extracted here so both adapters call
* the same implementation. For richer conversion use `zod-to-json-schema`.
*/
export function zodToJsonSchema(zodSchema: unknown): Record<string, unknown> {
const schema = zodSchema as ZodSchemaLike
if (!schema._def) {
return { type: 'object' }
}
switch (schema._def.typeName) {
case 'ZodString':
return { type: 'string' }
case 'ZodNumber':
return { type: 'number' }
case 'ZodBoolean':
return { type: 'boolean' }
case 'ZodArray':
return { type: 'array', items: zodToJsonSchema(schema._def.type) }
case 'ZodObject': {
const shape = schema._def.shape?.() ?? {}
const properties: Record<string, unknown> = {}
for (const [key, value] of Object.entries(shape)) {
properties[key] = zodToJsonSchema(value)
}
return { type: 'object', properties, required: Object.keys(properties) }
}
default:
return { type: 'object' }
}
}
// ============================================================================
// JSON parsing (used by every adapter that returns raw text)
// ============================================================================
/**
* Try to parse `text` as JSON when it looks like JSON or a schema is expected,
* otherwise return the text unchanged. Never throws.
*/
export function tryParseJson(text: string | undefined, expectJson = false): unknown {
if (!text) return text
const trimmed = text.trim()
const looksLikeJson = trimmed.startsWith('{') || trimmed.startsWith('[')
if (!expectJson && !looksLikeJson) return text
try {
return JSON.parse(text)
} catch {
return text
}
}
// ============================================================================
// Misc
// ============================================================================
/** Promise-based setTimeout. */
export function sleep(ms: number): Promise<void> {
return new Promise((resolve) => setTimeout(resolve, ms))
}