UNPKG

@plust/datasleuth

Version:

Build LLM-powered research pipelines and output structured data.

github.com/PlustOrg/datasleuth

PlustOrg/datasleuth

267 lines • 10.1 kB

JavaScript

/** * Core pipeline execution engine * * This module provides the infrastructure for executing research pipelines. * It handles step execution, error management, retries, timeouts, and state management * throughout the research process. * * @module core/pipeline */ import { logger, createStepLogger } from '../utils/logging.js'; import { executeWithRetry } from '../utils/retry.js'; import { BaseResearchError, PipelineError, isResearchError, } from '../types/errors.js'; /** * Default pipeline configuration */ const DEFAULT_PIPELINE_CONFIG = { steps: [], errorHandling: 'stop', maxRetries: 3, retryDelay: 1000, backoffFactor: 2, continueOnError: false, timeout: 300000, // 5 minutes logLevel: 'info', }; /** * Creates the initial state object for a research pipeline * * @param query - The research query string * @param outputSchema - A Zod schema that defines the expected output structure * @returns A fresh ResearchState object initialized with the provided query and schema * * @example * ```typescript * import { z } from 'zod'; * import { createInitialState } from '@plust/datasleuth'; * * const outputSchema = z.object({ * summary: z.string(), * findings: z.array(z.string()) * }); * * const initialState = createInitialState( * "What are the latest advancements in renewable energy?", * outputSchema * ); * ``` */ export function createInitialState(query, outputSchema) { return { query, outputSchema, data: {}, results: [], errors: [], metadata: { startTime: new Date(), stepHistory: [], confidenceScore: 0, }, }; } /** * Records the execution of a step */ function recordStepExecution(state, step, success, error, duration, metadata) { const startTime = new Date(Date.now() - (duration || 0)); const endTime = new Date(); const record = { stepName: step.name, startTime, endTime, success, error, metadata: { ...metadata, duration: duration || endTime.getTime() - startTime.getTime(), }, }; return { ...state, metadata: { ...state.metadata, stepHistory: [...state.metadata.stepHistory, record], }, errors: error ? [...state.errors, error] : state.errors, }; } /** * Executes a single step with enhanced error handling and retry logic */ async function executeStepWithErrorHandling(step, state, config) { const stepLogger = createStepLogger(step.name); let startTime; // Define the execution function const executeStep = async () => { startTime = Date.now(); stepLogger.info(`Starting execution`); try { // Execute the step const updatedState = await step.execute(state); // Record success const duration = Date.now() - startTime; stepLogger.info(`Execution completed successfully in ${duration}ms`); return recordStepExecution(updatedState, step, true, undefined, duration); } catch (error) { const duration = Date.now() - startTime; // Transform errors into ResearchError if needed let researchError; if (isResearchError(error)) { researchError = error; } else if (error instanceof Error) { researchError = new BaseResearchError({ message: error.message, code: 'step_execution_error', step: step.name, details: { originalError: error, stack: error.stack }, }); } else { researchError = new BaseResearchError({ message: `Unknown error in step ${step.name}`, code: 'unknown_error', step: step.name, details: { originalError: error }, }); } // Log the error stepLogger.error(`Execution failed in ${duration}ms: ${researchError.getFormattedMessage()}`); // Add error to state and mark as failed return recordStepExecution(state, step, false, researchError, duration); } }; // Execute with retry if step is marked as retryable if (step.retryable && config.maxRetries && config.maxRetries > 0) { stepLogger.debug(`Step is retryable, will retry up to ${config.maxRetries} times if needed`); try { return await executeWithRetry(executeStep, { maxRetries: config.maxRetries, retryDelay: config.retryDelay || 1000, backoffFactor: config.backoffFactor || 2, onRetry: (attempt, error, delay) => { stepLogger.warn(`Retry attempt ${attempt}/${config.maxRetries} after error: ` + `${error instanceof Error ? error.message : 'Unknown error'}. ` + `Retrying in ${delay}ms...`); }, }); } catch (error) { // If all retries failed, we'll get here stepLogger.error(`All ${config.maxRetries} retry attempts failed`); // The error has already been transformed by executeStep // Just return the state from the last attempt return state; } } else { // No retry, just execute once return executeStep(); } } /** * Main pipeline execution function */ export async function executePipeline(initialState, steps, config = {}) { const fullConfig = { ...DEFAULT_PIPELINE_CONFIG, ...config, steps }; // Configure logger based on pipeline config logger.setLogLevel(fullConfig.logLevel || 'info'); // Initialize state and add start time let state = { ...initialState, metadata: { ...initialState.metadata, startTime: new Date(), pipelineConfig: fullConfig, }, }; logger.info(`Starting pipeline execution with ${steps.length} steps`); // Create a timeout promise with handle for cleanup let timeoutId; const timeoutPromise = new Promise((_, reject) => { timeoutId = setTimeout(() => { const error = new PipelineError({ message: `Pipeline execution timed out after ${fullConfig.timeout}ms`, step: 'pipeline', }); reject(error); }, fullConfig.timeout || DEFAULT_PIPELINE_CONFIG.timeout); }); // Execute the pipeline with timeout try { const executionPromise = executeSteps(state, fullConfig); state = await Promise.race([executionPromise, timeoutPromise]); logger.info(`Pipeline execution completed successfully`); } catch (error) { logger.error(`Pipeline execution failed: ${error instanceof Error ? error.message : 'Unknown error'}`); // Transform error to ResearchError if needed const researchError = isResearchError(error) ? error : new PipelineError({ message: error instanceof Error ? error.message : String(error), step: 'pipeline', }); state.errors.push(researchError); } finally { // Always clear the timeout to prevent memory leaks if (timeoutId !== undefined) { clearTimeout(timeoutId); } // Always update end time state.metadata.endTime = new Date(); // Calculate total duration const duration = state.metadata.endTime.getTime() - state.metadata.startTime.getTime(); logger.info(`Pipeline execution finished in ${duration}ms`); } return state; } /** * Execute pipeline steps sequentially with enhanced error handling */ async function executeSteps(initialState, config) { let state = initialState; const { steps, errorHandling, continueOnError } = config; for (const step of steps) { // Execute the step with error handling const updatedState = await executeStepWithErrorHandling(step, state, config); state = updatedState; // Check for errors and handle according to strategy const latestExecution = state.metadata.stepHistory[state.metadata.stepHistory.length - 1]; if (!latestExecution.success) { logger.warn(`Step "${step.name}" failed`); if (errorHandling === 'stop' && !continueOnError) { logger.info(`Stopping pipeline execution due to error in step "${step.name}" (errorHandling: 'stop')`); break; } else if (errorHandling === 'rollback' && step.rollback) { logger.info(`Rolling back step "${step.name}"`); try { state = await step.rollback(state); logger.info(`Rollback for step "${step.name}" successful`); } catch (rollbackError) { logger.error(`Rollback for step "${step.name}" failed: ${rollbackError instanceof Error ? rollbackError.message : String(rollbackError)}`); // Transform error to ResearchError if needed const researchError = isResearchError(rollbackError) ? rollbackError : new PipelineError({ message: `Rollback for step "${step.name}" failed: ${rollbackError instanceof Error ? rollbackError.message : String(rollbackError)}`, step: step.name, }); state.errors.push(researchError); } if (!continueOnError) { logger.info(`Stopping pipeline execution after rollback (errorHandling: 'rollback')`); break; } } // For 'continue' strategy or if continueOnError is true, move to the next step } } return state; } //# sourceMappingURL=pipeline.js.map