promptforge
Version:
Adaptive Prompt Intelligence & Orchestration SDK - Manage, optimize, and serve prompts for LLMs with versioning, feedback loops, and multi-provider support
43 lines • 1.51 kB
TypeScript
import { EvaluatePromptRequest, EvaluationResult, EvaluationDataset, PromptVersion, ExecutionResult, ExecutePromptRequest } from '../types.js';
interface ForgeInterface {
getPromptVersion(promptId: string, version?: number): Promise<PromptVersion | undefined>;
executePrompt(request: ExecutePromptRequest): Promise<ExecutionResult>;
}
export declare class EvaluationEngine {
/**
* Evaluate a prompt against a dataset or examples
*/
evaluate(request: EvaluatePromptRequest, forge: ForgeInterface): Promise<EvaluationResult>;
/**
* Calculate evaluation metrics
*/
private calculateMetrics;
/**
* Calculate similarity between two strings
* Simple implementation - would use embeddings in production
*/
private calculateSimilarity;
/**
* Create evaluation dataset
*/
createDataset(name: string, examples: Array<{
input: Record<string, string>;
expectedOutput?: string;
groundTruth?: string;
metadata?: Record<string, unknown>;
}>): Promise<EvaluationDataset>;
/**
* Compare two prompt versions
*/
compareVersions(promptId: string, version1: number, version2: number, examples: Array<{
input: Record<string, string>;
expectedOutput?: string;
}>, forge: ForgeInterface): Promise<{
version1Score: number;
version2Score: number;
winner: number;
improvement: number;
}>;
}
export {};
//# sourceMappingURL=evaluation.d.ts.map