@latitude-data/sdk
Version:
Latitude SDK for Typescript
1 lines • 239 kB
Source Map (JSON)
{"version":3,"file":"index.cjs","sources":["../../../../../../constants/src/ai.ts","../../../../../../constants/src/config.ts","../../../../../../constants/src/evaluations/shared.ts","../../../../../../constants/src/evaluations/human.ts","../../../../../../constants/src/evaluations/llm.ts","../../../../../../constants/src/evaluations/rule.ts","../../../../../../constants/src/evaluations/index.ts","../../../../../../constants/src/events/legacy.ts","../../../../../../constants/src/events/events.ts","../../../../../../constants/src/integrations.ts","../../../../../../constants/src/models.ts","../../../../../../constants/src/legacyCompiler.ts","../../../../../../constants/src/tracing/span.ts","../../../../../../constants/src/tracing/trace.ts","../../../../../../constants/src/tracing/index.ts","../../../../../../constants/src/history.ts","../../../../../../constants/src/index.ts","../../../../src/env/index.ts","../../../../src/utils/types.ts","../../../../src/utils/index.ts","../../../../../../constants/src/errors/constants.ts","../../../../src/utils/errors.ts","../../../../src/utils/version.ts","../../../../src/utils/request.ts","../../../../src/utils/nodeFetchResponseToReadableStream.ts","../../../../src/utils/handleStream.ts","../../../../src/utils/streamRun.ts","../../../../src/utils/streamChat.ts","../../../../src/utils/syncChat.ts","../../../../src/utils/syncRun.ts","../../../../src/utils/adapters/openai/getFunctionTools.ts","../../../../../../constants/src/latitudePromptSchema/providers/azure/index.ts","../../../../../../constants/src/latitudePromptSchema/providers/openai/fileSearchToolSchema.ts","../../../../../../constants/src/latitudePromptSchema/providers/openai/webSearchSchema.ts","../../../../../../constants/src/latitudePromptSchema/providers/openai/computerCallSchema.ts","../../../../../../constants/src/latitudePromptSchema/providers/openai/index.ts","../../../../../../constants/src/latitudePromptSchema/zodJsonSchema.ts","../../../../../../constants/src/latitudePromptSchema/toolsSchema.ts","../../../../../../constants/src/latitudePromptSchema/index.ts","../../../../src/utils/adapters/openai/getOpenAIResponsesBuiltinTools.ts","../../../../src/utils/adapters/getProviderTools.ts","../../../../src/utils/adapters/adaptPromptConfigToProvider.ts","../../../../src/utils/adapters/getAdapterFromProvider.ts","../../../../src/index.ts"],"sourcesContent":["import {\n AssistantMessage,\n Message,\n ToolCall,\n} from '@latitude-data/constants/legacyCompiler'\nimport {\n FinishReason,\n LanguageModelUsage,\n TextStreamPart,\n ToolContent,\n} from 'ai'\nimport { JSONSchema7 } from 'json-schema'\nimport { z } from 'zod'\n\nimport { ParameterType } from './config'\nimport { LatitudeEventData, LegacyChainEventTypes } from './events'\nimport { AzureConfig, LatitudePromptConfig } from './latitudePromptSchema'\nimport { ProviderLog } from './models'\n\nexport type AgentToolsMap = Record<string, string> // { [toolName]: agentPath }\n\nexport type ToolDefinition = JSONSchema7 & {\n description: string\n parameters: {\n type: 'object'\n properties: Record<string, JSONSchema7>\n required?: string[]\n additionalProperties: boolean\n }\n}\n\nexport type VercelProviderTool = {\n type: 'provider-defined'\n id: `${string}.${string}`\n args: Record<string, unknown>\n parameters: z.ZodObject<{}, 'strip', z.ZodTypeAny, {}, {}>\n}\n\nexport type VercelTools = Record<string, VercelProviderTool | ToolDefinition>\n\nexport type ToolDefinitionsMap = Record<string, ToolDefinition>\nexport type ToolsItem =\n | ToolDefinitionsMap // - tool_name: <tool_definition>\n | string // - latitude/* (no spaces)\n\n// Config supported by Vercel\nexport type VercelConfig = {\n provider: string\n model: string\n url?: string\n cacheControl?: boolean\n schema?: JSONSchema7\n parameters?: Record<string, { type: ParameterType }>\n tools?: VercelTools\n azure?: AzureConfig\n}\n\nexport type PartialPromptConfig = Omit<LatitudePromptConfig, 'provider'>\n\nexport type ProviderData = TextStreamPart<any>\n\nexport type ChainEventDto = ProviderData | LatitudeEventData\n\nexport type AssertedStreamType = 'text' | Record<string | symbol, unknown>\nexport type ChainCallResponseDto<S extends AssertedStreamType = 'text'> =\n S extends 'text'\n ? ChainStepTextResponse\n : S extends Record<string | symbol, unknown>\n ? ChainStepObjectResponse<S>\n : never\n\nexport type ChainEventDtoResponse =\n | Omit<ChainStepResponse<'object'>, 'providerLog'>\n | Omit<ChainStepResponse<'text'>, 'providerLog'>\n\nexport type StreamType = 'object' | 'text'\ntype BaseResponse = {\n text: string\n usage: LanguageModelUsage\n documentLogUuid?: string\n providerLog?: ProviderLog\n output?: (AssistantMessage | { role: 'tool'; content: ToolContent })[]\n}\n\nexport type ChainStepTextResponse = BaseResponse & {\n streamType: 'text'\n reasoning?: string | undefined\n toolCalls: ToolCall[]\n}\n\nexport type ChainStepObjectResponse<S extends Record<string, unknown> = any> =\n BaseResponse & {\n streamType: 'object'\n object: S\n }\n\nexport type ChainStepResponse<T extends StreamType> = T extends 'text'\n ? ChainStepTextResponse\n : T extends 'object'\n ? ChainStepObjectResponse\n : never\n\nexport enum StreamEventTypes {\n Latitude = 'latitude-event',\n Provider = 'provider-event',\n}\n\nexport type LegacyChainEvent =\n | {\n data: LegacyLatitudeEventData\n event: StreamEventTypes.Latitude\n }\n | {\n data: ProviderData\n event: StreamEventTypes.Provider\n }\n\nexport type LegacyLatitudeStepEventData = {\n type: LegacyChainEventTypes.Step\n config: LatitudePromptConfig\n isLastStep: boolean\n messages: Message[]\n documentLogUuid?: string\n}\n\nexport type LegacyLatitudeStepCompleteEventData = {\n type: LegacyChainEventTypes.StepComplete\n response: ChainStepResponse<StreamType>\n documentLogUuid?: string\n}\n\nexport type LegacyLatitudeChainCompleteEventData = {\n type: LegacyChainEventTypes.Complete\n config: LatitudePromptConfig\n messages?: Message[]\n object?: any\n response: ChainStepResponse<StreamType>\n finishReason: FinishReason\n documentLogUuid?: string\n}\n\nexport type LegacyLatitudeChainErrorEventData = {\n type: LegacyChainEventTypes.Error\n error: Error\n}\n\nexport type LegacyLatitudeEventData =\n | LegacyLatitudeStepEventData\n | LegacyLatitudeStepCompleteEventData\n | LegacyLatitudeChainCompleteEventData\n | LegacyLatitudeChainErrorEventData\n\nexport type RunSyncAPIResponse<S extends AssertedStreamType = 'text'> = {\n uuid: string\n conversation: Message[]\n response: ChainCallResponseDto<S>\n}\n\nexport type ChatSyncAPIResponse<S extends AssertedStreamType = 'text'> =\n RunSyncAPIResponse<S>\n\nexport const toolCallResponseSchema = z.object({\n id: z.string(),\n name: z.string(),\n result: z.unknown(),\n isError: z.boolean().optional(),\n text: z.string().optional(),\n})\n\nexport type ToolCallResponse = z.infer<typeof toolCallResponseSchema>\n\nexport const FINISH_REASON_DETAILS = {\n stop: {\n name: 'Stop',\n description:\n 'Generation ended naturally, either the model thought it was done, or it emitted a user-supplied stop-sequence, before hitting any limits.',\n },\n length: {\n name: 'Length',\n description:\n 'The model hit a hard token boundary in the overall context window, so output was truncated.',\n },\n 'content-filter': {\n name: 'Content Filter',\n description:\n \"The provider's safety filters flagged part of the prospective text (hate, sexual, self-harm, violence, etc.), so generation was withheld, returning early.\",\n },\n 'tool-calls': {\n name: 'Tool Calls',\n description:\n 'Instead of generating text, the assistant asked for one or more declared tools to run; your code should handle them before asking the model to continue.',\n },\n error: {\n name: 'Error',\n description:\n 'The generation terminated because the provider encountered an error. This could be due to a variety of reasons, including timeouts, server issues, or problems with the input data.',\n },\n other: {\n name: 'Other',\n description:\n 'The generation ended without a specific reason. This could be due to a variety of reasons, including timeouts, server issues, or problems with the input data.',\n },\n unknown: {\n name: 'Unknown',\n description: `The provider returned a finish-reason not yet standardized. Check out the provider's documentation for more information.`,\n },\n} as const satisfies {\n [R in FinishReason]: {\n name: string\n description: string\n }\n}\n\nexport type ToolResultPayload = {\n value: unknown\n isError: boolean\n}\n","export enum ParameterType {\n Text = 'text',\n Image = 'image',\n File = 'file',\n}\n\nexport const FAKE_AGENT_START_TOOL_NAME = 'start_autonomous_chain' // TODO(compiler): remove\nexport const AGENT_RETURN_TOOL_NAME = 'end_autonomous_chain' // TODO(compiler): remove\nexport const AGENT_TOOL_PREFIX = 'lat_agent'\nexport const LATITUDE_TOOL_PREFIX = 'lat_tool'\n\nexport enum LatitudeTool {\n RunCode = 'code',\n WebSearch = 'search',\n WebExtract = 'extract',\n}\n\nexport enum LatitudeToolInternalName {\n RunCode = 'lat_tool_run_code',\n WebSearch = 'lat_tool_web_search',\n WebExtract = 'lat_tool_web_extract',\n}\n\nexport const MAX_STEPS_CONFIG_NAME = 'maxSteps'\nexport const DEFAULT_MAX_STEPS = 20\nexport const ABSOLUTE_MAX_STEPS = 150\n","import { z } from 'zod'\n\nconst actualOutputConfiguration = z.object({\n messageSelection: z.enum(['last', 'all']), // Which assistant messages to select\n contentFilter: z.enum(['text', 'image', 'file', 'tool_call']).optional(),\n parsingFormat: z.enum(['string', 'json']),\n fieldAccessor: z.string().optional(), // Field accessor to get the output from if it's a key-value format\n})\nexport type ActualOutputConfiguration = z.infer<\n typeof actualOutputConfiguration\n>\n\nconst expectedOutputConfiguration = z.object({\n parsingFormat: z.enum(['string', 'json']),\n fieldAccessor: z.string().optional(), // Field accessor to get the output from if it's a key-value format\n})\nexport type ExpectedOutputConfiguration = z.infer<\n typeof expectedOutputConfiguration\n>\n\nexport const ACCESSIBLE_OUTPUT_FORMATS = ['json']\n\nexport const baseEvaluationConfiguration = z.object({\n reverseScale: z.boolean(), // If true, lower is better, otherwise, higher is better\n actualOutput: actualOutputConfiguration.optional(), // Optional for backwards compatibility\n expectedOutput: expectedOutputConfiguration.optional(), // Optional for backwards compatibility\n})\nexport const baseEvaluationResultMetadata = z.object({\n // Configuration snapshot is defined in every metric specification\n actualOutput: z.string(),\n expectedOutput: z.string().optional(),\n datasetLabel: z.string().optional(),\n})\nexport const baseEvaluationResultError = z.object({\n message: z.string(),\n})\n","import { z } from 'zod'\nimport {\n baseEvaluationConfiguration,\n baseEvaluationResultError,\n baseEvaluationResultMetadata,\n} from './shared'\n\nconst humanEvaluationConfiguration = baseEvaluationConfiguration.extend({\n criteria: z.string().optional(),\n})\nconst humanEvaluationResultMetadata = baseEvaluationResultMetadata.extend({\n reason: z.string().optional(),\n})\nconst humanEvaluationResultError = baseEvaluationResultError.extend({})\n\n// BINARY\n\nconst humanEvaluationBinaryConfiguration = humanEvaluationConfiguration.extend({\n passDescription: z.string().optional(),\n failDescription: z.string().optional(),\n})\nconst humanEvaluationBinaryResultMetadata =\n humanEvaluationResultMetadata.extend({\n configuration: humanEvaluationBinaryConfiguration,\n })\nconst humanEvaluationBinaryResultError = humanEvaluationResultError.extend({})\nexport const HumanEvaluationBinarySpecification = {\n name: 'Binary',\n description:\n 'Judges whether the response meets the criteria. The resulting score is \"passed\" or \"failed\"',\n configuration: humanEvaluationBinaryConfiguration,\n resultMetadata: humanEvaluationBinaryResultMetadata,\n resultError: humanEvaluationBinaryResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: false,\n supportsManualEvaluation: true,\n} as const\nexport type HumanEvaluationBinaryConfiguration = z.infer<\n typeof HumanEvaluationBinarySpecification.configuration\n>\nexport type HumanEvaluationBinaryResultMetadata = z.infer<\n typeof HumanEvaluationBinarySpecification.resultMetadata\n>\nexport type HumanEvaluationBinaryResultError = z.infer<\n typeof HumanEvaluationBinarySpecification.resultError\n>\n\n// RATING\n\nconst humanEvaluationRatingConfiguration = humanEvaluationConfiguration.extend({\n minRating: z.number(),\n minRatingDescription: z.string().optional(),\n maxRating: z.number(),\n maxRatingDescription: z.string().optional(),\n minThreshold: z.number().optional(), // Threshold in rating range\n maxThreshold: z.number().optional(), // Threshold in rating range\n})\nconst humanEvaluationRatingResultMetadata =\n humanEvaluationResultMetadata.extend({\n configuration: humanEvaluationRatingConfiguration,\n })\nconst humanEvaluationRatingResultError = humanEvaluationResultError.extend({})\nexport const HumanEvaluationRatingSpecification = {\n name: 'Rating',\n description:\n 'Judges the response by rating it under a criteria. The resulting score is the rating',\n configuration: humanEvaluationRatingConfiguration,\n resultMetadata: humanEvaluationRatingResultMetadata,\n resultError: humanEvaluationRatingResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: false,\n supportsManualEvaluation: true,\n} as const\nexport type HumanEvaluationRatingConfiguration = z.infer<\n typeof HumanEvaluationRatingSpecification.configuration\n>\nexport type HumanEvaluationRatingResultMetadata = z.infer<\n typeof HumanEvaluationRatingSpecification.resultMetadata\n>\nexport type HumanEvaluationRatingResultError = z.infer<\n typeof HumanEvaluationRatingSpecification.resultError\n>\n\n/* ------------------------------------------------------------------------- */\n\nexport enum HumanEvaluationMetric {\n Binary = 'binary',\n Rating = 'rating',\n}\n\n// prettier-ignore\nexport type HumanEvaluationConfiguration<M extends HumanEvaluationMetric = HumanEvaluationMetric> =\n M extends HumanEvaluationMetric.Binary ? HumanEvaluationBinaryConfiguration :\n M extends HumanEvaluationMetric.Rating ? HumanEvaluationRatingConfiguration :\n never;\n\n// prettier-ignore\nexport type HumanEvaluationResultMetadata<M extends HumanEvaluationMetric = HumanEvaluationMetric> =\n M extends HumanEvaluationMetric.Binary ? HumanEvaluationBinaryResultMetadata :\n M extends HumanEvaluationMetric.Rating ? HumanEvaluationRatingResultMetadata :\n never;\n\n// prettier-ignore\nexport type HumanEvaluationResultError<M extends HumanEvaluationMetric = HumanEvaluationMetric> =\n M extends HumanEvaluationMetric.Binary ? HumanEvaluationBinaryResultError :\n M extends HumanEvaluationMetric.Rating ? HumanEvaluationRatingResultError :\n never;\n\nexport const HumanEvaluationSpecification = {\n name: 'Human-in-the-Loop',\n description: 'Evaluate responses using a human as a judge',\n configuration: humanEvaluationConfiguration,\n resultMetadata: humanEvaluationResultMetadata,\n resultError: humanEvaluationResultError,\n // prettier-ignore\n metrics: {\n [HumanEvaluationMetric.Binary]: HumanEvaluationBinarySpecification,\n [HumanEvaluationMetric.Rating]: HumanEvaluationRatingSpecification,\n },\n} as const\n","import { z } from 'zod'\nimport {\n baseEvaluationConfiguration,\n baseEvaluationResultError,\n baseEvaluationResultMetadata,\n} from './shared'\n\nconst llmEvaluationConfiguration = baseEvaluationConfiguration.extend({\n provider: z.string(),\n model: z.string(),\n})\nconst llmEvaluationResultMetadata = baseEvaluationResultMetadata.extend({\n evaluationLogId: z.number(),\n reason: z.string(),\n tokens: z.number(),\n cost: z.number(),\n duration: z.number(),\n})\nconst llmEvaluationResultError = baseEvaluationResultError.extend({\n runErrorId: z.number().optional(),\n})\n\n// BINARY\n\nconst llmEvaluationBinaryConfiguration = llmEvaluationConfiguration.extend({\n criteria: z.string(),\n passDescription: z.string(),\n failDescription: z.string(),\n})\nconst llmEvaluationBinaryResultMetadata = llmEvaluationResultMetadata.extend({\n configuration: llmEvaluationBinaryConfiguration,\n})\nconst llmEvaluationBinaryResultError = llmEvaluationResultError.extend({})\nexport const LlmEvaluationBinarySpecification = {\n name: 'Binary',\n description:\n 'Judges whether the response meets the criteria. The resulting score is \"passed\" or \"failed\"',\n configuration: llmEvaluationBinaryConfiguration,\n resultMetadata: llmEvaluationBinaryResultMetadata,\n resultError: llmEvaluationBinaryResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: true,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type LlmEvaluationBinaryConfiguration = z.infer<\n typeof LlmEvaluationBinarySpecification.configuration\n>\nexport type LlmEvaluationBinaryResultMetadata = z.infer<\n typeof LlmEvaluationBinarySpecification.resultMetadata\n>\nexport type LlmEvaluationBinaryResultError = z.infer<\n typeof LlmEvaluationBinarySpecification.resultError\n>\n\n// RATING\n\nconst llmEvaluationRatingConfiguration = llmEvaluationConfiguration.extend({\n criteria: z.string(),\n minRating: z.number(),\n minRatingDescription: z.string(),\n maxRating: z.number(),\n maxRatingDescription: z.string(),\n minThreshold: z.number().optional(), // Threshold in rating range\n maxThreshold: z.number().optional(), // Threshold in rating range\n})\nconst llmEvaluationRatingResultMetadata = llmEvaluationResultMetadata.extend({\n configuration: llmEvaluationRatingConfiguration,\n})\nconst llmEvaluationRatingResultError = llmEvaluationResultError.extend({})\nexport const LlmEvaluationRatingSpecification = {\n name: 'Rating',\n description:\n 'Judges the response by rating it under a criteria. The resulting score is the rating',\n configuration: llmEvaluationRatingConfiguration,\n resultMetadata: llmEvaluationRatingResultMetadata,\n resultError: llmEvaluationRatingResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: true,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type LlmEvaluationRatingConfiguration = z.infer<\n typeof LlmEvaluationRatingSpecification.configuration\n>\nexport type LlmEvaluationRatingResultMetadata = z.infer<\n typeof LlmEvaluationRatingSpecification.resultMetadata\n>\nexport type LlmEvaluationRatingResultError = z.infer<\n typeof LlmEvaluationRatingSpecification.resultError\n>\n\n// COMPARISON\n\nconst llmEvaluationComparisonConfiguration = llmEvaluationConfiguration.extend({\n criteria: z.string(),\n passDescription: z.string(),\n failDescription: z.string(),\n minThreshold: z.number().optional(), // Threshold percentage\n maxThreshold: z.number().optional(), // Threshold percentage\n})\nconst llmEvaluationComparisonResultMetadata =\n llmEvaluationResultMetadata.extend({\n configuration: llmEvaluationComparisonConfiguration,\n })\nconst llmEvaluationComparisonResultError = llmEvaluationResultError.extend({})\nexport const LlmEvaluationComparisonSpecification = {\n name: 'Comparison',\n description:\n 'Judges the response by comparing the criteria to the expected output. The resulting score is the percentage of compared criteria that is met',\n configuration: llmEvaluationComparisonConfiguration,\n resultMetadata: llmEvaluationComparisonResultMetadata,\n resultError: llmEvaluationComparisonResultError,\n requiresExpectedOutput: true,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type LlmEvaluationComparisonConfiguration = z.infer<\n typeof LlmEvaluationComparisonSpecification.configuration\n>\nexport type LlmEvaluationComparisonResultMetadata = z.infer<\n typeof LlmEvaluationComparisonSpecification.resultMetadata\n>\nexport type LlmEvaluationComparisonResultError = z.infer<\n typeof LlmEvaluationComparisonSpecification.resultError\n>\n\n// CUSTOM\n\nconst llmEvaluationCustomConfiguration = llmEvaluationConfiguration.extend({\n prompt: z.string(),\n minScore: z.number(),\n maxScore: z.number(),\n minThreshold: z.number().optional(), // Threshold percentage\n maxThreshold: z.number().optional(), // Threshold percentage\n})\nconst llmEvaluationCustomResultMetadata = llmEvaluationResultMetadata.extend({\n configuration: llmEvaluationCustomConfiguration,\n})\nconst llmEvaluationCustomResultError = llmEvaluationResultError.extend({})\nexport const LlmEvaluationCustomSpecification = {\n name: 'Custom',\n description:\n 'Judges the response under a criteria using a custom prompt. The resulting score is the value of criteria that is met',\n configuration: llmEvaluationCustomConfiguration,\n resultMetadata: llmEvaluationCustomResultMetadata,\n resultError: llmEvaluationCustomResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: true,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type LlmEvaluationCustomConfiguration = z.infer<\n typeof LlmEvaluationCustomSpecification.configuration\n>\nexport type LlmEvaluationCustomResultMetadata = z.infer<\n typeof LlmEvaluationCustomSpecification.resultMetadata\n>\nexport type LlmEvaluationCustomResultError = z.infer<\n typeof LlmEvaluationCustomSpecification.resultError\n>\n\nexport const LLM_EVALUATION_CUSTOM_PROMPT_DOCUMENTATION = `\n/*\n IMPORTANT: The evaluation MUST return an object with the score and reason fields.\n\n These are the available variables:\n - {{ actualOutput }} (string): The actual output to evaluate\n - {{ expectedOutput }} (string/undefined): The, optional, expected output to compare against\n - {{ conversation }} (string): The full conversation of the evaluated log\n\n - {{ messages }} (array of objects): All the messages of the conversation\n - {{ toolCalls }} (array of objects): All the tool calls of the conversation\n - {{ cost }} (number): The cost, in cents, of the evaluated log\n - {{ tokens }} (number): The tokens of the evaluated log\n - {{ duration }} (number): The duration, in seconds, of the evaluated log\n\n More info on messages and tool calls format in: https://docs.latitude.so/promptl/syntax/messages\n\n - {{ prompt }} (string): The prompt of the evaluated log\n - {{ config }} (object): The configuration of the evaluated log\n - {{ parameters }} (object): The parameters of the evaluated log\n\n More info on configuration and parameters format in: https://docs.latitude.so/promptl/syntax/configuration\n*/\n`.trim()\n\n// CUSTOM LABELED\n\nexport const LlmEvaluationCustomLabeledSpecification = {\n ...LlmEvaluationCustomSpecification,\n name: 'Custom (Labeled)',\n requiresExpectedOutput: true,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\n\n/* ------------------------------------------------------------------------- */\n\nexport enum LlmEvaluationMetric {\n Binary = 'binary',\n Rating = 'rating',\n Comparison = 'comparison',\n Custom = 'custom',\n CustomLabeled = 'custom_labeled',\n}\n\nexport type LlmEvaluationMetricAnyCustom =\n | LlmEvaluationMetric.Custom\n | LlmEvaluationMetric.CustomLabeled\n\n// prettier-ignore\nexport type LlmEvaluationConfiguration<M extends LlmEvaluationMetric = LlmEvaluationMetric> =\n M extends LlmEvaluationMetric.Binary ? LlmEvaluationBinaryConfiguration :\n M extends LlmEvaluationMetric.Rating ? LlmEvaluationRatingConfiguration :\n M extends LlmEvaluationMetric.Comparison ? LlmEvaluationComparisonConfiguration :\n M extends LlmEvaluationMetric.Custom ? LlmEvaluationCustomConfiguration :\n M extends LlmEvaluationMetric.CustomLabeled ? LlmEvaluationCustomConfiguration :\n never;\n\n// prettier-ignore\nexport type LlmEvaluationResultMetadata<M extends LlmEvaluationMetric = LlmEvaluationMetric> =\n M extends LlmEvaluationMetric.Binary ? LlmEvaluationBinaryResultMetadata :\n M extends LlmEvaluationMetric.Rating ? LlmEvaluationRatingResultMetadata :\n M extends LlmEvaluationMetric.Comparison ? LlmEvaluationComparisonResultMetadata :\n M extends LlmEvaluationMetric.Custom ? LlmEvaluationCustomResultMetadata :\n M extends LlmEvaluationMetric.CustomLabeled ? LlmEvaluationCustomResultMetadata :\n never;\n\n// prettier-ignore\nexport type LlmEvaluationResultError<M extends LlmEvaluationMetric = LlmEvaluationMetric> =\n M extends LlmEvaluationMetric.Binary ? LlmEvaluationBinaryResultError :\n M extends LlmEvaluationMetric.Rating ? LlmEvaluationRatingResultError :\n M extends LlmEvaluationMetric.Comparison ? LlmEvaluationComparisonResultError :\n M extends LlmEvaluationMetric.Custom ? LlmEvaluationCustomResultError :\n M extends LlmEvaluationMetric.CustomLabeled ? LlmEvaluationCustomResultError :\n never;\n\nexport const LlmEvaluationSpecification = {\n name: 'LLM-as-a-Judge',\n description: 'Evaluate responses using an LLM as a judge',\n configuration: llmEvaluationConfiguration,\n resultMetadata: llmEvaluationResultMetadata,\n resultError: llmEvaluationResultError,\n // prettier-ignore\n metrics: {\n [LlmEvaluationMetric.Binary]: LlmEvaluationBinarySpecification,\n [LlmEvaluationMetric.Rating]: LlmEvaluationRatingSpecification,\n [LlmEvaluationMetric.Comparison]: LlmEvaluationComparisonSpecification,\n [LlmEvaluationMetric.Custom]: LlmEvaluationCustomSpecification,\n [LlmEvaluationMetric.CustomLabeled]: LlmEvaluationCustomLabeledSpecification,\n },\n} as const\n\nexport const LLM_EVALUATION_PROMPT_PARAMETERS = [\n 'actualOutput',\n 'expectedOutput',\n 'conversation',\n 'cost',\n 'tokens',\n 'duration',\n 'config',\n 'toolCalls',\n 'messages',\n 'prompt',\n 'parameters',\n 'context',\n 'response',\n] as const\n\nexport type LlmEvaluationPromptParameter =\n (typeof LLM_EVALUATION_PROMPT_PARAMETERS)[number]\n","import { z } from 'zod'\nimport {\n baseEvaluationConfiguration,\n baseEvaluationResultError,\n baseEvaluationResultMetadata,\n} from './shared'\n\nconst ruleEvaluationConfiguration = baseEvaluationConfiguration.extend({})\nconst ruleEvaluationResultMetadata = baseEvaluationResultMetadata.extend({})\nconst ruleEvaluationResultError = baseEvaluationResultError.extend({})\n\n// EXACT MATCH\n\nconst ruleEvaluationExactMatchConfiguration =\n ruleEvaluationConfiguration.extend({\n caseInsensitive: z.boolean(),\n })\nconst ruleEvaluationExactMatchResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationExactMatchConfiguration,\n })\nconst ruleEvaluationExactMatchResultError = ruleEvaluationResultError.extend({})\nexport const RuleEvaluationExactMatchSpecification = {\n name: 'Exact Match',\n description:\n 'Checks if the response is exactly the same as the expected output. The resulting score is \"matched\" or \"unmatched\"',\n configuration: ruleEvaluationExactMatchConfiguration,\n resultMetadata: ruleEvaluationExactMatchResultMetadata,\n resultError: ruleEvaluationExactMatchResultError,\n requiresExpectedOutput: true,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationExactMatchConfiguration = z.infer<\n typeof RuleEvaluationExactMatchSpecification.configuration\n>\nexport type RuleEvaluationExactMatchResultMetadata = z.infer<\n typeof RuleEvaluationExactMatchSpecification.resultMetadata\n>\nexport type RuleEvaluationExactMatchResultError = z.infer<\n typeof RuleEvaluationExactMatchSpecification.resultError\n>\n\n// REGULAR EXPRESSION\n\nconst ruleEvaluationRegularExpressionConfiguration =\n ruleEvaluationConfiguration.extend({\n pattern: z.string(),\n })\nconst ruleEvaluationRegularExpressionResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationRegularExpressionConfiguration,\n })\nconst ruleEvaluationRegularExpressionResultError =\n ruleEvaluationResultError.extend({})\nexport const RuleEvaluationRegularExpressionSpecification = {\n name: 'Regular Expression',\n description:\n 'Checks if the response matches the regular expression. The resulting score is \"matched\" or \"unmatched\"',\n configuration: ruleEvaluationRegularExpressionConfiguration,\n resultMetadata: ruleEvaluationRegularExpressionResultMetadata,\n resultError: ruleEvaluationRegularExpressionResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: true,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationRegularExpressionConfiguration = z.infer<\n typeof RuleEvaluationRegularExpressionSpecification.configuration\n>\nexport type RuleEvaluationRegularExpressionResultMetadata = z.infer<\n typeof RuleEvaluationRegularExpressionSpecification.resultMetadata\n>\nexport type RuleEvaluationRegularExpressionResultError = z.infer<\n typeof RuleEvaluationRegularExpressionSpecification.resultError\n>\n\n// SCHEMA VALIDATION\n\nconst ruleEvaluationSchemaValidationConfiguration =\n ruleEvaluationConfiguration.extend({\n format: z.enum(['json']),\n schema: z.string(),\n })\nconst ruleEvaluationSchemaValidationResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationSchemaValidationConfiguration,\n })\nconst ruleEvaluationSchemaValidationResultError =\n ruleEvaluationResultError.extend({})\nexport const RuleEvaluationSchemaValidationSpecification = {\n name: 'Schema Validation',\n description:\n 'Checks if the response follows the schema. The resulting score is \"valid\" or \"invalid\"',\n configuration: ruleEvaluationSchemaValidationConfiguration,\n resultMetadata: ruleEvaluationSchemaValidationResultMetadata,\n resultError: ruleEvaluationSchemaValidationResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: true,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationSchemaValidationConfiguration = z.infer<\n typeof RuleEvaluationSchemaValidationSpecification.configuration\n>\nexport type RuleEvaluationSchemaValidationResultMetadata = z.infer<\n typeof RuleEvaluationSchemaValidationSpecification.resultMetadata\n>\nexport type RuleEvaluationSchemaValidationResultError = z.infer<\n typeof RuleEvaluationSchemaValidationSpecification.resultError\n>\n\n// LENGTH COUNT\n\nconst ruleEvaluationLengthCountConfiguration =\n ruleEvaluationConfiguration.extend({\n algorithm: z.enum(['character', 'word', 'sentence']),\n minLength: z.number().optional(),\n maxLength: z.number().optional(),\n })\nconst ruleEvaluationLengthCountResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationLengthCountConfiguration,\n })\nconst ruleEvaluationLengthCountResultError = ruleEvaluationResultError.extend(\n {},\n)\nexport const RuleEvaluationLengthCountSpecification = {\n name: 'Length Count',\n description:\n 'Checks if the response is of a certain length. The resulting score is the length of the response',\n configuration: ruleEvaluationLengthCountConfiguration,\n resultMetadata: ruleEvaluationLengthCountResultMetadata,\n resultError: ruleEvaluationLengthCountResultError,\n requiresExpectedOutput: false,\n supportsLiveEvaluation: true,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationLengthCountConfiguration = z.infer<\n typeof RuleEvaluationLengthCountSpecification.configuration\n>\nexport type RuleEvaluationLengthCountResultMetadata = z.infer<\n typeof RuleEvaluationLengthCountSpecification.resultMetadata\n>\nexport type RuleEvaluationLengthCountResultError = z.infer<\n typeof RuleEvaluationLengthCountSpecification.resultError\n>\n\n// LEXICAL OVERLAP\n\nconst ruleEvaluationLexicalOverlapConfiguration =\n ruleEvaluationConfiguration.extend({\n algorithm: z.enum(['substring', 'levenshtein_distance', 'rouge']),\n minOverlap: z.number().optional(), // Percentage of overlap\n maxOverlap: z.number().optional(), // Percentage of overlap\n })\nconst ruleEvaluationLexicalOverlapResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationLexicalOverlapConfiguration,\n })\nconst ruleEvaluationLexicalOverlapResultError =\n ruleEvaluationResultError.extend({})\nexport const RuleEvaluationLexicalOverlapSpecification = {\n name: 'Lexical Overlap',\n description:\n 'Checks if the response contains the expected output. The resulting score is the percentage of overlap',\n configuration: ruleEvaluationLexicalOverlapConfiguration,\n resultMetadata: ruleEvaluationLexicalOverlapResultMetadata,\n resultError: ruleEvaluationLexicalOverlapResultError,\n requiresExpectedOutput: true,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationLexicalOverlapConfiguration = z.infer<\n typeof RuleEvaluationLexicalOverlapSpecification.configuration\n>\nexport type RuleEvaluationLexicalOverlapResultMetadata = z.infer<\n typeof RuleEvaluationLexicalOverlapSpecification.resultMetadata\n>\nexport type RuleEvaluationLexicalOverlapResultError = z.infer<\n typeof RuleEvaluationLexicalOverlapSpecification.resultError\n>\n\n// SEMANTIC SIMILARITY\n\nconst ruleEvaluationSemanticSimilarityConfiguration =\n ruleEvaluationConfiguration.extend({\n algorithm: z.enum(['cosine_distance']),\n minSimilarity: z.number().optional(), // Percentage of similarity\n maxSimilarity: z.number().optional(), // Percentage of similarity\n })\nconst ruleEvaluationSemanticSimilarityResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationSemanticSimilarityConfiguration,\n })\nconst ruleEvaluationSemanticSimilarityResultError =\n ruleEvaluationResultError.extend({})\nexport const RuleEvaluationSemanticSimilaritySpecification = {\n name: 'Semantic Similarity',\n description:\n 'Checks if the response is semantically similar to the expected output. The resulting score is the percentage of similarity',\n configuration: ruleEvaluationSemanticSimilarityConfiguration,\n resultMetadata: ruleEvaluationSemanticSimilarityResultMetadata,\n resultError: ruleEvaluationSemanticSimilarityResultError,\n requiresExpectedOutput: true,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationSemanticSimilarityConfiguration = z.infer<\n typeof RuleEvaluationSemanticSimilaritySpecification.configuration\n>\nexport type RuleEvaluationSemanticSimilarityResultMetadata = z.infer<\n typeof RuleEvaluationSemanticSimilaritySpecification.resultMetadata\n>\nexport type RuleEvaluationSemanticSimilarityResultError = z.infer<\n typeof RuleEvaluationSemanticSimilaritySpecification.resultError\n>\n\n// NUMERIC SIMILARITY\n\nconst ruleEvaluationNumericSimilarityConfiguration =\n ruleEvaluationConfiguration.extend({\n algorithm: z.enum(['relative_difference']),\n minSimilarity: z.number().optional(), // Percentage of similarity\n maxSimilarity: z.number().optional(), // Percentage of similarity\n })\nconst ruleEvaluationNumericSimilarityResultMetadata =\n ruleEvaluationResultMetadata.extend({\n configuration: ruleEvaluationNumericSimilarityConfiguration,\n })\nconst ruleEvaluationNumericSimilarityResultError =\n ruleEvaluationResultError.extend({})\nexport const RuleEvaluationNumericSimilaritySpecification = {\n name: 'Numeric Similarity',\n description:\n 'Checks if the response is numerically similar to the expected output. The resulting score is the percentage of similarity',\n configuration: ruleEvaluationNumericSimilarityConfiguration,\n resultMetadata: ruleEvaluationNumericSimilarityResultMetadata,\n resultError: ruleEvaluationNumericSimilarityResultError,\n requiresExpectedOutput: true,\n supportsLiveEvaluation: false,\n supportsBatchEvaluation: true,\n supportsManualEvaluation: false,\n} as const\nexport type RuleEvaluationNumericSimilarityConfiguration = z.infer<\n typeof RuleEvaluationNumericSimilaritySpecification.configuration\n>\nexport type RuleEvaluationNumericSimilarityResultMetadata = z.infer<\n typeof RuleEvaluationNumericSimilaritySpecification.resultMetadata\n>\nexport type RuleEvaluationNumericSimilarityResultError = z.infer<\n typeof RuleEvaluationNumericSimilaritySpecification.resultError\n>\n\n/* ------------------------------------------------------------------------- */\n\nexport enum RuleEvaluationMetric {\n ExactMatch = 'exact_match',\n RegularExpression = 'regular_expression',\n SchemaValidation = 'schema_validation',\n LengthCount = 'length_count',\n LexicalOverlap = 'lexical_overlap',\n SemanticSimilarity = 'semantic_similarity',\n NumericSimilarity = 'numeric_similarity',\n}\n\n// prettier-ignore\nexport type RuleEvaluationConfiguration<M extends RuleEvaluationMetric = RuleEvaluationMetric> = \n M extends RuleEvaluationMetric.ExactMatch ? RuleEvaluationExactMatchConfiguration :\n M extends RuleEvaluationMetric.RegularExpression ? RuleEvaluationRegularExpressionConfiguration :\n M extends RuleEvaluationMetric.SchemaValidation ? RuleEvaluationSchemaValidationConfiguration :\n M extends RuleEvaluationMetric.LengthCount ? RuleEvaluationLengthCountConfiguration :\n M extends RuleEvaluationMetric.LexicalOverlap ? RuleEvaluationLexicalOverlapConfiguration :\n M extends RuleEvaluationMetric.SemanticSimilarity ? RuleEvaluationSemanticSimilarityConfiguration :\n M extends RuleEvaluationMetric.NumericSimilarity ? RuleEvaluationNumericSimilarityConfiguration :\n never;\n\n// prettier-ignore\nexport type RuleEvaluationResultMetadata<M extends RuleEvaluationMetric = RuleEvaluationMetric> = \n M extends RuleEvaluationMetric.ExactMatch ? RuleEvaluationExactMatchResultMetadata :\n M extends RuleEvaluationMetric.RegularExpression ? RuleEvaluationRegularExpressionResultMetadata :\n M extends RuleEvaluationMetric.SchemaValidation ? RuleEvaluationSchemaValidationResultMetadata :\n M extends RuleEvaluationMetric.LengthCount ? RuleEvaluationLengthCountResultMetadata :\n M extends RuleEvaluationMetric.LexicalOverlap ? RuleEvaluationLexicalOverlapResultMetadata :\n M extends RuleEvaluationMetric.SemanticSimilarity ? RuleEvaluationSemanticSimilarityResultMetadata :\n M extends RuleEvaluationMetric.NumericSimilarity ? RuleEvaluationNumericSimilarityResultMetadata :\n never;\n\n// prettier-ignore\nexport type RuleEvaluationResultError<M extends RuleEvaluationMetric = RuleEvaluationMetric> = \n M extends RuleEvaluationMetric.ExactMatch ? RuleEvaluationExactMatchResultError :\n M extends RuleEvaluationMetric.RegularExpression ? RuleEvaluationRegularExpressionResultError :\n M extends RuleEvaluationMetric.SchemaValidation ? RuleEvaluationSchemaValidationResultError :\n M extends RuleEvaluationMetric.LengthCount ? RuleEvaluationLengthCountResultError :\n M extends RuleEvaluationMetric.LexicalOverlap ? RuleEvaluationLexicalOverlapResultError :\n M extends RuleEvaluationMetric.SemanticSimilarity ? RuleEvaluationSemanticSimilarityResultError :\n M extends RuleEvaluationMetric.NumericSimilarity ? RuleEvaluationNumericSimilarityResultError :\n never;\n\nexport const RuleEvaluationSpecification = {\n name: 'Programmatic Rule',\n description: 'Evaluate responses using a programmatic rule',\n configuration: ruleEvaluationConfiguration,\n resultMetadata: ruleEvaluationResultMetadata,\n resultError: ruleEvaluationResultError,\n // prettier-ignore\n metrics: {\n [RuleEvaluationMetric.ExactMatch]: RuleEvaluationExactMatchSpecification,\n [RuleEvaluationMetric.RegularExpression]: RuleEvaluationRegularExpressionSpecification,\n [RuleEvaluationMetric.SchemaValidation]: RuleEvaluationSchemaValidationSpecification,\n [RuleEvaluationMetric.LengthCount]: RuleEvaluationLengthCountSpecification,\n [RuleEvaluationMetric.LexicalOverlap]: RuleEvaluationLexicalOverlapSpecification,\n [RuleEvaluationMetric.SemanticSimilarity]: RuleEvaluationSemanticSimilaritySpecification,\n [RuleEvaluationMetric.NumericSimilarity]: RuleEvaluationNumericSimilaritySpecification,\n },\n} as const\n","import { z } from 'zod'\nimport {\n HumanEvaluationConfiguration,\n HumanEvaluationMetric,\n HumanEvaluationResultError,\n HumanEvaluationResultMetadata,\n HumanEvaluationSpecification,\n} from './human'\nimport {\n LlmEvaluationConfiguration,\n LlmEvaluationMetric,\n LlmEvaluationResultError,\n LlmEvaluationResultMetadata,\n LlmEvaluationSpecification,\n} from './llm'\nimport {\n RuleEvaluationConfiguration,\n RuleEvaluationMetric,\n RuleEvaluationResultError,\n RuleEvaluationResultMetadata,\n RuleEvaluationSpecification,\n} from './rule'\n\nexport * from './human'\nexport * from './llm'\nexport * from './rule'\nexport * from './shared'\n\nexport enum EvaluationType {\n Rule = 'rule',\n Llm = 'llm',\n Human = 'human',\n}\n\nexport const EvaluationTypeSchema = z.nativeEnum(EvaluationType)\n\n// prettier-ignore\nexport type EvaluationMetric<T extends EvaluationType = EvaluationType> =\n T extends EvaluationType.Rule ? RuleEvaluationMetric :\n T extends EvaluationType.Llm ? LlmEvaluationMetric :\n T extends EvaluationType.Human ? HumanEvaluationMetric :\n never;\n\nexport const EvaluationMetricSchema = z.union([\n z.nativeEnum(RuleEvaluationMetric),\n z.nativeEnum(LlmEvaluationMetric),\n z.nativeEnum(HumanEvaluationMetric),\n])\n\n// prettier-ignore\nexport type EvaluationConfiguration<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> =\n T extends EvaluationType.Rule ? RuleEvaluationConfiguration<M extends RuleEvaluationMetric ? M : never> :\n T extends EvaluationType.Llm ? LlmEvaluationConfiguration<M extends LlmEvaluationMetric ? M : never> :\n T extends EvaluationType.Human ? HumanEvaluationConfiguration<M extends HumanEvaluationMetric ? M : never> :\n never;\n\nexport const EvaluationConfigurationSchema = z.custom<EvaluationConfiguration>()\n\n// prettier-ignore\nexport type EvaluationResultMetadata<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> =\n T extends EvaluationType.Rule ? RuleEvaluationResultMetadata<M extends RuleEvaluationMetric ? M : never> :\n T extends EvaluationType.Llm ? LlmEvaluationResultMetadata<M extends LlmEvaluationMetric ? M : never> :\n T extends EvaluationType.Human ? HumanEvaluationResultMetadata<M extends HumanEvaluationMetric ? M : never> :\n never;\n\n// prettier-ignore\nexport const EvaluationResultMetadataSchema = z.custom<EvaluationResultMetadata>()\n\n// prettier-ignore\nexport type EvaluationResultError<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> =\n T extends EvaluationType.Rule ? RuleEvaluationResultError<M extends RuleEvaluationMetric ? M : never> :\n T extends EvaluationType.Llm ? LlmEvaluationResultError<M extends LlmEvaluationMetric ? M : never> :\n T extends EvaluationType.Human ? HumanEvaluationResultError<M extends HumanEvaluationMetric ? M : never> :\n never;\n\n// prettier-ignore\nexport const EvaluationResultErrorSchema = z.custom<EvaluationResultError>()\n\n// prettier-ignore\ntype ZodSchema<T = any> = z.ZodObject<z.ZodRawShape, z.UnknownKeysParam, z.ZodTypeAny, T, T>\n\nexport type EvaluationMetricSpecification<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> = {\n name: string\n description: string\n configuration: ZodSchema<EvaluationConfiguration<T, M>>\n resultMetadata: ZodSchema<EvaluationResultMetadata<T, M>>\n resultError: ZodSchema<EvaluationResultError<T, M>>\n requiresExpectedOutput: boolean\n supportsLiveEvaluation: boolean\n supportsBatchEvaluation: boolean\n supportsManualEvaluation: boolean\n}\n\nexport type EvaluationSpecification<T extends EvaluationType = EvaluationType> =\n {\n name: string\n description: string\n configuration: ZodSchema\n resultMetadata: ZodSchema\n resultError: ZodSchema\n metrics: { [M in EvaluationMetric<T>]: EvaluationMetricSpecification<T, M> }\n }\n\nexport const EVALUATION_SPECIFICATIONS = {\n [EvaluationType.Rule]: RuleEvaluationSpecification,\n [EvaluationType.Llm]: LlmEvaluationSpecification,\n [EvaluationType.Human]: HumanEvaluationSpecification,\n} as const satisfies {\n [T in EvaluationType]: EvaluationSpecification<T>\n}\ntype EvaluationSpecifications = typeof EVALUATION_SPECIFICATIONS\n\n// prettier-ignore\ntype EvaluationMetricSpecificationFilter<\n F extends keyof EvaluationMetricSpecification,\n T extends EvaluationType = EvaluationType\n> = { [K in EvaluationType]: {\n [M in keyof EvaluationSpecifications[K]['metrics']]:\n // @ts-expect-error F can indeed index M type\n EvaluationSpecifications[K]['metrics'][M][F] extends true ? M : never\n }[keyof EvaluationSpecifications[K]['metrics']]\n}[T] & EvaluationMetric<T>\n\nexport type LiveEvaluationMetric<T extends EvaluationType = EvaluationType> =\n EvaluationMetricSpecificationFilter<'supportsLiveEvaluation', T>\n\nexport type BatchEvaluationMetric<T extends EvaluationType = EvaluationType> =\n EvaluationMetricSpecificationFilter<'supportsBatchEvaluation', T>\n\nexport type ManualEvaluationMetric<T extends EvaluationType = EvaluationType> =\n EvaluationMetricSpecificationFilter<'supportsManualEvaluation', T>\n\nexport type EvaluationV2<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> = {\n uuid: string\n versionId: number\n workspaceId: number\n commitId: number\n documentUuid: string\n name: string\n description: string\n type: T\n metric: M\n configuration: EvaluationConfiguration<T, M>\n evaluateLiveLogs?: boolean | null\n enableSuggestions?: boolean | null\n autoApplySuggestions?: boolean | null\n createdAt: Date\n updatedAt: Date\n deletedAt?: Date | null\n}\n\nexport type EvaluationResultValue<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> =\n | {\n score: number\n normalizedScore: number\n metadata: EvaluationResultMetadata<T, M>\n hasPassed: boolean\n error?: null\n }\n | {\n score?: null\n normalizedScore?: null\n metadata?: null\n hasPassed?: null\n error: EvaluationResultError<T, M>\n }\n\nexport type EvaluationResultV2<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> = {\n id: number\n uuid: string\n workspaceId: number\n commitId: number\n evaluationUuid: string\n experimentId?: number | null\n datasetId?: number | null\n evaluatedRowId?: number | null\n evaluatedLogId: number\n usedForSuggestion?: boolean | null\n createdAt: Date\n updatedAt: Date\n} & EvaluationResultValue<T, M>\n\nexport type PublicManualEvaluationResultV2 = Pick<\n EvaluationResultV2<EvaluationType.Human, HumanEvaluationMetric>,\n | 'uuid'\n | 'score'\n | 'normalizedScore'\n | 'metadata'\n | 'hasPassed'\n | 'createdAt'\n | 'updatedAt'\n> & { versionUuid: string; error: string | null }\n\nexport type EvaluationSettings<\n T extends EvaluationType = EvaluationType,\n M extends EvaluationMetric<T> = EvaluationMetric<T>,\n> = Pick<\n EvaluationV2<T, M>,\n 'name' | 'description' | 'type' | 'metric' | 'configuration'\n>\n\nexport const EvaluationSettingsSchema = z.object({\n name: z.string(),\n description: z.string(),\n type: EvaluationTypeSchema,\n metric: EvaluationMetricSchema,\n configuration: EvaluationConfigurationSchema,\n})\n\nexport type EvaluationOptions = Pick<\n EvaluationV2,\n 'evaluateLiveLogs' | 'enableSuggestions' | 'autoApplySuggestions'\n>\n\nexport const EvaluationOptionsSchema = z.object({\n evaluateLiveLogs: z.boolean().nullable().optional(),\n enableSuggestions: z.boolean().nullable().optional(),\n autoApplySuggestions: z.boolean().nullable().optional(),\n})\n\nexport const EVALUATION_SCORE_SCALE = 100\nexport const DEFAULT_DATASET_LABEL = 'output'\n","import { Message } from '@latitude-data/constants/legacyCompiler'\nimport { ChainEventDtoResponse } from '..'\nimport { FinishReason } from 'ai'\nimport { LatitudePromptConfig } from '../latitudePromptSchema'\n\nexport enum LegacyChainEventTypes {\n Error = 'chain-error',\n Step = 'chain-step',\n Complete = 'chain-complete',\n StepComplete = 'chain-step-complete',\n}\n\nexport type LegacyEventData =\n | {\n type: LegacyChainEventTypes.Step\n config: LatitudePromptConfig\n isLastStep: boolean\n messages: Message[]\n uuid?: string\n }\n | {\n type: LegacyChainEventTypes.StepComplete\n response: ChainEventDtoResponse\n uuid?: string\n }\n | {\n type: LegacyChainEventTypes.Complete\n config: LatitudePromptConfig\n finishReason?: FinishReason\n messages?: Message[]\n object?: any\n response: ChainEventDtoResponse\n uuid?: string\n }\n | {\n type: LegacyChainEventTypes.Error\n error: {\n name: string\n message: string\n stack?: string\n }\n }\n","import {\n Config,\n Message,\n ToolCall,\n} from '@latitude-data/constants/legacyCompiler'\nimport {\n ChainStepResponse,\n ProviderData,\n StreamEventTypes,\n StreamType,\n} from '..'\nimport { FinishReaso