UNPKG

axiom

Version:

Axiom AI SDK provides - an API to wrap your AI calls with observability instrumentation. - offline evals - online evals

378 lines (375 loc) 13.8 kB
// src/otel/semconv/attributes.ts import { ATTR_ERROR_TYPE, ATTR_HTTP_RESPONSE_STATUS_CODE } from "@opentelemetry/semantic-conventions"; // src/otel/semconv/eval_proposal.ts var ATTR_EVAL_ID = "eval.id"; var ATTR_EVAL_NAME = "eval.name"; var ATTR_EVAL_VERSION = "eval.version"; var ATTR_EVAL_TYPE = "eval.type"; var ATTR_EVAL_TAGS = "eval.tags"; var ATTR_EVAL_BASELINE_ID = "eval.baseline.id"; var ATTR_EVAL_BASELINE_NAME = "eval.baseline.name"; var ATTR_EVAL_BASELINE_VERSION = "eval.baseline.version"; var ATTR_EVAL_METADATA = "eval.metadata"; var ATTR_EVAL_TRIALS = "eval.trials"; var ATTR_EVAL_CAPABILITY_NAME = "eval.capability.name"; var ATTR_EVAL_STEP_NAME = "eval.step.name"; var ATTR_EVAL_COLLECTION_ID = "eval.collection.id"; var ATTR_EVAL_COLLECTION_SIZE = "eval.collection.size"; var ATTR_EVAL_COLLECTION_NAME = "eval.collection.name"; var ATTR_EVAL_CONFIG_FLAGS = "eval.config.flags"; var ATTR_EVAL_CASE_INDEX = "eval.case.index"; var ATTR_EVAL_CASE_INPUT = "eval.case.input"; var ATTR_EVAL_CASE_OUTPUT = "eval.case.output"; var ATTR_EVAL_CASE_EXPECTED = "eval.case.expected"; var ATTR_EVAL_CASE_SCORES = "eval.case.scores"; var ATTR_EVAL_CASE_METADATA = "eval.case.metadata"; var ATTR_EVAL_CASE_TRIALS = "eval.case.trials"; var ATTR_EVAL_TRIAL_INDEX = "eval.trial.index"; var ATTR_EVAL_TRIAL_ERROR = "eval.trial.error"; var ATTR_EVAL_TASK_OUTPUT = "eval.task.output"; var ATTR_EVAL_TASK_NAME = "eval.task.name"; var ATTR_EVAL_TASK_TYPE = "eval.task.type"; var ATTR_EVAL_RUN_ID = "eval.run.id"; var ATTR_EVAL_SCORE_NAME = "eval.score.name"; var ATTR_EVAL_SCORE_VALUE = "eval.score.value"; var ATTR_EVAL_SCORE_THRESHOLD = "eval.score.threshold"; var ATTR_EVAL_SCORE_PASSED = "eval.score.passed"; var ATTR_EVAL_SCORE_IS_BOOLEAN = "eval.score.is_boolean"; var ATTR_EVAL_SCORE_METADATA = "eval.score.metadata"; var ATTR_EVAL_SCORE_AGGREGATION = "eval.score.aggregation"; var ATTR_EVAL_SCORE_TRIALS = "eval.score.trials"; var ATTR_EVAL_ONLINE_SCORERS_TOTAL = "eval.online.scorers.total"; var ATTR_EVAL_ONLINE_SCORERS_RAN = "eval.online.scorers.ran"; var ATTR_EVAL_ONLINE_SCORERS_SAMPLED_OUT = "eval.online.scorers.sampled_out"; var ATTR_EVAL_ONLINE_SCORERS_FAILED = "eval.online.scorers.failed"; var ATTR_EVAL_USER_NAME = "eval.user.name"; var ATTR_EVAL_USER_EMAIL = "eval.user.email"; // src/otel/semconv/attributes.ts import { ATTR_ERROR_MESSAGE, ATTR_GEN_AI_AGENT_DESCRIPTION, ATTR_GEN_AI_AGENT_ID, ATTR_GEN_AI_AGENT_NAME, ATTR_GEN_AI_CONVERSATION_ID, ATTR_GEN_AI_INPUT_MESSAGES, ATTR_GEN_AI_OPERATION_NAME, ATTR_GEN_AI_OUTPUT_MESSAGES, ATTR_GEN_AI_OUTPUT_TYPE, ATTR_GEN_AI_PROVIDER_NAME, ATTR_GEN_AI_REQUEST_CHOICE_COUNT, ATTR_GEN_AI_REQUEST_ENCODING_FORMATS, ATTR_GEN_AI_REQUEST_FREQUENCY_PENALTY, ATTR_GEN_AI_REQUEST_MAX_TOKENS, ATTR_GEN_AI_REQUEST_MODEL, ATTR_GEN_AI_REQUEST_PRESENCE_PENALTY, ATTR_GEN_AI_REQUEST_SEED, ATTR_GEN_AI_REQUEST_STOP_SEQUENCES, ATTR_GEN_AI_REQUEST_TEMPERATURE, ATTR_GEN_AI_REQUEST_TOP_K, ATTR_GEN_AI_REQUEST_TOP_P, ATTR_GEN_AI_RESPONSE_FINISH_REASONS, ATTR_GEN_AI_RESPONSE_ID, ATTR_GEN_AI_RESPONSE_MODEL, ATTR_GEN_AI_TOOL_CALL_ID, ATTR_GEN_AI_TOOL_DESCRIPTION, ATTR_GEN_AI_TOOL_NAME, ATTR_GEN_AI_TOOL_TYPE, ATTR_GEN_AI_USAGE_INPUT_TOKENS, ATTR_GEN_AI_USAGE_OUTPUT_TOKENS, GEN_AI_OPERATION_NAME_VALUE_CHAT, GEN_AI_OPERATION_NAME_VALUE_CREATE_AGENT, GEN_AI_OPERATION_NAME_VALUE_EMBEDDINGS, GEN_AI_OPERATION_NAME_VALUE_EXECUTE_TOOL, GEN_AI_OPERATION_NAME_VALUE_GENERATE_CONTENT, GEN_AI_OPERATION_NAME_VALUE_INVOKE_AGENT, GEN_AI_OUTPUT_TYPE_VALUE_IMAGE, GEN_AI_OUTPUT_TYPE_VALUE_JSON, GEN_AI_OUTPUT_TYPE_VALUE_SPEECH, GEN_AI_OUTPUT_TYPE_VALUE_TEXT, GEN_AI_PROVIDER_NAME_VALUE_ANTHROPIC, GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK, GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_INFERENCE, GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_OPENAI, GEN_AI_PROVIDER_NAME_VALUE_COHERE, GEN_AI_PROVIDER_NAME_VALUE_DEEPSEEK, GEN_AI_PROVIDER_NAME_VALUE_GCP_GEMINI, GEN_AI_PROVIDER_NAME_VALUE_GCP_GEN_AI, GEN_AI_PROVIDER_NAME_VALUE_GCP_VERTEX_AI, GEN_AI_PROVIDER_NAME_VALUE_GROQ, GEN_AI_PROVIDER_NAME_VALUE_IBM_WATSONX_AI, GEN_AI_PROVIDER_NAME_VALUE_MISTRAL_AI, GEN_AI_PROVIDER_NAME_VALUE_OPENAI, GEN_AI_PROVIDER_NAME_VALUE_PERPLEXITY, GEN_AI_PROVIDER_NAME_VALUE_X_AI } from "@opentelemetry/semantic-conventions/incubating"; var ATTR_AXIOM_GEN_AI_SCHEMA_URL = "axiom.gen_ai.schema_url"; var ATTR_AXIOM_GEN_AI_SDK_NAME = "axiom.gen_ai.sdk.name"; var ATTR_AXIOM_GEN_AI_SDK_VERSION = "axiom.gen_ai.sdk.version"; var ATTR_GEN_AI_CAPABILITY_NAME = "gen_ai.capability.name"; var ATTR_GEN_AI_STEP_NAME = "gen_ai.step.name"; var ATTR_GEN_AI_TOOL_ARGUMENTS = "gen_ai.tool.arguments"; var ATTR_GEN_AI_TOOL_MESSAGE = "gen_ai.tool.message"; var GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI = "assemblyai"; var GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS = "cerebras"; var GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM = "deepgram"; var GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA = "deepinfra"; var GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS = "elevenlabs"; var GEN_AI_PROVIDER_NAME_VALUE_FAL = "fal"; var GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS = "fireworks"; var GEN_AI_PROVIDER_NAME_VALUE_GLADIA = "gladia"; var GEN_AI_PROVIDER_NAME_VALUE_HUME = "hume"; var GEN_AI_PROVIDER_NAME_VALUE_LMNT = "lmnt"; var GEN_AI_PROVIDER_NAME_VALUE_LUMA = "luma"; var GEN_AI_PROVIDER_NAME_VALUE_REPLICATE = "replicate"; var GEN_AI_PROVIDER_NAME_VALUE_REVAI = "revai"; var GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI = "togetherai"; var GEN_AI_PROVIDER_NAME_VALUE_VERCEL = "vercel"; var Attr = { __EXPERIMENTAL_Flag: (flagName) => `flag.${flagName}`, __EXPERIMENTAL_Fact: (factName) => `fact.${factName}`, Axiom: { GenAI: { SchemaURL: ATTR_AXIOM_GEN_AI_SCHEMA_URL, SDK: { Name: ATTR_AXIOM_GEN_AI_SDK_NAME, Version: ATTR_AXIOM_GEN_AI_SDK_VERSION } } }, GenAI: { PromptMetadata: { ID: "axiom.gen_ai.prompt.id", Name: "axiom.gen_ai.prompt.name", Slug: "axiom.gen_ai.prompt.slug", Version: "axiom.gen_ai.prompt.version" }, /** * These two are used to identify the span */ Capability: { Name: ATTR_GEN_AI_CAPABILITY_NAME }, Step: { Name: ATTR_GEN_AI_STEP_NAME }, Provider: { Name: ATTR_GEN_AI_PROVIDER_NAME, Name_Values: { Anthropic: GEN_AI_PROVIDER_NAME_VALUE_ANTHROPIC, AssemblyAI: GEN_AI_PROVIDER_NAME_VALUE_ASSEMBLYAI, AWSBedrock: GEN_AI_PROVIDER_NAME_VALUE_AWS_BEDROCK, AzureAIInference: GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_INFERENCE, AzureAIOpenAI: GEN_AI_PROVIDER_NAME_VALUE_AZURE_AI_OPENAI, Cerebras: GEN_AI_PROVIDER_NAME_VALUE_CEREBRAS, Cohere: GEN_AI_PROVIDER_NAME_VALUE_COHERE, Deepgram: GEN_AI_PROVIDER_NAME_VALUE_DEEPGRAM, DeepInfra: GEN_AI_PROVIDER_NAME_VALUE_DEEPINFRA, Deepseek: GEN_AI_PROVIDER_NAME_VALUE_DEEPSEEK, ElevenLabs: GEN_AI_PROVIDER_NAME_VALUE_ELEVENLABS, Fal: GEN_AI_PROVIDER_NAME_VALUE_FAL, Fireworks: GEN_AI_PROVIDER_NAME_VALUE_FIREWORKS, GCPGemini: GEN_AI_PROVIDER_NAME_VALUE_GCP_GEMINI, GCPGenAI: GEN_AI_PROVIDER_NAME_VALUE_GCP_GEN_AI, GCPVertexAI: GEN_AI_PROVIDER_NAME_VALUE_GCP_VERTEX_AI, Gladia: GEN_AI_PROVIDER_NAME_VALUE_GLADIA, Groq: GEN_AI_PROVIDER_NAME_VALUE_GROQ, Hume: GEN_AI_PROVIDER_NAME_VALUE_HUME, IBMWatsonxAI: GEN_AI_PROVIDER_NAME_VALUE_IBM_WATSONX_AI, Lmnt: GEN_AI_PROVIDER_NAME_VALUE_LMNT, Luma: GEN_AI_PROVIDER_NAME_VALUE_LUMA, MistralAI: GEN_AI_PROVIDER_NAME_VALUE_MISTRAL_AI, OpenAI: GEN_AI_PROVIDER_NAME_VALUE_OPENAI, Perplexity: GEN_AI_PROVIDER_NAME_VALUE_PERPLEXITY, Replicate: GEN_AI_PROVIDER_NAME_VALUE_REPLICATE, RevAI: GEN_AI_PROVIDER_NAME_VALUE_REVAI, TogetherAI: GEN_AI_PROVIDER_NAME_VALUE_TOGETHERAI, Vercel: GEN_AI_PROVIDER_NAME_VALUE_VERCEL, XAI: GEN_AI_PROVIDER_NAME_VALUE_X_AI } }, /** * Regular attributes */ Agent: { Description: ATTR_GEN_AI_AGENT_DESCRIPTION, // not yet used by axiom-ai ID: ATTR_GEN_AI_AGENT_ID, // not yet used by axiom-ai Name: ATTR_GEN_AI_AGENT_NAME // not yet used by axiom-ai }, Conversation: { ID: ATTR_GEN_AI_CONVERSATION_ID // not yet used by axiom-ai, anyway probably needs to be provided by user }, Input: { Messages: ATTR_GEN_AI_INPUT_MESSAGES }, Operation: { Name: ATTR_GEN_AI_OPERATION_NAME, Name_Values: { /** * Note that "text_completion" is deprecated in favor of "chat" for both OpenAI and Anthropic */ Chat: GEN_AI_OPERATION_NAME_VALUE_CHAT, CreateAgent: GEN_AI_OPERATION_NAME_VALUE_CREATE_AGENT, Embeddings: GEN_AI_OPERATION_NAME_VALUE_EMBEDDINGS, ExecuteTool: GEN_AI_OPERATION_NAME_VALUE_EXECUTE_TOOL, GenerateContent: GEN_AI_OPERATION_NAME_VALUE_GENERATE_CONTENT, InvokeAgent: GEN_AI_OPERATION_NAME_VALUE_INVOKE_AGENT } }, Output: { Messages: ATTR_GEN_AI_OUTPUT_MESSAGES, Type: ATTR_GEN_AI_OUTPUT_TYPE, Type_Values: { Text: GEN_AI_OUTPUT_TYPE_VALUE_TEXT, Json: GEN_AI_OUTPUT_TYPE_VALUE_JSON, Image: GEN_AI_OUTPUT_TYPE_VALUE_IMAGE, Speech: GEN_AI_OUTPUT_TYPE_VALUE_SPEECH } }, /** * The provider that is hosting the model, eg AWS Bedrock * There doesn't seem to be a semconv for this */ Request: { ChoiceCount: ATTR_GEN_AI_REQUEST_CHOICE_COUNT, // not yet used by axiom-ai EncodingFormats: ATTR_GEN_AI_REQUEST_ENCODING_FORMATS, // not yet used by axiom-ai FrequencyPenalty: ATTR_GEN_AI_REQUEST_FREQUENCY_PENALTY, MaxTokens: ATTR_GEN_AI_REQUEST_MAX_TOKENS, /** * The model you asked for */ Model: ATTR_GEN_AI_REQUEST_MODEL, PresencePenalty: ATTR_GEN_AI_REQUEST_PRESENCE_PENALTY, Seed: ATTR_GEN_AI_REQUEST_SEED, StopSequences: ATTR_GEN_AI_REQUEST_STOP_SEQUENCES, Temperature: ATTR_GEN_AI_REQUEST_TEMPERATURE, TopK: ATTR_GEN_AI_REQUEST_TOP_K, TopP: ATTR_GEN_AI_REQUEST_TOP_P }, Response: { FinishReasons: ATTR_GEN_AI_RESPONSE_FINISH_REASONS, ID: ATTR_GEN_AI_RESPONSE_ID, /** * The model that was actually used (might be different bc routing) - only ever get this from the response, otherwise omit */ Model: ATTR_GEN_AI_RESPONSE_MODEL // somehow not landing on the span for google models? check up on this... }, Tool: { CallID: ATTR_GEN_AI_TOOL_CALL_ID, Description: ATTR_GEN_AI_TOOL_DESCRIPTION, Name: ATTR_GEN_AI_TOOL_NAME, Type: ATTR_GEN_AI_TOOL_TYPE, /** * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span * But we at least want to give users THE OPTION to put them on the tool spans themselves as well * Because it enables a lot of things with querying * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0 */ Arguments: ATTR_GEN_AI_TOOL_ARGUMENTS, /** * Note, OTel Semantic Convention suggest only putting tool inputs/outputs on the parent chat span * But we at least want to give users THE OPTION to put them on the tool spans themselves as well * Because it enables a lot of things with querying * @see https://github.com/open-telemetry/semantic-conventions/releases/tag/v1.37.0 */ Message: ATTR_GEN_AI_TOOL_MESSAGE }, Usage: { InputTokens: ATTR_GEN_AI_USAGE_INPUT_TOKENS, OutputTokens: ATTR_GEN_AI_USAGE_OUTPUT_TOKENS } }, Eval: { ID: ATTR_EVAL_ID, Name: ATTR_EVAL_NAME, Version: ATTR_EVAL_VERSION, Type: ATTR_EVAL_TYPE, Trials: ATTR_EVAL_TRIALS, Baseline: { ID: ATTR_EVAL_BASELINE_ID, Name: ATTR_EVAL_BASELINE_NAME, Version: ATTR_EVAL_BASELINE_VERSION }, Capability: { Name: ATTR_EVAL_CAPABILITY_NAME }, Step: { Name: ATTR_EVAL_STEP_NAME }, Tags: ATTR_EVAL_TAGS, Metadata: ATTR_EVAL_METADATA, Online: { Scorers: { Total: ATTR_EVAL_ONLINE_SCORERS_TOTAL, Ran: ATTR_EVAL_ONLINE_SCORERS_RAN, SampledOut: ATTR_EVAL_ONLINE_SCORERS_SAMPLED_OUT, Failed: ATTR_EVAL_ONLINE_SCORERS_FAILED } }, Collection: { ID: ATTR_EVAL_COLLECTION_ID, Name: ATTR_EVAL_COLLECTION_NAME, Size: ATTR_EVAL_COLLECTION_SIZE }, Config: { Flags: ATTR_EVAL_CONFIG_FLAGS }, Run: { ID: ATTR_EVAL_RUN_ID }, Trial: { Index: ATTR_EVAL_TRIAL_INDEX, Error: ATTR_EVAL_TRIAL_ERROR }, Case: { Index: ATTR_EVAL_CASE_INDEX, Input: ATTR_EVAL_CASE_INPUT, Output: ATTR_EVAL_CASE_OUTPUT, Expected: ATTR_EVAL_CASE_EXPECTED, Scores: ATTR_EVAL_CASE_SCORES, Metadata: ATTR_EVAL_CASE_METADATA, Trials: ATTR_EVAL_CASE_TRIALS }, Task: { Output: ATTR_EVAL_TASK_OUTPUT, Name: ATTR_EVAL_TASK_NAME, Type: ATTR_EVAL_TASK_TYPE }, Score: { Name: ATTR_EVAL_SCORE_NAME, Value: ATTR_EVAL_SCORE_VALUE, Threshold: ATTR_EVAL_SCORE_THRESHOLD, Passed: ATTR_EVAL_SCORE_PASSED, IsBoolean: ATTR_EVAL_SCORE_IS_BOOLEAN, Metadata: ATTR_EVAL_SCORE_METADATA, Aggregation: ATTR_EVAL_SCORE_AGGREGATION, Trials: ATTR_EVAL_SCORE_TRIALS }, User: { Name: ATTR_EVAL_USER_NAME, Email: ATTR_EVAL_USER_EMAIL } }, Error: { Type: ATTR_ERROR_TYPE, Message: ATTR_ERROR_MESSAGE }, HTTP: { Response: { StatusCode: ATTR_HTTP_RESPONSE_STATUS_CODE } } }; export { Attr }; //# sourceMappingURL=chunk-4TKUTT24.js.map