neuronpedia

import { AxiosResponse, AxiosRequestConfig } from 'axios'; /** * An activation record containing tokens and their corresponding activation values */ interface NPActivation$3 { /** List of tokens for this text */ tokens: string[]; /** Activation values corresponding to each token */ values: number[]; } interface NPScoreEmbeddingResponse { /** The score from 0 to 1 */ score: number; /** Detailed breakdown of the embedding outputs */ breakdown: NPScoreEmbeddingOutput[]; } /** * Request model for scoring explanations using embedding similarity */ interface NPScoreEmbeddingRequest { /** List of activation records to analyze */ activations: NPActivation$3[]; /** The explanation to evaluate */ explanation: string; /** Authentication secret for the API */ secret: string; } /** * Quantile or neighbor distance */ type NPScoreEmbeddingOutputDistance = number | number; /** * The "scorer.__call__" result's score breakdown. With exception of fixing similarity to change to number instead of array of number, type is copied from https://github.com/EleutherAI/sae-auto-interp/blob/3659ff3bfefbe2628d37484e5bcc0087a5b10a27/sae_auto_interp/scorers/embedding/embedding.py#L20 */ interface NPScoreEmbeddingOutput { /** The text that was used to evaluate the similarity */ text: string; /** Quantile or neighbor distance */ distance: NPScoreEmbeddingOutputDistance; /** What is the similarity of the example to the explanation */ similarity: number; } /** * @summary Score an explanation using embedding similarity, using the dunzhang/stella_en_400M_v5 model. */ declare const postScoreEmbedding: <TData = AxiosResponse<NPScoreEmbeddingResponse, any>>(nPScoreEmbeddingRequest: NPScoreEmbeddingRequest, options?: AxiosRequestConfig) => Promise<TData>; type PostScoreEmbeddingResult = AxiosResponse<NPScoreEmbeddingResponse>; /** * An activation record containing tokens and their corresponding activation values */ interface NPActivation$2 { /** List of tokens for this text */ tokens: string[]; /** Activation values corresponding to each token */ values: number[]; } /** * Request model for scoring explanations using fuzzing or detection methods */ interface NPScoreFuzzDetectionRequest { /** List of activation records to analyze */ activations: NPActivation$2[]; /** The explanation to evaluate */ explanation: string; /** API key for OpenRouter service */ openrouter_key: string; /** Model identifier to use for scoring */ model: string; /** Type of scoring to perform - either fuzzing or detection */ type: NPScoreFuzzDetectionType; /** Authentication secret for the API */ secret: string; } /** * Quantile or neighbor distance */ type NPScoreClassifierOutputDistance = number | number; /** * The "scorer.__call__" result's score breakdown. Type copied from https://github.com/EleutherAI/sae-auto-interp/blob/3659ff3bfefbe2628d37484e5bcc0087a5b10a27/sae_auto_interp/scorers/classifier/sample.py#L19 */ interface NPScoreClassifierOutput { /** List of strings */ str_tokens?: string[]; /** List of floats */ activations?: number[]; /** Quantile or neighbor distance */ distance?: NPScoreClassifierOutputDistance; /** Whether the example is activating or not */ ground_truth?: boolean; /** Whether the model predicted the example activating or not */ prediction?: boolean; /** Whether the sample is highlighted */ highlighted?: boolean; /** The probability of the example activating */ probability?: number; /** Whether the prediction is correct */ correct?: boolean; } interface NPScoreFuzzDetectionResponse { /** The score from 0 to 1 */ score: number; /** Detailed breakdown of the classification outputs */ breakdown: NPScoreClassifierOutput[]; } /** * Type of scoring method, either fuzz or detection */ type NPScoreFuzzDetectionType = typeof NPScoreFuzzDetectionType[keyof typeof NPScoreFuzzDetectionType]; declare const NPScoreFuzzDetectionType: { readonly fuzz: "fuzz"; readonly detection: "detection"; }; /** * @summary Score an explanation using fuzzing or detection methods */ declare const postScoreFuzzDetection: <TData = AxiosResponse<NPScoreFuzzDetectionResponse, any>>(nPScoreFuzzDetectionRequest: NPScoreFuzzDetectionRequest, options?: AxiosRequestConfig) => Promise<TData>; type PostScoreFuzzDetectionResult = AxiosResponse<NPScoreFuzzDetectionResponse>; /** * Generated by orval v7.4.1 🍺 * Do not edit manually. * Neuronpedia - Global Commons * Global commons schema for Neuronpedia * OpenAPI spec version: 1.0.0 */ /** * An activation record containing tokens and their corresponding activation values */ interface NPActivation$1 { /** List of tokens for this text */ tokens: string[]; /** Activation values corresponding to each token */ values: number[]; } declare function getEleutherScoreFuzzDetection(type: "fuzz" | "detection", activations: NPActivation$1[], explanation: string, openRouterKey: string, openRouterModel: string, inferenceServerSecret: string): Promise<NPScoreFuzzDetectionResponse>; declare function getEleutherScoreEmbedding(activations: NPActivation$1[], explanation: string, inferenceServerSecret: string): Promise<NPScoreEmbeddingResponse>; /** * An activation record containing tokens and their corresponding activation values */ interface NPActivation { /** List of tokens for this text */ tokens: string[]; /** Activation values corresponding to each token */ values: number[]; } interface NPExplainDefaultResponse { /** The generated explanation for the given set of activations */ explanation: string; } /** * Request model for generating explanations of neuron/feature behavior */ interface NPExplainDefaultRequest { /** List of activation records to analyze */ activations: NPActivation[]; /** API key for OpenRouter service */ openrouter_key: string; /** Model identifier to use for explanation generation */ model: string; /** Authentication secret for the API */ secret: string; } /** * @summary Generate an explanation for neuron/feature behavior using the default explainer */ declare const postExplainDefault: <TData = AxiosResponse<NPExplainDefaultResponse, any>>(nPExplainDefaultRequest: NPExplainDefaultRequest, options?: AxiosRequestConfig) => Promise<TData>; type PostExplainDefaultResult = AxiosResponse<NPExplainDefaultResponse>; declare function getEleutherExplanationDefault(activations: NPActivation$1[], openRouterKey: string, openRouterModel: string, inferenceServerSecret: string): Promise<NPExplainDefaultResponse>; interface NPSaeEvalListResponse { /** List of SAE eval types and their results */ evalTypes?: NPSaeEvalType[]; } interface NPSaeEvalListRequest { [key: string]: unknown; } /** * An SAE eval result containing the outputs (metrics, config, etc) of the eval */ interface NPSaeEval { /** ID of the SAE Eval */ id: string; /** SAE Eval Type - should equal the "name" property of NPSaeEvalType */ typeName: string; /** The model ID of the SAE being evaluated */ modelId: string; /** The source ID of the SAE being evaluated */ sourceId: string; /** The output data, matching the outputSchema of the NPSaeEvalType, with the exception of eval_result_details, which will go under detailedMetrics */ output: string; /** The eval_result_details of the output data. Kept in a separate field/column due to large size. */ detailedMetrics?: string; } /** * A type of SAE Eval (eg absorption, sparse probing, etc). Can contain an array of the actual eval results. */ interface NPSaeEvalType { /** ID of the Eval Type */ name: string; /** A UI-friendly display name of the eval type */ displayName: string; /** A brief description of the eval */ description: string; /** JSON output schema used to parse the eval output results. From the SAEBench repo - for example https://github.com/adamkarvonen/SAEBench/blob/main/sae_bench/evals/autointerp/eval_output_schema_autointerp.json */ outputSchema: string; /** URL to the eval */ url?: string; /** The actual eval results of this type */ evals?: NPSaeEval[]; } /** * @summary List all SAE Evals */ declare const postApiSaeEval: <TData = AxiosResponse<NPSaeEvalListResponse, any>>(nPSaeEvalListRequest: NPSaeEvalListRequest, options?: AxiosRequestConfig) => Promise<TData>; type PostApiSaeEvalResult = AxiosResponse<NPSaeEvalListResponse>; export { type NPExplainDefaultRequest, type NPExplainDefaultResponse, type NPSaeEval, type NPSaeEvalListRequest, type NPSaeEvalListResponse, type NPSaeEvalType, type NPScoreClassifierOutput, type NPScoreClassifierOutputDistance, type NPScoreEmbeddingOutput, type NPScoreEmbeddingOutputDistance, type NPScoreEmbeddingRequest, type NPScoreEmbeddingResponse, type NPScoreFuzzDetectionRequest, type NPScoreFuzzDetectionResponse, NPScoreFuzzDetectionType, type PostApiSaeEvalResult, type PostExplainDefaultResult, type PostScoreEmbeddingResult, type PostScoreFuzzDetectionResult, getEleutherExplanationDefault, getEleutherScoreEmbedding, getEleutherScoreFuzzDetection, postApiSaeEval, postExplainDefault, postScoreEmbedding, postScoreFuzzDetection };