@presidio-dev/hai-guardrails
Version:
A set of guards for LLM Apps
753 lines (749 loc) • 30.9 kB
TypeScript
// Generated by dts-bundle-generator v9.5.1
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
import { ZodObject, ZodRawShape, z } from 'zod/v4';
export type LLMMessage = {
role: string;
content: string;
id?: string;
};
export type LLMMessages = LLMMessage[];
export type LLMEngineMessage = {
originalMessage: LLMMessage;
inScope: boolean;
messageHash?: string;
};
export type GuardResult = {
passed: boolean;
reason?: string;
modifiedMessage?: LLMMessage;
guardId: string;
guardName: string;
message: LLMMessage;
index: number;
messageHash?: string;
inScope: boolean;
additionalFields?: Record<string, unknown>;
};
export type MessageType = "user" | "assistant" | "human" | "ai" | "generic" | "developer" | "system" | "function" | "tool" | "remove";
export type Guard = (messages: LLMMessage[] | LLMEngineMessage[], llm?: LLM) => Promise<GuardResult[]> | GuardResult[];
export type GuardPredicate = (msg: LLMMessage, idx: number, messages: LLMMessage[]) => boolean;
export declare enum SelectionType {
First = "first",
NFirst = "n-first",
Last = "last",
NLast = "n-last",
All = "all"
}
export type GuardOptions = {
predicate?: GuardPredicate;
roles?: MessageType[];
selection?: SelectionType;
n?: number;
llm?: LLM;
messageHashingAlgorithm?: MessageHahsingAlgorithm;
};
export type GuardImplementation = (input: string, msg: LLMEngineMessage, config: MakeGuardConfig, idx: number, llm?: LLM) => GuardResult | Promise<GuardResult>;
export type MakeGuardConfig = GuardOptions & {
id: string;
name: string;
description?: string;
implementation: GuardImplementation;
};
export declare enum MessageHahsingAlgorithm {
MD5 = "md5",
SHA1 = "sha1",
SHA256 = "sha256",
SHA512 = "sha512"
}
export type LogLevel = "fatal" | "error" | "warn" | "info" | "debug" | "trace" | "silent";
export interface GuardrailsChainOptions {
llm?: LLM;
guards: Guard[];
enabled?: boolean;
logLevel?: LogLevel;
messageHashingAlgorithm?: MessageHahsingAlgorithm;
}
export type LLM = BaseChatModel | ((messages: LLMMessage[]) => Promise<LLMMessage[]>);
export declare enum TacticName {
Heuristic = "heuristic",
LanguageModel = "language_model",
Pattern = "pattern"
}
export interface TacticExecution {
score: number;
additionalFields?: Record<string, unknown>;
}
export interface Tactic {
readonly name: TacticName;
readonly defaultThreshold: number;
execute(input: string, thresholdOverride?: number): Promise<TacticExecution>;
}
/**
* Selects messages based on specified roles, selection type, and optional predicate.
*
* @param messages - Array of messages to filter.
* @param options - Options to determine selection criteria.
* @returns Array of messages with `inScope` property updated.
*/
export declare function selectMessages(messages: LLMEngineMessage[], options?: GuardOptions): LLMEngineMessage[];
export declare function makeGuard(config: MakeGuardConfig): Guard;
export declare function injectionGuard(opts: GuardOptions | undefined, extra: {
mode: "heuristic" | "pattern" | "language-model";
threshold: number;
failOnError?: boolean;
}): Guard;
export declare function leakageGuard(opts: GuardOptions | undefined, extra: {
mode: "heuristic" | "pattern" | "language-model";
threshold: number;
failOnError?: boolean;
}): Guard;
export type PIIRegex = {
id: string;
name: string;
description: string;
regex: RegExp;
replacement: string;
};
export type PIIGuardOptions = GuardOptions & {
patterns?: PIIRegex[];
mode?: "block" | "redact";
};
export declare function piiGuard(opts?: PIIGuardOptions): Guard;
export type SecretPattern = {
id: string;
name: string;
description: string;
pattern: RegExp;
minEntropy?: number;
replacement: string;
};
export type SecretGuardOptions = GuardOptions & {
patterns?: SecretPattern[];
mode?: "block" | "redact";
};
export declare function secretGuard(opts?: SecretGuardOptions): Guard;
/**
* Supported validation types for the LLM guard.
* - `Score`: Validates content and returns a numeric score (lower is better).
* - `Binary`: Validates content with a simple pass/fail result.
*/
export declare enum ValidationType {
/**
* Score-based validation where content is rated on a numeric scale.
* The guard passes if the score is below the configured threshold.
*/
Score = "score",
/**
* Binary validation where content is either accepted or rejected.
* The guard passes if the response indicates the content is valid.
*/
Binary = "binary"
}
/**
* Represents a single example of structured response with its corresponding input.
* @template T - The type of the structured response.
*/
export type StructuredResponseExampleSchemaType<T> = T & {
/** The input text that produces this example response */
input: string;
};
/**
* Collection of validation examples for both valid and invalid cases.
* @template T - The type of the structured response.
*/
export type StructuredResponseExamplesSchemaType<T> = {
/** Array of examples that should pass validation */
valid: StructuredResponseExampleSchemaType<T>[];
/**
* Optional array of examples that should fail validation
* @default []
*/
invalid?: StructuredResponseExampleSchemaType<T>[];
};
/**
* Configuration options for creating an LLM guard.
* @template Shape - The shape of the Zod schema used for validation.
*/
export type LLMGuardOptions<Shape extends ZodRawShape = ZodRawShape> = GuardOptions & Omit<Partial<MakeGuardConfig>, "implementation"> & {
/** Type of validation to perform */
validationType: ValidationType;
/**
* Zod schema that defines the expected structure of the LLM's response.
* Must include a 'score' field for ValidationType.Score or a 'passed' field for ValidationType.Binary.
*/
schema: ZodObject<Shape>;
/**
* Optional examples to guide the LLM's responses.
* Helps improve the quality and consistency of validations.
*/
examples?: StructuredResponseExamplesSchemaType<z.infer<ZodObject<Shape>>>;
/**
* Clear description of the LLM's role and the validation criteria.
* This helps the LLM understand how to evaluate the input.
*/
roleDefinition: string;
/**
* Required for ValidationType.Score. The maximum score that is considered passing.
* Scores below this threshold will pass validation.
*/
threshold?: number;
};
/**
* ## Creates a guard that uses a language model to validate content against a schema.
*
* ## Configuration Options
*
* The `llmGuard` function accepts the following configuration options:
*
* ### Required Options
* - `validationType`: The type of validation to perform (`ValidationType.Score` or `ValidationType.Binary`)
* - `schema`: A Zod schema that defines the expected response structure from the LLM
* - `roleDefinition`: A clear description of the LLM's role and validation criteria
* - `llm`: The language model to use for validation (optional if used with GuardrailsEngine and provided in the engine)
*
* ### Optional Options
* - `examples`: Object containing `valid` and `invalid` example arrays to guide the LLM
* - `threshold`: Required for `ValidationType.Score` - maximum score that is considered passing
* - `id`: Unique identifier for the guard (default: 'llm')
* - `name`: Human-readable name for the guard (default: 'LLM Guard')
* - `description`: Description of what the guard does (default: 'Using LLM to validate messages')
*
* ### Schema Requirements
* - For `ValidationType.Binary`: Schema must include a `passed: boolean` field
* - For `ValidationType.Score`: Schema must include a `score: number` field
*
* ## Behavior
* - When used with `ValidationType.Score` the `score` is compared to the `threshold` and the guard passes if the score is below the threshold
* - When used with `ValidationType.Binary` the `passed` field is checked and the guard passes if the value is `true` otherwise it fails
*
* @example
* ```typescript
* // Basic binary validation
* const schema = z.object({
* passed: z.boolean(),
* reason: z.string(),
* severity: z.enum(['low', 'medium', 'high'])
* });
*
* const toxicGuard = llmGuard({
* validationType: ValidationType.Binary,
* schema,
* roleDefinition: 'You are a content moderator. Detect if the input contains toxic language.',
* examples: {
* valid: [
* { input: 'Hello, how are you?', passed: true, reason: 'Friendly greeting', severity: 'low' }
* ],
* invalid: [
* { input: 'I hate you!', passed: false, reason: 'Contains hate speech', severity: 'high' }
* ]
* }
* });
*
* // Score-based validation with threshold
* const scoringSchema = z.object({
* score: z.number().min(0).max(10),
* reason: z.string(),
* confidence: z.number().min(0).max(1)
* });
*
* const qualityGuard = llmGuard({
* validationType: ValidationType.Score,
* schema: scoringSchema,
* threshold: 5, // Scores below 5 will pass
* roleDefinition: 'Rate the quality of the input text from 0-10 (lower is better).',
* examples: {
* valid: [
* { input: 'Well-written text with good grammar.', score: 2, reason: 'High quality', confidence: 0.9 }
* ],
* invalid: [
* { input: 'Bad text with many errors.', score: 8, reason: 'Poor quality', confidence: 0.8 }
* ]
* }
* });
* ```
*
* @template Shape - The shape of the Zod schema used for validation
* @param opts - Configuration options for the LLM guard
* @returns A guard function that validates content using the configured LLM
* @throws {Error} If required configuration is missing or invalid (e.g., missing threshold for score validation)
*/
export declare function llmGuard<Shape extends ZodRawShape>(opts: LLMGuardOptions<Shape>): Guard;
/**
* Configuration options for the Toxic Guard.
*
* @property {number} [threshold=0.95] - The toxicity score threshold (0-1).
* Inputs with scores at or above this threshold will be considered toxic.
* Lower values make the guard more sensitive to potential toxicity.
*/
export type ToxicGuardOptions = GuardOptions & {
threshold?: number;
};
/**
* ## Creates a guard that detects toxic language in text inputs.
*
* The Toxic Guard uses a language model to analyze text and detect various forms of toxicity,
* including hate speech, harassment, and other harmful content. It returns a score between 0 and 1,
* where higher scores indicate higher likelihood of toxicity.
*
* ## Configuration Options
* - `threshold`: The toxicity score threshold (0-1, default: 0.95)
* - `llm`: Custom LLM instance to use (optional when used with {@link GuardrailsEngine} and provided in the {@link GuardrailsChainOptions.llm})
* - `selection`: Which messages to evaluate (default: {@link SelectionType.Last})
*
* ## Response Format
* The guard returns an array of {@link GuardResult}
*
* @example
* ```typescript
* // Basic usage with default settings
* const basicGuard = toxicGuard({ threshold: 0.9 });
*
* // Advanced usage with custom selection and LLM
* const customGuard = toxicGuard({
* threshold: 0.85,
* selection: SelectionType.All, // Check all messages
* roles: ['user', 'assistant'], // Only check user and assistant messages
* llm: customLLM, // Use a custom LLM instance
* messageHashingAlgorithm: MessageHashingAlgorithm.SHA512 // Use SHA-512 for message hashing
* });
* ```
*
* @see {@link GuardOptions} for additional configuration options
* @see {@link GuardResult} for the structure of the returned results
*
* @throws {Error} If no LLM is provided either in the options or in the GuardrailsEngine
*
* @param {ToxicGuardOptions} opts - Configuration options for the Toxic Guard
* @returns {Guard} A guard function that detects toxic language in text inputs
*/
export declare function toxicGuard(opts: ToxicGuardOptions): Guard;
/**
* Configuration options for the Adult Content Guard.
*
* @property {number} [threshold=0.8] - The adult content detection threshold (0-1).
* Inputs with scores at or above this threshold will be flagged as potential adult content.
* Lower values make the guard more sensitive to potential adult content.
*/
export type AdultContentGuardOptions = GuardOptions & {
threshold?: number;
};
/**
* ## Creates a guard that detects adult or NSFW (Not Safe For Work) content in text inputs.
*
* The Adult Content Guard uses a language model to identify explicit sexual content,
* adult themes, or other NSFW material. It helps ensure that content remains appropriate
* for the intended audience and use case.
*
* ## Configuration Options
* - `threshold`: The adult content detection threshold (0-1, default: 0.8)
* - `llm`: Custom LLM instance to use (optional when used with {@link GuardrailsEngine} and provided in the {@link GuardrailsChainOptions.llm})
* - `selection`: Which messages to evaluate (default: {@link SelectionType.Last})
*
* ## Response Format
* The guard returns an array of {@link GuardResult} with the following additional properties:
* - `score`: Confidence score between 0 and 1
* - `reason`: Explanation of the adult content detection
* - `categories`: Array of detected content categories (e.g., 'romance', 'suggestive', 'adult')
* - `isExplicit`: Boolean indicating if the content contains explicit sexual material
*
* @example
* ```typescript
* // Basic usage with default settings
* const basicGuard = adultContentGuard({ threshold: 0.85 });
*
* // Advanced usage with custom selection and LLM
* const customGuard = adultContentGuard({
* threshold: 0.75,
* selection: SelectionType.All, // Check all messages
* roles: ['user', 'assistant'], // Check both user and assistant messages
* llm: customLLM, // Use a custom LLM instance
* messageHashingAlgorithm: MessageHashingAlgorithm.SHA512 // Use SHA-512 for message hashing
* });
* ```
*
* @see {@link GuardOptions} for additional configuration options
* @see {@link GuardResult} for the structure of the returned results
*
* @throws {Error} If no LLM is provided either in the options or in the GuardrailsEngine
*
* @param {AdultContentGuardOptions} opts - Configuration options for the Adult Content Guard
* @returns {Guard} A guard function that detects adult or NSFW content in text inputs
*/
export declare function adultContentGuard(opts: AdultContentGuardOptions): Guard;
/**
* Configuration options for the Bias Detection Guard.
*
* @property {number} [threshold=0.7] - The bias detection threshold (0-1).
* Inputs with scores at or above this threshold will be flagged for potential bias.
* Lower values make the guard more sensitive to potential bias.
*/
export type BiasDetectionGuardOptions = GuardOptions & {
threshold?: number;
};
/**
* ## Creates a guard that detects potential bias in text inputs.
*
* The Bias Detection Guard uses a language model to identify stereotypes, prejudices,
* and unfair generalizations about people based on their group membership. It helps ensure
* that AI-generated content remains fair and inclusive.
*
* ## Configuration Options
* - `threshold`: The bias detection threshold (0-1, default: 0.7)
* - `llm`: Custom LLM instance to use (optional when used with {@link GuardrailsEngine} and provided in the {@link GuardrailsChainOptions.llm})
* - `selection`: Which messages to evaluate (default: {@link SelectionType.Last})
*
* ## Response Format
* The guard returns an array of {@link GuardResult} with the following additional properties:
* - `score`: Confidence score between 0 and 1
* - `reason`: Explanation of the bias detection
* - `categories`: Array of detected bias types (e.g., 'age', 'gender', 'racial')
* - `affectedGroups`: Groups that are the subject of potential bias
* - `impact`: Estimated impact level of the detected bias ('low', 'medium', 'high')
*
* @example
* ```typescript
* // Basic usage with default settings
* const basicGuard = biasDetectionGuard({ threshold: 0.75 });
*
* // Advanced usage with custom selection and LLM
* const customGuard = biasDetectionGuard({
* threshold: 0.65,
* selection: SelectionType.All, // Check all messages
* roles: ['assistant'], // Only check assistant outputs
* llm: customLLM, // Use a custom LLM instance
* messageHashingAlgorithm: MessageHashingAlgorithm.SHA512 // Use SHA-512 for message hashing
* });
* ```
*
* @see {@link GuardOptions} for additional configuration options
* @see {@link GuardResult} for the structure of the returned results
* @see {@link hateSpeechGuard} for detecting identity-based hate speech
* @see {@link toxicGuard} for detecting general toxicity
*
* @throws {Error} If no LLM is provided either in the options or in the GuardrailsEngine
*
* @param {BiasDetectionGuardOptions} opts - Configuration options for the Bias Detection Guard
* @returns {Guard} A guard function that detects potential bias in text inputs
*/
export declare function biasDetectionGuard(opts: BiasDetectionGuardOptions): Guard;
/**
* Configuration options for the Copyright Guard.
*
* @property {number} [threshold=0.8] - The copyright detection threshold (0-1).
* Inputs with scores at or above this threshold will be flagged for potential copyright issues.
* Lower values make the guard more sensitive to potential copyright violations.
*/
export type CopyrightGuardOptions = GuardOptions & {
threshold?: number;
};
/**
* ## Creates a guard that detects potential copyright violations in text inputs.
*
* The Copyright Guard uses a language model to identify text that might be protected by copyright,
* including song lyrics, book excerpts, articles, code snippets, and movie scripts. It helps prevent
* the unauthorized use of copyrighted material in AI-generated content.
*
* ## Configuration Options
* - `threshold`: The copyright detection threshold (0-1, default: 0.8)
* - `llm`: Custom LLM instance to use (optional when used with {@link GuardrailsEngine} and provided in the {@link GuardrailsChainOptions.llm})
* - `selection`: Which messages to evaluate (default: {@link SelectionType.Last})
*
* ## Response Format
* The guard returns an array of {@link GuardResult} with the following additional properties:
* - `score`: Confidence score between 0 and 1
* - `reason`: Explanation of the copyright detection
* - `type`: Array of detected content types (e.g., 'book_excerpt', 'lyrics')
* - `source`: Potential source or work this content might be from
* - `isDirectMatch`: Whether this appears to be a direct match with a known work
*
* @example
* ```typescript
* // Basic usage with default settings
* const basicGuard = copyrightGuard({ threshold: 0.75 });
*
* // Advanced usage with custom selection and LLM
* const customGuard = copyrightGuard({
* threshold: 0.7,
* selection: SelectionType.All, // Check all messages
* roles: ['assistant'], // Only check assistant outputs
* llm: customLLM, // Use a custom LLM instance
* messageHashingAlgorithm: MessageHashingAlgorithm.SHA512 // Use SHA-512 for message hashing
* });
* ```
*
* @see {@link GuardOptions} for additional configuration options
* @see {@link GuardResult} for the structure of the returned results
*
* @throws {Error} If no LLM is provided either in the options or in the GuardrailsEngine
*
* @param {CopyrightGuardOptions} opts - Configuration options for the Copyright Guard
* @returns {Guard} A guard function that detects potential copyright violations in text inputs
*/
export declare function copyrightGuard(opts: CopyrightGuardOptions): Guard;
/**
* Configuration options for the Hate Speech Guard.
*
* @property {number} [threshold=0.9] - The hate speech detection threshold (0-1).
* Inputs with scores at or above this threshold will be considered hate speech.
* Lower values make the guard more sensitive to potential hate speech.
*/
export type HateSpeechGuardOptions = GuardOptions & {
threshold?: number;
};
/**
* ## Creates a guard that detects hate speech and identity-based attacks in text inputs.
*
* The Hate Speech Guard uses a language model to identify content that attacks or uses
* pejorative/discriminatory language about individuals or groups based on protected
* characteristics such as race, ethnicity, religion, gender, sexual orientation, or disability.
*
* ## Configuration Options
* - `threshold`: The hate speech detection threshold (0-1, default: 0.9)
* - `llm`: Custom LLM instance to use (optional when used with {@link GuardrailsEngine} and provided in the {@link GuardrailsChainOptions.llm})
* - `selection`: Which messages to evaluate (default: {@link SelectionType.Last})
*
* ## Response Format
* The guard returns an array of {@link GuardResult} with the following additional properties:
* - `score`: Confidence score between 0 and 1
* - `reason`: Explanation of the hate speech detection
* - `categories`: Array of detected hate speech categories (e.g., 'racial', 'religious', 'ableist')
* - `targetedGroups`: Array of groups targeted by the hate speech
*
* @example
* ```typescript
* // Basic usage with default settings
* const basicGuard = hateSpeechGuard({ threshold: 0.85 });
*
* // Advanced usage with custom selection and LLM
* const customGuard = hateSpeechGuard({
* threshold: 0.8,
* selection: SelectionType.All, // Check all messages
* roles: ['user', 'assistant'], // Check specific roles
* llm: customLLM, // Use a custom LLM instance
* messageHashingAlgorithm: MessageHashingAlgorithm.SHA512 // Use SHA-512 for message hashing
* });
* ```
*
* @see {@link GuardOptions} for additional configuration options
* @see {@link GuardResult} for the structure of the returned results
* @see {@link profanityGuard} for detecting general profanity
* @see {@link toxicGuard} for detecting general toxicity
*
* @throws {Error} If no LLM is provided either in the options or in the GuardrailsEngine
*
* @param {HateSpeechGuardOptions} opts - Configuration options for the Hate Speech Guard
* @returns {Guard} A guard function that detects hate speech in text inputs
*/
export declare function hateSpeechGuard(opts: HateSpeechGuardOptions): Guard;
/**
* Configuration options for the Profanity Guard.
*
* @property {number} [threshold=0.8] - The profanity detection threshold (0-1).
* Inputs with scores at or above this threshold will be considered profane.
* Lower values make the guard more sensitive to potential profanity.
*/
export type ProfanityGuardOptions = GuardOptions & {
threshold?: number;
};
/**
* ## Creates a guard that detects profane or offensive language in text inputs.
*
* The Profanity Guard uses a language model to identify and filter out profanity,
* vulgar language, and explicit content. It returns a score between 0 and 1,
* where higher scores indicate a higher likelihood of containing profanity.
*
* ## Configuration Options
* - `threshold`: The profanity detection threshold (0-1, default: 0.8)
* - `llm`: Custom LLM instance to use (optional when used with {@link GuardrailsEngine} and provided in the {@link GuardrailsChainOptions.llm})
* - `selection`: Which messages to evaluate (default: {@link SelectionType.Last})
*
* ## Response Format
* The guard returns an array of {@link GuardResult} with the following additional properties:
* - `score`: Confidence score between 0 and 1
* - `reason`: Explanation of the profanity detection
* - `flaggedWords`: Array of detected profane words (if any)
* - `severity`: Severity level of the profanity ('mild', 'moderate', 'severe')
*
* @example
* ```typescript
* // Basic usage with default settings
* const basicGuard = profanityGuard({ threshold: 0.9 });
*
* // Advanced usage with custom selection and LLM
* const customGuard = profanityGuard({
* threshold: 0.75,
* selection: SelectionType.All, // Check all messages
* roles: ['user'], // Only check user messages
* llm: customLLM, // Use a custom LLM instance
* messageHashingAlgorithm: MessageHashingAlgorithm.SHA512 // Use SHA-512 for message hashing
* });
* ```
*
* @see {@link GuardOptions} for additional configuration options
* @see {@link GuardResult} for the structure of the returned results
*
* @throws {Error} If no LLM is provided either in the options or in the GuardrailsEngine
*
* @param {ProfanityGuardOptions} opts - Configuration options for the Profanity Guard
* @returns {Guard} A guard function that detects profane language in text inputs
*/
export declare function profanityGuard(opts: ProfanityGuardOptions): Guard;
export type GuardrailsEngineResult = {
messages: LLMMessage[];
messagesWithGuardResult: {
guardId: string;
guardName: string;
messages: Omit<GuardResult, "guardId" | "guardName">[];
}[];
};
/**
* The GuardrailsEngine class manages the execution of a set of guards on a sequence of messages.
* It provides a way to validate and modify messages before they are processed by an LLM.
*
* @example
* ```typescript
* import { GuardrailsEngine, piiGuard, secretGuard } from '@hai-guardrails';
*
* const engine = new GuardrailsEngine({
* guards: [piiGuard(), secretGuard()],
* });
*
* const results = await engine.run(messages);
* ```
* @param opts - Configuration options for the engine
* @param {boolean} opts.enabled - Whether the engine is enabled (default: true)
* @param {Guard[]} opts.guards - Array of guard functions to apply to messages
* @param {MessageHashingAlgorithm} opts.messageHashingAlgorithm - Algorithm for hashing messages (default: SHA256)
*
* @method isEnabled - Returns true if the engine is enabled.
* @method isDisabled - Returns true if the engine is disabled.
* @method enable - Enables the engine.
* @method disable - Disables the engine.
* @method run - Executes the guards on the provided messages and returns the results.
*
* The run method processes each message through the configured guards, modifying messages as needed,
* and returns the original and modified messages along with the results of the guard checks.
*/
export declare class GuardrailsEngine {
private readonly opts;
constructor(opts: GuardrailsChainOptions);
/**
* Checks if the engine is currently enabled
* @returns {boolean} True if the engine is enabled
*/
get isEnabled(): boolean;
/**
* Checks if the engine is currently disabled
* @returns {boolean} True if the engine is disabled
*/
get isDisabled(): boolean;
/**
* Enables the guardrails engine
*/
enable(): void;
/**
* Disables the guardrails engine
*/
disable(): void;
/**
* Sets the log level for the engine
* @param level - The log level to set
*/
setLogLevel(level: LogLevel): void;
/**
* Gets the current log level
* @returns The current log level
*/
getLogLevel(): LogLevel;
/**
* Executes the configured guards on the provided messages
*
* @param {LLMMessage[]} messages - Array of messages to process
* @returns {Promise<GuardrailsEngineResult>} An object containing:
* - messages: The processed messages
* - messagesWithGuardResult: Detailed results of guard executions
*/
run(messages: LLMMessage[]): Promise<GuardrailsEngineResult>;
}
/**
* Heuristic tactic: fuzzy matching and word overlap with known injection keywords.
*
* This tactic works by testing the input string against a set of known suspicious
* keywords. For each keyword, the tactic computes a score based on the number of
* matching words and the similarity between the keyword and the input string. The
* highest score is then returned as the result of the tactic. The score is then
* thresholded to determine if the input string is a prompt injection attack or
* not.
*
* @param threshold The default threshold for determining if a score indicates a
* prompt injection attack. Defaults to 0.
*/
export declare class Heuristic implements Tactic {
private readonly keywords;
readonly name = TacticName.Heuristic;
readonly defaultThreshold: number;
constructor(threshold: number, keywords: string[]);
execute(input: string, thresholdOverride?: number): Promise<TacticExecution>;
}
/**
* Language Model tactic: uses an LLM to assess prompt injection likelihood.
*
* This tactic works by generating a prompt that asks the LLM to assess the input
* string for prompt injection likelihood. The LLM responds with a score between 0
* and 1, which is then thresholded to determine if the input string is a prompt
* injection attack or not.
*
* @param threshold The default threshold for determining if a score indicates a
* prompt injection attack. Defaults to 0.
* @param llm The language model to use for assessing prompt injection likelihood.
* Must be an instance of BaseChatModel.
*/
export declare class LanguageModel implements Tactic {
private readonly llm;
private renderPromptTemplate;
readonly name = TacticName.LanguageModel;
readonly defaultThreshold: number;
constructor(threshold: number | undefined, llm: LLM, renderPromptTemplate: (input: string) => string);
execute(input: string, thresholdOverride?: number): Promise<TacticExecution>;
}
/**
* Pattern tactic: regular expression matching against suspicious prompt patterns.
*
* This tactic works by testing the input string against a set of regular
* expressions that match known suspicious patterns. If any of the patterns
* match, the tactic returns a score of 1.0. Otherwise, the score is 0.0. The
* score is then thresholded to determine if the input string is a prompt
* injection attack or not.
*
* @param threshold The default threshold for determining if a score indicates a
* prompt injection attack. Defaults to 0.
*/
export declare class Pattern implements Tactic {
private readonly patterns;
readonly name = TacticName.Pattern;
readonly defaultThreshold: number;
constructor(threshold: number | undefined, patterns: RegExp[]);
execute(input: string, thresholdOverride?: number): Promise<TacticExecution>;
}
type ProxyHandler$1<T extends BaseChatModel> = {
[K in keyof T]?: T[K] extends (...args: infer Args) => infer Return ? (originalFn: T[K], target: T, thisArg: T, args: Args, guardrailsEngine: GuardrailsEngine) => Return | Promise<Return> : never;
};
/**
* Creates a bridge LangChain chat model that applies guardrails protection
* to all method calls.
*
* @template T The type of the LangChain chat model
* @param model The LangChain chat model instance to protect
* @param guardrailsEngine The guardrails engine instance to use
* @param handler Optional custom handler to override default behavior
* @returns Protected version of input model that applies guardrails protection
*
* @example
* ```typescript
* const model = new ChatOpenAI();
* const engine = new GuardrailsEngine();
* const protectedModel = LangChainChatGuardrails(model, engine);
* ```
*/
export declare function LangChainChatGuardrails<T extends BaseChatModel>(model: T, guardrailsEngine: GuardrailsEngine, handler?: ProxyHandler$1<T>): T;
export {};