claude-flow

Version:

Ruflo - Enterprise AI agent orchestration for Claude Code. Deploy 60+ specialized agents in coordinated swarms with self-learning, fault-tolerant consensus, vector memory, and MCP integration

github.com/ruvnet/claude-flow

ruvnet/claude-flow

1,064 lines (920 loc) • 36.1 kB

text/typescript

/** * RuVector PostgreSQL Bridge - Attention Mechanisms Module * * Comprehensive implementation of all 39 attention mechanisms for the * RuVector PostgreSQL vector database integration. * * @module @claude-flow/plugins/integrations/ruvector/attention * @version 1.0.0 */ import type { AttentionMechanism, AttentionConfig, AttentionInput, AttentionOutput, AttentionParams, AttentionStats, KVCache, } from './types.js'; // ============================================================================ // Attention Mechanism Interface // ============================================================================ /** * Options for configuring attention computation. */ export interface AttentionOptions { /** Number of attention heads */ numHeads?: number; /** Dimension per head */ headDim?: number; /** Dropout rate */ dropout?: number; /** Whether to use causal masking */ causal?: boolean; /** Scale factor for attention scores */ scale?: number; /** Maximum sequence length */ maxSeqLen?: number; /** Mechanism-specific parameters */ params?: AttentionParams; } /** * Interface for attention mechanism implementations. */ export interface IAttentionMechanism { /** Attention mechanism type */ readonly type: AttentionMechanism; /** Human-readable name */ readonly name: string; /** Description of the mechanism */ readonly description: string; /** Category of the mechanism */ readonly category: AttentionCategory; /** * Compute attention output from query, keys, and values. */ compute( query: number[], keys: number[][], values: number[][] ): Promise<number[]>; /** * Compute batched attention. */ computeBatch( queries: number[][], keys: number[][], values: number[][] ): Promise<number[][]>; /** * Configure the attention mechanism with options. */ configure(options: AttentionOptions): void; /** * Generate SQL query for PostgreSQL execution. */ toSQL(input: AttentionInput): string; /** * Get current configuration. */ getConfig(): AttentionConfig; } /** * Categories of attention mechanisms. */ export type AttentionCategory = | 'core' | 'efficient' | 'positional' | 'sparse' | 'linear' | 'graph' | 'temporal' | 'multimodal' | 'retrieval'; // ============================================================================ // Attention Registry // ============================================================================ /** * Registry for managing attention mechanism implementations. */ export class AttentionRegistry { private mechanisms: Map<AttentionMechanism, IAttentionMechanism> = new Map(); private categoryIndex: Map<AttentionCategory, Set<AttentionMechanism>> = new Map(); constructor() { // Initialize category index const categories: AttentionCategory[] = [ 'core', 'efficient', 'positional', 'sparse', 'linear', 'graph', 'temporal', 'multimodal', 'retrieval' ]; categories.forEach(cat => this.categoryIndex.set(cat, new Set())); } /** * Register an attention mechanism implementation. */ register(impl: IAttentionMechanism): void { this.mechanisms.set(impl.type, impl); this.categoryIndex.get(impl.category)?.add(impl.type); } /** * Get an attention mechanism by type. */ get(type: AttentionMechanism): IAttentionMechanism { const mechanism = this.mechanisms.get(type); if (!mechanism) { throw new Error(`Attention mechanism '${type}' not registered`); } return mechanism; } /** * Check if a mechanism is registered. */ has(type: AttentionMechanism): boolean { return this.mechanisms.has(type); } /** * List all registered attention mechanisms. */ listAvailable(): AttentionMechanism[] { return Array.from(this.mechanisms.keys()); } /** * List mechanisms by category. */ listByCategory(category: AttentionCategory): AttentionMechanism[] { return Array.from(this.categoryIndex.get(category) || []); } /** * Get all mechanisms with metadata. */ getAllWithMetadata(): Array<{ type: AttentionMechanism; name: string; description: string; category: AttentionCategory; }> { return Array.from(this.mechanisms.values()).map(m => ({ type: m.type, name: m.name, description: m.description, category: m.category, })); } /** * Unregister a mechanism. */ unregister(type: AttentionMechanism): boolean { const mechanism = this.mechanisms.get(type); if (mechanism) { this.categoryIndex.get(mechanism.category)?.delete(type); return this.mechanisms.delete(type); } return false; } /** * Clear all registered mechanisms. */ clear(): void { this.mechanisms.clear(); this.categoryIndex.forEach(set => set.clear()); } } // ============================================================================ // Base Attention Implementation // ============================================================================ /** * Base class for attention mechanism implementations. */ export abstract class BaseAttentionMechanism implements IAttentionMechanism { abstract readonly type: AttentionMechanism; abstract readonly name: string; abstract readonly description: string; abstract readonly category: AttentionCategory; protected config: AttentionConfig; constructor(config?: Partial<AttentionConfig>) { // Note: mechanism will be set correctly via getConfig() which uses this.type this.config = { mechanism: 'multi_head' as AttentionMechanism, // Placeholder, overridden by getConfig numHeads: config?.numHeads ?? 8, headDim: config?.headDim ?? 64, embedDim: config?.embedDim ?? 512, dropout: config?.dropout ?? 0.0, useBias: config?.useBias ?? true, scale: config?.scale, causal: config?.causal ?? false, maxSeqLen: config?.maxSeqLen ?? 2048, params: config?.params, }; } configure(options: AttentionOptions): void { if (options.numHeads !== undefined) this.config = { ...this.config, numHeads: options.numHeads }; if (options.headDim !== undefined) this.config = { ...this.config, headDim: options.headDim }; if (options.dropout !== undefined) this.config = { ...this.config, dropout: options.dropout }; if (options.causal !== undefined) this.config = { ...this.config, causal: options.causal }; if (options.scale !== undefined) this.config = { ...this.config, scale: options.scale }; if (options.maxSeqLen !== undefined) this.config = { ...this.config, maxSeqLen: options.maxSeqLen }; if (options.params !== undefined) this.config = { ...this.config, params: { ...this.config.params, ...options.params } }; } getConfig(): AttentionConfig { return { ...this.config, mechanism: this.type }; } abstract compute(query: number[], keys: number[][], values: number[][]): Promise<number[]>; abstract computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]>; abstract toSQL(input: AttentionInput): string; /** * Compute attention scale factor. */ protected getScale(): number { return this.config.scale ?? Math.sqrt(this.config.headDim); } /** * Format vector for SQL. */ protected formatVector(v: number[] | Float32Array): string { const arr = Array.isArray(v) ? v : Array.from(v); return `'[${arr.join(',')}]'::vector`; } /** * Format matrix for SQL. */ protected formatMatrix(m: number[][] | Float32Array[]): string { const rows = m.map(row => { const arr = Array.isArray(row) ? row : Array.from(row); return `'[${arr.join(',')}]'::vector`; }); return `ARRAY[${rows.join(',')}]`; } } // ============================================================================ // Core Attention Implementations // ============================================================================ /** * Multi-Head Attention (Transformer standard). */ export class MultiHeadAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'multi_head'; readonly name = 'Multi-Head Attention'; readonly description = 'Standard Transformer multi-head attention with parallel attention heads'; readonly category: AttentionCategory = 'core'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { // Compute attention scores const scale = this.getScale(); const scores = keys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); return `SELECT ruvector.multi_head_attention(${q}, ${k}, ${v}, ${this.config.numHeads}, ${this.getScale()}, ${this.config.causal})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Self-Attention mechanism. */ export class SelfAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'self_attention'; readonly name = 'Self-Attention'; readonly description = 'Self-attention where queries, keys, and values come from the same sequence'; readonly category: AttentionCategory = 'core'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const scale = this.getScale(); const scores = keys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); return `SELECT ruvector.self_attention(${q}, ${k}, ${v}, ${this.getScale()}, ${this.config.causal})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Cross-Attention mechanism. */ export class CrossAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'cross_attention'; readonly name = 'Cross-Attention'; readonly description = 'Cross-attention between two different sequences (encoder-decoder)'; readonly category: AttentionCategory = 'core'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const scale = this.getScale(); const scores = keys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); return `SELECT ruvector.cross_attention(${q}, ${k}, ${v}, ${this.config.numHeads}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Causal (Masked) Attention for autoregressive models. */ export class CausalAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'causal'; readonly name = 'Causal Attention'; readonly description = 'Causal/masked attention for autoregressive generation (GPT-style)'; readonly category: AttentionCategory = 'core'; constructor(config?: Partial<AttentionConfig>) { super({ ...config, causal: true }); } async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const scale = this.getScale(); const queryIdx = keys.length - 1; // Assume query is for last position const scores = keys.map((k, i) => { if (i > queryIdx) return -Infinity; // Mask future tokens return this.dotProduct(query, k) / scale; }); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { const results: number[][] = []; for (let i = 0; i < queries.length; i++) { const maskedKeys = keys.slice(0, i + 1); const maskedValues = values.slice(0, i + 1); results.push(await this.compute(queries[i], maskedKeys, maskedValues)); } return results; } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); return `SELECT ruvector.causal_attention(${q}, ${k}, ${v}, ${this.config.numHeads}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const filtered = x.filter(v => v !== -Infinity); if (filtered.length === 0) return x.map(() => 0); const max = Math.max(...filtered); const exp = x.map(v => v === -Infinity ? 0 : Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return sum > 0 ? exp.map(v => v / sum) : exp; } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Bidirectional Attention (BERT-style). */ export class BidirectionalAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'bidirectional'; readonly name = 'Bidirectional Attention'; readonly description = 'Bidirectional attention attending to all tokens (BERT-style)'; readonly category: AttentionCategory = 'core'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const scale = this.getScale(); const scores = keys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); return `SELECT ruvector.bidirectional_attention(${q}, ${k}, ${v}, ${this.config.numHeads}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Local Attention with sliding window. */ export class LocalAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'local_attention'; readonly name = 'Local Attention'; readonly description = 'Local attention with fixed window size around each position'; readonly category: AttentionCategory = 'core'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const windowSize = this.config.params?.windowSize ?? 256; const scale = this.getScale(); const queryIdx = keys.length - 1; const start = Math.max(0, queryIdx - Math.floor(windowSize / 2)); const end = Math.min(keys.length, queryIdx + Math.floor(windowSize / 2) + 1); const scores = keys.map((k, i) => { if (i < start || i >= end) return -Infinity; return this.dotProduct(query, k) / scale; }); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map((q, i) => { const windowSize = this.config.params?.windowSize ?? 256; const start = Math.max(0, i - Math.floor(windowSize / 2)); const end = Math.min(keys.length, i + Math.floor(windowSize / 2) + 1); return this.compute(q, keys.slice(start, end), values.slice(start, end)); })); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const windowSize = this.config.params?.windowSize ?? 256; return `SELECT ruvector.local_attention(${q}, ${k}, ${v}, ${windowSize}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const filtered = x.filter(v => v !== -Infinity); if (filtered.length === 0) return x.map(() => 0); const max = Math.max(...filtered); const exp = x.map(v => v === -Infinity ? 0 : Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return sum > 0 ? exp.map(v => v / sum) : exp; } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Global Attention with special global tokens. */ export class GlobalAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'global_attention'; readonly name = 'Global Attention'; readonly description = 'Global attention tokens that attend to and are attended by all positions'; readonly category: AttentionCategory = 'core'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const scale = this.getScale(); const scores = keys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const numGlobal = this.config.params?.numGlobalTokens ?? 1; return `SELECT ruvector.global_attention(${q}, ${k}, ${v}, ${numGlobal}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } // ============================================================================ // Efficient Attention Implementations // ============================================================================ /** * Flash Attention - memory efficient O(N) attention. */ export class FlashAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'flash_attention'; readonly name = 'Flash Attention'; readonly description = 'Memory-efficient attention using tiling and recomputation'; readonly category: AttentionCategory = 'efficient'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const blockSize = this.config.params?.flashBlockSize ?? 64; const scale = this.getScale(); const seqLen = keys.length; const dim = values[0].length; let output = new Array(dim).fill(0); let maxScore = -Infinity; let sumExp = 0; // Process in blocks for memory efficiency for (let blockStart = 0; blockStart < seqLen; blockStart += blockSize) { const blockEnd = Math.min(blockStart + blockSize, seqLen); // Compute scores for this block const blockScores: number[] = []; for (let i = blockStart; i < blockEnd; i++) { blockScores.push(this.dotProduct(query, keys[i]) / scale); } // Update running max and sum const blockMax = Math.max(...blockScores); if (blockMax > maxScore) { const correction = Math.exp(maxScore - blockMax); output = output.map(v => v * correction); sumExp *= correction; maxScore = blockMax; } // Accumulate weighted values for (let i = 0; i < blockScores.length; i++) { const weight = Math.exp(blockScores[i] - maxScore); sumExp += weight; for (let j = 0; j < dim; j++) { output[j] += weight * values[blockStart + i][j]; } } } // Normalize return output.map(v => v / sumExp); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const blockSize = this.config.params?.flashBlockSize ?? 64; return `SELECT ruvector.flash_attention(${q}, ${k}, ${v}, ${blockSize}, ${this.getScale()}, ${this.config.causal})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } } /** * Flash Attention V2 - improved memory efficiency. */ export class FlashAttentionV2 extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'flash_attention_v2'; readonly name = 'Flash Attention V2'; readonly description = 'Improved Flash Attention with better parallelism and reduced memory'; readonly category: AttentionCategory = 'efficient'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { // Similar to Flash Attention but with improved block scheduling const blockSize = this.config.params?.flashBlockSize ?? 128; const scale = this.getScale(); const seqLen = keys.length; const dim = values[0].length; let output = new Array(dim).fill(0); let maxScore = -Infinity; let sumExp = 0; for (let blockStart = 0; blockStart < seqLen; blockStart += blockSize) { const blockEnd = Math.min(blockStart + blockSize, seqLen); const blockScores: number[] = []; for (let i = blockStart; i < blockEnd; i++) { blockScores.push(this.dotProduct(query, keys[i]) / scale); } const blockMax = Math.max(...blockScores); if (blockMax > maxScore) { const correction = Math.exp(maxScore - blockMax); output = output.map(v => v * correction); sumExp *= correction; maxScore = blockMax; } for (let i = 0; i < blockScores.length; i++) { const weight = Math.exp(blockScores[i] - maxScore); sumExp += weight; for (let j = 0; j < dim; j++) { output[j] += weight * values[blockStart + i][j]; } } } return output.map(v => v / sumExp); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const blockSize = this.config.params?.flashBlockSize ?? 128; return `SELECT ruvector.flash_attention_v2(${q}, ${k}, ${v}, ${blockSize}, ${this.getScale()}, ${this.config.causal})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } } /** * Memory Efficient Attention. */ export class MemoryEfficientAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'memory_efficient'; readonly name = 'Memory Efficient Attention'; readonly description = 'Attention optimized for reduced memory footprint'; readonly category: AttentionCategory = 'efficient'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const scale = this.getScale(); const scores = keys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { // Process one at a time to minimize memory const results: number[][] = []; for (const q of queries) { results.push(await this.compute(q, keys, values)); } return results; } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); return `SELECT ruvector.memory_efficient_attention(${q}, ${k}, ${v}, ${this.getScale()}, ${this.config.params?.checkpointing ?? false})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Chunk Attention - process in chunks. */ export class ChunkAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'chunk_attention'; readonly name = 'Chunk Attention'; readonly description = 'Chunked attention processing for very long sequences'; readonly category: AttentionCategory = 'efficient'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const chunkSize = this.config.params?.blockSize ?? 512; const scale = this.getScale(); const dim = values[0].length; const outputs: number[][] = []; const chunkWeights: number[] = []; for (let i = 0; i < keys.length; i += chunkSize) { const chunkKeys = keys.slice(i, i + chunkSize); const chunkValues = values.slice(i, i + chunkSize); const scores = chunkKeys.map(k => this.dotProduct(query, k) / scale); const weights = this.softmax(scores); const chunkOutput = this.weightedSum(chunkValues, weights); outputs.push(chunkOutput); chunkWeights.push(weights.reduce((a, b) => a + b, 0)); } // Combine chunk outputs const totalWeight = chunkWeights.reduce((a, b) => a + b, 0); const result = new Array(dim).fill(0); for (let c = 0; c < outputs.length; c++) { const w = chunkWeights[c] / totalWeight; for (let j = 0; j < dim; j++) { result[j] += w * outputs[c][j]; } } return result; } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const chunkSize = this.config.params?.blockSize ?? 512; return `SELECT ruvector.chunk_attention(${q}, ${k}, ${v}, ${chunkSize}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const max = Math.max(...x); const exp = x.map(v => Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return exp.map(v => v / sum); } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Sliding Window Attention. */ export class SlidingWindowAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'sliding_window'; readonly name = 'Sliding Window Attention'; readonly description = 'Attention with a sliding window for each position'; readonly category: AttentionCategory = 'efficient'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const windowSize = this.config.params?.windowSize ?? 256; const scale = this.getScale(); const queryIdx = keys.length - 1; const halfWindow = Math.floor(windowSize / 2); const scores = keys.map((k, i) => { if (Math.abs(i - queryIdx) > halfWindow) return -Infinity; return this.dotProduct(query, k) / scale; }); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { const windowSize = this.config.params?.windowSize ?? 256; const halfWindow = Math.floor(windowSize / 2); return Promise.all(queries.map((q, idx) => { const start = Math.max(0, idx - halfWindow); const end = Math.min(keys.length, idx + halfWindow + 1); const windowKeys = keys.slice(start, end); const windowValues = values.slice(start, end); return this.compute(q, windowKeys, windowValues); })); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const windowSize = this.config.params?.windowSize ?? 256; return `SELECT ruvector.sliding_window_attention(${q}, ${k}, ${v}, ${windowSize}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const filtered = x.filter(v => v !== -Infinity); if (filtered.length === 0) return x.map(() => 0); const max = Math.max(...filtered); const exp = x.map(v => v === -Infinity ? 0 : Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return sum > 0 ? exp.map(v => v / sum) : exp; } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } } /** * Dilated Attention with strided access. */ export class DilatedAttention extends BaseAttentionMechanism { readonly type: AttentionMechanism = 'dilated_attention'; readonly name = 'Dilated Attention'; readonly description = 'Dilated/strided attention for capturing long-range dependencies'; readonly category: AttentionCategory = 'efficient'; async compute(query: number[], keys: number[][], values: number[][]): Promise<number[]> { const dilationRate = this.config.params?.dilationRate ?? 2; const scale = this.getScale(); const scores = keys.map((k, i) => { if (i % dilationRate !== 0) return -Infinity; return this.dotProduct(query, k) / scale; }); const weights = this.softmax(scores); return this.weightedSum(values, weights); } async computeBatch(queries: number[][], keys: number[][], values: number[][]): Promise<number[][]> { return Promise.all(queries.map(q => this.compute(q, keys, values))); } toSQL(input: AttentionInput): string { const q = this.formatMatrix(input.query); const k = this.formatMatrix(input.key); const v = this.formatMatrix(input.value); const dilationRate = this.config.params?.dilationRate ?? 2; return `SELECT ruvector.dilated_attention(${q}, ${k}, ${v}, ${dilationRate}, ${this.getScale()})`; } private dotProduct(a: number[], b: number[]): number { return a.reduce((sum, val, i) => sum + val * b[i], 0); } private softmax(x: number[]): number[] { const filtered = x.filter(v => v !== -Infinity); if (filtered.length === 0) return x.map(() => 0); const max = Math.max(...filtered); const exp = x.map(v => v === -Infinity ? 0 : Math.exp(v - max)); const sum = exp.reduce((a, b) => a + b, 0); return sum > 0 ? exp.map(v => v / sum) : exp; } private weightedSum(values: number[][], weights: number[]): number[] { const dim = values[0].length; const result = new Array(dim).fill(0); for (let i = 0; i < values.length; i++) { for (let j = 0; j < dim; j++) { result[j] += weights[i] * values[i][j]; } } return result; } }