codecrucible-synth
Version:
Production-Ready AI Development Platform with Multi-Voice Synthesis, Smithery MCP Integration, Enterprise Security, and Zero-Timeout Reliability
143 lines (131 loc) • 4.08 kB
YAML
# Unified Model Configuration - Single Source of Truth
# Created: August 21, 2025
# Purpose: Resolves configuration conflicts identified in audit report
llm:
# Default provider selection
default_provider: "ollama"
fallback_chain: ["ollama", "lm-studio", "mock"]
providers:
ollama:
endpoint: "http://localhost:11434"
timeout:
connection: 5000 # 5s for connection establishment
response: 30000 # 30s for response generation
status_check: 3000 # 3s for health checks
models:
# Preferred models in priority order
preferred:
- "qwen2.5-coder:7b"
- "qwen2.5-coder:3b"
- "deepseek-coder:8b"
# Fallback models if preferred not available
fallback:
- "llama3.2:latest"
- "gemma:latest"
# Model-specific settings
settings:
"qwen2.5-coder:7b":
temperature: 0.1
max_tokens: 128000
context_window: 128000
"deepseek-coder:8b":
temperature: 0.2
max_tokens: 128000
context_window: 128000
# Task routing preferences
optimal_for:
- "analysis"
- "planning"
- "complex"
- "multi-file"
- "architecture"
- "debugging"
lm-studio:
endpoint: "http://localhost:1234"
timeout:
connection: 3000 # 3s for connection
response: 15000 # 15s for response
status_check: 2000 # 2s for health checks
models:
preferred:
- "codellama-7b-instruct"
- "gemma-2b-it"
- "qwen/qwen2.5-coder-14b"
fallback:
- "gpt-3.5-turbo" # OpenAI compatible fallback
settings:
"codellama-7b-instruct":
temperature: 0.7
max_tokens: 128000
context_window: 128000
optimal_for:
- "template"
- "edit"
- "format"
- "boilerplate"
- "quick-fix"
- "streaming"
# Routing strategy configuration
routing:
strategy: "hybrid" # Options: hybrid, simple, complex
# Task complexity mapping
task_complexity:
simple:
- "format"
- "template"
- "boilerplate"
- "edit"
- "rename"
complex:
- "analysis"
- "architecture"
- "planning"
- "debug"
- "security"
- "multi-file"
# Confidence thresholds for routing decisions
confidence_thresholds:
high: 0.9 # Use fast path
medium: 0.7 # Standard routing
low: 0.5 # Require validation
escalation: 0.3 # Escalate to human
# Performance targets
performance_targets:
simple_task_response: 1000 # 1s for simple tasks
complex_task_response: 30000 # 30s for complex tasks
streaming_latency: 100 # 100ms for first token
# Monitoring and metrics
monitoring:
enabled: true
metrics_port: 3001
collect_performance: true
collect_errors: true
# Alert thresholds
alerts:
error_rate: 0.05 # Alert if >5% errors
response_time_p99: 5000 # Alert if p99 > 5s
availability: 0.95 # Alert if <95% available
# Security settings
security:
validate_inputs: true
sanitize_outputs: true
max_prompt_length: 128000
max_response_length: 128000
blocked_patterns:
- "password"
- "secret"
- "api_key"
- "private_key"
# Caching configuration
caching:
enabled: true
ttl: 3600 # 1 hour cache TTL
max_cache_size: 1000 # Max cached responses
cache_strategy: "lru" # Least recently used
# Experimental features
experimental:
dual_agent_review: true # Enable sequential review system
auto_retry_on_timeout: true # Automatic retry with backoff
adaptive_timeouts: true # Adjust timeouts based on task
connection_pooling: true # HTTP connection pooling
circuit_breaker: false # Not yet implemented