claude-flow-novice
Version:
Claude Flow Novice - Advanced orchestration platform for multi-agent AI workflows with CFN Loop architecture Includes Local RuVector Accelerator and all CFN skills for complete functionality.
431 lines (369 loc) • 12.9 kB
text/typescript
/**
* GLM 4.6 Provider for MDAP
*
* Unified provider for all MDAP tasks using zai-glm-4.6 model.
* Supports thinking/reasoning control per Cerebras documentation:
* https://inference-docs.cerebras.ai/resources/glm-migration#7-minimize-reasoning-when-not-needed
*
* Usage:
* - Decomposition/Planning tasks: enableThinking = true (reasoning needed)
* - Implementation/Fix tasks: enableThinking = false (faster, no reasoning overhead)
*
* @module glm-client
* @version 1.0.1
*/
// =============================================
// Types
// =============================================
export interface GLMRequestOptions {
/** Enable thinking/reasoning mode (default: false for speed) */
enableThinking?: boolean;
/** Maximum tokens for response */
maxTokens?: number;
/** Temperature for response generation (0.0-1.0) */
temperature?: number;
/** Timeout in milliseconds (default: 30000) */
timeoutMs?: number;
}
export interface GLMResponse {
/** Generated content */
content: string;
/** Input tokens used */
inputTokens: number;
/** Output tokens used */
outputTokens: number;
/** Duration in milliseconds */
durationMs: number;
/** Whether thinking was enabled */
thinkingEnabled: boolean;
}
// =============================================
// Constants
// =============================================
/**
* GLM 4.6 model ID - use this for ALL MDAP tasks
* Consistent model reduces variability and simplifies debugging
*/
export const GLM_MODEL_ID = "zai-glm-4.6";
/** Cerebras API endpoint */
const CEREBRAS_API_URL = "https://api.cerebras.ai/v1/chat/completions";
/** Default configuration */
const DEFAULTS = {
maxTokens: 2048,
temperature: 0.5,
timeoutMs: 30000,
enableThinking: false,
};
/** Retry configuration */
const RETRY_CONFIG = {
maxRetries: 5,
baseDelayMs: 1000,
maxDelayMs: 30000,
};
// =============================================
// Security: Input Validation
// =============================================
/**
* Validate API key format and security
*/
function validateApiKey(apiKey: string): void {
if (!apiKey || typeof apiKey !== 'string') {
throw new Error('API key is required and must be a string');
}
// Check minimum length (Cerebras API keys are typically longer)
if (apiKey.length < 20) {
throw new Error('API key appears to be invalid (too short)');
}
// Check for common placeholder values
const placeholders = ['your-api-key', 'sk-xxxxxxxx', 'dummy', 'test', 'example'];
if (placeholders.some(placeholder => apiKey.toLowerCase().includes(placeholder))) {
throw new Error('Invalid API key: appears to be a placeholder');
}
// Basic format check for Cerebras API keys (they typically start with specific patterns)
if (!apiKey.startsWith('csk-') && !apiKey.startsWith('sk-')) {
console.warn('API key format may be incorrect for Cerebras');
}
}
/**
* Sanitize prompt to prevent injection
*/
function sanitizePrompt(prompt: string): string {
if (!prompt || typeof prompt !== 'string') {
throw new Error('Prompt is required and must be a string');
}
// Check reasonable length limits
if (prompt.length < 1 || prompt.length > 100000) {
throw new Error('Prompt must be between 1 and 100000 characters');
}
// Trim excessive whitespace
let sanitized = prompt.trim();
// Remove potentially dangerous content
const dangerousPatterns = [
/<script[^>]*>.*?<\/script>/gi,
/javascript:/gi,
/data:text\/html/gi,
/vbscript:/gi,
];
for (const pattern of dangerousPatterns) {
sanitized = sanitized.replace(pattern, '[REDACTED]');
}
return sanitized;
}
/**
* Validate request options
*/
function validateOptions(options: GLMRequestOptions): void {
if (!options || typeof options !== 'object') {
return; // Use defaults
}
if (options.maxTokens !== undefined) {
if (typeof options.maxTokens !== 'number' ||
options.maxTokens < 1 ||
options.maxTokens > 32768) {
throw new Error('maxTokens must be a number between 1 and 32768');
}
}
if (options.temperature !== undefined) {
if (typeof options.temperature !== 'number' ||
options.temperature < 0 ||
options.temperature > 2) {
throw new Error('temperature must be a number between 0 and 2');
}
}
if (options.timeoutMs !== undefined) {
if (typeof options.timeoutMs !== 'number' ||
options.timeoutMs < 1000 ||
options.timeoutMs > 300000) {
throw new Error('timeoutMs must be a number between 1000 and 300000');
}
}
}
/**
* Create secure headers for API request
*/
function createSecureHeaders(apiKey: string): Record<string, string> {
return {
"Content-Type": "application/json",
"Authorization": `Bearer ${apiKey}`,
"User-Agent": "MDAP/1.0.1", // Identify client
"Accept": "application/json",
// Prevent potential header injection
"X-Content-Type-Options": "nosniff",
};
}
// =============================================
// Helper Functions
// =============================================
/**
* Sleep helper for retry backoff
*/
function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
/**
* Calculate exponential backoff delay with jitter
*/
function calculateBackoff(attempt: number, baseDelay: number, maxDelay: number): number {
const exponentialDelay = baseDelay * Math.pow(2, attempt);
const jitter = Math.random() * 500;
return Math.min(exponentialDelay + jitter, maxDelay);
}
// =============================================
// Main API Function
// =============================================
/**
* Call GLM 4.6 API with optional thinking/reasoning
*
* Per Cerebras docs, disable thinking for implementation tasks:
* "If your task doesn't require reasoning, you can disable thinking
* to get faster responses and reduce token usage."
*
* @param prompt - The prompt to send
* @param options - Request options including thinking toggle
* @returns GLM response with content and metrics
*
* @example
* // For decomposition (needs reasoning):
* const result = await callGLM(prompt, { enableThinking: true });
*
* @example
* // For implementation (no reasoning needed):
* const result = await callGLM(prompt, { enableThinking: false });
*/
export async function callGLM(
prompt: string,
options: GLMRequestOptions = {}
): Promise<GLMResponse> {
const startTime = Date.now();
// Validate and get API key
const apiKey = process.env.CEREBRAS_API_KEY;
if (!apiKey) {
throw new Error("CEREBRAS_API_KEY environment variable not set");
}
// Validate inputs
validateApiKey(apiKey);
const sanitizedPrompt = sanitizePrompt(prompt);
validateOptions(options);
const {
enableThinking = DEFAULTS.enableThinking,
maxTokens = DEFAULTS.maxTokens,
temperature = DEFAULTS.temperature,
timeoutMs = DEFAULTS.timeoutMs,
} = options;
let lastError: Error | null = null;
for (let attempt = 0; attempt < RETRY_CONFIG.maxRetries; attempt++) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
try {
// Build request body with validated options
const requestBody: Record<string, unknown> = {
model: GLM_MODEL_ID,
messages: [{ role: "user", content: sanitizedPrompt }],
max_tokens: maxTokens,
temperature: temperature,
};
// Add thinking parameter per Cerebras GLM docs
// https://inference-docs.cerebras.ai/resources/glm-migration#7-minimize-reasoning-when-not-needed
if (!enableThinking) {
requestBody.thinking = { type: "disabled" };
}
// When enableThinking is true, omit the parameter (default behavior enables thinking)
const response = await fetch(CEREBRAS_API_URL, {
method: "POST",
headers: createSecureHeaders(apiKey),
body: JSON.stringify(requestBody),
signal: controller.signal,
});
clearTimeout(timeoutId);
// Handle rate limiting with exponential backoff
if (response.status === 429) {
const retryAfter = response.headers.get('Retry-After');
let delayMs: number;
if (retryAfter) {
delayMs = parseInt(retryAfter, 10) * 1000;
} else {
delayMs = calculateBackoff(attempt, RETRY_CONFIG.baseDelayMs, RETRY_CONFIG.maxDelayMs);
}
console.log(
`[glm-client] Rate limited (429), retry ${attempt + 1}/${RETRY_CONFIG.maxRetries} after ${Math.round(delayMs)}ms`
);
await sleep(delayMs);
continue;
}
const durationMs = Date.now() - startTime;
if (!response.ok) {
const errorBody = await response.text();
// Sanitize error body to prevent sensitive data leakage
const sanitizedError = errorBody
.replace(/"api_key":\s*"[^"]*"/gi, '"api_key":"[REDACTED]"')
.replace(/"token":\s*"[^"]*"/gi, '"token":"[REDACTED]"')
.replace(/"password":\s*"[^"]*"/gi, '"password":"[REDACTED]"')
.substring(0, 200);
lastError = new Error(
`GLM API error: ${response.status} - ${sanitizedError}`
);
// Only retry on 5xx server errors
if (response.status >= 500 && attempt < RETRY_CONFIG.maxRetries - 1) {
const delayMs = calculateBackoff(attempt, RETRY_CONFIG.baseDelayMs, RETRY_CONFIG.maxDelayMs);
console.log(
`[glm-client] Server error (${response.status}), retry ${attempt + 1}/${RETRY_CONFIG.maxRetries} after ${delayMs}ms`
);
await sleep(delayMs);
continue;
}
throw lastError;
}
const data = await response.json() as {
choices?: Array<{ message: { content: string } }>;
usage?: { prompt_tokens?: number; completion_tokens?: number };
};
// Validate response structure
if (!data.choices || data.choices.length === 0) {
throw new Error("GLM API returned no choices");
}
return {
content: data.choices[0].message.content,
inputTokens: data.usage?.prompt_tokens || 0,
outputTokens: data.usage?.completion_tokens || 0,
durationMs,
thinkingEnabled: enableThinking,
};
} catch (error) {
clearTimeout(timeoutId);
if (error instanceof Error && error.name === 'AbortError') {
lastError = new Error(`GLM API request timed out after ${timeoutMs}ms`);
} else {
lastError = error as Error;
}
// Don't retry on abort/timeout
if (error instanceof Error && error.name === 'AbortError') {
throw lastError;
}
}
}
// All retries exhausted
throw lastError || new Error(`GLM API failed after ${RETRY_CONFIG.maxRetries} retries`);
}
// =============================================
// Convenience Functions
// =============================================
/**
* Call GLM for decomposition/planning tasks (thinking ENABLED)
* Use this for tasks that require reasoning, analysis, or planning.
*/
export async function callGLMWithThinking(
prompt: string,
options: Omit<GLMRequestOptions, 'enableThinking'> = {}
): Promise<GLMResponse> {
return callGLM(prompt, { ...options, enableThinking: true });
}
/**
* Call GLM for implementation/fix tasks (thinking DISABLED)
* Use this for straightforward code generation, fixes, transformations.
* Faster and more token-efficient than thinking mode.
*/
export async function callGLMFast(
prompt: string,
options: Omit<GLMRequestOptions, 'enableThinking'> = {}
): Promise<GLMResponse> {
return callGLM(prompt, { ...options, enableThinking: false });
}
// =============================================
// Task-Specific Presets
// =============================================
/**
* Preset for decomposition tasks
* - Thinking enabled (complex reasoning)
* - Higher max tokens (detailed output)
* - Lower temperature (consistent output)
*/
export const DECOMPOSER_PRESET: GLMRequestOptions = {
enableThinking: true,
maxTokens: 4096,
temperature: 0.3,
timeoutMs: 60000, // Longer timeout for thinking
};
/**
* Preset for implementation/fixer tasks
* - Thinking disabled (speed)
* - Standard max tokens
* - Moderate temperature
*/
export const IMPLEMENTER_PRESET: GLMRequestOptions = {
enableThinking: false,
maxTokens: 2048,
temperature: 0.5,
timeoutMs: 30000,
};
/**
* Preset for validation tasks
* - Thinking disabled (straightforward checking)
* - Lower max tokens (validation is brief)
* - Low temperature (consistent checks)
*/
export const VALIDATOR_PRESET: GLMRequestOptions = {
enableThinking: false,
maxTokens: 1024,
temperature: 0.2,
timeoutMs: 20000,
};