@juspay/neurolink
Version:
Universal AI Development Platform with external MCP server integration, multi-provider support, and professional CLI. Connect to 65+ MCP servers for filesystem, GitHub, database operations, and more. Build, test, and deploy AI applications with 9 major pr
356 lines (355 loc) • 14.1 kB
JavaScript
import { HfInference } from '@huggingface/inference';
import { streamText, generateText, Output } from 'ai';
import { logger } from '../utils/logger.js';
// Default system context
const DEFAULT_SYSTEM_CONTEXT = {
systemPrompt: 'You are a helpful AI assistant.'
};
// Configuration helpers
const getHuggingFaceApiKey = () => {
const apiKey = process.env.HUGGINGFACE_API_KEY || process.env.HF_TOKEN;
if (!apiKey) {
throw new Error('HUGGINGFACE_API_KEY environment variable is not set');
}
return apiKey;
};
const getHuggingFaceModelId = () => {
return process.env.HUGGINGFACE_MODEL || 'microsoft/DialoGPT-medium';
};
const hasValidAuth = () => {
return !!(process.env.HUGGINGFACE_API_KEY || process.env.HF_TOKEN);
};
// Lazy initialization cache
let _hfClient = null;
function getHuggingFaceClient() {
if (!_hfClient) {
const apiKey = getHuggingFaceApiKey();
_hfClient = new HfInference(apiKey);
}
return _hfClient;
}
// Retry configuration for model loading
const RETRY_CONFIG = {
maxRetries: 3,
baseDelay: 2000, // 2 seconds
maxDelay: 30000, // 30 seconds
backoffMultiplier: 2
};
// Helper function for exponential backoff retry
async function retryWithBackoff(operation, retryConfig = RETRY_CONFIG) {
let lastError;
for (let attempt = 0; attempt <= retryConfig.maxRetries; attempt++) {
try {
return await operation();
}
catch (error) {
lastError = error;
// Check if it's a model loading error (503 status)
if (error instanceof Error && error.message.includes('503')) {
if (attempt < retryConfig.maxRetries) {
const delay = Math.min(retryConfig.baseDelay * Math.pow(retryConfig.backoffMultiplier, attempt), retryConfig.maxDelay);
logger.debug('HuggingFace model loading, retrying...', {
attempt: attempt + 1,
maxRetries: retryConfig.maxRetries,
delayMs: delay,
error: error.message
});
await new Promise(resolve => setTimeout(resolve, delay));
continue;
}
}
// For non-503 errors or final attempt, throw immediately
throw error;
}
}
throw lastError;
}
// Custom LanguageModelV1 implementation for Hugging Face
class HuggingFaceLanguageModel {
specificationVersion = 'v1';
provider = 'huggingface';
modelId;
maxTokens;
supportsStreaming = true;
defaultObjectGenerationMode = 'json';
client;
constructor(modelId, client) {
this.modelId = modelId;
this.client = client;
}
estimateTokens(text) {
return Math.ceil(text.length / 4); // Rough estimation: 4 characters per token
}
convertMessagesToPrompt(messages) {
return messages
.map(msg => {
if (typeof msg.content === 'string') {
return `${msg.role}: ${msg.content}`;
}
else if (Array.isArray(msg.content)) {
// Handle multi-part content (text, images, etc.)
return `${msg.role}: ${msg.content
.filter((part) => part.type === 'text')
.map((part) => part.text)
.join(' ')}`;
}
return '';
})
.join('\n');
}
async doGenerate(options) {
const prompt = this.convertMessagesToPrompt(options.prompt);
const response = await retryWithBackoff(async () => {
return await this.client.textGeneration({
model: this.modelId,
inputs: prompt,
parameters: {
temperature: options.temperature || 0.7,
max_new_tokens: options.maxTokens || 500,
return_full_text: false,
do_sample: (options.temperature || 0.7) > 0
}
});
});
const generatedText = response.generated_text || '';
const promptTokens = this.estimateTokens(prompt);
const completionTokens = this.estimateTokens(generatedText);
return {
text: generatedText,
usage: {
promptTokens,
completionTokens,
totalTokens: promptTokens + completionTokens
},
finishReason: 'stop',
logprobs: undefined,
rawCall: { rawPrompt: prompt, rawSettings: options },
rawResponse: { headers: {} }
};
}
async doStream(options) {
const prompt = this.convertMessagesToPrompt(options.prompt);
// HuggingFace Inference API doesn't support true streaming
// We'll simulate streaming by generating the full text and chunking it
const response = await this.doGenerate(options);
// Create a ReadableStream that chunks the response
const stream = new ReadableStream({
start(controller) {
const text = response.text || '';
const chunkSize = Math.max(1, Math.floor(text.length / 10)); // 10 chunks
let index = 0;
const pushChunk = () => {
if (index < text.length) {
const chunk = text.slice(index, index + chunkSize);
controller.enqueue({
type: 'text-delta',
textDelta: chunk
});
index += chunkSize;
// Add delay to simulate streaming
setTimeout(pushChunk, 50);
}
else {
// Send finish event
controller.enqueue({
type: 'finish',
finishReason: response.finishReason,
usage: response.usage,
logprobs: response.logprobs
});
controller.close();
}
};
pushChunk();
}
});
return {
stream,
rawCall: response.rawCall,
rawResponse: response.rawResponse
};
}
}
// Hugging Face class with enhanced error handling
export class HuggingFace {
modelName;
client;
/**
* Initializes a new instance of HuggingFace
* @param modelName - Optional model name to override the default from config
*/
constructor(modelName) {
const functionTag = 'HuggingFace.constructor';
this.modelName = modelName || getHuggingFaceModelId();
try {
this.client = getHuggingFaceClient();
logger.debug(`[${functionTag}] Initialization started`, {
modelName: this.modelName,
hasApiKey: hasValidAuth()
});
logger.debug(`[${functionTag}] Initialization completed`, {
modelName: this.modelName,
success: true
});
}
catch (err) {
logger.error(`[${functionTag}] Initialization failed`, {
message: 'Error in initializing Hugging Face',
modelName: this.modelName,
error: err instanceof Error ? err.message : String(err),
stack: err instanceof Error ? err.stack : undefined
});
throw err;
}
}
/**
* Gets the appropriate model instance
* @private
*/
getModel() {
logger.debug('HuggingFace.getModel - Hugging Face model selected', {
modelName: this.modelName
});
return new HuggingFaceLanguageModel(this.modelName, this.client);
}
/**
* Processes text using streaming approach with enhanced error handling callbacks
* @param prompt - The input text prompt to analyze
* @param analysisSchema - Optional Zod schema or Schema object for output validation
* @returns Promise resolving to StreamTextResult or null if operation fails
*/
async streamText(optionsOrPrompt, analysisSchema) {
const functionTag = 'HuggingFace.streamText';
const provider = 'huggingface';
let chunkCount = 0;
try {
// Parse parameters - support both string and options object
const options = typeof optionsOrPrompt === 'string'
? { prompt: optionsOrPrompt }
: optionsOrPrompt;
const { prompt, temperature = 0.7, maxTokens = 500, systemPrompt = DEFAULT_SYSTEM_CONTEXT.systemPrompt, schema } = options;
// Use schema from options or fallback parameter
const finalSchema = schema || analysisSchema;
logger.debug(`[${functionTag}] Stream request started`, {
provider,
modelName: this.modelName,
promptLength: prompt.length,
temperature,
maxTokens,
hasSchema: !!finalSchema
});
const model = this.getModel();
const streamOptions = {
model: model,
prompt: prompt,
system: systemPrompt,
temperature,
maxTokens,
onError: (event) => {
const error = event.error;
const errorMessage = error instanceof Error ? error.message : String(error);
const errorStack = error instanceof Error ? error.stack : undefined;
logger.error(`[${functionTag}] Stream text error`, {
provider,
modelName: this.modelName,
error: errorMessage,
stack: errorStack,
promptLength: prompt.length,
chunkCount
});
},
onFinish: (event) => {
logger.debug(`[${functionTag}] Stream text finished`, {
provider,
modelName: this.modelName,
finishReason: event.finishReason,
usage: event.usage,
totalChunks: chunkCount,
promptLength: prompt.length,
responseLength: event.text?.length || 0
});
},
onChunk: (event) => {
chunkCount++;
logger.debug(`[${functionTag}] Stream text chunk`, {
provider,
modelName: this.modelName,
chunkNumber: chunkCount,
chunkLength: event.chunk.text?.length || 0,
chunkType: event.chunk.type
});
}
};
if (finalSchema) {
streamOptions.experimental_output = Output.object({ schema: finalSchema });
}
const result = streamText(streamOptions);
return result;
}
catch (err) {
logger.error(`[${functionTag}] Exception`, {
provider,
modelName: this.modelName,
message: 'Error in streaming text',
err: String(err),
promptLength: typeof optionsOrPrompt === 'string' ? optionsOrPrompt.length : optionsOrPrompt.prompt.length
});
throw err; // Re-throw error to trigger fallback
}
}
/**
* Processes text using non-streaming approach with optional schema validation
* @param prompt - The input text prompt to analyze
* @param analysisSchema - Optional Zod schema or Schema object for output validation
* @returns Promise resolving to GenerateTextResult or null if operation fails
*/
async generateText(optionsOrPrompt, analysisSchema) {
const functionTag = 'HuggingFace.generateText';
const provider = 'huggingface';
try {
// Parse parameters - support both string and options object
const options = typeof optionsOrPrompt === 'string'
? { prompt: optionsOrPrompt }
: optionsOrPrompt;
const { prompt, temperature = 0.7, maxTokens = 500, systemPrompt = DEFAULT_SYSTEM_CONTEXT.systemPrompt, schema } = options;
// Use schema from options or fallback parameter
const finalSchema = schema || analysisSchema;
logger.debug(`[${functionTag}] Generate request started`, {
provider,
modelName: this.modelName,
promptLength: prompt.length,
temperature,
maxTokens
});
const model = this.getModel();
const generateOptions = {
model: model,
prompt: prompt,
system: systemPrompt,
temperature,
maxTokens
};
if (finalSchema) {
generateOptions.experimental_output = Output.object({ schema: finalSchema });
}
const result = await generateText(generateOptions);
logger.debug(`[${functionTag}] Generate text completed`, {
provider,
modelName: this.modelName,
usage: result.usage,
finishReason: result.finishReason,
responseLength: result.text?.length || 0
});
return result;
}
catch (err) {
logger.error(`[${functionTag}] Exception`, {
provider,
modelName: this.modelName,
message: 'Error in generating text',
err: String(err)
});
throw err; // Re-throw error to trigger fallback
}
}
}