@juspay/neurolink
Version:
Universal AI Development Platform with working MCP integration, multi-provider support, voice (TTS/STT/realtime), and professional CLI. 58+ external MCP servers discoverable, multimodal file processing, RAG pipelines. Build, test, and deploy AI applicatio
403 lines • 17.6 kB
JavaScript
import { ErrorCategory, ErrorSeverity, ReplicateModels, } from "../constants/enums.js";
import { BaseProvider } from "../core/baseProvider.js";
import { getReplicateAuth } from "../adapters/replicate/auth.js";
import { downloadPredictionOutput, predict, } from "../adapters/replicate/predictionLifecycle.js";
import { MAX_IMAGE_BYTES } from "../utils/sizeGuard.js";
import { isNeuroLink } from "../neurolink.js";
import { logger } from "../utils/logger.js";
import { ERROR_CODES, NeuroLinkError } from "../utils/errorHandling.js";
import { createReplicateConfig, getProviderModel, validateApiKey, } from "../utils/providerConfig.js";
const getDefaultReplicateModel = () => getProviderModel("REPLICATE_MODEL", ReplicateModels.LLAMA_3_70B_INSTRUCT);
/**
* Render a chat-style options object as a single prompt string suitable
* for Replicate's prediction API. Replicate-hosted Llama / Mistral models
* expect a flat `prompt` (or model-specific `messages`) input — they don't
* implement OpenAI's chat-completions contract uniformly.
*/
function buildPromptFromOptions(options) {
const opts = options;
const userText = opts.prompt ?? opts.input?.text ?? "";
const system = opts.systemPrompt;
if (system) {
return `${system}\n\n${userText}`;
}
return userText;
}
function flattenReplicateOutput(output) {
if (typeof output === "string") {
return output;
}
if (Array.isArray(output)) {
return output
.map((part) => (typeof part === "string" ? part : ""))
.join("");
}
return "";
}
/**
* Replicate LLM Provider — predict-then-stream pattern.
*
* Replicate's prediction API is asynchronous: POST `/predictions`, poll
* until `succeeded`, fetch the output. Their streaming endpoint is SSE
* on a separate URL and not OpenAI-compatible.
*
* For a first pass we run the prediction synchronously (with the
* `Prefer: wait=60` hint baked into `predict()`) and synthesize a single-
* chunk stream. Future revisions can swap in true SSE streaming when
* the prediction lifecycle helper grows support for it.
*
* For image-gen models on Replicate (FLUX, SDXL, etc.) use Replicate via
* `output: { mode: "image" }` once the routing layer recognises the
* `flux` / `sdxl` prefixes (already in IMAGE_GENERATION_MODELS for
* direct Stability/Ideogram/Recraft; Replicate-hosted FLUX runs through
* `executeImageGeneration` overridden below when the model id matches).
*
* Tool calling is not supported (Replicate predictions are stateless and
* don't carry function-call metadata reliably).
*
* @see https://replicate.com/docs/reference/http
*/
export class ReplicateProvider extends BaseProvider {
apiToken;
baseURL;
constructor(modelName, sdk, _region, credentials) {
const validatedNeurolink = isNeuroLink(sdk) ? sdk : undefined;
super(modelName, "replicate", validatedNeurolink);
const overrideToken = credentials?.apiToken?.trim();
this.apiToken =
overrideToken && overrideToken.length > 0
? overrideToken
: validateApiKey(createReplicateConfig());
this.baseURL = credentials?.baseUrl;
logger.debug("Replicate Provider initialized", {
modelName: this.modelName,
baseURL: this.baseURL,
});
}
getProviderName() {
return this.providerName;
}
getDefaultModel() {
return getDefaultReplicateModel();
}
supportsTools() {
return false;
}
/**
* Replicate doesn't expose a chat-completions endpoint we can wrap as
* an AI SDK `LanguageModel`. This getter is consulted by `streamText`
* (which we intentionally bypass) and by middleware injection (which
* also bypasses to executeStream below). Throwing here keeps the
* contract honest.
*/
getAISDKModel() {
throw new Error("Replicate routes through the predictions API, not the AI SDK chat models. Streaming uses the predict-then-stream path inside executeStream.");
}
/**
* Override generate() to bypass BaseProvider's AI-SDK path entirely.
*
* BaseProvider.runGenerateInActiveContext() calls prepareGenerationContext()
* which unconditionally invokes getAISDKModelWithMiddleware() → getAISDKModel().
* For Replicate that throws, because Replicate uses the predictions API, not
* the Vercel AI SDK chat-completions contract.
*
* Special modes (image, video, avatar, music) are handled exactly as in the
* base class. Plain text generation is routed through executeStream() and
* unwrapped into an EnhancedGenerateResult so callers get a consistent shape.
*/
async generate(optionsOrPrompt, _analysisSchema) {
const options = typeof optionsOrPrompt === "string"
? { prompt: optionsOrPrompt }
: optionsOrPrompt;
const { IMAGE_GENERATION_MODELS } = await import("../core/constants.js");
// Delegate special output modes to base class (which never calls getAISDKModel for these)
if (options.output?.mode === "video" ||
options.output?.mode === "avatar" ||
options.output?.mode === "music") {
return super.generate(options, _analysisSchema);
}
// Image-gen models: delegate to base which calls executeImageGeneration()
const isImageModel = IMAGE_GENERATION_MODELS.some((m) => this.modelName.includes(m));
const requestsNonImageOutput = options.output?.format === "json" ||
options.output?.format === "structured" ||
options.output?.format === "text";
if (isImageModel && !requestsNonImageOutput) {
return super.generate(options, _analysisSchema);
}
// Structured / JSON output is not natively supported by the Replicate
// predictions API. Surface a clear error instead of silently returning
// unvalidated plain text.
if (options.output?.format === "json" ||
options.output?.format === "structured" ||
(_analysisSchema !== null && _analysisSchema !== undefined)) {
throw new NeuroLinkError({
code: ERROR_CODES.PROVIDER_NOT_AVAILABLE,
message: "Replicate models do not support structured-output / JSON schema. " +
"Remove output.format or _analysisSchema, or use a provider that " +
"implements the OpenAI chat-completions contract (e.g. openai, anthropic).",
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.MEDIUM,
retriable: false,
});
}
// Plain text generation — use the predict-then-stream path directly.
// Pass `input.text` as the user text only; `systemPrompt` is forwarded
// separately so `buildPromptFromOptions` inside executeStream prepends it
// exactly once. Do NOT concatenate systemText here or the system prompt
// would appear twice in the final API request.
const userText = options.prompt ?? options.input?.text ?? "";
const startTime = Date.now();
const streamResult = await this.executeStream({
input: { text: userText },
systemPrompt: options.systemPrompt,
maxTokens: options.maxTokens,
temperature: options.temperature,
abortSignal: options.abortSignal,
timeout: options.timeout,
});
// Collect all chunks from the single-chunk stream
let content = "";
for await (const chunk of streamResult.stream) {
if ("content" in chunk && typeof chunk.content === "string") {
content += chunk.content;
}
}
const result = {
content,
provider: this.providerName,
model: this.modelName,
usage: { input: 0, output: 0, total: 0 },
};
logger.info(`[ReplicateProvider] generate() complete in ${Date.now() - startTime}ms — ${content.length} chars`);
return result;
}
async executeStream(options, _analysisSchema) {
const startTime = Date.now();
// Resolve per-call credentials first, then fall back to instance-level.
const perCallCreds = options.credentials?.replicate;
const effectiveToken = perCallCreds?.apiToken?.trim() || this.apiToken;
const effectiveBaseUrl = perCallCreds?.baseUrl || this.baseURL;
const auth = getReplicateAuth({
apiToken: effectiveToken,
baseUrl: effectiveBaseUrl,
});
if (!auth) {
throw new NeuroLinkError({
code: ERROR_CODES.MISSING_CONFIGURATION,
message: "Replicate auth could not be resolved (REPLICATE_API_TOKEN missing).",
category: ErrorCategory.CONFIGURATION,
severity: ErrorSeverity.HIGH,
retriable: false,
});
}
const prompt = buildPromptFromOptions(options);
if (!prompt.trim()) {
throw new NeuroLinkError({
code: ERROR_CODES.INVALID_PARAMETERS,
message: "Replicate predictions require a prompt (input.text or prompt)",
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.MEDIUM,
retriable: false,
});
}
// Replicate's chat schemas accept EITHER `max_tokens` OR `max_new_tokens`
// depending on the model. Newer Llama 3.x routes reject setting both with
// E1102 InvalidArgumentMaxTokens. We pass only `max_new_tokens` (the more
// widely supported field) and let the model defaults apply when unset.
const replicateInput = {
prompt,
...(options.maxTokens !== undefined && {
max_new_tokens: options.maxTokens,
}),
temperature: options.temperature,
top_p: 1,
};
let prediction;
try {
prediction = await predict(auth, {
model: this.modelName,
input: replicateInput,
}, { abortSignal: options.abortSignal });
}
catch (err) {
throw this.handleProviderError(err);
}
const text = flattenReplicateOutput(prediction.output);
if (!text) {
throw new Error(`Replicate prediction ${prediction.id} returned empty output`);
}
const stream = {
async *[Symbol.asyncIterator]() {
yield { content: text };
},
};
logger.info(`[ReplicateProvider] Generated ${text.length} chars in ${Date.now() - startTime}ms — model ${this.modelName} (prediction ${prediction.id})`);
return {
stream,
provider: this.providerName,
model: this.modelName,
finishReason: "stop",
metadata: {
startTime,
streamId: `replicate-${prediction.id}`,
},
};
}
/**
* Image-gen routing for Replicate-hosted image models (FLUX, SDXL, etc.).
*
* The dispatcher in baseProvider routes here when the `model` name
* matches an entry in `IMAGE_GENERATION_MODELS`. Replicate model ids
* use `owner/name(:version)?` format — image models route here as
* long as the caller passes the FQMN.
*/
async executeImageGeneration(options) {
const startTime = Date.now();
// Resolve per-call credentials first, then fall back to instance-level.
const perCallCreds = options.credentials?.replicate;
const effectiveToken = perCallCreds?.apiToken?.trim() || this.apiToken;
const effectiveBaseUrl = perCallCreds?.baseUrl || this.baseURL;
const auth = getReplicateAuth({
apiToken: effectiveToken,
baseUrl: effectiveBaseUrl,
});
if (!auth) {
throw new NeuroLinkError({
code: ERROR_CODES.MISSING_CONFIGURATION,
message: "Replicate auth could not be resolved (REPLICATE_API_TOKEN missing).",
category: ErrorCategory.CONFIGURATION,
severity: ErrorSeverity.HIGH,
retriable: false,
});
}
const prompt = options.prompt ?? options.input?.text ?? "";
if (!prompt.trim()) {
throw new NeuroLinkError({
code: ERROR_CODES.INVALID_PARAMETERS,
message: "Replicate image-gen requires a prompt",
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.MEDIUM,
retriable: false,
});
}
const extras = options;
const replicateInput = {
prompt,
output_format: "png",
};
if (extras.aspectRatio) {
replicateInput.aspect_ratio = extras.aspectRatio;
}
if (extras.negativePrompt) {
replicateInput.negative_prompt = extras.negativePrompt;
}
if (extras.seed !== undefined) {
replicateInput.seed = extras.seed;
}
let prediction;
try {
prediction = await predict(auth, {
model: this.modelName,
input: replicateInput,
}, { abortSignal: options.abortSignal });
}
catch (err) {
throw this.handleProviderError(err);
}
let buffer;
try {
buffer = await downloadPredictionOutput(prediction, MAX_IMAGE_BYTES);
}
catch (err) {
throw this.handleProviderError(err);
}
const base64 = buffer.toString("base64");
logger.info(`[ReplicateProvider] Generated image (${buffer.length} bytes) in ${Date.now() - startTime}ms — model ${this.modelName}`);
return {
content: prompt,
provider: this.providerName,
model: this.modelName,
usage: { input: 0, output: 0, total: 0 },
imageOutput: { base64 },
};
}
formatProviderError(error) {
const message = error instanceof Error
? error.message
: typeof error === "string"
? error
: "Unknown error";
const originalError = error instanceof Error ? error : undefined;
if (message.includes("401") ||
message.toLowerCase().includes("unauthorized") ||
message.toLowerCase().includes("invalid token")) {
return new NeuroLinkError({
code: ERROR_CODES.PROVIDER_AUTH_FAILED,
message: "Invalid Replicate API token. Get one at https://replicate.com/account/api-tokens",
category: ErrorCategory.CONFIGURATION,
severity: ErrorSeverity.HIGH,
retriable: false,
context: { provider: "replicate" },
originalError,
});
}
if (message.includes("402") ||
message.toLowerCase().includes("insufficient credit")) {
return new NeuroLinkError({
code: ERROR_CODES.PROVIDER_QUOTA_EXCEEDED,
message: "Replicate insufficient credit. Top up at https://replicate.com/account/billing — most image/music models require a paid balance.",
category: ErrorCategory.RESOURCE,
severity: ErrorSeverity.HIGH,
retriable: false,
context: { provider: "replicate" },
originalError,
});
}
if (message.includes("429") ||
message.toLowerCase().includes("rate limit")) {
return new NeuroLinkError({
code: ERROR_CODES.PROVIDER_QUOTA_EXCEEDED,
message: "Replicate rate limit exceeded. Back off and retry.",
category: ErrorCategory.RESOURCE,
severity: ErrorSeverity.HIGH,
retriable: true,
context: { provider: "replicate" },
originalError,
});
}
if (message.toLowerCase().includes("not found") ||
message.includes("404")) {
return new NeuroLinkError({
code: ERROR_CODES.PROVIDER_NOT_AVAILABLE,
message: `Replicate model '${this.modelName}' not found. Use owner/name or owner/name:version format. Browse https://replicate.com/explore`,
category: ErrorCategory.VALIDATION,
severity: ErrorSeverity.MEDIUM,
retriable: false,
context: { provider: "replicate", model: this.modelName },
originalError,
});
}
return new NeuroLinkError({
code: ERROR_CODES.PROVIDER_NOT_AVAILABLE,
message: `Replicate error: ${message}`,
category: ErrorCategory.EXECUTION,
severity: ErrorSeverity.HIGH,
retriable: false,
context: { provider: "replicate" },
originalError,
});
}
async validateConfiguration() {
return typeof this.apiToken === "string" && this.apiToken.trim().length > 0;
}
getConfiguration() {
return {
provider: this.providerName,
model: this.modelName,
defaultModel: getDefaultReplicateModel(),
baseURL: this.baseURL,
};
}
}
export default ReplicateProvider;
//# sourceMappingURL=replicate.js.map