llama-flow
Version:
The Typescript-first prompt engineering toolkit for working with chat based LLMs.
196 lines (195 loc) • 8.58 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.OpenAI = void 0;
const js_tiktoken_1 = __importDefault(require("js-tiktoken"));
const jsonic_1 = __importDefault(require("jsonic"));
const lodash_1 = require("lodash");
const openai_edge_1 = require("openai-edge");
const chat_1 = require("../chat");
const config_1 = require("../config");
const utils_1 = require("../utils");
const errors_1 = require("./errors");
const Defaults = {
model: 'gpt-3.5-turbo',
messages: [],
};
const RequestDefaults = {
retries: config_1.CompletionDefaultRetries,
retryInterval: config_1.RateLimitRetryIntervalMs,
timeout: config_1.CompletionDefaultTimeout,
minimumResponseTokens: config_1.MinimumResponseTokens,
};
const AzureQueryParams = { 'api-version': '2023-03-15-preview' };
const getTokenLimit = (model) => (model === 'gpt-4' ? 8000 : 4096);
const encoder = js_tiktoken_1.default.getEncoding('cl100k_base');
const convertConfig = (config) => ({
model: config.model,
temperature: config.temperature,
top_p: config.topP,
n: 1,
stop: config.stop,
presence_penalty: config.presencePenalty,
frequency_penalty: config.frequencyPenalty,
logit_bias: config.logitBias,
user: config.user,
stream: config.stream,
});
class OpenAI {
_model;
_isAzure;
_headers;
defaults;
config;
constructor(config, defaults, chatConfig) {
this._isAzure = Boolean(config.azureEndpoint && config.azureDeployment);
const configuration = new openai_edge_1.Configuration({
...config,
basePath: this._isAzure
? `${config.azureEndpoint}${config.azureEndpoint?.at(-1) === '/' ? '' : '/'}openai/deployments/${config.azureDeployment}`
: undefined,
});
this._headers = this._isAzure
? { 'api-key': String(config.apiKey) }
: undefined;
const azureFetch = (input, init) => {
const customInput = typeof input === 'string'
? `${input}?${new URLSearchParams(AzureQueryParams)}`
: input instanceof URL
? `${input.toString()}?${new URLSearchParams(AzureQueryParams)}`
: input;
return fetch(customInput, init);
};
this._model = new openai_edge_1.OpenAIApi(configuration, undefined, this._isAzure ? azureFetch : undefined);
this.defaults = defaults ?? {};
this.config = chatConfig ?? {};
}
chat(persona, config) {
const finalConfig = (0, lodash_1.defaults)(config, this.config);
return new chat_1.Chat(persona, finalConfig ?? {}, this);
}
getTokensFromMessages(messages) {
let numTokens = 0;
for (const message of messages) {
numTokens += 5;
numTokens += encoder.encode(message.content).length;
}
numTokens += 2;
return numTokens;
}
async request(messages, config = {}, requestOptions = {}) {
const finalConfig = (0, lodash_1.defaults)(convertConfig(config), convertConfig(this.defaults), Defaults);
const finalRequestOptions = (0, lodash_1.defaults)(requestOptions, this.config.options, RequestDefaults);
utils_1.debug.log(`Sending request with config: ${JSON.stringify(finalConfig)}, options: ${JSON.stringify(finalRequestOptions)}`);
try {
const maxPromptTokens = getTokenLimit(finalConfig.model) -
finalRequestOptions.minimumResponseTokens;
const messageTokens = this.getTokensFromMessages(messages);
if (messageTokens > maxPromptTokens) {
throw new errors_1.TokenError('Prompt too big, not enough tokens to meet minimum response', messageTokens - maxPromptTokens);
}
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), finalRequestOptions.timeout);
const completion = await this._model.createChatCompletion({
...finalConfig,
messages,
stream: finalConfig.stream,
}, {
signal: controller.signal,
headers: this._headers,
});
clearTimeout(timeoutId);
if (!completion.ok) {
if (completion.status === 401) {
utils_1.debug.error('Authorization error, did you set the OpenAI API key correctly?');
throw new Error('Authorization error');
}
else if (completion.status === 429 || completion.status >= 500) {
utils_1.debug.log(`Completion rate limited (${completion.status}), retrying... attempts left: ${finalRequestOptions.retries}`);
await (0, utils_1.sleep)(finalRequestOptions.retryInterval);
return this.request(messages, config, {
...finalRequestOptions,
retries: finalRequestOptions.retries - 1,
retryInterval: finalRequestOptions.retryInterval * 2,
});
}
}
let content = '';
let usage;
if (finalConfig.stream) {
const reader = completion.body?.getReader();
if (!reader) {
throw new Error('Reader undefined');
}
const decoder = new TextDecoder('utf-8');
while (true) {
const { done, value } = await reader.read();
const stringfied = decoder.decode(value).split('\n');
for (const line of stringfied) {
try {
const cleaned = line.replace('data:', '').trim();
if (cleaned.length === 0 || cleaned === '[DONE]') {
continue;
}
const parsed = (0, jsonic_1.default)(cleaned);
const text = parsed.choices[0].delta.content ?? '';
utils_1.debug.write(text);
finalRequestOptions?.events?.emit('data', text);
content += text;
}
catch (e) {
utils_1.debug.error('Error parsing content', e);
}
}
if (done) {
break;
}
}
utils_1.debug.write('\n[STREAM] response end\n');
}
else {
const body = await completion.json();
if (body.error || !('choices' in body)) {
throw new Error(`Completion response error: ${JSON.stringify(body ?? {})}`);
}
content = body.choices[0].message?.content;
usage = body.usage;
}
if (!content) {
throw new Error('Completion response malformed');
}
return {
content,
isStream: Boolean(finalConfig.stream),
usage: usage
? {
totalTokens: usage.total_tokens,
promptTokens: usage.prompt_tokens,
completionTokens: usage.completion_tokens,
}
: undefined,
};
}
catch (error) {
if (!finalRequestOptions.retries) {
utils_1.debug.log('Completion failed, already retryed, failing completion');
throw error;
}
if (error.code === 'ETIMEDOUT' ||
error.code === 'ECONNABORTED' ||
error.code === 'ECONNRESET') {
utils_1.debug.log(`Completion timed out (${error.code}), retrying... attempts left: ${finalRequestOptions.retries}`);
await (0, utils_1.sleep)(finalRequestOptions.retryInterval);
return this.request(messages, config, {
...finalRequestOptions,
retries: finalRequestOptions.retries - 1,
retryInterval: finalRequestOptions.retryInterval * 2,
});
}
throw error;
}
}
}
exports.OpenAI = OpenAI;
;