UNPKG

langxlang

Version:

LLM wrapper for OpenAI GPT and Google Gemini and PaLM 2 models

github.com/extremeheat/LXL

extremeheat/LXL

240 lines (216 loc) • 7.45 kB

JavaScript

const OpenAI = require('openai') const debug = require('debug')('lxl') const SafetyError = require('../SafetyError') function safetyCheck (choices) { const hasSafetyFlag = choices.some((choice) => choice.finishReason === 'content_filter') if (hasSafetyFlag) { const okOnes = choices.filter((choice) => choice.finishReason !== 'content_filter') if (okOnes.length) { return okOnes } else { throw new SafetyError('Completions were blocked by OpenAI safety filter') } } return choices } function createChunkProcessor (chunkCb, resultChoices, usageData) { return function (chunk) { if (!chunk) { chunkCb?.({ done: true, textDelta: '', parts: [] }) return } if (chunk.usage) { for (const key in chunk.usage) { usageData[key] = chunk.usage[key] } } for (const choiceId in chunk.choices) { const choice = chunk.choices[choiceId] const resultChoice = resultChoices[choiceId] ??= { content: '', fnCalls: [], finishReason: '', safetyRatings: {} } if (choice.finish_reason) { resultChoice.finishReason = choice.finish_reason } if (choice.message) { resultChoice.content += choice.message.content } else if (choice.delta) { const delta = choice.delta if (delta.tool_calls) { for (const call of delta.tool_calls) { resultChoice.fnCalls[call.index] ??= { id: call.id, name: '', args: '' } const entry = resultChoice.fnCalls[call.index] if (call.function.name) { entry.name = call.function.name } if (call.function.arguments) { entry.args += call.function.arguments } } } else if (delta.content) { resultChoice.content += delta.content chunkCb?.({ n: Number(choiceId), textDelta: delta.content, parts: [{ text: delta.content }], done: false }) } } else throw new Error('Unknown chunk type') } } } // unused async function generateChatCompletionEx (model, messages, options, chunkCb) { const openai = new OpenAI(options) const completion = await openai.chat.completions.create({ model, messages, stream: true, tools: options.functions || undefined, tool_choice: options.functions ? 'auto' : undefined, ...options.generationConfig }) const resultChoices = [] const usageData = {} const handler = createChunkProcessor(chunkCb, resultChoices, usageData) for await (const chunk of completion) { handler(chunk) } return { choices: safetyCheck(resultChoices), usage: { inputTokens: usageData.input_tokens, outputTokens: usageData.output_tokens, totalTokens: usageData.total_tokens } } } // Updated to use Fetch API async function _sendApiChatComplete (apiBase, apiKey, payload, chunkCb) { const url = new URL(`${apiBase}/chat/completions`) const headers = { Accept: 'text/event-stream', 'Content-Type': 'application/json', 'Cache-Control': 'no-cache', Connection: 'keep-alive', Authorization: `Bearer ${apiKey}` } debug('[OpenAI] /completions Payload', url.toString(), headers, JSON.stringify(payload)) const response = await fetch(url, { method: 'POST', headers, body: JSON.stringify(payload) }) if (!response.ok) { const errorText = await response.text() debug(`[OpenAI] Server returned status code ${response.status}`, errorText) throw new Error(`Server returned status code ${response.status}: ${errorText}`) } if (!payload.stream) { const data = await response.json() chunkCb(data) return } const reader = response.body.getReader() const decoder = new TextDecoder('utf-8') let buffer = '' while (true) { const { done, value } = await reader.read() if (done) break buffer += decoder.decode(value) const lines = buffer.split('\n') buffer = lines.pop() // Keep incomplete line in buffer for (const line of lines) { if (line === 'data: [DONE]') { chunkCb(null) } else if (line.startsWith('data: ')) { const jsonData = line.slice('data: '.length) chunkCb(JSON.parse(jsonData)) } } } } async function generateChatCompletionIn (model, messages, options, chunkCb) { debug('openai.generateChatCompletionIn', model, options) const resultChoices = [] const usageData = {} await _sendApiChatComplete(options.baseURL || 'https://api.openai.com/v1', options.apiKey, { model, ...options.generationConfig, messages, stream: true, tools: options.functions?.map((fn) => ({ type: 'function', function: fn })), tool_choice: options.functions ? 'auto' : undefined }, createChunkProcessor(chunkCb, resultChoices, usageData)) debug('openai.generateChatCompletionIn result', JSON.stringify(resultChoices)) return { choices: safetyCheck(resultChoices), usage: { inputTokens: usageData.input_tokens, outputTokens: usageData.output_tokens, totalTokens: usageData.total_tokens } } } async function generateCompletion (model, system, user, options = {}) { const messages = [{ role: 'user', content: user }] if (system) messages.unshift({ role: 'system', content: system }) const completion = await generateChatCompletionIn(model, messages, options) debug('[OpenAI] Completion', JSON.stringify(completion)) return completion } async function transcribeAudioEx (apiBase, apiKey, model, stream, options) { const openai = new OpenAI({ apiKey, baseURL: apiBase }) const payload = { model, file: stream instanceof Buffer ? new Blob([stream], { type: 'audio/wav' }) : stream, temperature: options.temperature, response_format: options.responseFormat, timestamp_granularities: options.granularity } if (payload.timestamp_granularities === 'word' || payload.timestamp_granularities === 'sentence') { if (!payload.response_format) { payload.response_format = 'verbose_json' } } const transcription = await openai.speech.transcription.create(payload) return transcription } async function synthesizeSpeechEx (apiBase, apiKey, model, text, options) { const openai = new OpenAI({ apiKey, baseURL: apiBase }) const payload = { model, text, voice: options.voice, speed: options.speed, pitch: options.pitch, volume: options.volume } const speech = await openai.speech.synthesis.create(payload) return speech } async function listModels (baseURL, apiKey) { const openai = new OpenAI({ baseURL, apiKey }) const list = await openai.models.list() return list.body.data } module.exports = { generateCompletion, generateChatCompletionEx, generateChatCompletionIn, transcribeAudioEx, synthesizeSpeechEx, listModels } /* via https://platform.openai.com/docs/guides/text-generation/chat-completions-api Every response will include a finish_reason. The possible values for finish_reason are: stop: API returned complete message, or a message terminated by one of the stop sequences provided via the stop parameter length: Incomplete model output due to max_tokens parameter or token limit function_call: The model decided to call a function content_filter: Omitted content due to a flag from our content filters null: API response still in progress or incomplete */