venice-dev-tools
Version:
unOfficial SDK for the Venice AI API
161 lines (142 loc) • 5.5 kB
text/typescript
import { ApiEndpoint } from '../../../registry/endpoint';
import { ChatCompletionRequest } from '../../../../types';
import { ChatValidator } from '../../../../utils/validators/chat-validator';
/**
* API endpoint for streaming chat completions.
*
* This endpoint allows you to create streaming chat completions using various models.
* It returns an async generator that yields completion chunks as they become available.
*
* @example
* ```typescript
* const stream = venice.chatStream.streamCompletion({
* model: 'llama-3.3-70b',
* messages: [
* { role: 'system', content: 'You are a helpful assistant.' },
* { role: 'user', content: 'Write a short poem about AI.' }
* ]
* });
*
* // Process the stream chunks as they arrive
* for await (const chunk of stream) {
* const content = chunk.choices[0]?.delta?.content || '';
* process.stdout.write(content);
* }
* ```
*/
export class ChatStreamEndpoint extends ApiEndpoint {
/**
* The chat validator for validating request parameters.
*/
private validator: ChatValidator;
/**
* Creates a new ChatStreamEndpoint instance.
*
* @param client - The Venice client instance
*/
constructor(client: any) {
super(client);
this.validator = new ChatValidator();
}
/**
* Gets the base endpoint path for chat requests.
*
* @returns The endpoint path ('/chat')
*/
getEndpointPath(): string {
return '/chat';
}
/**
* Streams a chat completion using the specified model.
*
* This method returns an async generator that yields completion chunks
* as they become available from the API. The request will automatically
* have `stream: true` set regardless of what's provided in the request.
*
* @param request - The chat completion request parameters
* @param request.model - The model to use (e.g., 'llama-3.3-70b')
* @param request.messages - The conversation messages array
* @param request.temperature - Controls randomness (0-1, default: 0.7)
* @param request.max_tokens - Maximum tokens to generate (default: varies by model)
* @param request.top_p - Controls diversity via nucleus sampling (0-1, default: 1)
* @param request.frequency_penalty - Penalizes repeated tokens (-2 to 2, default: 0)
* @param request.presence_penalty - Penalizes repeated topics (-2 to 2, default: 0)
* @param request.stop - Array of sequences where the model should stop generating
*
* @returns An async generator that yields completion chunks
*
* @throws {VeniceValidationError} If the request parameters are invalid
* @throws {VeniceApiError} If the API returns an error
* @throws {VeniceNetworkError} If there's a network issue
* @throws {VeniceTimeoutError} If the request times out
* @throws {VeniceStreamError} If there's an error processing the stream
*/
public async *streamCompletion(request: ChatCompletionRequest): AsyncGenerator<any, void, unknown> {
// Set stream to true
const streamingRequest = { ...request, stream: true };
// Validate request parameters
this.validator.validateChatCompletionRequest(streamingRequest);
// Emit a request event
this.emit('request', { type: 'chat.completion.stream', data: streamingRequest });
// Make the API request
const response = await this.streamingHttp.stream(
this.getPath('/completions'),
streamingRequest
);
try {
// Get the reader from the response body
const reader = response.body?.getReader();
if (!reader) {
throw new Error('Stream response body is null');
}
// Read chunks from the stream
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
// Decode the chunk and add it to our buffer
buffer += decoder.decode(value, { stream: true });
// Process complete lines from the buffer
const lines = buffer.split('\n');
buffer = lines.pop() || ''; // Keep the last (potentially incomplete) line in the buffer
for (const line of lines) {
const trimmedLine = line.trim();
if (!trimmedLine) continue;
if (trimmedLine.startsWith('data: ')) {
const data = trimmedLine.substring(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
yield parsed;
} catch (e) {
console.error('Error parsing stream data:', e);
}
}
}
}
// Process any remaining data in the buffer
if (buffer.trim()) {
const lines = buffer.split('\n');
for (const line of lines) {
const trimmedLine = line.trim();
if (!trimmedLine) continue;
if (trimmedLine.startsWith('data: ')) {
const data = trimmedLine.substring(6);
if (data === '[DONE]') break;
try {
const parsed = JSON.parse(data);
yield parsed;
} catch (e) {
console.error('Error parsing stream data:', e);
}
}
}
}
} finally {
// Emit a response event when the stream ends
this.emit('response', { type: 'chat.completion.stream', data: { status: 'completed' } });
}
}
}
export default ChatStreamEndpoint;