@lobehub/chat

Version:

Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.

github.com/lobehub/lobe-chat

lobehub/lobe-chat

163 lines (144 loc) • 5.16 kB

text/typescript

import { EnhancedGenerateContentResponse } from '@google/generative-ai'; import { ModelTokensUsage } from '@/types/message'; import { GroundingSearch } from '@/types/search'; import { nanoid } from '@/utils/uuid'; import { ChatStreamCallbacks } from '../../types'; import { StreamContext, StreamProtocolChunk, StreamToolCallChunkData, createCallbacksTransformer, createSSEProtocolTransformer, createTokenSpeedCalculator, generateToolCallId, } from './protocol'; const transformGoogleGenerativeAIStream = ( chunk: EnhancedGenerateContentResponse, context: StreamContext, ): StreamProtocolChunk | StreamProtocolChunk[] => { // maybe need another structure to add support for multiple choices const candidate = chunk.candidates?.[0]; const usage = chunk.usageMetadata; const usageChunks: StreamProtocolChunk[] = []; if (candidate?.finishReason && usage) { const outputReasoningTokens = (usage as any).thoughtsTokenCount || undefined; const totalOutputTokens = (usage.candidatesTokenCount ?? 0) + (outputReasoningTokens ?? 0); usageChunks.push( { data: candidate.finishReason, id: context?.id, type: 'stop' }, { data: { // TODO: Google SDK 0.24.0 don't have promptTokensDetails types inputImageTokens: (usage as any).promptTokensDetails?.find( (i: any) => i.modality === 'IMAGE', )?.tokenCount, inputTextTokens: (usage as any).promptTokensDetails?.find( (i: any) => i.modality === 'TEXT', )?.tokenCount, outputReasoningTokens, outputTextTokens: totalOutputTokens - (outputReasoningTokens ?? 0), totalInputTokens: usage.promptTokenCount, totalOutputTokens, totalTokens: usage.totalTokenCount, } as ModelTokensUsage, id: context?.id, type: 'usage', }, ); } const functionCalls = chunk.functionCalls?.(); if (functionCalls) { return [ { data: functionCalls.map( (value, index): StreamToolCallChunkData => ({ function: { arguments: JSON.stringify(value.args), name: value.name, }, id: generateToolCallId(index, value.name), index: index, type: 'function', }), ), id: context.id, type: 'tool_calls', }, ...usageChunks, ]; } const text = chunk.text?.(); if (candidate) { // 首先检查是否为 reasoning 内容 (thought: true) if (Array.isArray(candidate.content.parts) && candidate.content.parts.length > 0) { for (const part of candidate.content.parts) { if (part && part.text && (part as any).thought === true) { return { data: part.text, id: context.id, type: 'reasoning' }; } } } // return the grounding if (candidate.groundingMetadata) { const { webSearchQueries, groundingChunks } = candidate.groundingMetadata; return [ { data: text, id: context.id, type: 'text' }, { data: { citations: groundingChunks?.map((chunk) => ({ // google 返回的 uri 是经过 google 自己处理过的 url，因此无法展现真实的 favicon // 需要使用 title 作为替换 favicon: chunk.web?.title, title: chunk.web?.title, url: chunk.web?.uri, })), searchQueries: webSearchQueries, } as GroundingSearch, id: context.id, type: 'grounding', }, ...usageChunks, ]; } if (candidate.finishReason) { if (chunk.usageMetadata) { return [ !!text ? { data: text, id: context?.id, type: 'text' } : undefined, ...usageChunks, ].filter(Boolean) as StreamProtocolChunk[]; } return { data: candidate.finishReason, id: context?.id, type: 'stop' }; } if (!!text?.trim()) return { data: text, id: context?.id, type: 'text' }; // streaming the image if (Array.isArray(candidate.content.parts) && candidate.content.parts.length > 0) { const part = candidate.content.parts[0]; if (part && part.inlineData && part.inlineData.data && part.inlineData.mimeType) { return { data: `data:${part.inlineData.mimeType};base64,${part.inlineData.data}`, id: context.id, type: 'base64_image', }; } } } return { data: text, id: context?.id, type: 'text', }; }; export interface GoogleAIStreamOptions { callbacks?: ChatStreamCallbacks; inputStartAt?: number; } export const GoogleGenerativeAIStream = ( rawStream: ReadableStream<EnhancedGenerateContentResponse>, { callbacks, inputStartAt }: GoogleAIStreamOptions = {}, ) => { const streamStack: StreamContext = { id: 'chat_' + nanoid() }; return rawStream .pipeThrough( createTokenSpeedCalculator(transformGoogleGenerativeAIStream, { inputStartAt, streamStack }), ) .pipeThrough(createSSEProtocolTransformer((c) => c, streamStack)) .pipeThrough(createCallbacksTransformer(callbacks)); };