@lobehub/chat
Version:
Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.
157 lines (141 loc) • 4.72 kB
text/typescript
import { GenerateContentResponse } from '@google/genai';
import { ModelTokensUsage } from '@/types/message';
import { GroundingSearch } from '@/types/search';
import { nanoid } from '@/utils/uuid';
import { type GoogleAIStreamOptions } from './google-ai';
import {
StreamContext,
StreamProtocolChunk,
createCallbacksTransformer,
createSSEProtocolTransformer,
createTokenSpeedCalculator,
generateToolCallId,
} from './protocol';
const transformVertexAIStream = (
chunk: GenerateContentResponse,
context: StreamContext,
): StreamProtocolChunk | StreamProtocolChunk[] => {
// maybe need another structure to add support for multiple choices
const candidate = chunk.candidates?.[0];
const usage = chunk.usageMetadata;
const usageChunks: StreamProtocolChunk[] = [];
if (candidate?.finishReason && usage) {
const outputReasoningTokens = usage.thoughtsTokenCount || undefined;
const outputTextTokens = usage.candidatesTokenCount ?? 0;
const totalOutputTokens = outputTextTokens + (outputReasoningTokens ?? 0);
usageChunks.push(
{ data: candidate.finishReason, id: context?.id, type: 'stop' },
{
data: {
// TODO: Google SDK 0.24.0 don't have promptTokensDetails types
inputImageTokens: (usage as any).promptTokensDetails?.find(
(i: any) => i.modality === 'IMAGE',
)?.tokenCount,
inputTextTokens: (usage as any).promptTokensDetails?.find(
(i: any) => i.modality === 'TEXT',
)?.tokenCount,
outputReasoningTokens,
outputTextTokens,
totalInputTokens: usage.promptTokenCount,
totalOutputTokens,
totalTokens: usage.totalTokenCount,
} as ModelTokensUsage,
id: context?.id,
type: 'usage',
},
);
}
if (
candidate && // 首先检查是否为 reasoning 内容 (thought: true)
Array.isArray(candidate.content?.parts) &&
candidate.content.parts.length > 0
) {
for (const part of candidate.content.parts) {
if (part && part.text && (part as any).thought === true) {
return { data: part.text, id: context.id, type: 'reasoning' };
}
}
}
if (!candidate) {
return {
data: '',
id: context?.id,
type: 'text',
};
}
if (candidate.content) {
const part = candidate.content.parts?.[0];
if (part?.functionCall) {
const functionCall = part.functionCall;
return [
{
data: [
{
function: {
arguments: JSON.stringify(functionCall.args),
name: functionCall.name,
},
id: generateToolCallId(0, functionCall.name),
index: 0,
type: 'function',
},
],
id: context?.id,
type: 'tool_calls',
},
...usageChunks,
];
}
// return the grounding
const { groundingChunks, webSearchQueries } = candidate.groundingMetadata ?? {};
if (groundingChunks) {
return [
!!part?.text ? { data: part.text, id: context?.id, type: 'text' } : undefined,
{
data: {
citations: groundingChunks?.map((chunk) => ({
// google 返回的 uri 是经过 google 自己处理过的 url,因此无法展现真实的 favicon
// 需要使用 title 作为替换
favicon: chunk.web?.title,
title: chunk.web?.title,
url: chunk.web?.uri,
})),
searchQueries: webSearchQueries,
} as GroundingSearch,
id: context.id,
type: 'grounding',
},
...usageChunks,
].filter(Boolean) as StreamProtocolChunk[];
}
if (candidate.finishReason) {
if (chunk.usageMetadata) {
return [
!!part?.text ? { data: part.text, id: context?.id, type: 'text' } : undefined,
...usageChunks,
].filter(Boolean) as StreamProtocolChunk[];
}
return { data: candidate.finishReason, id: context?.id, type: 'stop' };
}
return {
data: part?.text,
id: context?.id,
type: 'text',
};
}
return {
data: '',
id: context?.id,
type: 'stop',
};
};
export const VertexAIStream = (
rawStream: ReadableStream<GenerateContentResponse>,
{ callbacks, inputStartAt }: GoogleAIStreamOptions = {},
) => {
const streamStack: StreamContext = { id: 'chat_' + nanoid() };
return rawStream
.pipeThrough(createTokenSpeedCalculator(transformVertexAIStream, { inputStartAt, streamStack }))
.pipeThrough(createSSEProtocolTransformer((c) => c, streamStack))
.pipeThrough(createCallbacksTransformer(callbacks));
};