@lobehub/chat
Version:
Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.
539 lines (450 loc) • 15.9 kB
text/typescript
// @vitest-environment node
import { eq } from 'drizzle-orm/expressions';
import { afterEach, beforeEach, describe, expect, it } from 'vitest';
import { LobeChatDatabase } from '@/database/type';
import { uuid } from '@/utils/uuid';
import { chunks, embeddings, fileChunks, files, unstructuredChunks, users } from '../../schemas';
import { ChunkModel } from '../chunk';
import { getTestDB } from './_util';
import { codeEmbedding, designThinkingQuery, designThinkingQuery2 } from './fixtures/embedding';
const serverDB: LobeChatDatabase = await getTestDB();
const userId = 'chunk-model-test-user-id';
const chunkModel = new ChunkModel(serverDB, userId);
const sharedFileList = [
{
id: '1',
name: 'document.pdf',
url: 'https://example.com/document.pdf',
size: 1000,
fileType: 'application/pdf',
userId,
},
{
id: '2',
name: 'image.jpg',
url: 'https://example.com/image.jpg',
size: 500,
fileType: 'image/jpeg',
userId,
},
{
id: '3',
name: 'audio.mp3',
url: 'https://example.com/audio.mp3',
size: 2000,
fileType: 'audio/mpeg',
userId,
},
];
beforeEach(async () => {
await serverDB.delete(users);
await serverDB.insert(users).values([{ id: userId }]);
await serverDB.insert(files).values(sharedFileList);
});
afterEach(async () => {
await serverDB.delete(users).where(eq(users.id, userId));
});
describe('ChunkModel', () => {
describe('bulkCreate', () => {
it('should create multiple chunks', async () => {
const params = [
{ text: 'Chunk 1', userId },
{ text: 'Chunk 2', userId },
];
await chunkModel.bulkCreate(params, '1');
const createdChunks = await serverDB.query.chunks.findMany({
where: eq(chunks.userId, userId),
});
expect(createdChunks).toHaveLength(2);
expect(createdChunks[0]).toMatchObject(params[0]);
expect(createdChunks[1]).toMatchObject(params[1]);
});
// 测试空参数场景
it('should handle empty params array', async () => {
const result = await chunkModel.bulkCreate([], '1');
expect(result).toHaveLength(0);
});
// 测试事务回滚
it('should rollback transaction on error', async () => {
const invalidParams = [
{ text: 'Chunk 1', userId },
{ index: 'abc', userId }, // 这会导致错误
] as any;
await expect(chunkModel.bulkCreate(invalidParams, '1')).rejects.toThrow();
const createdChunks = await serverDB.query.chunks.findMany({
where: eq(chunks.userId, userId),
});
expect(createdChunks).toHaveLength(0);
});
});
describe('delete', () => {
it('should delete a chunk by id', async () => {
const { id } = await serverDB
.insert(chunks)
.values({ text: 'Test Chunk', userId })
.returning()
.then((res) => res[0]);
await chunkModel.delete(id);
const chunk = await serverDB.query.chunks.findFirst({
where: eq(chunks.id, id),
});
expect(chunk).toBeUndefined();
});
});
describe('deleteOrphanChunks', () => {
it('should delete orphaned chunks', async () => {
// Create orphaned chunks
await serverDB
.insert(chunks)
.values([
{ text: 'Orphan Chunk 1', userId },
{ text: 'Orphan Chunk 2', userId },
])
.returning();
// Create a non-orphaned chunk
const [nonOrphanChunk] = await serverDB
.insert(chunks)
.values([{ text: 'Non-Orphan Chunk', userId }])
.returning();
await serverDB
.insert(fileChunks)
.values([{ fileId: '1', chunkId: nonOrphanChunk.id, userId }]);
// Execute the method
await chunkModel.deleteOrphanChunks();
// Check if orphaned chunks are deleted
const remainingChunks = await serverDB.query.chunks.findMany();
expect(remainingChunks).toHaveLength(1);
expect(remainingChunks[0].id).toBe(nonOrphanChunk.id);
});
it('should not delete any chunks when there are no orphans', async () => {
// Create non-orphaned chunks
const [chunk1, chunk2] = await serverDB
.insert(chunks)
.values([
{ text: 'Chunk 1', userId },
{ text: 'Chunk 2', userId },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId: '1', chunkId: chunk1.id, userId },
{ fileId: '2', chunkId: chunk2.id, userId },
]);
// Execute the method
await chunkModel.deleteOrphanChunks();
// Check if all chunks are still present
const remainingChunks = await serverDB.query.chunks.findMany();
expect(remainingChunks).toHaveLength(2);
});
it('should not throw an error when the database is empty', async () => {
// Ensure the database is empty
await serverDB.delete(chunks);
await serverDB.delete(fileChunks);
// Execute the method and expect it not to throw
await expect(chunkModel.deleteOrphanChunks()).resolves.not.toThrow();
});
});
describe('semanticSearch', () => {
it('should perform semantic search and return results', async () => {
const fileId = '1';
const [chunk1, chunk2] = await serverDB
.insert(chunks)
.values([
{ text: 'Test Chunk 1', userId },
{ text: 'Test Chunk 2', userId },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId, chunkId: chunk1.id, userId },
{ fileId, chunkId: chunk2.id, userId },
]);
await serverDB.insert(embeddings).values([
{ chunkId: chunk1.id, embeddings: designThinkingQuery, userId },
{ chunkId: chunk2.id, embeddings: codeEmbedding, userId },
]);
const result = await chunkModel.semanticSearch({
embedding: designThinkingQuery2,
fileIds: [fileId],
query: 'design thinking',
});
expect(result).toHaveLength(2);
expect(result[0].id).toBe(chunk1.id);
expect(result[1].id).toBe(chunk2.id);
expect(result[0].similarity).toBeGreaterThan(result[1].similarity);
});
// 补充无文件 ID 的搜索场景
it('should perform semantic search without fileIds', async () => {
const [chunk1, chunk2] = await serverDB
.insert(chunks)
.values([
{ text: 'Test Chunk 1', userId },
{ text: 'Test Chunk 2', userId },
])
.returning();
await serverDB.insert(embeddings).values([
{ chunkId: chunk1.id, embeddings: designThinkingQuery, userId },
{ chunkId: chunk2.id, embeddings: codeEmbedding, userId },
]);
const result = await chunkModel.semanticSearch({
embedding: designThinkingQuery2,
fileIds: undefined,
query: 'design thinking',
});
expect(result).toBeDefined();
expect(result).toHaveLength(2);
});
// 测试空结果场景
it('should return empty array when no matches found', async () => {
const result = await chunkModel.semanticSearch({
embedding: designThinkingQuery,
fileIds: ['non-existent-file'],
query: 'no matches',
});
expect(result).toHaveLength(0);
});
});
describe('bulkCreateUnstructuredChunks', () => {
it('should create multiple unstructured chunks', async () => {
const [chunk] = await serverDB
.insert(chunks)
.values([{ text: 'Chunk 1', userId, index: 0 }])
.returning();
const params = [
{ text: 'Unstructured Chunk 1', userId, fileId: '1', parentId: '1', compositeId: chunk.id },
{ text: 'Unstructured Chunk 2', userId, fileId: '1', parentId: '1', compositeId: chunk.id },
];
await chunkModel.bulkCreateUnstructuredChunks(params);
const createdChunks = await serverDB.query.unstructuredChunks.findMany({
where: eq(unstructuredChunks.userId, userId),
});
expect(createdChunks).toHaveLength(2);
expect(createdChunks[0]).toMatchObject(params[0]);
expect(createdChunks[1]).toMatchObject(params[1]);
});
});
describe('findByFileId', () => {
it('should find chunks by file id with pagination', async () => {
const fileId = '1';
const [chunk1, chunk2, chunk3] = await serverDB
.insert(chunks)
.values([
{ text: 'Chunk 1', userId, index: 0 },
{ text: 'Chunk 2', userId, index: 1 },
{ text: 'Chunk 3', userId, index: 2 },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId, chunkId: chunk1.id, userId },
{ fileId, chunkId: chunk2.id, userId },
{ fileId, chunkId: chunk3.id, userId },
]);
const result = await chunkModel.findByFileId(fileId, 0);
expect(result).toHaveLength(3);
expect(result[0].index).toBe(0);
expect(result[1].index).toBe(1);
expect(result[2].index).toBe(2);
});
});
describe('getChunksTextByFileId', () => {
it('should get chunks text by file id', async () => {
const fileId = '1';
const [chunk1, chunk2] = await serverDB
.insert(chunks)
.values([
{ text: 'Chunk 1', userId },
{ text: 'Chunk 2', userId },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId, chunkId: chunk1.id, userId },
{ fileId, chunkId: chunk2.id, userId },
]);
const result = await chunkModel.getChunksTextByFileId(fileId);
expect(result).toHaveLength(2);
expect(result[0].text).toBe('Chunk 1');
expect(result[1].text).toBe('Chunk 2');
});
});
describe('countByFileIds', () => {
it('should count chunks by file ids', async () => {
const fileIds = ['1', '2'];
const [chunk1, chunk2, chunk3] = await serverDB
.insert(chunks)
.values([
{ text: 'Chunk 1', userId, index: 0 },
{ text: 'Chunk 2', userId, index: 1 },
{ text: 'Chunk 3', userId, index: 2 },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId: '1', chunkId: chunk1.id, userId },
{ fileId: '1', chunkId: chunk2.id, userId },
{ fileId: '2', chunkId: chunk3.id, userId },
]);
const result = await chunkModel.countByFileIds(fileIds);
expect(result).toHaveLength(2);
expect(result.find((r) => r.id === '1')?.count).toBe(2);
expect(result.find((r) => r.id === '2')?.count).toBe(1);
});
it('should return empty array for empty file ids', async () => {
const result = await chunkModel.countByFileIds([]);
expect(result).toHaveLength(0);
});
});
describe('countByFileId', () => {
it('should count chunks by file id', async () => {
const fileId = '1';
const [chunk1, chunk2, chunk3] = await serverDB
.insert(chunks)
.values([
{ text: 'Chunk 1', userId, index: 0 },
{ text: 'Chunk 2', userId, index: 1 },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId, chunkId: chunk1.id, userId },
{ fileId, chunkId: chunk2.id, userId },
]);
const result = await chunkModel.countByFileId(fileId);
expect(result).toBe(2);
});
it('should return 0 for non-existent file id', async () => {
const result = await chunkModel.countByFileId('non-existent');
expect(result).toBe(0);
});
});
describe('semanticSearchForChat', () => {
it('should perform semantic search for chat and return results', async () => {
const fileId = '1';
const [chunk1, chunk2] = await serverDB
.insert(chunks)
.values([
{ text: 'Test Chunk 1', userId },
{ text: 'Test Chunk 2', userId },
])
.returning();
await serverDB.insert(fileChunks).values([
{ fileId, chunkId: chunk1.id, userId },
{ fileId, chunkId: chunk2.id, userId },
]);
await serverDB.insert(embeddings).values([
{ chunkId: chunk1.id, embeddings: designThinkingQuery, userId },
{ chunkId: chunk2.id, embeddings: codeEmbedding, userId },
]);
const result = await chunkModel.semanticSearchForChat({
embedding: designThinkingQuery2,
fileIds: [fileId],
query: 'design thinking',
});
expect(result).toHaveLength(2);
expect(result[0].id).toBe(chunk1.id);
expect(result[1].id).toBe(chunk2.id);
expect(result[0].similarity).toBeGreaterThan(result[1].similarity);
});
});
describe('mapChunkText', () => {
it('should map chunk text correctly for non-Table type', () => {
const chunk = {
text: 'Normal text',
type: 'Text',
metadata: {},
};
const result = chunkModel['mapChunkText'](chunk);
expect(result).toBe('Normal text');
});
it('should map chunk text correctly for Table type', () => {
const chunk = {
text: 'Table text',
type: 'Table',
metadata: {
text_as_html: '<table>...</table>',
},
};
const result = chunkModel['mapChunkText'](chunk);
expect(result).toBe(`Table text
content in Table html is below:
<table>...</table>
`);
});
it('should handle null text', () => {
const chunk = {
text: null,
type: 'Text',
metadata: {},
};
const result = chunkModel['mapChunkText'](chunk);
expect(result).toBeNull();
});
it('should handle missing metadata for Table type', () => {
const chunk = {
text: 'Table text',
type: 'Table',
metadata: {},
};
const result = chunkModel['mapChunkText'](chunk);
expect(result).toContain('Table text');
expect(result).toContain('content in Table html is below:');
expect(result).toContain('undefined'); // metadata.text_as_html is undefined
});
});
describe('findById', () => {
it('should find a chunk by id', async () => {
// Create a test chunk
const [chunk] = await serverDB
.insert(chunks)
.values({ text: 'Test Chunk', userId })
.returning();
const result = await chunkModel.findById(chunk.id);
expect(result).toBeDefined();
expect(result?.id).toBe(chunk.id);
expect(result?.text).toBe('Test Chunk');
});
it('should return null for non-existent id', async () => {
const result = await chunkModel.findById(uuid());
expect(result).toBeUndefined();
});
});
describe('semanticSearchForChat', () => {
// 测试空文件 ID 列表场景
it('should return empty array when fileIds is empty', async () => {
const result = await chunkModel.semanticSearchForChat({
embedding: designThinkingQuery,
fileIds: [],
query: 'test',
});
expect(result).toHaveLength(0);
});
// 测试结果限制
it('should limit results to 15 items', async () => {
const fileId = '1';
// Create 24 chunks
const chunkResult = await serverDB
.insert(chunks)
.values(
Array(24)
.fill(0)
.map((_, i) => ({ text: `Test Chunk ${i}`, userId })),
)
.returning();
await serverDB.insert(fileChunks).values(
chunkResult.map((chunk) => ({
fileId,
chunkId: chunk.id,
userId,
})),
);
await serverDB.insert(embeddings).values(
chunkResult.map((chunk) => ({
chunkId: chunk.id,
embeddings: designThinkingQuery,
userId,
})),
);
const result = await chunkModel.semanticSearchForChat({
embedding: designThinkingQuery2,
fileIds: [fileId],
query: 'test',
});
expect(result).toHaveLength(15);
});
});
});