@ever_cheng/memory-task-mcp
Version:
Memory and task management MCP Server
101 lines (100 loc) • 2.34 kB
TypeScript
/**
* Chunking Service for MemTask
*
* Intelligent text chunking for better semantic search and embedding generation.
* Supports multiple chunking strategies optimized for multilingual-e5-large model.
*/
import { Memory } from './types';
/**
* Memory Chunk Interface
*/
export interface MemoryChunk {
id: string;
memoryId: string;
chunkIndex: number;
totalChunks: number;
content: string;
context: string;
metadata: {
created_at: string;
tags: string[];
originalLength: number;
chunkLength: number;
};
embedding?: number[];
}
/**
* Chunking Configuration
*/
export interface ChunkingConfig {
maxChunkSize: number;
overlapSize: number;
minChunkSize: number;
preserveBoundaries: boolean;
}
/**
* Default chunking configuration optimized for multilingual-e5-large
*/
export declare const DEFAULT_CHUNKING_CONFIG: ChunkingConfig;
/**
* Chunking Service Class
*/
export declare class ChunkingService {
private config;
constructor(config?: ChunkingConfig);
/**
* 智能分塊 - 為 Memory 創建 chunks
*/
chunkMemory(memory: Memory): Promise<MemoryChunk[]>;
/**
* 語義邊界分塊 - 保持段落和句子完整性
*/
private semanticChunking;
/**
* 簡單固定長度分塊
*/
private simpleChunking;
/**
* 將文本分割成句子
*/
private splitIntoSentences;
/**
* 過濾太小的 chunks
*/
private filterSmallChunks;
/**
* 建構上下文信息
*/
private buildContext;
/**
* 獲取前一個 chunk 的上下文
*/
private getPreviousContext;
/**
* 獲取後一個 chunk 的上下文
*/
private getNextContext;
/**
* 批量處理多個 memories
*/
batchChunkMemories(memories: Memory[]): Promise<MemoryChunk[]>;
/**
* 更新配置
*/
updateConfig(newConfig: Partial<ChunkingConfig>): void;
/**
* 獲取當前配置
*/
getConfig(): ChunkingConfig;
/**
* 統計信息
*/
getChunkingStats(chunks: MemoryChunk[]): {
totalChunks: number;
avgChunkLength: number;
maxChunkLength: number;
minChunkLength: number;
totalOriginalLength: number;
compressionRatio: number;
};
}