@lobehub/chat
Version:
Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.
126 lines (99 loc) • 3.07 kB
text/typescript
import {
SupportedTextSplitterLanguage,
SupportedTextSplitterLanguages,
} from 'langchain/text_splitter';
import { LANGCHAIN_SUPPORT_TEXT_LIST } from '@/libs/langchain/file';
import { LangChainLoaderType } from '@/libs/langchain/types';
import { CodeLoader } from './code';
import { CsVLoader } from './csv';
import { DocxLoader } from './docx';
import { EPubLoader } from './epub';
import { LatexLoader } from './latex';
import { MarkdownLoader } from './markdown';
import { PdfLoader } from './pdf';
import { PPTXLoader } from './pptx';
import { TextLoader } from './txt';
class LangChainError extends Error {
constructor(message: string) {
super(message);
this.name = 'LangChainChunkingError';
}
}
export class ChunkingLoader {
partitionContent = async (filename: string, content: Uint8Array) => {
try {
const fileBlob = new Blob([Buffer.from(content)]);
const txt = this.uint8ArrayToString(content);
const type = this.getType(filename?.toLowerCase());
switch (type) {
case 'code': {
const ext = filename.split('.').pop();
return await CodeLoader(txt, ext!);
}
case 'ppt': {
return await PPTXLoader(fileBlob);
}
case 'latex': {
return await LatexLoader(txt);
}
case 'pdf': {
return await PdfLoader(fileBlob);
}
case 'markdown': {
return await MarkdownLoader(txt);
}
case 'doc': {
return await DocxLoader(fileBlob);
}
case 'text': {
return await TextLoader(txt);
}
case 'csv': {
return await CsVLoader(fileBlob);
}
case 'epub': {
return await EPubLoader(content);
}
default: {
throw new Error(
`Unsupported file type [${type}], please check your file is supported, or create report issue here: https://github.com/lobehub/lobe-chat/discussions/3550`,
);
}
}
} catch (e) {
throw new LangChainError((e as Error).message);
}
};
private getType = (filename: string): LangChainLoaderType | undefined => {
if (filename.endsWith('pptx')) {
return 'ppt';
}
if (filename.endsWith('docx') || filename.endsWith('doc')) {
return 'doc';
}
if (filename.endsWith('pdf')) {
return 'pdf';
}
if (filename.endsWith('tex')) {
return 'latex';
}
if (filename.endsWith('md') || filename.endsWith('mdx')) {
return 'markdown';
}
if (filename.endsWith('csv')) {
return 'csv';
}
if (filename.endsWith('epub')) {
return 'epub';
}
const ext = filename.split('.').pop();
if (ext && SupportedTextSplitterLanguages.includes(ext as SupportedTextSplitterLanguage)) {
return 'code';
}
if (ext && LANGCHAIN_SUPPORT_TEXT_LIST.includes(ext)) return 'text';
};
private uint8ArrayToString(uint8Array: Uint8Array) {
const decoder = new TextDecoder();
return decoder.decode(uint8Array);
}
}