UNPKG

@langchain/community

Version:
1 lines 1.94 kB
{"version":3,"file":"pptx.cjs","names":["BufferLoader","Document"],"sources":["../../../src/document_loaders/fs/pptx.ts"],"sourcesContent":["import { parseOffice } from \"officeparser\";\nimport { Document } from \"@langchain/core/documents\";\nimport { BufferLoader } from \"@langchain/classic/document_loaders/fs/buffer\";\n\n/**\n * A class that extends the `BufferLoader` class. It represents a document\n * loader that loads documents from PPTX files.\n */\nexport class PPTXLoader extends BufferLoader {\n constructor(filePathOrBlob: string | Blob) {\n super(filePathOrBlob);\n }\n\n /**\n * A method that takes a `raw` buffer and `metadata` as parameters and\n * returns a promise that resolves to an array of `Document` instances. It\n * uses the `parseOffice` function from the `officeparser` module to extract\n * the raw text content from the buffer. If the extracted powerpoint content is\n * empty, it returns an empty array. Otherwise, it creates a new\n * `Document` instance with the extracted powerpoint content and the provided\n * metadata, and returns it as an array.\n * @param raw The buffer to be parsed.\n * @param metadata The metadata of the document.\n * @returns A promise that resolves to an array of `Document` instances.\n */\n public async parse(\n raw: Buffer,\n metadata: Document[\"metadata\"]\n ): Promise<Document[]> {\n const ast = await parseOffice(raw, { outputErrorToConsole: true });\n const pptx = ast.toText();\n\n if (!pptx) return [];\n\n return [\n new Document({\n pageContent: pptx,\n metadata,\n }),\n ];\n }\n}\n"],"mappings":";;;;;;;;;;;AAQA,IAAa,aAAb,cAAgCA,8CAAAA,aAAa;CAC3C,YAAY,gBAA+B;AACzC,QAAM,eAAe;;;;;;;;;;;;;;CAevB,MAAa,MACX,KACA,UACqB;EAErB,MAAM,QADM,OAAA,GAAA,aAAA,aAAkB,KAAK,EAAE,sBAAsB,MAAM,CAAC,EACjD,QAAQ;AAEzB,MAAI,CAAC,KAAM,QAAO,EAAE;AAEpB,SAAO,CACL,IAAIC,0BAAAA,SAAS;GACX,aAAa;GACb;GACD,CAAC,CACH"}