UNPKG

@langchain/community

Version:
1 lines 5.89 kB
{"version":3,"file":"s3.cjs","names":["BaseDocumentLoader","fsDefault","UnstructuredLoaderDefault","UnstructuredLoader","path","os","S3Client","GetObjectCommand","Readable"],"sources":["../../../src/document_loaders/web/s3.ts"],"sourcesContent":["import * as fsDefault from \"node:fs\";\nimport * as path from \"node:path\";\nimport * as os from \"node:os\";\nimport { Readable } from \"node:stream\";\nimport { S3Client, GetObjectCommand, S3ClientConfig } from \"@aws-sdk/client-s3\";\nimport { BaseDocumentLoader } from \"@langchain/core/document_loaders/base\";\nimport { UnstructuredLoader as UnstructuredLoaderDefault } from \"../fs/unstructured.js\";\n\n/**\n * Represents the parameters for the S3Loader class. It includes\n * properties such as the S3 bucket, key, unstructured API URL,\n * unstructured API key, S3 configuration, file system module, and\n * UnstructuredLoader module.\n */\nexport interface S3LoaderParams {\n bucket: string;\n key: string;\n unstructuredAPIURL: string;\n unstructuredAPIKey: string;\n s3Config?: S3ClientConfig;\n fs?: typeof fsDefault;\n UnstructuredLoader?: typeof UnstructuredLoaderDefault;\n}\n\n/**\n * A class that extends the BaseDocumentLoader class. It represents a\n * document loader for loading files from an S3 bucket.\n * @example\n * ```typescript\n * const loader = new S3Loader({\n * bucket: \"my-document-bucket-123\",\n * key: \"AccountingOverview.pdf\",\n * s3Config: {\n * region: \"us-east-1\",\n * credentials: {\n * accessKeyId: \"<YourAccessKeyId>\",\n * secretAccessKey: \"<YourSecretAccessKey>\",\n * },\n * },\n * unstructuredAPIURL: \"<YourUnstructuredAPIURL>\",\n * unstructuredAPIKey: \"<YourUnstructuredAPIKey>\",\n * });\n * const docs = await loader.load();\n * ```\n */\nexport class S3Loader extends BaseDocumentLoader {\n private bucket: string;\n\n private key: string;\n\n private unstructuredAPIURL: string;\n\n private unstructuredAPIKey: string;\n\n private s3Config: S3ClientConfig;\n\n private _fs: typeof fsDefault;\n\n private _UnstructuredLoader: typeof UnstructuredLoaderDefault;\n\n constructor({\n bucket,\n key,\n unstructuredAPIURL,\n unstructuredAPIKey,\n s3Config = {},\n fs = fsDefault,\n UnstructuredLoader = UnstructuredLoaderDefault,\n }: S3LoaderParams) {\n super();\n this.bucket = bucket;\n this.key = key;\n this.unstructuredAPIURL = unstructuredAPIURL;\n this.unstructuredAPIKey = unstructuredAPIKey;\n this.s3Config = s3Config;\n this._fs = fs;\n this._UnstructuredLoader = UnstructuredLoader;\n }\n\n /**\n * Loads the file from the S3 bucket, saves it to a temporary directory,\n * and then uses the UnstructuredLoader to load the file as a document.\n * @returns An array of Document objects representing the loaded documents.\n */\n public async load() {\n const tempDir = this._fs.mkdtempSync(\n path.join(os.tmpdir(), \"s3fileloader-\")\n );\n\n const filePath = path.join(tempDir, this.key);\n\n try {\n const s3Client = new S3Client(this.s3Config);\n\n const getObjectCommand = new GetObjectCommand({\n Bucket: this.bucket,\n Key: this.key,\n });\n\n const response = await s3Client.send(getObjectCommand);\n\n const objectData = await new Promise<Buffer>((resolve, reject) => {\n const chunks: Buffer[] = [];\n\n // eslint-disable-next-line no-instanceof/no-instanceof\n if (response.Body instanceof Readable) {\n response.Body.on(\"data\", (chunk: Buffer) => chunks.push(chunk));\n response.Body.on(\"end\", () => resolve(Buffer.concat(chunks)));\n response.Body.on(\"error\", reject);\n } else {\n reject(new Error(\"Response body is not a readable stream.\"));\n }\n });\n\n this._fs.mkdirSync(path.dirname(filePath), { recursive: true });\n\n this._fs.writeFileSync(filePath, objectData);\n // oxlint-disable-next-line typescript/no-explicit-any\n } catch (e: any) {\n throw new Error(\n `Failed to download file ${this.key} from S3 bucket ${this.bucket}: ${e.message}`\n );\n }\n\n try {\n const options = {\n apiUrl: this.unstructuredAPIURL,\n apiKey: this.unstructuredAPIKey,\n };\n\n const unstructuredLoader = new this._UnstructuredLoader(\n filePath,\n options\n );\n\n const docs = await unstructuredLoader.load();\n\n return docs;\n } catch {\n throw new Error(\n `Failed to load file ${filePath} using unstructured loader.`\n );\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AA6CA,IAAa,WAAb,cAA8BA,sCAAAA,mBAAmB;CAC/C;CAEA;CAEA;CAEA;CAEA;CAEA;CAEA;CAEA,YAAY,EACV,QACA,KACA,oBACA,oBACA,WAAW,EAAE,EACb,KAAKC,SACL,oBAAA,uBAAqBC,yCAAAA,sBACJ;AACjB,SAAO;AACP,OAAK,SAAS;AACd,OAAK,MAAM;AACX,OAAK,qBAAqB;AAC1B,OAAK,qBAAqB;AAC1B,OAAK,WAAW;AAChB,OAAK,MAAM;AACX,OAAK,sBAAsBC;;;;;;;CAQ7B,MAAa,OAAO;EAClB,MAAM,UAAU,KAAK,IAAI,YACvBC,UAAK,KAAKC,QAAG,QAAQ,EAAE,gBAAgB,CACxC;EAED,MAAM,WAAWD,UAAK,KAAK,SAAS,KAAK,IAAI;AAE7C,MAAI;GACF,MAAM,WAAW,IAAIE,mBAAAA,SAAS,KAAK,SAAS;GAE5C,MAAM,mBAAmB,IAAIC,mBAAAA,iBAAiB;IAC5C,QAAQ,KAAK;IACb,KAAK,KAAK;IACX,CAAC;GAEF,MAAM,WAAW,MAAM,SAAS,KAAK,iBAAiB;GAEtD,MAAM,aAAa,MAAM,IAAI,SAAiB,SAAS,WAAW;IAChE,MAAM,SAAmB,EAAE;AAG3B,QAAI,SAAS,gBAAgBC,YAAAA,UAAU;AACrC,cAAS,KAAK,GAAG,SAAS,UAAkB,OAAO,KAAK,MAAM,CAAC;AAC/D,cAAS,KAAK,GAAG,aAAa,QAAQ,OAAO,OAAO,OAAO,CAAC,CAAC;AAC7D,cAAS,KAAK,GAAG,SAAS,OAAO;UAEjC,wBAAO,IAAI,MAAM,0CAA0C,CAAC;KAE9D;AAEF,QAAK,IAAI,UAAUJ,UAAK,QAAQ,SAAS,EAAE,EAAE,WAAW,MAAM,CAAC;AAE/D,QAAK,IAAI,cAAc,UAAU,WAAW;WAErC,GAAQ;AACf,SAAM,IAAI,MACR,2BAA2B,KAAK,IAAI,kBAAkB,KAAK,OAAO,IAAI,EAAE,UACzE;;AAGH,MAAI;GACF,MAAM,UAAU;IACd,QAAQ,KAAK;IACb,QAAQ,KAAK;IACd;AASD,UAFa,MALc,IAAI,KAAK,oBAClC,UACA,QACD,CAEqC,MAAM;UAGtC;AACN,SAAM,IAAI,MACR,uBAAuB,SAAS,6BACjC"}