UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

104 lines 4.68 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getRelevantFilesAsStrings = exports.getRelevantFilePathsInDir = exports.getRepoLocally = exports.makeRandomTmp = exports.makeGitDataSource = void 0; const simple_git_1 = __importDefault(require("simple-git")); const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const os_1 = __importDefault(require("os")); const rimraf_1 = require("rimraf"); const arrayFilters_1 = require("../arrayFilters"); const logger_1 = require("../logger"); /** Loads and processes files from a Git repo (can be hosted anywhere). */ function makeGitDataSource({ name, handlePage, filter, sourceType, metadata, repoUri, repoOptions, }) { return { name, fetchPages: async () => { const randomTmpDir = makeRandomTmp(name); try { logger_1.logger.info(`Created ${randomTmpDir} for ${repoUri}`); await getRepoLocally({ repoPath: repoUri, localPath: randomTmpDir, options: { "--depth": 1, ...(repoOptions ?? {}) }, }); logger_1.logger.info(`Cloned ${repoUri} to ${randomTmpDir}`); const pathsAndContents = await getRelevantFilesAsStrings({ directoryPath: randomTmpDir, filter(path) { // pathInRepo is leading slash + path within the repo const pathInRepo = path.replace(randomTmpDir, ""); return filter(pathInRepo); }, }); const pagesPromises = Object.entries(pathsAndContents).map(async ([path, content]) => handlePage(path, content)); return (0, arrayFilters_1.filterDefined)((0, arrayFilters_1.filterFulfilled)(await Promise.allSettled(pagesPromises)).map(({ value }) => value)) .flat(1) .map((page) => ({ ...page, sourceName: name, sourceType: sourceType ?? page.sourceType, metadata: metadata || page.metadata ? { ...(metadata ?? {}), ...(page.metadata ?? {}) } : undefined, })); } finally { (0, rimraf_1.rimrafSync)(randomTmpDir); logger_1.logger.info(`Deleted ${randomTmpDir}`); } }, }; } exports.makeGitDataSource = makeGitDataSource; // ---------------- // Helper functions // ---------------- /** @param prefix - prefix for the temporary directory name */ function makeRandomTmp(prefix) { // Get the system's default temporary directory const tmpDir = os_1.default.tmpdir(); // Create a unique temporary directory and get its path const randomTmpDir = fs_1.default.mkdtempSync(path_1.default.resolve(tmpDir, prefix)); return randomTmpDir; } exports.makeRandomTmp = makeRandomTmp; async function getRepoLocally({ repoPath, localPath, options, }) { const git = (0, simple_git_1.default)(); logger_1.logger.info(`Started cloning ${repoPath} to ${localPath} with options ${JSON.stringify(options)}`); await git.clone(repoPath, localPath, options); logger_1.logger.info(`Successfully cloned ${repoPath} to ${localPath} with options ${JSON.stringify(options)}`); } exports.getRepoLocally = getRepoLocally; function getRelevantFilePathsInDir(directoryPath, filter, fileList = []) { const items = fs_1.default.readdirSync(directoryPath); items.forEach((item) => { const itemPath = path_1.default.resolve(directoryPath, item); const itemStat = fs_1.default.statSync(itemPath); if (itemStat.isDirectory()) { getRelevantFilePathsInDir(itemPath, filter, fileList); } else if (filter(itemPath)) { fileList.push(itemPath); } }); return fileList; } exports.getRelevantFilePathsInDir = getRelevantFilePathsInDir; async function getRelevantFilesAsStrings({ directoryPath, filter, }) { const paths = getRelevantFilePathsInDir(directoryPath, filter); const pathsAndContents = {}; paths.forEach((path) => { const content = fs_1.default.readFileSync(path, "utf8"); pathsAndContents[path.replace(directoryPath, "")] = content; }); return pathsAndContents; } exports.getRelevantFilesAsStrings = getRelevantFilesAsStrings; //# sourceMappingURL=GitDataSource.js.map