mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
104 lines • 4.68 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getRelevantFilesAsStrings = exports.getRelevantFilePathsInDir = exports.getRepoLocally = exports.makeRandomTmp = exports.makeGitDataSource = void 0;
const simple_git_1 = __importDefault(require("simple-git"));
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const os_1 = __importDefault(require("os"));
const rimraf_1 = require("rimraf");
const arrayFilters_1 = require("../arrayFilters");
const logger_1 = require("../logger");
/**
Loads and processes files from a Git repo (can be hosted anywhere).
*/
function makeGitDataSource({ name, handlePage, filter, sourceType, metadata, repoUri, repoOptions, }) {
return {
name,
fetchPages: async () => {
const randomTmpDir = makeRandomTmp(name);
try {
logger_1.logger.info(`Created ${randomTmpDir} for ${repoUri}`);
await getRepoLocally({
repoPath: repoUri,
localPath: randomTmpDir,
options: { "--depth": 1, ...(repoOptions ?? {}) },
});
logger_1.logger.info(`Cloned ${repoUri} to ${randomTmpDir}`);
const pathsAndContents = await getRelevantFilesAsStrings({
directoryPath: randomTmpDir,
filter(path) {
// pathInRepo is leading slash + path within the repo
const pathInRepo = path.replace(randomTmpDir, "");
return filter(pathInRepo);
},
});
const pagesPromises = Object.entries(pathsAndContents).map(async ([path, content]) => handlePage(path, content));
return (0, arrayFilters_1.filterDefined)((0, arrayFilters_1.filterFulfilled)(await Promise.allSettled(pagesPromises)).map(({ value }) => value))
.flat(1)
.map((page) => ({
...page,
sourceName: name,
sourceType: sourceType ?? page.sourceType,
metadata: metadata || page.metadata
? { ...(metadata ?? {}), ...(page.metadata ?? {}) }
: undefined,
}));
}
finally {
(0, rimraf_1.rimrafSync)(randomTmpDir);
logger_1.logger.info(`Deleted ${randomTmpDir}`);
}
},
};
}
exports.makeGitDataSource = makeGitDataSource;
// ----------------
// Helper functions
// ----------------
/**
@param prefix - prefix for the temporary directory name
*/
function makeRandomTmp(prefix) {
// Get the system's default temporary directory
const tmpDir = os_1.default.tmpdir();
// Create a unique temporary directory and get its path
const randomTmpDir = fs_1.default.mkdtempSync(path_1.default.resolve(tmpDir, prefix));
return randomTmpDir;
}
exports.makeRandomTmp = makeRandomTmp;
async function getRepoLocally({ repoPath, localPath, options, }) {
const git = (0, simple_git_1.default)();
logger_1.logger.info(`Started cloning ${repoPath} to ${localPath} with options ${JSON.stringify(options)}`);
await git.clone(repoPath, localPath, options);
logger_1.logger.info(`Successfully cloned ${repoPath} to ${localPath} with options ${JSON.stringify(options)}`);
}
exports.getRepoLocally = getRepoLocally;
function getRelevantFilePathsInDir(directoryPath, filter, fileList = []) {
const items = fs_1.default.readdirSync(directoryPath);
items.forEach((item) => {
const itemPath = path_1.default.resolve(directoryPath, item);
const itemStat = fs_1.default.statSync(itemPath);
if (itemStat.isDirectory()) {
getRelevantFilePathsInDir(itemPath, filter, fileList);
}
else if (filter(itemPath)) {
fileList.push(itemPath);
}
});
return fileList;
}
exports.getRelevantFilePathsInDir = getRelevantFilePathsInDir;
async function getRelevantFilesAsStrings({ directoryPath, filter, }) {
const paths = getRelevantFilePathsInDir(directoryPath, filter);
const pathsAndContents = {};
paths.forEach((path) => {
const content = fs_1.default.readFileSync(path, "utf8");
pathsAndContents[path.replace(directoryPath, "")] = content;
});
return pathsAndContents;
}
exports.getRelevantFilesAsStrings = getRelevantFilesAsStrings;
//# sourceMappingURL=GitDataSource.js.map