mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
69 lines • 2.67 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.pageBlobUrl = exports.makeCodeOnGithubTextDataSource = void 0;
const contentStore_1 = require("../contentStore");
const GitHubDataSource_1 = require("./GitHubDataSource");
const path_1 = __importDefault(require("path"));
/**
Loads source code files from a GitHub repo.
*/
const makeCodeOnGithubTextDataSource = async ({ name, repoUrl, repoLoaderOptions, filter, sourceType, metadata, }) => {
return (0, GitHubDataSource_1.makeGitHubDataSource)({
name,
repoUrl,
filter,
repoLoaderOptions: {
...(repoLoaderOptions ?? {}),
ignoreFiles: [
/LICENSE/,
/CONTRIBUTING/,
/\.git/, // Ignores .git/, .gitignore, .github/, etc.
/\.dockerignore/,
/\.gcloudignore/,
/\.editorconfig/,
/\.vscode/,
...(repoLoaderOptions?.ignoreFiles ?? []),
],
},
async handleDocumentInRepo(document) {
const format = (0, contentStore_1.pageFormat)(getFileExtension(document.metadata.source));
const page = {
body: document.pageContent,
format,
sourceName: name,
sourceType,
url: pageBlobUrl({
repoUrl,
branch: repoLoaderOptions?.branch ?? "master",
filePath: document.metadata.source,
}),
metadata: {
...(metadata ?? {}),
programmingLanguage: format,
},
};
return page;
},
});
};
exports.makeCodeOnGithubTextDataSource = makeCodeOnGithubTextDataSource;
function getFileExtension(filePath) {
// Use regular expression to extract file extension
const match = filePath.match(/\.([^.]+)$/);
// If a match is found, return the extension; otherwise, default to "txt"
return match ? match[1] : "txt";
}
function pageBlobUrl(args) {
const { origin, pathname: repoUrlPath } = new URL(args.repoUrl);
const urlPath = path_1.default.posix.join(repoUrlPath, "blob", args.branch, ...(args.filePath === undefined
? [""]
: Array.isArray(args.filePath)
? args.filePath
: [args.filePath]));
return new URL(urlPath, origin).toString();
}
exports.pageBlobUrl = pageBlobUrl;
//# sourceMappingURL=CodeOnGithubTextDataSource.js.map