mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
77 lines • 3 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.getChangedPages = void 0;
const deep_equal_1 = __importDefault(require("deep-equal"));
const logger_1 = require("../logger");
/**
Given sets of old and new pages, returns the pages that need to be created,
updated, or deleted.
*/
const getChangedPages = async ({ oldPages: oldPagesIn, newPages: newPagesIn, sourceName, }) => {
const oldPages = new Map(oldPagesIn.map((page) => [page.url, page]));
const newPages = new Map(newPagesIn.map((page) => [page.url, page]));
logger_1.logger.info(`After de-duplication based on page URL, there are ${oldPages.size} pages currently in the store and ${newPages.size} pages from the data source ${sourceName} to be processed.`);
// Perform set difference to find deleted, created, and changed pages.
logger_1.logger.info(`Comparing pages currently in the store against pages from the data source ${sourceName} to determine which pages need to be created, updated, or deleted...`);
// deleted = elements in old but not in new
const deleted = [...oldPages]
.filter(([url, { action }]) => {
if (action === "deleted") {
// If already marked deleted in the old set, no need to delete again
return false;
}
// If it does not exist in the new set, it was deleted
return !newPages.has(url);
})
.map(([, page]) => ({
...page,
updated: new Date(),
action: "deleted",
}));
// created = elements in new but not in old
const created = [...newPages]
.filter(([url]) => {
const oldPage = oldPages.get(url);
if (oldPage === undefined) {
return true;
}
// Was it formerly deleted? If so, restore
return oldPage.action === "deleted";
})
.map(([, page]) => ({
...page,
updated: new Date(),
action: "created",
}));
// updated = elements in both old and new (that have the same url, but different content)
const updated = [...newPages]
.filter(([url, page]) => {
const oldPage = oldPages.get(url);
if (!oldPage) {
return false;
}
// Filter out pages that haven't changed
return !(0, deep_equal_1.default)(comparablePartialPage(oldPage), comparablePartialPage(page));
})
.map(([, page]) => ({
...page,
updated: new Date(),
action: "updated",
}));
return { deleted, created, updated };
};
exports.getChangedPages = getChangedPages;
/**
Create a page object with any fields irrelevant to comparison stripped out.
*/
const comparablePartialPage = ({ url, sourceName, body, format, metadata, }) => ({
url,
sourceName,
body,
format,
metadata,
});
//# sourceMappingURL=getChangedPages.js.map