UNPKG

mongodb-rag-core

Version:

Common elements used by MongoDB Chatbot Framework components.

77 lines 3 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getChangedPages = void 0; const deep_equal_1 = __importDefault(require("deep-equal")); const logger_1 = require("../logger"); /** Given sets of old and new pages, returns the pages that need to be created, updated, or deleted. */ const getChangedPages = async ({ oldPages: oldPagesIn, newPages: newPagesIn, sourceName, }) => { const oldPages = new Map(oldPagesIn.map((page) => [page.url, page])); const newPages = new Map(newPagesIn.map((page) => [page.url, page])); logger_1.logger.info(`After de-duplication based on page URL, there are ${oldPages.size} pages currently in the store and ${newPages.size} pages from the data source ${sourceName} to be processed.`); // Perform set difference to find deleted, created, and changed pages. logger_1.logger.info(`Comparing pages currently in the store against pages from the data source ${sourceName} to determine which pages need to be created, updated, or deleted...`); // deleted = elements in old but not in new const deleted = [...oldPages] .filter(([url, { action }]) => { if (action === "deleted") { // If already marked deleted in the old set, no need to delete again return false; } // If it does not exist in the new set, it was deleted return !newPages.has(url); }) .map(([, page]) => ({ ...page, updated: new Date(), action: "deleted", })); // created = elements in new but not in old const created = [...newPages] .filter(([url]) => { const oldPage = oldPages.get(url); if (oldPage === undefined) { return true; } // Was it formerly deleted? If so, restore return oldPage.action === "deleted"; }) .map(([, page]) => ({ ...page, updated: new Date(), action: "created", })); // updated = elements in both old and new (that have the same url, but different content) const updated = [...newPages] .filter(([url, page]) => { const oldPage = oldPages.get(url); if (!oldPage) { return false; } // Filter out pages that haven't changed return !(0, deep_equal_1.default)(comparablePartialPage(oldPage), comparablePartialPage(page)); }) .map(([, page]) => ({ ...page, updated: new Date(), action: "updated", })); return { deleted, created, updated }; }; exports.getChangedPages = getChangedPages; /** Create a page object with any fields irrelevant to comparison stripped out. */ const comparablePartialPage = ({ url, sourceName, body, format, metadata, }) => ({ url, sourceName, body, format, metadata, }); //# sourceMappingURL=getChangedPages.js.map