mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
22 lines • 1.29 kB
TypeScript
import { Page, PageMetadata } from "../contentStore";
export type HandleHtmlPageFuncOptions = {
/** Returns an array of DOM elements to be removed from the parsed document. */
removeElements: (domDoc: Document) => Element[];
/** Construct the `Page.url` from page path. */
pathToPageUrl: (path: string) => string;
/** `Page.metadata` passed from config. Included in all documents */
metadata?: PageMetadata;
/**
Extract metadata from page DOM. Added to the `Page.metadata` field.
If a in the result of `extractMetadata()` is the same as a key in `metadata`,
the `extractMetadata()` key will override it.
*/
extractMetadata?: (domDoc: Document) => Record<string, unknown>;
/** Extract `Page.title` from page content and path. */
extractTitle?: (domDoc: Document) => string | undefined;
/** Transform Markdown once it's been generated */
postProcessMarkdown?: (markdown: string) => Promise<string>;
};
export declare function handleHtmlDocument<SourceType extends string = string>(path: string, content: string, options: HandleHtmlPageFuncOptions): Promise<Omit<Page<SourceType>, "sourceName">>;
export declare function extractHtmlH1(domDoc: Document): string | undefined;
//# sourceMappingURL=handleHtmlDocument.d.ts.map