mongodb-rag-core
Version:
Common elements used by MongoDB Chatbot Framework components.
39 lines • 1.39 kB
TypeScript
/// <reference types="node" />
import { PageMetadata } from "../contentStore";
import { DataSource } from "./DataSource";
export type GetPdfBufferFunc = (url: string) => Promise<Buffer>;
export type GetTitleFromPdfFunc = (pdfMdContent: string) => string | undefined;
export type MakePdfToMarkdownDataSourceArgs<SourceType extends string = string> = {
/**
The data source name.
*/
name: string;
/**
The page URLs of the PDFs.
*/
urls: string[];
/**
Gets Buffer object for a PDF.
*/
getPdfBuffer: GetPdfBufferFunc;
/**
Converts the raw URL to a Page URL.
TODO - We should use this to normalize the URL after EAI-1029 (#825) is merged.
*/
transformPageUrl?: (url: string) => string;
/**
Gets title from the parsed PDF markdown contents.
*/
getTitleFromContent?: GetTitleFromPdfFunc;
/**
Source type to be included in pages.
*/
sourceType?: SourceType;
/**
Metadata to be included in all pages.
*/
metadata?: PageMetadata;
};
/** Loads PDF and converts content to Page */
export declare function makePdfToMarkdownDataSource<SourceType extends string = string>({ name, urls, getPdfBuffer, transformPageUrl, getTitleFromContent, sourceType, metadata, }: MakePdfToMarkdownDataSourceArgs<SourceType>): DataSource;
//# sourceMappingURL=PdfToMdDataSource.d.ts.map