UNPKG

@promptbook/remote-client

Version:

Promptbook: Create persistent AI agents that turn your company's scattered knowledge into action

45 lines (44 loc) 2.08 kB
import type { ExecutionTools } from '../../execution/ExecutionTools'; import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson'; import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions'; import type { Converter } from '../_common/Converter'; import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata'; import type { Scraper } from '../_common/Scraper'; import type { ScraperSourceHandler } from '../_common/Scraper'; import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource'; /** * Integration of Markitdown by Microsoft into Promptbook * * @see https://github.com/microsoft/markitdown * @see `documentationUrl` for more details * * @public exported from `@promptbook/markitdown` * @public exported from `@promptbook/pdf` */ export declare class MarkitdownScraper implements Converter, Scraper { private readonly tools; private readonly options; /** * Metadata of the scraper which includes title, mime types, etc. */ get metadata(): ScraperAndConverterMetadata; /** * Markdown scraper is used internally */ private readonly markdownScraper; /** * Markdown scraper is used internally */ private readonly markitdown; constructor(tools: Pick<ExecutionTools, 'fs' | 'llm' | 'executables'>, options: PrepareAndScrapeOptions); /** * Convert the documents to `.md` file and returns intermediate source * * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object */ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>; /** * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it. */ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>; }