UNPKG

@promptbook/remote-client

Version:

Promptbook: Turn your company's scattered knowledge into AI ready books

42 lines (41 loc) 1.97 kB
import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson'; import type { Scraper } from '../_common/Scraper'; import type { ScraperSourceHandler } from '../_common/Scraper'; import type { ExecutionTools } from '../../execution/ExecutionTools'; import type { PrepareAndScrapeOptions } from '../../prepare/PrepareAndScrapeOptions'; import type { Converter } from '../_common/Converter'; import type { ScraperAndConverterMetadata } from '../_common/register/ScraperAndConverterMetadata'; import type { ScraperIntermediateSource } from '../_common/ScraperIntermediateSource'; /** * Scraper for .pdf files * * @see `documentationUrl` for more details * @public exported from `@promptbook/pdf` */ export declare class PdfScraper implements Converter, Scraper { private readonly tools; private readonly options; /** * Metadata of the scraper which includes title, mime types, etc. */ get metadata(): ScraperAndConverterMetadata; /** * Markdown scraper is used internally */ private readonly markdownScraper; constructor(tools: Pick<ExecutionTools, 'llm'>, options: PrepareAndScrapeOptions); /** * Converts the `.pdf` file to `.md` file and returns intermediate source */ $convert(source: ScraperSourceHandler): Promise<ScraperIntermediateSource>; /** * Scrapes the `.pdf` file and returns the knowledge pieces or `null` if it can't scrape it */ scrape(source: ScraperSourceHandler): Promise<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>; } /** * TODO: [👣] Converted pdf documents can act as cached items - there is no need to run conversion each time * TODO: [🪂] Do it in parallel 11:11 * Note: No need to aggregate usage here, it is done by intercepting the llmTools * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment */