UNPKG

@promptbook/azure-openai

Version:

Promptbook: Run AI apps in plain human language across multiple models and platforms

61 lines (60 loc) 2.26 kB
import type { Promisable } from 'type-fest'; import type { KnowledgePiecePreparedJson } from '../../pipeline/PipelineJson/KnowledgePieceJson'; import type { string_filename } from '../../types/typeAliases'; import type { string_knowledge_source_link } from '../../types/typeAliases'; import type { string_mime_type } from '../../types/typeAliases'; import type { string_url } from '../../types/typeAliases'; import type { ScraperAndConverterMetadata } from './register/ScraperAndConverterMetadata'; /** * Interface defining the requirements for scraper implementations. * Scrapers are responsible for extracting structured content from various knowledge sources. */ export type Scraper = { /** * Metadata of the scraper which includes title, mime types, etc. */ readonly metadata: ScraperAndConverterMetadata; /** * Scrapes the markdown file and returns the knowledge pieces or `null` if it can't scrape it */ scrape(source: ScraperSourceHandler): Promisable<ReadonlyArray<Omit<KnowledgePiecePreparedJson, 'sources' | 'preparationIds'>> | null>; }; /** * Handler for accessing and processing content from diverse knowledge sources. * Provides standardized methods to interact with files, URLs and raw text sources. */ export type ScraperSourceHandler = { /** * The source of the knowledge */ readonly source: string_knowledge_source_link; /** * The path to the file, if it is a file * * Note: Typically one of the `filename` or `url` is set and the other is `null` */ readonly filename: string_filename | null; /** * The URL, if it is online * * Note: Typically one of the `filename` or `url` is set and the other is `null` */ readonly url: string_url | null; /** * Mime type of the source */ readonly mimeType: string_mime_type; /** * Get the content as parsed JSON */ asJson(): Promisable<unknown>; /** * Get the content as a utf-8 string */ asText(): Promisable<string>; }; /** * TODO: [🧠] Maybe split `ScraperSourceHandler` into `ScraperWebsiteSourceHandler` + `ScraperFileSourceHandler` * TODO: [🥽] Add ` asBlob(): Promisable<Blob>;` or asFile * TODO: [🔼] Export via types */