UNPKG

docuglean-ocr

Version:

An SDK for intelligent document processing using State of the Art AI models.

83 lines (82 loc) 1.88 kB
import { z } from 'zod'; export type Provider = 'openai' | 'mistral' | 'gemini'; export declare const validateConfig: (config: OCRConfig | ExtractConfig) => void; interface MistralOCRImage { id: string; topLeftX: number | null; topLeftY: number | null; bottomRightX: number | null; bottomRightY: number | null; imageBase64?: string | null; } interface MistralOCRPage { index: number; markdown: string; images: MistralOCRImage[]; dimensions: { dpi: number; height: number; width: number; } | null; } export interface MistralOCRResponse { pages: MistralOCRPage[]; } export interface OpenAIOCRResponse { text: string; usage: { prompt_tokens: number; completion_tokens: number; total_tokens: number; }; } export interface GeminiOCRResponse { text: string; model_used: string; } export interface OCRPage { index: number; markdown: string; } export interface OCRConfig { filePath: string; provider?: Provider; model?: string; apiKey: string; options?: { mistral?: { includeImageBase64?: boolean; }; openai?: { maxTokens?: number; }; gemini?: { temperature?: number; topP?: number; topK?: number; }; }; prompt?: string; } export interface ExtractConfig { filePath: string; apiKey: string; provider?: Provider; model?: string; prompt?: string; responseFormat?: z.ZodType<any>; systemPrompt?: string; } export interface BaseStructuredOutput { [key: string]: any; } export interface StructuredExtractionResult<T extends BaseStructuredOutput> { raw: string; parsed: T; } export interface OCRResult { markdown: string; images: any[]; rawResponse: MistralOCRResponse; } export {};