docuglean-ocr
Version:
An SDK for intelligent document processing using State of the Art AI models.
83 lines (82 loc) • 1.88 kB
TypeScript
import { z } from 'zod';
export type Provider = 'openai' | 'mistral' | 'gemini';
export declare const validateConfig: (config: OCRConfig | ExtractConfig) => void;
interface MistralOCRImage {
id: string;
topLeftX: number | null;
topLeftY: number | null;
bottomRightX: number | null;
bottomRightY: number | null;
imageBase64?: string | null;
}
interface MistralOCRPage {
index: number;
markdown: string;
images: MistralOCRImage[];
dimensions: {
dpi: number;
height: number;
width: number;
} | null;
}
export interface MistralOCRResponse {
pages: MistralOCRPage[];
}
export interface OpenAIOCRResponse {
text: string;
usage: {
prompt_tokens: number;
completion_tokens: number;
total_tokens: number;
};
}
export interface GeminiOCRResponse {
text: string;
model_used: string;
}
export interface OCRPage {
index: number;
markdown: string;
}
export interface OCRConfig {
filePath: string;
provider?: Provider;
model?: string;
apiKey: string;
options?: {
mistral?: {
includeImageBase64?: boolean;
};
openai?: {
maxTokens?: number;
};
gemini?: {
temperature?: number;
topP?: number;
topK?: number;
};
};
prompt?: string;
}
export interface ExtractConfig {
filePath: string;
apiKey: string;
provider?: Provider;
model?: string;
prompt?: string;
responseFormat?: z.ZodType<any>;
systemPrompt?: string;
}
export interface BaseStructuredOutput {
[key: string]: any;
}
export interface StructuredExtractionResult<T extends BaseStructuredOutput> {
raw: string;
parsed: T;
}
export interface OCRResult {
markdown: string;
images: any[];
rawResponse: MistralOCRResponse;
}
export {};