UNPKG

@knowcode/convert-to-markdown

Version:

Convert Excel, PDF, and Word documents to clean, AI-ready formats like Markdown and JSON

198 lines (172 loc) 4.82 kB
/** * Convert to Markdown - TypeScript Definitions */ export interface ConversionOptions { /** * Original filename (used for metadata) */ filename?: string; /** * Filter sheets by prefix (Excel only) */ sheetPrefix?: string; } export interface ExcelSheet { name: string; rowCount: number; columnCount: number; cellCount: number; nonEmptyCells: number; emptyRate: string | number; } export interface ExcelJsonStatistics { fileName: string; fileSize: { bytes: number; KB: string; MB: string; }; sheets: ExcelSheet[]; totalSheets: number; totalCells: number; totalNonEmptyCells: number; overallEmptyRate: string | number; estimatedTokens: number; } export interface ExcelJsonResult { /** * JSON string containing the converted data */ content: string; /** * Conversion statistics */ statistics: ExcelJsonStatistics; } export interface MarkdownStatistics { sizeInBytes: number; sizeInKB: string; sizeInMB: string; numberOfLines: number; numberOfParagraphs: number; numberOfHeadings: number; estimatedTokens: number; [key: string]: any; } export interface ExcelMarkdownResult { /** * Markdown document string */ document: string; /** * Conversion statistics */ stats: MarkdownStatistics & { numberOfSheets: number; sheets: Record<string, { rowCount: number; columnCount: number }>; numberOfTables: number; estimatedTokensPerSheet: number; }; } export interface PdfMarkdownResult { /** * Markdown document string */ document: string; /** * Conversion statistics */ stats: MarkdownStatistics & { numberOfPages: number; numberOfTables: number; estimatedTokensPerLine: string; pdfInfo: Record<string, any>; }; } export interface WordHtmlStatistics { fileSize: { bytes: number; KB: string; MB: string; }; messages: any[]; htmlLength: number; } export interface WordHtmlResult { /** * HTML string */ html: string; /** * Conversion statistics */ statistics: WordHtmlStatistics; } export interface WordMarkdownResult { /** * Markdown document string */ markdown: string; /** * Conversion statistics */ statistics: MarkdownStatistics & { numberOfHeadings: number; numberOfLists: number; numberOfLinks: number; numberOfTables: number; conversionMessages: any[]; }; } /** * Main converter class */ export class ConvertToMarkdown { /** * Convert Excel file to JSON format */ static excelToJson(input: Buffer | string, options?: ConversionOptions): Promise<ExcelJsonResult>; /** * Convert Excel file to Markdown format */ static excelToMarkdown(input: Buffer | string, options?: ConversionOptions): Promise<ExcelMarkdownResult>; /** * Convert PDF file to Markdown format */ static pdfToMarkdown(input: Buffer | string, options?: ConversionOptions): Promise<PdfMarkdownResult>; /** * Convert Word document to HTML format */ static wordToHtml(input: Buffer | string, options?: ConversionOptions): Promise<WordHtmlResult>; /** * Convert Word document to Markdown format */ static wordToMarkdown(input: Buffer | string, options?: ConversionOptions): Promise<WordMarkdownResult>; } /** * Direct converter functions */ export namespace excel { export function toJson(buffer: Buffer, options?: ConversionOptions): Promise<ExcelJsonResult>; export function toMarkdown(buffer: Buffer, options?: ConversionOptions): Promise<ExcelMarkdownResult>; } export namespace pdf { export function toMarkdown(buffer: Buffer, options?: ConversionOptions): Promise<PdfMarkdownResult>; } export namespace word { export function toHtml(buffer: Buffer, options?: ConversionOptions): Promise<WordHtmlResult>; export function toMarkdown(buffer: Buffer, options?: ConversionOptions): Promise<WordMarkdownResult>; } /** * Utility functions */ export namespace utils { export function estimateTokens(text: string): number; export function cleanTextForMarkdown(text: string): string; export function dataToMarkdownTable(data: any[], headers?: string[]): string; export function isRowEmpty(row: any): boolean; export function cleanRowData(row: any): any; export function createMetadataHeader(metadata: Record<string, any>): string; export function calculateStats(content: string, additionalStats?: Record<string, any>): MarkdownStatistics; } export default ConvertToMarkdown;