@r-huijts/opentk-mcp
Version:
MCP server for Dutch parliamentary data access via OpenTK
32 lines (31 loc) • 1.16 kB
TypeScript
/**
* Utility for extracting text from various document formats
* Using established libraries for better reliability
*/
/**
* Extracts text from a PDF document using pdf-parse library
* @param data The PDF document as a Buffer
* @returns The extracted text content
*/
export declare function extractTextFromPdf(data: ArrayBuffer): Promise<string>;
/**
* Extracts text from a DOCX document using mammoth library
* @param data The DOCX document as an ArrayBuffer
* @returns The extracted text content
*/
export declare function extractTextFromDocx(data: ArrayBuffer): Promise<string>;
/**
* Summarizes the extracted text to a reasonable length
* @param text The full extracted text
* @param maxLength Maximum length of the summary (default: 8000 characters)
* @param offset Starting position for extraction (default: 0)
* @returns Object containing the summarized text and pagination info
*/
export declare function summarizeText(text: string, maxLength?: number, offset?: number): {
text: string;
isTruncated: boolean;
totalLength: number;
currentOffset: number;
nextOffset: number | null;
remainingLength: number;
};