UNPKG

nano-ai-pdf

Version:

This package helps you summarize pdfs using Gemini nano on edge or on browser, making it compliant safe, faster and free

81 lines 3.86 kB
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; import pdfToText from "react-pdftotext"; import { PdfSummarizerError } from "./types"; /** * Extracts text content from a PDF file * * @param file - The PDF file to extract text from * @param minTextLength - Minimum required text length (default: 100) * @returns Promise resolving to the extracted text * @throws PdfSummarizerError if extraction fails or text is insufficient */ export function extractTextFromPdf(file_1) { return __awaiter(this, arguments, void 0, function* (file, minTextLength = 100) { try { console.log(`📖 Starting PDF text extraction for: ${file.name}`); console.log(`📄 File size: ${(file.size / 1024 / 1024).toFixed(2)} MB`); // Extract text using react-pdftotext const rawText = yield pdfToText(file); if (!rawText || typeof rawText !== "string") { throw new PdfSummarizerError("PDF text extraction returned no content", "EXTRACTION_EMPTY"); } // Clean and normalize the extracted text const cleanedText = rawText .replace(/\s+/g, " ") .replace(/\n\s*\n/g, "\n\n") .trim(); console.log(`✅ Text extraction completed: ${cleanedText.length} characters`); // Validate minimum text length if (cleanedText.length < minTextLength) { throw new PdfSummarizerError(`PDF contains insufficient text content (${cleanedText.length} chars, minimum ${minTextLength} required)`, "INSUFFICIENT_TEXT"); } return cleanedText; } catch (error) { const errorMessage = error instanceof Error ? error.message : "Unknown error"; console.error(`❌ PDF text extraction failed: ${errorMessage}`); if (error instanceof PdfSummarizerError) { throw error; } throw new PdfSummarizerError(`Failed to extract text from PDF: ${errorMessage}`, "EXTRACTION_FAILED"); } }); } /** * Validates a PDF file before processing * * @param file - The file to validate * @param maxFileSize - Maximum allowed file size in bytes * @throws PdfSummarizerError if validation fails */ export function validatePdfFile(file, maxFileSize = 10 * 1024 * 1024) { console.log(`🔍 Validating PDF file: ${file.name}`); // Check if file exists if (!file) { throw new PdfSummarizerError("No file provided", "NO_FILE"); } // Check file type if (file.type !== "application/pdf") { throw new PdfSummarizerError(`Invalid file type: ${file.type}. Only PDF files are supported.`, "INVALID_FILE_TYPE"); } // Check file size if (file.size > maxFileSize) { throw new PdfSummarizerError(`File size (${(file.size / 1024 / 1024).toFixed(2)} MB) exceeds maximum allowed size (${(maxFileSize / 1024 / 1024).toFixed(2)} MB)`, "FILE_TOO_LARGE"); } // Check for empty file if (file.size === 0) { throw new PdfSummarizerError("PDF file is empty", "EMPTY_FILE"); } console.log(`✅ PDF file validation passed`); } //# sourceMappingURL=pdf-extractor.js.map