UNPKG

pdf-tax-reader-cl

Version:

PDF scraping library for Chilean tax documents. Extract emitter name, economic activities, and address from structured PDF documents like 'CARPETA TRIBUTARIA ELECTRÓNICA PARA SOLICITAR CRÉDITOS'

102 lines (89 loc) 2.81 kB
/** * PDF Tax Scraping Library for Chilean Tax Documents * * This library extracts specific data from Chilean tax PDF documents * like "CARPETA TRIBUTARIA ELECTRÓNICA PARA SOLICITAR CRÉDITOS" */ export interface ExtractedTaxData { /** Name of the document emitter/company */ emitterName: string | null; /** List of economic activities */ economicActivities: string[]; /** Registered address */ address: string | null; } export interface ValidationResult { /** Whether the data is valid */ isValid: boolean; /** List of missing fields */ missingFields: string[]; } export interface ProcessingResult { /** Original filename */ filename: string; /** Extracted data or error */ data?: ExtractedTaxData; /** Error message if processing failed */ error?: string; } /** * Extract tax data from a PDF file * @param pdfPath - Path to the PDF file * @returns Promise with extracted tax data * @throws Error if file is invalid, not found, or not a tax document */ export function extractTaxData(pdfPath: string): Promise<ExtractedTaxData>; /** * Process multiple PDF files in a directory * @param directoryPath - Path to directory containing PDF files * @returns Promise with array of processing results */ export function processMultiplePDFs(directoryPath: string): Promise<ProcessingResult[]>; /** * Save data to JSON file * @param data - Data to save * @param outputPath - Output file path */ export function saveToJSON(data: any, outputPath: string): void; /** * Extract emitter name from PDF text * @param text - PDF text content * @returns Emitter name or null */ export function extractEmitterName(text: string): string | null; /** * Extract economic activities from PDF text * @param text - PDF text content * @returns Array of economic activities */ export function extractEconomicActivities(text: string): string[]; /** * Extract address from PDF text * @param text - PDF text content * @returns Address or null */ export function extractAddress(text: string): string | null; /** * Validate if a file is a valid PDF * @param dataBuffer - File buffer * @returns True if valid PDF, false otherwise */ export function isValidPDF(dataBuffer: Buffer): boolean; /** * Validate file extension * @param filePath - Path to the file * @returns True if file has .pdf extension */ export function hasValidExtension(filePath: string): boolean; /** * Check if the document appears to be a Chilean tax document * @param text - PDF text content * @returns True if appears to be a tax document */ export function isTaxDocument(text: string): boolean; /** * Validate extracted data completeness * @param data - Extracted data * @returns Validation result */ export function validateExtractedData(data: ExtractedTaxData): ValidationResult;