@pratiksha90/financial-data-extractors
Version:
Utilities for extracting financial data from various economic calendar websites
94 lines (93 loc) • 2.45 kB
TypeScript
import { TableMetadata } from '../types';
/**
* Table data structure definitions
*/
export interface TableCellData {
value: string | number | boolean | null;
rawText: string;
rowspan?: number;
colspan?: number;
}
export interface TableRowData {
[key: string]: TableCellData;
}
export interface TableData {
headers: string[];
rows: TableRowData[];
metadata: TableMetadata;
}
/**
* The TableHandler class processes HTML tables and extracts structured data
*/
export declare class TableHandler {
/**
* Extract all tables from a document
* @param document The document to extract tables from
* @returns Array of table data
*/
extractAllTables(document: Document): TableData[];
/**
* Check if the document appears to be an economic calendar
*/
private checkIfEconomicCalendar;
/**
* Extract data from a specific table element
*/
private extractTableData;
/**
* Make headers unique by adding a suffix to duplicates
*/
private makeHeadersUnique;
/**
* Create a 2D grid representation of the table to handle rowspan/colspan
*/
private createTableGrid;
/**
* Convert the grid to row objects using headers
*/
private convertGridToRows;
/**
* Helper method to find the maximum number of columns in any row
*/
private getMaxColumns;
/**
* Clean and normalize text from a cell
*/
private cleanText;
/**
* Convert cell value to appropriate type
*/
private convertCellValue;
/**
* Try to find and extract tables from non-standard or pseudo-table structures
*/
private extractPseudoTables;
/**
* Process tables for economic calendar format
*/
processEconomicCalendarData(tables: TableData[]): any;
/**
* Map table headers to standard economic calendar fields
*/
private mapCalendarHeaders;
/**
* Extract a date from a row if present
*/
private extractDateFromRow;
/**
* Check if a string looks like a date
*/
private isDateString;
/**
* Extract event data from a row
*/
private extractEventData;
/**
* Convert table data to a simplified format suitable for JSON export
*/
simplifyTableData(tableData: TableData): any;
/**
* Convert all tables to a simplified format
*/
simplifyAllTables(tables: TableData[]): any[];
}