UNPKG

@pratiksha90/financial-data-extractors

Version:

Utilities for extracting financial data from various economic calendar websites

121 lines (120 loc) 6.72 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.isTradingEconomicsCalendar = isTradingEconomicsCalendar; exports.extractTradingEconomicsData = extractTradingEconomicsData; /** * Check if the current page is Trading Economics calendar */ function isTradingEconomicsCalendar(url) { return url.includes('tradingeconomics.com/calendar'); } /** * Helper to safely extract text content from an element or its child */ function getText(element, selector) { var _a, _b; if (!element) return ''; if (selector) { const child = element.querySelector(selector); return child ? ((_a = child.textContent) === null || _a === void 0 ? void 0 : _a.trim()) || '' : ''; } return ((_b = element.textContent) === null || _b === void 0 ? void 0 : _b.trim()) || ''; } /** * Extract calendar events directly from DOM - Trading Economics specific */ /** * Extract calendar events directly from DOM - Trading Economics specific */ function extractCalendarEventsFromDOM(options) { const { document } = options; const events = []; try { // Find all table rows with data-url attribute (these are the event rows) const rows = document.querySelectorAll('tr[data-url]'); console.log(`Found ${rows.length} total event rows`); if (rows.length === 0) { console.error("No event rows found"); return []; } // Process each row rows.forEach((row, index) => { var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0; try { // Extract the data-url attribute for the event URL const dataUrl = row.getAttribute('data-url'); // Find the date by looking at the preceding thead element let currentDateElement = (_a = row.parentElement) === null || _a === void 0 ? void 0 : _a.previousElementSibling; while (currentDateElement && currentDateElement.tagName !== 'THEAD') { currentDateElement = currentDateElement.previousElementSibling; } const dateHeader = currentDateElement === null || currentDateElement === void 0 ? void 0 : currentDateElement.querySelector('th[colspan="3"]'); const currentDate = ((_b = dateHeader === null || dateHeader === void 0 ? void 0 : dateHeader.textContent) === null || _b === void 0 ? void 0 : _b.trim()) || 'Unknown Date'; // Get all cells in the row const cells = row.querySelectorAll('td'); // Debug log console.log(`Row ${index}, Cells: ${cells.length}, Date: ${currentDate}`); if (cells.length < 5) { console.log(`Skipping row with insufficient cells: ${cells.length}`); return; } // Log cell contents for debugging for (let i = 0; i < cells.length; i++) { console.log(`Cell ${i} content: ${(_c = cells[i].textContent) === null || _c === void 0 ? void 0 : _c.trim()}`); } // Extract basic information from cells const timeText = ((_e = (_d = cells[0]) === null || _d === void 0 ? void 0 : _d.textContent) === null || _e === void 0 ? void 0 : _e.trim()) || ''; const countryCode = ((_h = (_g = (_f = cells[1]) === null || _f === void 0 ? void 0 : _f.querySelector('.calendar-iso')) === null || _g === void 0 ? void 0 : _g.textContent) === null || _h === void 0 ? void 0 : _h.trim()) || ''; // Get event name and reference from cell 2 const eventName = ((_l = (_k = (_j = cells[2]) === null || _j === void 0 ? void 0 : _j.querySelector('a.calendar-event')) === null || _k === void 0 ? void 0 : _k.textContent) === null || _l === void 0 ? void 0 : _l.trim()) || ((_p = (_o = (_m = cells[2]) === null || _m === void 0 ? void 0 : _m.querySelector('span:not(.calendar-reference)')) === null || _o === void 0 ? void 0 : _o.textContent) === null || _p === void 0 ? void 0 : _p.trim()) || ''; const reference = ((_s = (_r = (_q = cells[2]) === null || _q === void 0 ? void 0 : _q.querySelector('span.calendar-reference')) === null || _r === void 0 ? void 0 : _r.textContent) === null || _s === void 0 ? void 0 : _s.trim()) || ''; // Get values from cells 3-6 const actual = ((_u = (_t = cells[3]) === null || _t === void 0 ? void 0 : _t.textContent) === null || _u === void 0 ? void 0 : _u.trim().replace(/\s+/g, ' ')) || ''; const previous = ((_w = (_v = cells[4]) === null || _v === void 0 ? void 0 : _v.textContent) === null || _w === void 0 ? void 0 : _w.trim().replace(/\s+/g, ' ')) || ''; const consensus = cells.length > 5 ? ((_y = (_x = cells[5]) === null || _x === void 0 ? void 0 : _x.textContent) === null || _y === void 0 ? void 0 : _y.trim().replace(/\s+/g, ' ')) || '' : ''; const forecast = cells.length > 6 ? ((_0 = (_z = cells[6]) === null || _z === void 0 ? void 0 : _z.textContent) === null || _0 === void 0 ? void 0 : _0.trim().replace(/\s+/g, ' ')) || '' : ''; // Create event object const event = { date: currentDate, time: timeText, country: countryCode, event: eventName, reference: reference, actual: actual, previous: previous, consensus: consensus, forecast: forecast, url: dataUrl ? `https://tradingeconomics.com${dataUrl}` : undefined }; events.push(event); } catch (rowError) { console.error(`Error processing row ${index}:`, rowError); } }); console.log(`Successfully extracted ${events.length} events`); return events; } catch (e) { console.error('Error extracting Trading Economics data:', e); return []; } } function extractTradingEconomicsData(options) { const { document } = options; const url = document.URL || ''; console.log("Starting Trading Economics calendar extraction..."); // Extract events directly from DOM const events = extractCalendarEventsFromDOM(options); return { metadata: { title: "Trading Economics Calendar", source: url, extractedAt: new Date().toISOString(), totalEvents: events.length }, events: events }; }