@pratiksha90/financial-data-extractors
Version:
Utilities for extracting financial data from various economic calendar websites
121 lines (120 loc) • 6.72 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.isTradingEconomicsCalendar = isTradingEconomicsCalendar;
exports.extractTradingEconomicsData = extractTradingEconomicsData;
/**
* Check if the current page is Trading Economics calendar
*/
function isTradingEconomicsCalendar(url) {
return url.includes('tradingeconomics.com/calendar');
}
/**
* Helper to safely extract text content from an element or its child
*/
function getText(element, selector) {
var _a, _b;
if (!element)
return '';
if (selector) {
const child = element.querySelector(selector);
return child ? ((_a = child.textContent) === null || _a === void 0 ? void 0 : _a.trim()) || '' : '';
}
return ((_b = element.textContent) === null || _b === void 0 ? void 0 : _b.trim()) || '';
}
/**
* Extract calendar events directly from DOM - Trading Economics specific
*/
/**
* Extract calendar events directly from DOM - Trading Economics specific
*/
function extractCalendarEventsFromDOM(options) {
const { document } = options;
const events = [];
try {
// Find all table rows with data-url attribute (these are the event rows)
const rows = document.querySelectorAll('tr[data-url]');
console.log(`Found ${rows.length} total event rows`);
if (rows.length === 0) {
console.error("No event rows found");
return [];
}
// Process each row
rows.forEach((row, index) => {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p, _q, _r, _s, _t, _u, _v, _w, _x, _y, _z, _0;
try {
// Extract the data-url attribute for the event URL
const dataUrl = row.getAttribute('data-url');
// Find the date by looking at the preceding thead element
let currentDateElement = (_a = row.parentElement) === null || _a === void 0 ? void 0 : _a.previousElementSibling;
while (currentDateElement && currentDateElement.tagName !== 'THEAD') {
currentDateElement = currentDateElement.previousElementSibling;
}
const dateHeader = currentDateElement === null || currentDateElement === void 0 ? void 0 : currentDateElement.querySelector('th[colspan="3"]');
const currentDate = ((_b = dateHeader === null || dateHeader === void 0 ? void 0 : dateHeader.textContent) === null || _b === void 0 ? void 0 : _b.trim()) || 'Unknown Date';
// Get all cells in the row
const cells = row.querySelectorAll('td');
// Debug log
console.log(`Row ${index}, Cells: ${cells.length}, Date: ${currentDate}`);
if (cells.length < 5) {
console.log(`Skipping row with insufficient cells: ${cells.length}`);
return;
}
// Log cell contents for debugging
for (let i = 0; i < cells.length; i++) {
console.log(`Cell ${i} content: ${(_c = cells[i].textContent) === null || _c === void 0 ? void 0 : _c.trim()}`);
}
// Extract basic information from cells
const timeText = ((_e = (_d = cells[0]) === null || _d === void 0 ? void 0 : _d.textContent) === null || _e === void 0 ? void 0 : _e.trim()) || '';
const countryCode = ((_h = (_g = (_f = cells[1]) === null || _f === void 0 ? void 0 : _f.querySelector('.calendar-iso')) === null || _g === void 0 ? void 0 : _g.textContent) === null || _h === void 0 ? void 0 : _h.trim()) || '';
// Get event name and reference from cell 2
const eventName = ((_l = (_k = (_j = cells[2]) === null || _j === void 0 ? void 0 : _j.querySelector('a.calendar-event')) === null || _k === void 0 ? void 0 : _k.textContent) === null || _l === void 0 ? void 0 : _l.trim()) ||
((_p = (_o = (_m = cells[2]) === null || _m === void 0 ? void 0 : _m.querySelector('span:not(.calendar-reference)')) === null || _o === void 0 ? void 0 : _o.textContent) === null || _p === void 0 ? void 0 : _p.trim()) || '';
const reference = ((_s = (_r = (_q = cells[2]) === null || _q === void 0 ? void 0 : _q.querySelector('span.calendar-reference')) === null || _r === void 0 ? void 0 : _r.textContent) === null || _s === void 0 ? void 0 : _s.trim()) || '';
// Get values from cells 3-6
const actual = ((_u = (_t = cells[3]) === null || _t === void 0 ? void 0 : _t.textContent) === null || _u === void 0 ? void 0 : _u.trim().replace(/\s+/g, ' ')) || '';
const previous = ((_w = (_v = cells[4]) === null || _v === void 0 ? void 0 : _v.textContent) === null || _w === void 0 ? void 0 : _w.trim().replace(/\s+/g, ' ')) || '';
const consensus = cells.length > 5 ? ((_y = (_x = cells[5]) === null || _x === void 0 ? void 0 : _x.textContent) === null || _y === void 0 ? void 0 : _y.trim().replace(/\s+/g, ' ')) || '' : '';
const forecast = cells.length > 6 ? ((_0 = (_z = cells[6]) === null || _z === void 0 ? void 0 : _z.textContent) === null || _0 === void 0 ? void 0 : _0.trim().replace(/\s+/g, ' ')) || '' : '';
// Create event object
const event = {
date: currentDate,
time: timeText,
country: countryCode,
event: eventName,
reference: reference,
actual: actual,
previous: previous,
consensus: consensus,
forecast: forecast,
url: dataUrl ? `https://tradingeconomics.com${dataUrl}` : undefined
};
events.push(event);
}
catch (rowError) {
console.error(`Error processing row ${index}:`, rowError);
}
});
console.log(`Successfully extracted ${events.length} events`);
return events;
}
catch (e) {
console.error('Error extracting Trading Economics data:', e);
return [];
}
}
function extractTradingEconomicsData(options) {
const { document } = options;
const url = document.URL || '';
console.log("Starting Trading Economics calendar extraction...");
// Extract events directly from DOM
const events = extractCalendarEventsFromDOM(options);
return {
metadata: {
title: "Trading Economics Calendar",
source: url,
extractedAt: new Date().toISOString(),
totalEvents: events.length
},
events: events
};
}