html-table-to-dataframe
Version:
Convert HTML tables to data-frames
116 lines (115 loc) • 5.3 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.InteractiveDataFrame = void 0;
const base_frame_1 = require("./base-frame");
class InteractiveDataFrame extends base_frame_1.BaseDataFrame {
buildFooter() {
var _a, _b;
const tfoot = this.document.querySelector('table tfoot');
if (!tfoot) {
throw new Error('No <tfoot> element found in the table, but footer option is enabled.');
}
// Get footer headers
let footerHeaders = Array.from(tfoot.querySelectorAll('th')).map((th) => { var _a; return ((_a = th.textContent) === null || _a === void 0 ? void 0 : _a.trim()) || ''; });
// If footerHeaders is empty, fall back to options.headers
if (footerHeaders.length === 0 && ((_a = this.options) === null || _a === void 0 ? void 0 : _a.header)) {
footerHeaders = [...this.options.header]; // Use options.headers as fallback
}
if (((_b = this.options) === null || _b === void 0 ? void 0 : _b.footer) && this.options.locatorId) {
const rowElements = Array.from(this.document.querySelectorAll(this.options.locatorId));
const footerCells = rowElements.map((row) => {
return Array.from(row.querySelectorAll('td')).map((cell) => {
const queryOnElements = 'textarea, input, button, mat-select, mat-icon, mat-slide-toggle, select';
const control = cell.querySelector(queryOnElements);
if (control) {
let type = '';
// Determine the type based on the tagName
if (control.tagName === 'SELECT') {
type = 'select';
}
else if (control.tagName === 'TEXTAREA') {
type = 'textarea';
}
else if (control.tagName === 'INPUT') {
type = 'input';
}
// Use extractLocatorID to get all attributes
return this.extractLocatorID(control, type);
}
// Return a LocatorID for cells without controls
return this.extractLocatorID(cell, 'unknown');
});
});
if (!this.options.header) {
throw new Error('No Headers Provided!');
}
return this.buildData(footerCells, footerHeaders);
}
else {
return this.build();
}
}
build() {
var _a;
// Look for the tbody instead of the entire document
const tbody = this.document.querySelector('tbody');
if (!tbody) {
throw new Error('No <tbody> element found in the HTML.');
}
let headers;
if (((_a = this.options) === null || _a === void 0 ? void 0 : _a.header) && this.options.header.length > 0) {
headers = this.options.header;
this.validateHeaders(headers);
}
else {
headers = this.generateHeaders(); // Generate headers if not provided
}
// Focus on rows inside the tbody
const rows = Array.from(tbody.querySelectorAll('tr')).map((row) => {
return Array.from(row.querySelectorAll('td')).map((cell) => {
const queryOnElements = 'textarea, input, button, a, mat-select, mat-icon, mat-slide-toggle, select';
const control = cell.querySelector(queryOnElements);
if (control) {
let type = '';
// Determine the type based on the tagName
if (control.tagName === 'SELECT') {
type = 'select';
}
else if (control.tagName === 'TEXTAREA') {
type = 'textarea';
}
else if (control.tagName === 'INPUT') {
type = 'input';
}
else if (control.tagName === 'A') {
type = 'link';
}
// Use extractLocatorID to get all attributes
return this.extractLocatorID(control, type);
}
// Return a LocatorID for cells without controls
return this.extractLocatorID(cell, 'unknown');
});
});
return this.buildData(rows, headers);
}
/**
* Extracts all attributes from a DOM element.
*
* @param element - The DOM element from which to extract the attributes.
* @param type - The type of the element (e.g., 'select', 'input', 'textarea').
* @returns A LocatorID object containing all attributes as key-value pairs and the element type.
*/
extractLocatorID(element, type) {
const attributes = {};
// Extract all attributes as key-value pairs
Array.from(element.attributes).forEach((attr) => {
attributes[attr.name] = attr.value;
});
return {
attributes,
type: type || 'unknown',
};
}
}
exports.InteractiveDataFrame = InteractiveDataFrame;