parse-html-table
Version:
A lightweight library for parsing HTML tables into structured data using TypeScript.
55 lines (49 loc) • 1.56 kB
JavaScript
;
const cheerio = require('cheerio');
function _interopDefaultCompat (e) { return e && typeof e === 'object' && 'default' in e ? e.default : e; }
const cheerio__default = /*#__PURE__*/_interopDefaultCompat(cheerio);
function createTableParser() {
return (html) => {
const $ = cheerio__default.load(html);
let headers = [];
const parseHeaders = () => {
headers = $("th").map((_, el) => {
const headerText = $(el).text().trim();
const words = headerText.split(/\s+/);
const camelCaseText = words.map((word, index) => {
if (index === 0) {
return word.toLowerCase();
} else {
return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
}
}).join("");
return camelCaseText;
}).get();
};
const parseRows = () => {
return $("tbody tr").map((_, row) => {
const rowData = {};
const $cells = $(row).find("td");
let isEmptyRow = true;
$cells.each((index, cell) => {
const cellText = $(cell).text().trim();
if (cellText !== "") {
rowData[headers[index]] = cellText;
isEmptyRow = false;
}
});
return isEmptyRow ? null : rowData;
}).get().filter((rowData) => rowData !== null);
};
const parseTable = () => {
parseHeaders();
const rows = parseRows();
return {
headers,
rows
};
};
return parseTable;
};
}
exports.createTableParser = createTableParser;