parse-html-table
Version:
A lightweight library for parsing HTML tables into structured data using TypeScript.
49 lines (46 loc) • 1.33 kB
JavaScript
import cheerio from 'cheerio';
function createTableParser() {
return (html) => {
const $ = cheerio.load(html);
let headers = [];
const parseHeaders = () => {
headers = $("th").map((_, el) => {
const headerText = $(el).text().trim();
const words = headerText.split(/\s+/);
const camelCaseText = words.map((word, index) => {
if (index === 0) {
return word.toLowerCase();
} else {
return word.charAt(0).toUpperCase() + word.slice(1).toLowerCase();
}
}).join("");
return camelCaseText;
}).get();
};
const parseRows = () => {
return $("tbody tr").map((_, row) => {
const rowData = {};
const $cells = $(row).find("td");
let isEmptyRow = true;
$cells.each((index, cell) => {
const cellText = $(cell).text().trim();
if (cellText !== "") {
rowData[headers[index]] = cellText;
isEmptyRow = false;
}
});
return isEmptyRow ? null : rowData;
}).get().filter((rowData) => rowData !== null);
};
const parseTable = () => {
parseHeaders();
const rows = parseRows();
return {
headers,
rows
};
};
return parseTable;
};
}
export { createTableParser };