@felisdiligens/md-table-tools
Version:
MultiMarkdown table tools
1,235 lines (1,225 loc) • 84.8 kB
JavaScript
'use strict';
var stringWidth = require('string-width');
var cheerio = require('cheerio');
var htmlparser2 = require('htmlparser2');
var TurndownService = require('turndown');
function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; }
function _interopNamespace(e) {
if (e && e.__esModule) return e;
var n = Object.create(null);
if (e) {
Object.keys(e).forEach(function (k) {
if (k !== 'default') {
var d = Object.getOwnPropertyDescriptor(e, k);
Object.defineProperty(n, k, d.get ? d : {
enumerable: true,
get: function () { return e[k]; }
});
}
});
}
n.default = e;
return Object.freeze(n);
}
var stringWidth__default = /*#__PURE__*/_interopDefault(stringWidth);
var cheerio__namespace = /*#__PURE__*/_interopNamespace(cheerio);
var htmlparser2__namespace = /*#__PURE__*/_interopNamespace(htmlparser2);
var TurndownService__default = /*#__PURE__*/_interopDefault(TurndownService);
/** indicates how text is aligned in a column */
exports.TextAlignment = void 0;
(function (TextAlignment) {
TextAlignment["left"] = "left";
TextAlignment["center"] = "center";
TextAlignment["right"] = "right";
TextAlignment["default"] = "start";
})(exports.TextAlignment || (exports.TextAlignment = {}));
/** indicates how a cell is merged with a neighboring cell */
exports.TableCellMerge = void 0;
(function (TableCellMerge) {
TableCellMerge[TableCellMerge["above"] = 0] = "above";
TableCellMerge[TableCellMerge["left"] = 1] = "left";
TableCellMerge[TableCellMerge["none"] = 2] = "none";
})(exports.TableCellMerge || (exports.TableCellMerge = {}));
/** indicates the placement of the table caption */
exports.TableCaptionPosition = void 0;
(function (TableCaptionPosition) {
TableCaptionPosition["top"] = "top";
TableCaptionPosition["bottom"] = "bottom";
})(exports.TableCaptionPosition || (exports.TableCaptionPosition = {}));
class IndexOutOfBoundsError extends Error {
constructor(msg) {
super(msg);
// Set the prototype explicitly.
Object.setPrototypeOf(this, IndexOutOfBoundsError.prototype);
}
}
class TableCaption {
text;
label;
position;
constructor(text = "", label = "", position = exports.TableCaptionPosition.top) {
this.text = text;
this.label = label;
this.position = position;
}
getLabel() {
// "If you have a caption, you can also have a label, allowing you to create anchors pointing to the table. If there is no label, then the caption acts as the label"
if (typeof this.label === 'string' && this.label.trim() !== "")
return this.label.trim().replace(/\s/g, "-");
return this.text.trim().toLowerCase().replace(/\s/g, "-").replace(/[^a-zA-Z0-9]/g, "");
}
}
class TableCell {
text;
table;
row;
column;
merged;
isHeader;
textAlign;
constructor(table, row, column) {
this.text = "";
this.table = table;
this.row = row;
this.column = column;
this.merged = exports.TableCellMerge.none;
this.isHeader = false;
this.textAlign = exports.TextAlignment.default;
}
isHeaderCell() {
return this.row.isHeader || this.isHeader;
}
getTextAlignment() {
if (this.textAlign != exports.TextAlignment.default)
return this.textAlign;
return this.column.textAlign;
}
setText(text) {
this.text = text;
}
getColspan() {
if (this.merged != exports.TableCellMerge.left) {
let col = this.table.indexOfColumn(this.column) + 1;
if (col > this.table.columnCount())
return 1;
let colspan = 1;
let cells = this.table.getCellsInRow(this.row);
for (; col < this.table.columnCount(); col++) {
if (cells[col].merged == exports.TableCellMerge.left)
colspan++;
else
break;
}
return colspan;
}
return 1;
}
getRowspan() {
if (this.merged != exports.TableCellMerge.above) {
let row = this.table.indexOfRow(this.row) + 1;
if (row > this.table.rowCount())
return 1;
let rowspan = 1;
let cells = this.table.getCellsInColumn(this.column);
for (; row < this.table.rowCount(); row++) {
if (cells[row].merged == exports.TableCellMerge.above)
rowspan++;
else
break;
}
return rowspan;
}
return 1;
}
}
class TableRow {
index;
isHeader;
isMultiline;
startsNewSection;
cells;
constructor(index = 0, isHeader = false,
/** Only pertains to MultiMarkdown multiline feature. Ignored by other parsers/renderers. See Table.mergeMultilineRows() */
isMultiline = false, startsNewSection = false) {
this.index = index;
this.isHeader = isHeader;
this.isMultiline = isMultiline;
this.startsNewSection = startsNewSection;
this.cells = [];
}
updateCells(table) {
if (table.columnCount() != this.cells.length)
this.cells = table.getCells().filter(cell => cell.row == this);
this.cells = this.cells.sort((a, b) => a.column.index - b.column.index);
}
getCell(index) {
return this.cells.at(index);
}
getCells() {
return this.cells;
}
}
class TableColumn {
index;
textAlign;
wrappable;
cells;
constructor(index = 0, textAlign = exports.TextAlignment.default, wrappable = false) {
this.index = index;
this.textAlign = textAlign;
this.wrappable = wrappable;
this.cells = [];
}
updateCells(table) {
if (table.rowCount() != this.cells.length)
this.cells = table.getCells().filter(cell => cell.column == this);
this.cells = this.cells.sort((a, b) => a.row.index - b.row.index);
}
getCell(index) {
return this.cells.at(index);
}
getCells() {
return this.cells;
}
}
class Table {
cells;
rows;
columns;
caption;
/** Text before the table */
beforeTable;
/** Text after the table */
afterTable;
constructor(rowNum = 0, colNum = 0) {
this.cells = [];
this.rows = Array.from({ length: rowNum }, (_, i) => new TableRow(i));
this.columns = Array.from({ length: colNum }, (_, i) => new TableColumn(i));
this.caption = null;
this.beforeTable = "";
this.afterTable = "";
}
/**
* Adds a TableRow to the table.
* @param index Insert row at index. -1 means it's appended.
* @param row (optional)
* @returns The added row.
*/
addRow(index = -1, row = new TableRow()) {
if (index < 0) {
row.index = this.rows.push(row) - 1;
}
else {
row.index = index;
this.rows.splice(index, 0, row);
}
return row;
}
/**
* Adds a TableColumn to the table.
* @param index Insert column at index. -1 means it's appended.
* @param col (optional)
* @returns The added column.
*/
addColumn(index = -1, col = new TableColumn()) {
if (index < 0) {
col.index = this.columns.push(col);
}
else {
col.index = index;
this.columns.splice(index, 0, col);
}
return col;
}
/** Gets the row at index. Negative index counts back from the end. Returns undefined if out-of-bounds. */
getRow(index) {
return this.rows.at(index);
}
/** Gets the index of the row. -1 if it hasn't been found. */
indexOfRow(row) {
return this.rows.indexOf(row);
}
/** Gets the column at index. Negative index counts back from the end. Returns undefined if out-of-bounds. */
getColumn(index) {
return this.columns.at(index);
}
/** Gets the index of the column. -1 if it hasn't been found. */
indexOfColumn(col) {
return this.columns.indexOf(col);
}
/**
* Removes the given column. Also removes all cells within the column.
* @param col Either index or object reference.
*/
removeColumn(col) {
let colObj = typeof col === "number" ? this.columns.at(col) : col;
let columnCells = this.getCellsInColumn(colObj);
this.cells = this.cells.filter(cell => !columnCells.includes(cell));
this.columns = this.columns.filter(c => c != colObj);
}
/**
* Removes the given row. Also removes all cells within the row.
* @param row Either index or object reference.
*/
removeRow(row) {
let rowObj = typeof row === "number" ? this.rows.at(row) : row;
let rowCells = this.getCellsInRow(rowObj);
this.cells = this.cells.filter(cell => !rowCells.includes(cell));
this.rows = this.rows.filter(r => r != rowObj);
}
/**
* Moves the given column to the new index.
* @param col Either index or object reference.
* @param newIndex The new index of the given column.
* @throws {IndexOutOfBoundsError} Can't move column outside of table.
*/
moveColumn(col, newIndex) {
let colObj = typeof col === "number" ? this.columns.at(col) : col;
if (colObj === undefined || newIndex >= this.columnCount() || newIndex < 0)
throw new IndexOutOfBoundsError("(IndexOutOfBoundsError) Can't move column outside of table.");
this.columns.splice(colObj.index, 1);
this.columns.splice(newIndex, 0, colObj);
colObj.index = newIndex;
}
/**
* Moves the given row to the new index.
* @param row Either index or object reference.
* @param newIndex The new index of the given row.
* @throws {IndexOutOfBoundsError} Can't move row outside of table.
*/
moveRow(row, newIndex) {
let rowObj = typeof row === "number" ? this.rows.at(row) : row;
if (rowObj === undefined || newIndex >= this.rowCount() || newIndex < 0)
throw new IndexOutOfBoundsError("(IndexOutOfBoundsError) Can't move row outside of table.");
this.rows.splice(rowObj.index, 1);
this.rows.splice(newIndex, 0, rowObj);
rowObj.index = newIndex;
}
/** Returns a list of all rows that are headers. */
getHeaderRows() {
return this.rows.filter(r => r.isHeader);
}
/** Returns a list of all rows that aren't headers. */
getNormalRows() {
return this.rows.filter(r => !r.isHeader);
}
/** Retruns all rows in the table, from top to bottom, including header rows. */
getRows() {
return this.rows;
}
/** Returns all columns in the table, from left to right. */
getColumns() {
return this.columns;
}
/** Returns all cells in the table. Isn't necessarily sorted! */
getCells() {
return this.cells;
}
/**
* Returns all cells within the given row.
* See also: {@link TableRow.getCells()}
* @param row Either index or object reference.
*/
getCellsInRow(row) {
return (typeof row === "number" ? this.rows[row] : row).cells;
}
/**
* Returns all cells within the given column.
* See also: {@link TableColumn.getCells()}
* @param column Either index or object reference.
*/
getCellsInColumn(column) {
return (typeof column === "number" ? this.columns[column] : column).cells;
}
/** Returns the cell at row and column. */
getCellByObjs(rowObj, columnObj) {
// Intersection of row / column:
for (const cell of rowObj.cells) {
if (columnObj.cells.includes(cell))
return cell;
}
let newCell = new TableCell(this, rowObj, columnObj);
this.addCell(newCell);
return newCell;
}
/**
* Returns the cell at row and column.
* If the cell doesn't already exist, it will be created.
* @param row Either index or object reference.
* @param column Either index or object reference.
* @returns The cell at row and column.
*/
getCell(row, column) {
return this.getCellByObjs(typeof row === "number" ? this.rows.at(row) : row, typeof column === "number" ? this.columns.at(column) : column);
}
/**
* Adds the cell to the Table and the cell's respective TableRow and TableColumn.
* (Be careful not to add a cell with row/column that already exist. Otherwise, the added cell will be overshadowed and not be used.)
*/
addCell(cell) {
this.cells.push(cell);
cell.row.cells.push(cell);
cell.column.cells.push(cell);
}
/** Returns the total amount of rows in the table, including the header rows. */
rowCount() {
return this.rows.length;
}
/** Returns the total amount of columns in the table. */
columnCount() {
return this.columns.length;
}
/**
* → Ensures that all table cells exist.
* → Updates indices and sorts the cells within rows and columns.
* → Tries to find invalid configurations and sanitize them.
*
* Call this method after altering the table.
*/
update() {
// Iterate over the entire table:
let columnObj;
let rowObj;
for (let colIndex = 0; colIndex < this.columns.length; colIndex++) {
// Update the column's index:
columnObj = this.columns[colIndex];
columnObj.index = colIndex;
for (let rowIndex = 0; rowIndex < this.rows.length; rowIndex++) {
// Update the row's index:
rowObj = this.rows[rowIndex];
rowObj.index = rowIndex;
// Use "getCellByObjs" to ensure that the cell gets created, if it doesn't exist already:
this.getCellByObjs(rowObj, columnObj);
}
}
// Update the column's 'cells' array:
for (const column of this.columns)
column.updateCells(this);
// Update the row's 'cells' array:
for (const row of this.rows)
row.updateCells(this);
this.sanitize();
return this;
}
/** Tries to find invalid configurations and sanitize them. */
sanitize() {
if (this.getNormalRows().length > 0) {
// Cannot merge cell above if in first row:
for (const cell of this.getCellsInRow(this.getNormalRows()[0])) {
if (cell.merged == exports.TableCellMerge.above)
cell.merged = exports.TableCellMerge.none;
}
this.getNormalRows()[0].startsNewSection = false;
}
for (const cell of this.cells) {
// Cannot merge cell left if in first column:
if (cell.column == this.columns[0] && cell.merged == exports.TableCellMerge.left)
cell.merged = exports.TableCellMerge.none;
// Cannot merge cell above if in first row:
if ((cell.row == this.rows[0] || cell.row.startsNewSection) && cell.merged == exports.TableCellMerge.above)
cell.merged = exports.TableCellMerge.none;
}
return this;
}
/**
* Merges multiline rows (from MultiMarkdown feature) into "normal" rows.
* This will destroy MultiMarkdown formatting! Only use when rendering into different formats.
*/
mergeMultilineRows() {
let newRows = [];
let merging = false;
let actualRowIndex = 0;
this.getRows().forEach((row, index) => {
if (merging) {
row.getCells().forEach((cell, index) => {
const parentCell = newRows[actualRowIndex - 1].getCell(index);
parentCell.setText(parentCell.text + "\n" + cell.text);
});
}
else {
row.index = actualRowIndex;
newRows.push(row);
actualRowIndex++;
}
if (!merging && row.isMultiline) {
merging = true;
}
else if (merging && !row.isMultiline) {
merging = false;
}
row.isMultiline = false;
});
this.rows = newRows;
this.update();
return this;
}
}
/*
* Due to the nature of CSV tables, some data will be lost when converting MMD (or HTML) to CSV.
*
* CSV file specifications and implementation:
* https://www.rfc-editor.org/rfc/rfc4180
* http://super-csv.github.io/super-csv/csv_specification.html
*/
class CSVTableParser {
separator;
quote;
assumeFirstLineIsHeader;
constructor(separator = ",", quote = "\"", assumeFirstLineIsHeader = true) {
this.separator = separator;
this.quote = quote;
this.assumeFirstLineIsHeader = assumeFirstLineIsHeader;
}
parse(table) {
/*
Prepare csv string:
*/
let csv = table.replace(/\r?\n/g, "\n");
if (!csv.endsWith("\n"))
csv += "\n";
/*
Parse csv string:
*/
let parsedTable = new Table();
let tableRow = parsedTable.addRow();
tableRow.isHeader = this.assumeFirstLineIsHeader;
let cellContent = "";
let rowIndex = 0;
let colIndex = 0;
let isQuoted = false;
let lastChar = null;
for (const char of csv) {
// Comma or newline:
if ((char == this.separator || char == "\n") && !isQuoted) {
// Get column:
let tableColumn;
if (rowIndex == 0)
tableColumn = parsedTable.addColumn();
else
tableColumn = parsedTable.getColumn(colIndex);
// Set table cell content:
let tableCell = new TableCell(parsedTable, tableRow, tableColumn);
tableCell.setText(cellContent);
parsedTable.addCell(tableCell);
//parsedTable.getCellByObjs(tableRow, tableColumn).setText(cellContent);
cellContent = "";
colIndex++;
// If it's a newline:
if (char == "\n") {
// Add a new row to the table:
tableRow = parsedTable.addRow();
rowIndex++;
colIndex = 0;
}
}
else if (char == this.quote) {
if (!isQuoted && lastChar == this.quote) {
cellContent += this.quote;
}
isQuoted = !isQuoted;
}
else {
cellContent += char;
}
lastChar = char;
}
// Remove unused row:
parsedTable.removeRow(tableRow);
return parsedTable;
}
}
/** changes the output of CSVTableRenderer */
exports.CSVTableRendererMode = void 0;
(function (CSVTableRendererMode) {
CSVTableRendererMode[CSVTableRendererMode["OmitSpecialCharacters"] = 0] = "OmitSpecialCharacters";
CSVTableRendererMode[CSVTableRendererMode["EscapeWithQuotes"] = 1] = "EscapeWithQuotes";
CSVTableRendererMode[CSVTableRendererMode["AlwaysUseQuotes"] = 2] = "AlwaysUseQuotes";
})(exports.CSVTableRendererMode || (exports.CSVTableRendererMode = {}));
class CSVTableRenderer {
separator;
quote;
lineBreak;
mode;
constructor(separator = ",", quote = "\"", lineBreak = "\r\n", mode = exports.CSVTableRendererMode.EscapeWithQuotes) {
this.separator = separator;
this.quote = quote;
this.lineBreak = lineBreak;
this.mode = mode;
}
render(table) {
let specialCharactersRegex = new RegExp(`([${this.separator}${this.quote}]|\r\n|\n)`);
let specialCharactersRegexGlobal = new RegExp(`([${this.separator}${this.quote}]|\r\n|\n)`, "g");
let quoteRegex = new RegExp(this.quote, "g");
let csv = [];
for (const row of table.getRows()) {
let renderedRow = [];
for (const cell of table.getCellsInRow(row)) {
switch (this.mode) {
case exports.CSVTableRendererMode.AlwaysUseQuotes:
renderedRow.push(`${this.quote}${cell.text.replace(quoteRegex, this.quote.repeat(2))}${this.quote}`);
break;
case exports.CSVTableRendererMode.EscapeWithQuotes:
if (specialCharactersRegex.test(cell.text)) {
renderedRow.push(`${this.quote}${cell.text.replace(quoteRegex, this.quote.repeat(2))}${this.quote}`);
}
else {
renderedRow.push(cell.text);
}
break;
case exports.CSVTableRendererMode.OmitSpecialCharacters:
renderedRow.push(cell.text.replace(specialCharactersRegexGlobal, ""));
break;
}
}
csv.push(renderedRow.join(this.separator));
}
return csv.join(this.lineBreak);
}
}
class ParsingError extends Error {
constructor(msg) {
super(msg);
// Set the prototype explicitly.
Object.setPrototypeOf(this, ParsingError.prototype);
}
}
/*
Specification: https://github.github.com/gfm/#tables-extension-
*/
const rowRegex$2 = /^\|(.+)\|$/;
const delimiterRowRegex = /^\|(\s*:?\-+:?\s*\|)+$/;
var ParsingState$2;
(function (ParsingState) {
ParsingState[ParsingState["BeforeTable"] = 0] = "BeforeTable";
ParsingState[ParsingState["HeaderRow"] = 1] = "HeaderRow";
ParsingState[ParsingState["DelimiterRow"] = 2] = "DelimiterRow";
ParsingState[ParsingState["DataRows"] = 3] = "DataRows";
ParsingState[ParsingState["AfterTable"] = 4] = "AfterTable";
})(ParsingState$2 || (ParsingState$2 = {}));
class GitHubFlavoredMarkdownTableParser {
parse(table) {
let parsedTable = new Table();
let state = ParsingState$2.BeforeTable;
let hasDelimiterRow = false;
let beforeTable = [];
let afterTable = [];
// Now parse line by line:
for (let line of table.split("\n")) {
/*
Determine parsing state and prepare:
*/
// Check if we are in the table:
if (state == ParsingState$2.BeforeTable && line.match(/[^|\\`]\|/g)) {
state = ParsingState$2.HeaderRow;
}
// The table is broken at the first empty line, or beginning of another block-level structure:
if (line.trim() === "" || line.trim().startsWith("> ")) {
state = ParsingState$2.AfterTable;
}
// If not inside table:
if (state == ParsingState$2.BeforeTable) {
beforeTable.push(line);
continue; // Skip the rest
}
else if (state == ParsingState$2.AfterTable) {
afterTable.push(line);
continue; // Skip the rest
}
// Format table line:
line = line.trim();
if (!line.startsWith("|"))
line = "|" + line;
if (!line.endsWith("|") ||
(line.charAt(line.length - 3) != "\\" && line.endsWith("\\|"))) // Check if last pipe is escaped ('\|')
line = line + "|";
if (!line.match(rowRegex$2))
throw new ParsingError(`Invalid row: ${line}`);
// Is delimiter row too early?
if (state == ParsingState$2.HeaderRow && line.match(delimiterRowRegex)) {
throw new ParsingError("Header row missing.");
}
/*
Parse line depending on parsing state:
*/
if (state == ParsingState$2.HeaderRow || state == ParsingState$2.DataRows) {
let tableRow = new TableRow();
tableRow.isHeader = state == ParsingState$2.HeaderRow;
parsedTable.addRow(-1, tableRow);
// Parse each character:
let cellContent = "";
let colIndex = 0;
let slashEscaped = false;
let fenceEscaped = false;
for (let char of line.substring(1, line.length)) {
if (!slashEscaped && !fenceEscaped && char == "|") {
// Ignore excess cells:
if (state == ParsingState$2.HeaderRow || colIndex < parsedTable.columnCount()) {
let tableColumn = parsedTable.getColumn(colIndex);
if (!tableColumn)
tableColumn = parsedTable.addColumn();
let cell = new TableCell(parsedTable, tableRow, tableColumn);
parsedTable.addCell(cell);
cell.setText(cellContent
.trim()
.replace(/(<[bB][rR]\s*\/?>)/g, "\n"));
}
cellContent = "";
colIndex++;
}
else if (!slashEscaped && char == "\\") {
slashEscaped = true;
}
else {
if (!slashEscaped && char == "\`")
fenceEscaped = !fenceEscaped;
if (slashEscaped)
cellContent += "\\";
cellContent += char;
slashEscaped = false;
}
}
// Insert empty cells if missing:
for (; colIndex < parsedTable.columnCount(); colIndex++) {
let cell = new TableCell(parsedTable, tableRow, parsedTable.getColumn(colIndex));
parsedTable.addCell(cell);
}
// If the header row has been parsed, parse the delimiter row next:
if (state == ParsingState$2.HeaderRow)
state = ParsingState$2.DelimiterRow;
}
else if (state == ParsingState$2.DelimiterRow) {
if (!line.match(delimiterRowRegex))
throw new ParsingError("Invalid delimiter row");
hasDelimiterRow = true;
let colIndex = 0;
let alignment = exports.TextAlignment.default;
let separator = false;
for (let char of line.substring(1, line.length)) {
if (char == "|") {
let tableColumn = parsedTable.getColumn(colIndex);
if (!tableColumn)
throw new ParsingError("Header row doesn't match the delimiter row in the number of cells.");
tableColumn.textAlign = alignment;
alignment = exports.TextAlignment.default;
separator = false;
colIndex++;
}
else if (char == ":") {
if (!separator) {
alignment = exports.TextAlignment.left;
}
else {
if (alignment == exports.TextAlignment.left)
alignment = exports.TextAlignment.center;
else
alignment = exports.TextAlignment.right;
}
}
else if (char == "-") {
separator = true;
if (alignment == exports.TextAlignment.right)
throw new ParsingError("Invalid delimiter row (minus sign after colon)");
}
else if (!char.match(/\s/g)) {
throw new ParsingError(`Unexpected character in delimiter row: '${char}'`);
}
}
if (colIndex < parsedTable.columnCount()) {
throw new ParsingError("Header row doesn't match the delimiter row in the number of cells.");
}
// Once the delimiter row has been parsed, parse the data rows next:
state = ParsingState$2.DataRows;
}
else {
throw new ParsingError(`Not implemented ParsingState: ${state}`);
}
}
if (!hasDelimiterRow)
throw new ParsingError("No delimiter row found.");
parsedTable.beforeTable = beforeTable.join("\n");
parsedTable.afterTable = afterTable.join("\n");
return parsedTable.update();
}
}
class GitHubFlavoredMarkdownTableRenderer {
prettify;
renderOutsideTable;
constructor(prettify = true, renderOutsideTable = true) {
this.prettify = prettify;
this.renderOutsideTable = renderOutsideTable;
}
render(table) {
const headerRow = table.getHeaderRows()[0];
const dataRows = table.getNormalRows();
const columnWidths = this.prettify ? this.determineColumnWidths(table) : null;
let result = [];
if (this.renderOutsideTable && table.beforeTable.trim() !== "")
result.push(table.beforeTable);
// Header row:
result.push(this.renderRow(table, headerRow, columnWidths));
// Delimiter row:
result.push(this.renderDelimiterRow(table, columnWidths));
// Data rows:
for (const row of dataRows)
result.push(this.renderRow(table, row, columnWidths));
if (this.renderOutsideTable && table.afterTable.trim() !== "")
result.push(table.afterTable);
return result.join("\n");
}
renderDelimiterRow(table, columnWidths) {
let result = [];
table.getColumns().forEach((col, i) => {
let width = this.prettify ? columnWidths[i] : null;
switch (col.textAlign) {
case exports.TextAlignment.left:
result.push(this.prettify ? `:${"-".repeat(width + 1)}` : ":-");
break;
case exports.TextAlignment.center:
result.push(this.prettify ? `:${"-".repeat(width)}:` : ":-:");
break;
case exports.TextAlignment.right:
result.push(this.prettify ? `${"-".repeat(width + 1)}:` : "-:");
break;
case exports.TextAlignment.default:
default:
result.push(this.prettify ? "-".repeat(width + 2) : "-");
break;
}
});
if (this.prettify)
return `|${result.join("|")}|`;
else
return result.join("|");
}
renderRow(table, row, columnWidths) {
let result = [];
row.getCells().forEach((cell, i) => {
result.push(this.renderCell(cell, this.prettify ? columnWidths[i] : null));
if (!this.prettify && i == row.getCells().length - 1 && cell.text.trim() == "")
result.push("");
});
if (this.prettify)
return `|${result.join("|")}|`;
else
return result.join("|");
}
renderCell(cell, cellWidth = -1) {
let text = cell.text.replace(/\r?\n/g, "<br>");
if (!this.prettify) {
return text;
}
const textLength = stringWidth__default.default(text);
switch (cell.column.textAlign) {
case exports.TextAlignment.center:
return `${" ".repeat(Math.max(0, Math.floor((cellWidth - textLength) / 2)))} ${text} ${" ".repeat(Math.max(0, Math.ceil((cellWidth - textLength) / 2)))}`;
case exports.TextAlignment.right:
return `${" ".repeat(Math.max(0, cellWidth - textLength))} ${text} `;
case exports.TextAlignment.left:
case exports.TextAlignment.default:
default:
return ` ${text} ${" ".repeat(Math.max(0, cellWidth - textLength))}`;
}
}
determineColumnWidth(table, column) {
let width = 0;
for (const cell of table.getCellsInColumn(column)) {
const cellTextLength = stringWidth__default.default(cell.text.replace(/\r?\n/g, "<br>"));
width = Math.max(cellTextLength, width);
}
return width;
}
determineColumnWidths(table) {
return table.getColumns().map(column => this.determineColumnWidth(table, column));
}
}
function removeInvisibleCharacters(str) {
// See: https://www.utf8-chartable.de/unicode-utf8-table.pl
// https://stackoverflow.com/a/13836410
// /[^\u0000-\u007E]/g
// /\u00AD/g - soft hyphen
// /[^\u0020-\u007E\u00A1-\u00AC\u00AE-\u00FF]/g
return str
.replace(/[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, "") // Control characters
.replace(/[\u00AD\u2007\u200C\u2028-\u202F\u2060-\u206F\uFEFF]/g, "") // Invisible characters, such as ­ or "Zero Width Non-Joiner"
.replace(/\u00A0/g, " ")
.replace(/\u2002/g, " ")
.replace(/\u2003/g, " ")
.replace(/\u2009/g, " ");
//.replace(/[\u0378\u0379\u0380-\u0383\u038B\u038D\u03A2\u0530\u0557\u0558\u058B\u058C\u0590\u05C8-\u05CF\u05EB-\u05EE\u05F5-\u05FF\u070E\u074B\u074C\u07B2-\u07BF\u07FB\u07FC\u082E\u082F\u083F]/g, ""); // Weird characters
}
/**
* Returns a TurndownService object configured for my own taste...
* (of course, if you don't like it, you can configure it to fit your needs)
*/
function getTurndownService() {
const turndownService = new TurndownService__default.default({
headingStyle: "atx",
hr: "---",
bulletListMarker: "-",
codeBlockStyle: "fenced",
fence: "```",
emDelimiter: "*",
strongDelimiter: "**",
linkStyle: "inlined",
linkReferenceStyle: "full",
});
// Add strikethrough:
turndownService.addRule("strikethrough", {
filter: ["del", "s"],
replacement: function (content) {
return "~~" + content + "~~";
},
});
// Add blockquote:
/*turndownService.addRule('blockquote', {
filter: ['blockquote'],
replacement: function (content) {
return '> ' + content;
}
});*/
// Filter table tags:
turndownService
.remove("table")
.remove("tbody")
.remove("thead")
.remove("tr")
.remove("td")
.remove("th");
return turndownService;
}
function escapeMarkdown(mdStr) {
return mdStr.replace(/\|/g, "\\|");
}
function mdToHtml(markdown, inline = true) {
let html = markdown.trim(); // escape(markdown);
// Blockquote:
if (!inline) {
let lines = [];
let quoted = false;
for (let line of html.split(/\r?\n/)) {
if (line.startsWith("> ")) {
if (!quoted)
lines.push("<blockquote>");
quoted = true;
lines.push(`<p>${mdToHtml(line.substring(2))}</p>`);
}
else {
if (quoted)
lines.push("</blockquote>");
quoted = false;
lines.push(line);
}
if (quoted)
lines.push("</blockquote>");
}
html = lines.join("\n");
if (!html.startsWith("<blockquote>"))
html = `<p>${html}</p>`;
}
// Image:
html = html.replace(/!\[([^\[\]]+)\]\(([^\(\)]+)\)/g, "<img src=\"$2\" alt=\"$1\">");
// Links:
html = html.replace(/\[([^\[\]]+)\]\(([^\(\)]+)\)/g, "<a href=\"$2\">$1</a>");
// Block code:
html = html.replace(/```[a-z]*?\n(.*?)\n```/g, "<code>$1</code>");
// Inline code:
html = html.replace(/`(.*?)`/g, "<code>$1</code>");
// Strikethrough:
html = html.replace(/~~(.*?)~~/g, "<del>$1</del>");
// Oblique:
html = html.replace(/___(.*?)___/g, "<em><strong>$1</strong></em>");
html = html.replace(/\*\*\*(.*?)\*\*\*/g, "<em><strong>$1</strong></em>");
// Bold:
html = html.replace(/__(.*?)__/g, "<strong>$1</strong>");
html = html.replace(/\*\*(.*?)\*\*/g, "<strong>$1</strong>");
// Italic:
html = html.replace(/_(.*?)_/g, "<em>$1</em>");
html = html.replace(/\*(.*?)\*/g, "<em>$1</em>");
// Escaped characters:
html = html.replace(/\\([#\.\|\*_\s`\[\]\-])/g, "$1");
// Newlines:
if (inline)
html = html.replace(/\r?\n/g, "<br>");
else
html = html.replace(/(\r?\n){2}/g, "</p>\n<p>").replace(/(?<!\<\\p\>)\r?\n(?!\<p\>)/g, " ");
// Remove unnecessary whitespace:
html = html.replace(/[ \t]{2,}/g, " ");
return html;
}
function htmlToMd(html, turndownService) {
return turndownService.turndown(html);
}
function textAlignToCSS(textAlign) {
switch (textAlign) {
case exports.TextAlignment.left:
return "text-align: left";
case exports.TextAlignment.right:
return "text-align: right";
case exports.TextAlignment.center:
return "text-align: center";
case exports.TextAlignment.default:
default:
return "text-align: start";
}
}
function cssToTextAlign(element) {
if (!element.css("text-align")) // Might return 'undefined'
return exports.TextAlignment.default;
switch (element.css("text-align").toLowerCase()) {
case "left":
return exports.TextAlignment.left;
case "center":
return exports.TextAlignment.center;
case "right":
return exports.TextAlignment.right;
default:
return exports.TextAlignment.default;
}
}
/** changes the behavior of HTMLTableParser */
exports.HTMLTableParserMode = void 0;
(function (HTMLTableParserMode) {
/** uses only text (`Cheerio.text()`) */
HTMLTableParserMode[HTMLTableParserMode["StripHTMLElements"] = 0] = "StripHTMLElements";
/** uses the HTML code (`Cheerio.html()`) without any converting */
HTMLTableParserMode[HTMLTableParserMode["PreserveHTMLElements"] = 1] = "PreserveHTMLElements";
/** uses the HTML code (`Cheerio.html()`) and converts to Markdown using Turndown if possible (default) */
HTMLTableParserMode[HTMLTableParserMode["ConvertHTMLElements"] = 2] = "ConvertHTMLElements";
})(exports.HTMLTableParserMode || (exports.HTMLTableParserMode = {}));
class HTMLTableParser {
mode;
turndownService;
constructor(mode = exports.HTMLTableParserMode.ConvertHTMLElements, turndownService = getTurndownService()) {
this.mode = mode;
this.turndownService = turndownService;
}
parse(table) {
/*
Parse the html string and find our <table> tag to start:
*/
const dom = htmlparser2__namespace.parseDocument(table, {
xmlMode: false,
lowerCaseTags: true,
lowerCaseAttributeNames: true,
decodeEntities: true,
});
const $ = cheerio__namespace.load(dom);
const $tables = $("table");
if ($tables.length === 0) {
throw new ParsingError("Couldn't find <table> tag in DOM.");
}
const $table = $($tables[0]);
/*
Converting table to Markdown:
*/
let parsedTable = new Table();
let hasSections = false;
let tableTextAlign = cssToTextAlign($table);
// Get everything before <table>:
let m = table.match(/((.|\n)*)<\s*[tT][aA][bB][lL][eE][^<>]*>/m);
if (m) {
parsedTable.beforeTable = htmlToMd(m[1], this.turndownService);
}
// Get everything after </table>:
m = table.match(/<\/\s*[tT][aA][bB][lL][eE]\s*>((.|\n)*)/m);
if (m) {
parsedTable.afterTable = htmlToMd(m[1], this.turndownService);
}
// Parse <thead> tag in <table>:
let $theads = $table.find("thead");
if ($theads.length != 0) {
let sectionTextAlign = cssToTextAlign($theads);
this.parseSection($, parsedTable, $theads.find("tr"), (sectionTextAlign != exports.TextAlignment.default ? sectionTextAlign : tableTextAlign), true);
hasSections = true;
}
// Parse <tbody> tags in <table>:
const self = this;
let $tbodies = $table.find("tbody");
if ($tbodies.length > 0) {
$tbodies.each((i, element) => {
const domTBody = $(element);
let sectionTextAlign = cssToTextAlign(domTBody);
self.parseSection($, parsedTable, domTBody.find("tr"), (sectionTextAlign != exports.TextAlignment.default ? sectionTextAlign : tableTextAlign), false, $theads == null, i > 0);
});
hasSections = true;
}
// No <thead> or <tbody> tags?
if (!hasSections) {
// Parse table that doesn't have thead or tbody tags as one section with no header:
this.parseSection($, parsedTable, $table.find("tr"), tableTextAlign, false, true, false);
}
// Parse <caption> tag in <table>:
let $captions = $table.find("caption");
if ($captions.length != 0) {
const $caption = $($captions);
let caption = new TableCaption();
caption.text = htmlToMd($caption.html() ? $caption.html() : "", this.turndownService).replace(/(\r?\n)/g, "").trim(); // domCaption.innerText.replace(/(\r?\n|\[|\])/g, "").trim();
if ($caption.attr('id') && caption.getLabel() != $caption.attr('id'))
caption.label = $caption.attr('id').replace(/(\r?\n|\[|\])/g, "").trim();
switch ($caption.css("caption-side") && $caption.css("caption-side").toLowerCase()) {
case "bottom":
caption.position = exports.TableCaptionPosition.bottom;
break;
case "top":
default:
caption.position = exports.TableCaptionPosition.top;
}
parsedTable.caption = caption;
}
return parsedTable.update();
}
parseSection($, table, $rows, defaultTextAlign, isHeader = false, allowHeaderDetection = false, firstRowStartsNewSection = false) {
// HTML skips "ghost" cells that are overshadowed by other cells that have a rowspan > 1.
// We'll memorize them:
let rowspanGhostCells = [];
// Remember how many rows we already have:
let rowOffset = table.rowCount();
// Iterate over each row (<tr>) of the HTML table:
$rows.each((domRowIndex, element) => {
let rowIndex = domRowIndex + rowOffset;
let row = table.getRow(rowIndex);
if (!row)
row = table.addRow();
row.isHeader = isHeader;
if (domRowIndex == 0)
row.startsNewSection = firstRowStartsNewSection;
// Memorize an offset (colspan):
let colOffset = 0;
// Iterate over each cell (<td> or <th>) of the HTML table row:
const $row = $(element);
let $cells = $row.find("td, th");
let allCellsAreTH = true;
$cells.each((domColIndex, element) => {
const $cell = $(element);
// Get the TableColumn of our Table object, taking the memorized rowspans and colOffset into account:
let colIndex = domColIndex + colOffset;
while (rowspanGhostCells.filter(ghost => ghost.row == rowIndex && ghost.col == colIndex).length > 0) {
colIndex = domColIndex + ++colOffset;
}
let column = table.getColumn(colIndex);
if (!column)
column = table.addColumn();
// Add cell to our Table object:
let cellContent = this.parseCell($cell);
let textAlign = cssToTextAlign($cell);
let wrappable = $cell.hasClass("extend");
textAlign = textAlign != exports.TextAlignment.default ? textAlign : defaultTextAlign;
let cellIsHeader = $cell.prop("tagName").toLowerCase() == "th";
allCellsAreTH = allCellsAreTH && cellIsHeader;
let cell = new TableCell(table, row, column);
cell.setText(cellContent);
cell.textAlign = textAlign;
cell.isHeader = cellIsHeader;
if (column.textAlign == exports.TextAlignment.default) {
column.textAlign = textAlign;
}
column.wrappable = wrappable;
table.addCell(cell);
// Take "colspan" into account:
let colspan = $cell.prop("colspan");
if (colspan > 1) {
// Add empty cells to our Table object:
for (let i = 1; i < colspan; i++) {
let nextColumn = table.getColumn(colIndex + i);
if (!nextColumn)
nextColumn = table.addColumn();
let mergedCell = table.getCell(row, nextColumn);
mergedCell.merged = exports.TableCellMerge.left;
}
// Add colspan to colOffset:
colOffset += colspan - 1;
}
// Take "rowspan" into account:
let rowspan = $cell.prop("rowspan");
if (rowspan > 1) {
// Add empty cells to our Table object:
for (let i = 1; i < rowspan; i++) {
let nextRow = table.getRow(rowIndex + i);
if (!nextRow)
nextRow = table.addRow();
nextRow.isHeader = isHeader;
let mergedCell = table.getCell(nextRow, column);
mergedCell.merged = exports.TableCellMerge.above;
// Memorize "ghost" cells:
rowspanGhostCells.push({
"row": rowIndex + i,
"col": colIndex
});
}
}
});
// Detect headers:
if (allowHeaderDetection && !isHeader) {
row.isHeader = allCellsAreTH;
}
});
}
parseCell($cell) {
switch (this.mode) {
case exports.HTMLTableParserMode.PreserveHTMLElements:
return removeInvisibleCharacters(escapeMarkdown($cell.html()));
case exports.HTMLTableParserMode.StripHTMLElements:
return removeInvisibleCharacters(escapeMarkdown($cell.text()));
case exports.HTMLTableParserMode.ConvertHTMLElements:
default:
return removeInvisibleCharacters(escapeMarkdown(htmlToMd($cell.html(), this.turndownService)));
}
}
}
class HTMLTableRenderer {
prettify;
indent;
renderOutsideTable;
constructor(prettify = true, indent = " ", renderOutsideTable = true) {
this.prettify = prettify;
this.indent = indent;
this.renderOutsideTable = renderOutsideTable;
}
render(table) {
let result = [];
if (this.renderOutsideTable && table.beforeTable.trim() !== "")
result.push(mdToHtml(table.beforeTable, false));
result.push("<table>");
let headerRows = table.getHeaderRows();
let normalRows = table.getNormalRows();
if (headerRows.length > 0) {
result.push(this.indentString("<thead>", 1));
for (const row of headerRows)
result.push(...this.renderRow(table, row));
result.push(this.indentString("</thead>", 1));
}
if (normalRows.length > 0) {
result.push(this.indentString("<tbody>", 1));
for (const row of normalRows) {
if (row.startsNewSection)
result.push(this.indentString("</tbody>", 1), this.indentString("<tbody>", 1));
result.push(...this.renderRow(table, row));
}
result.push(this.indentString("</tbody>", 1));
}
if (table.caption && table.caption.text.length > 0)
result.push(this.indentString(`<caption id="${table.caption.getLabel()}" style="caption-side: ${table.caption.position}">${mdToHtml(table.caption.text)}</caption>`, 1));
result.push("</table>");
if (this.renderOutsideTable && table.afterTable.trim() !== "")
result.push(mdToHtml(table.afterTable, false));
return result.join(this.prettify ? "\n" : "");
}
renderRow(table, row) {
let result = [];
result.push(this.indentString("<tr>", 2));
for (let cell of table.getCellsInRow(row)) {
let renderedCell = this.indentString(this.renderCell(cell), 3);
if (renderedCell.trim() !== "")
result.push(renderedCell);
}
result.push(this.indentString("</tr>", 2));
return result;
}
renderCell(cell) {
let colspan = cell.getColspan();
let rowspan = cell.getRowspan();
if (cell.merged == exports.TableCellMerge.none) {
let cellProps = (colspan > 1 ? ` colspan="${colspan}"` : "") +
(rowspan > 1 ? ` rowspan="${rowspan}"` : "") +
(cell.getTextAlignment() != exports.TextAlignment.default ? ` style="${textAlignToCSS(cell.getTextAlignment())}"` : "") + // ` align="${cell.getTextAlignment()}"`
(cell.column.wrappable ? `