UNPKG

@felisdiligens/md-table-tools

Version:

MultiMarkdown table tools

1,235 lines (1,225 loc) 84.8 kB
'use strict'; var stringWidth = require('string-width'); var cheerio = require('cheerio'); var htmlparser2 = require('htmlparser2'); var TurndownService = require('turndown'); function _interopDefault (e) { return e && e.__esModule ? e : { default: e }; } function _interopNamespace(e) { if (e && e.__esModule) return e; var n = Object.create(null); if (e) { Object.keys(e).forEach(function (k) { if (k !== 'default') { var d = Object.getOwnPropertyDescriptor(e, k); Object.defineProperty(n, k, d.get ? d : { enumerable: true, get: function () { return e[k]; } }); } }); } n.default = e; return Object.freeze(n); } var stringWidth__default = /*#__PURE__*/_interopDefault(stringWidth); var cheerio__namespace = /*#__PURE__*/_interopNamespace(cheerio); var htmlparser2__namespace = /*#__PURE__*/_interopNamespace(htmlparser2); var TurndownService__default = /*#__PURE__*/_interopDefault(TurndownService); /** indicates how text is aligned in a column */ exports.TextAlignment = void 0; (function (TextAlignment) { TextAlignment["left"] = "left"; TextAlignment["center"] = "center"; TextAlignment["right"] = "right"; TextAlignment["default"] = "start"; })(exports.TextAlignment || (exports.TextAlignment = {})); /** indicates how a cell is merged with a neighboring cell */ exports.TableCellMerge = void 0; (function (TableCellMerge) { TableCellMerge[TableCellMerge["above"] = 0] = "above"; TableCellMerge[TableCellMerge["left"] = 1] = "left"; TableCellMerge[TableCellMerge["none"] = 2] = "none"; })(exports.TableCellMerge || (exports.TableCellMerge = {})); /** indicates the placement of the table caption */ exports.TableCaptionPosition = void 0; (function (TableCaptionPosition) { TableCaptionPosition["top"] = "top"; TableCaptionPosition["bottom"] = "bottom"; })(exports.TableCaptionPosition || (exports.TableCaptionPosition = {})); class IndexOutOfBoundsError extends Error { constructor(msg) { super(msg); // Set the prototype explicitly. Object.setPrototypeOf(this, IndexOutOfBoundsError.prototype); } } class TableCaption { text; label; position; constructor(text = "", label = "", position = exports.TableCaptionPosition.top) { this.text = text; this.label = label; this.position = position; } getLabel() { // "If you have a caption, you can also have a label, allowing you to create anchors pointing to the table. If there is no label, then the caption acts as the label" if (typeof this.label === 'string' && this.label.trim() !== "") return this.label.trim().replace(/\s/g, "-"); return this.text.trim().toLowerCase().replace(/\s/g, "-").replace(/[^a-zA-Z0-9]/g, ""); } } class TableCell { text; table; row; column; merged; isHeader; textAlign; constructor(table, row, column) { this.text = ""; this.table = table; this.row = row; this.column = column; this.merged = exports.TableCellMerge.none; this.isHeader = false; this.textAlign = exports.TextAlignment.default; } isHeaderCell() { return this.row.isHeader || this.isHeader; } getTextAlignment() { if (this.textAlign != exports.TextAlignment.default) return this.textAlign; return this.column.textAlign; } setText(text) { this.text = text; } getColspan() { if (this.merged != exports.TableCellMerge.left) { let col = this.table.indexOfColumn(this.column) + 1; if (col > this.table.columnCount()) return 1; let colspan = 1; let cells = this.table.getCellsInRow(this.row); for (; col < this.table.columnCount(); col++) { if (cells[col].merged == exports.TableCellMerge.left) colspan++; else break; } return colspan; } return 1; } getRowspan() { if (this.merged != exports.TableCellMerge.above) { let row = this.table.indexOfRow(this.row) + 1; if (row > this.table.rowCount()) return 1; let rowspan = 1; let cells = this.table.getCellsInColumn(this.column); for (; row < this.table.rowCount(); row++) { if (cells[row].merged == exports.TableCellMerge.above) rowspan++; else break; } return rowspan; } return 1; } } class TableRow { index; isHeader; isMultiline; startsNewSection; cells; constructor(index = 0, isHeader = false, /** Only pertains to MultiMarkdown multiline feature. Ignored by other parsers/renderers. See Table.mergeMultilineRows() */ isMultiline = false, startsNewSection = false) { this.index = index; this.isHeader = isHeader; this.isMultiline = isMultiline; this.startsNewSection = startsNewSection; this.cells = []; } updateCells(table) { if (table.columnCount() != this.cells.length) this.cells = table.getCells().filter(cell => cell.row == this); this.cells = this.cells.sort((a, b) => a.column.index - b.column.index); } getCell(index) { return this.cells.at(index); } getCells() { return this.cells; } } class TableColumn { index; textAlign; wrappable; cells; constructor(index = 0, textAlign = exports.TextAlignment.default, wrappable = false) { this.index = index; this.textAlign = textAlign; this.wrappable = wrappable; this.cells = []; } updateCells(table) { if (table.rowCount() != this.cells.length) this.cells = table.getCells().filter(cell => cell.column == this); this.cells = this.cells.sort((a, b) => a.row.index - b.row.index); } getCell(index) { return this.cells.at(index); } getCells() { return this.cells; } } class Table { cells; rows; columns; caption; /** Text before the table */ beforeTable; /** Text after the table */ afterTable; constructor(rowNum = 0, colNum = 0) { this.cells = []; this.rows = Array.from({ length: rowNum }, (_, i) => new TableRow(i)); this.columns = Array.from({ length: colNum }, (_, i) => new TableColumn(i)); this.caption = null; this.beforeTable = ""; this.afterTable = ""; } /** * Adds a TableRow to the table. * @param index Insert row at index. -1 means it's appended. * @param row (optional) * @returns The added row. */ addRow(index = -1, row = new TableRow()) { if (index < 0) { row.index = this.rows.push(row) - 1; } else { row.index = index; this.rows.splice(index, 0, row); } return row; } /** * Adds a TableColumn to the table. * @param index Insert column at index. -1 means it's appended. * @param col (optional) * @returns The added column. */ addColumn(index = -1, col = new TableColumn()) { if (index < 0) { col.index = this.columns.push(col); } else { col.index = index; this.columns.splice(index, 0, col); } return col; } /** Gets the row at index. Negative index counts back from the end. Returns undefined if out-of-bounds. */ getRow(index) { return this.rows.at(index); } /** Gets the index of the row. -1 if it hasn't been found. */ indexOfRow(row) { return this.rows.indexOf(row); } /** Gets the column at index. Negative index counts back from the end. Returns undefined if out-of-bounds. */ getColumn(index) { return this.columns.at(index); } /** Gets the index of the column. -1 if it hasn't been found. */ indexOfColumn(col) { return this.columns.indexOf(col); } /** * Removes the given column. Also removes all cells within the column. * @param col Either index or object reference. */ removeColumn(col) { let colObj = typeof col === "number" ? this.columns.at(col) : col; let columnCells = this.getCellsInColumn(colObj); this.cells = this.cells.filter(cell => !columnCells.includes(cell)); this.columns = this.columns.filter(c => c != colObj); } /** * Removes the given row. Also removes all cells within the row. * @param row Either index or object reference. */ removeRow(row) { let rowObj = typeof row === "number" ? this.rows.at(row) : row; let rowCells = this.getCellsInRow(rowObj); this.cells = this.cells.filter(cell => !rowCells.includes(cell)); this.rows = this.rows.filter(r => r != rowObj); } /** * Moves the given column to the new index. * @param col Either index or object reference. * @param newIndex The new index of the given column. * @throws {IndexOutOfBoundsError} Can't move column outside of table. */ moveColumn(col, newIndex) { let colObj = typeof col === "number" ? this.columns.at(col) : col; if (colObj === undefined || newIndex >= this.columnCount() || newIndex < 0) throw new IndexOutOfBoundsError("(IndexOutOfBoundsError) Can't move column outside of table."); this.columns.splice(colObj.index, 1); this.columns.splice(newIndex, 0, colObj); colObj.index = newIndex; } /** * Moves the given row to the new index. * @param row Either index or object reference. * @param newIndex The new index of the given row. * @throws {IndexOutOfBoundsError} Can't move row outside of table. */ moveRow(row, newIndex) { let rowObj = typeof row === "number" ? this.rows.at(row) : row; if (rowObj === undefined || newIndex >= this.rowCount() || newIndex < 0) throw new IndexOutOfBoundsError("(IndexOutOfBoundsError) Can't move row outside of table."); this.rows.splice(rowObj.index, 1); this.rows.splice(newIndex, 0, rowObj); rowObj.index = newIndex; } /** Returns a list of all rows that are headers. */ getHeaderRows() { return this.rows.filter(r => r.isHeader); } /** Returns a list of all rows that aren't headers. */ getNormalRows() { return this.rows.filter(r => !r.isHeader); } /** Retruns all rows in the table, from top to bottom, including header rows. */ getRows() { return this.rows; } /** Returns all columns in the table, from left to right. */ getColumns() { return this.columns; } /** Returns all cells in the table. Isn't necessarily sorted! */ getCells() { return this.cells; } /** * Returns all cells within the given row. * See also: {@link TableRow.getCells()} * @param row Either index or object reference. */ getCellsInRow(row) { return (typeof row === "number" ? this.rows[row] : row).cells; } /** * Returns all cells within the given column. * See also: {@link TableColumn.getCells()} * @param column Either index or object reference. */ getCellsInColumn(column) { return (typeof column === "number" ? this.columns[column] : column).cells; } /** Returns the cell at row and column. */ getCellByObjs(rowObj, columnObj) { // Intersection of row / column: for (const cell of rowObj.cells) { if (columnObj.cells.includes(cell)) return cell; } let newCell = new TableCell(this, rowObj, columnObj); this.addCell(newCell); return newCell; } /** * Returns the cell at row and column. * If the cell doesn't already exist, it will be created. * @param row Either index or object reference. * @param column Either index or object reference. * @returns The cell at row and column. */ getCell(row, column) { return this.getCellByObjs(typeof row === "number" ? this.rows.at(row) : row, typeof column === "number" ? this.columns.at(column) : column); } /** * Adds the cell to the Table and the cell's respective TableRow and TableColumn. * (Be careful not to add a cell with row/column that already exist. Otherwise, the added cell will be overshadowed and not be used.) */ addCell(cell) { this.cells.push(cell); cell.row.cells.push(cell); cell.column.cells.push(cell); } /** Returns the total amount of rows in the table, including the header rows. */ rowCount() { return this.rows.length; } /** Returns the total amount of columns in the table. */ columnCount() { return this.columns.length; } /** * → Ensures that all table cells exist. * → Updates indices and sorts the cells within rows and columns. * → Tries to find invalid configurations and sanitize them. * * Call this method after altering the table. */ update() { // Iterate over the entire table: let columnObj; let rowObj; for (let colIndex = 0; colIndex < this.columns.length; colIndex++) { // Update the column's index: columnObj = this.columns[colIndex]; columnObj.index = colIndex; for (let rowIndex = 0; rowIndex < this.rows.length; rowIndex++) { // Update the row's index: rowObj = this.rows[rowIndex]; rowObj.index = rowIndex; // Use "getCellByObjs" to ensure that the cell gets created, if it doesn't exist already: this.getCellByObjs(rowObj, columnObj); } } // Update the column's 'cells' array: for (const column of this.columns) column.updateCells(this); // Update the row's 'cells' array: for (const row of this.rows) row.updateCells(this); this.sanitize(); return this; } /** Tries to find invalid configurations and sanitize them. */ sanitize() { if (this.getNormalRows().length > 0) { // Cannot merge cell above if in first row: for (const cell of this.getCellsInRow(this.getNormalRows()[0])) { if (cell.merged == exports.TableCellMerge.above) cell.merged = exports.TableCellMerge.none; } this.getNormalRows()[0].startsNewSection = false; } for (const cell of this.cells) { // Cannot merge cell left if in first column: if (cell.column == this.columns[0] && cell.merged == exports.TableCellMerge.left) cell.merged = exports.TableCellMerge.none; // Cannot merge cell above if in first row: if ((cell.row == this.rows[0] || cell.row.startsNewSection) && cell.merged == exports.TableCellMerge.above) cell.merged = exports.TableCellMerge.none; } return this; } /** * Merges multiline rows (from MultiMarkdown feature) into "normal" rows. * This will destroy MultiMarkdown formatting! Only use when rendering into different formats. */ mergeMultilineRows() { let newRows = []; let merging = false; let actualRowIndex = 0; this.getRows().forEach((row, index) => { if (merging) { row.getCells().forEach((cell, index) => { const parentCell = newRows[actualRowIndex - 1].getCell(index); parentCell.setText(parentCell.text + "\n" + cell.text); }); } else { row.index = actualRowIndex; newRows.push(row); actualRowIndex++; } if (!merging && row.isMultiline) { merging = true; } else if (merging && !row.isMultiline) { merging = false; } row.isMultiline = false; }); this.rows = newRows; this.update(); return this; } } /* * Due to the nature of CSV tables, some data will be lost when converting MMD (or HTML) to CSV. * * CSV file specifications and implementation: * https://www.rfc-editor.org/rfc/rfc4180 * http://super-csv.github.io/super-csv/csv_specification.html */ class CSVTableParser { separator; quote; assumeFirstLineIsHeader; constructor(separator = ",", quote = "\"", assumeFirstLineIsHeader = true) { this.separator = separator; this.quote = quote; this.assumeFirstLineIsHeader = assumeFirstLineIsHeader; } parse(table) { /* Prepare csv string: */ let csv = table.replace(/\r?\n/g, "\n"); if (!csv.endsWith("\n")) csv += "\n"; /* Parse csv string: */ let parsedTable = new Table(); let tableRow = parsedTable.addRow(); tableRow.isHeader = this.assumeFirstLineIsHeader; let cellContent = ""; let rowIndex = 0; let colIndex = 0; let isQuoted = false; let lastChar = null; for (const char of csv) { // Comma or newline: if ((char == this.separator || char == "\n") && !isQuoted) { // Get column: let tableColumn; if (rowIndex == 0) tableColumn = parsedTable.addColumn(); else tableColumn = parsedTable.getColumn(colIndex); // Set table cell content: let tableCell = new TableCell(parsedTable, tableRow, tableColumn); tableCell.setText(cellContent); parsedTable.addCell(tableCell); //parsedTable.getCellByObjs(tableRow, tableColumn).setText(cellContent); cellContent = ""; colIndex++; // If it's a newline: if (char == "\n") { // Add a new row to the table: tableRow = parsedTable.addRow(); rowIndex++; colIndex = 0; } } else if (char == this.quote) { if (!isQuoted && lastChar == this.quote) { cellContent += this.quote; } isQuoted = !isQuoted; } else { cellContent += char; } lastChar = char; } // Remove unused row: parsedTable.removeRow(tableRow); return parsedTable; } } /** changes the output of CSVTableRenderer */ exports.CSVTableRendererMode = void 0; (function (CSVTableRendererMode) { CSVTableRendererMode[CSVTableRendererMode["OmitSpecialCharacters"] = 0] = "OmitSpecialCharacters"; CSVTableRendererMode[CSVTableRendererMode["EscapeWithQuotes"] = 1] = "EscapeWithQuotes"; CSVTableRendererMode[CSVTableRendererMode["AlwaysUseQuotes"] = 2] = "AlwaysUseQuotes"; })(exports.CSVTableRendererMode || (exports.CSVTableRendererMode = {})); class CSVTableRenderer { separator; quote; lineBreak; mode; constructor(separator = ",", quote = "\"", lineBreak = "\r\n", mode = exports.CSVTableRendererMode.EscapeWithQuotes) { this.separator = separator; this.quote = quote; this.lineBreak = lineBreak; this.mode = mode; } render(table) { let specialCharactersRegex = new RegExp(`([${this.separator}${this.quote}]|\r\n|\n)`); let specialCharactersRegexGlobal = new RegExp(`([${this.separator}${this.quote}]|\r\n|\n)`, "g"); let quoteRegex = new RegExp(this.quote, "g"); let csv = []; for (const row of table.getRows()) { let renderedRow = []; for (const cell of table.getCellsInRow(row)) { switch (this.mode) { case exports.CSVTableRendererMode.AlwaysUseQuotes: renderedRow.push(`${this.quote}${cell.text.replace(quoteRegex, this.quote.repeat(2))}${this.quote}`); break; case exports.CSVTableRendererMode.EscapeWithQuotes: if (specialCharactersRegex.test(cell.text)) { renderedRow.push(`${this.quote}${cell.text.replace(quoteRegex, this.quote.repeat(2))}${this.quote}`); } else { renderedRow.push(cell.text); } break; case exports.CSVTableRendererMode.OmitSpecialCharacters: renderedRow.push(cell.text.replace(specialCharactersRegexGlobal, "")); break; } } csv.push(renderedRow.join(this.separator)); } return csv.join(this.lineBreak); } } class ParsingError extends Error { constructor(msg) { super(msg); // Set the prototype explicitly. Object.setPrototypeOf(this, ParsingError.prototype); } } /* Specification: https://github.github.com/gfm/#tables-extension- */ const rowRegex$2 = /^\|(.+)\|$/; const delimiterRowRegex = /^\|(\s*:?\-+:?\s*\|)+$/; var ParsingState$2; (function (ParsingState) { ParsingState[ParsingState["BeforeTable"] = 0] = "BeforeTable"; ParsingState[ParsingState["HeaderRow"] = 1] = "HeaderRow"; ParsingState[ParsingState["DelimiterRow"] = 2] = "DelimiterRow"; ParsingState[ParsingState["DataRows"] = 3] = "DataRows"; ParsingState[ParsingState["AfterTable"] = 4] = "AfterTable"; })(ParsingState$2 || (ParsingState$2 = {})); class GitHubFlavoredMarkdownTableParser { parse(table) { let parsedTable = new Table(); let state = ParsingState$2.BeforeTable; let hasDelimiterRow = false; let beforeTable = []; let afterTable = []; // Now parse line by line: for (let line of table.split("\n")) { /* Determine parsing state and prepare: */ // Check if we are in the table: if (state == ParsingState$2.BeforeTable && line.match(/[^|\\`]\|/g)) { state = ParsingState$2.HeaderRow; } // The table is broken at the first empty line, or beginning of another block-level structure: if (line.trim() === "" || line.trim().startsWith("> ")) { state = ParsingState$2.AfterTable; } // If not inside table: if (state == ParsingState$2.BeforeTable) { beforeTable.push(line); continue; // Skip the rest } else if (state == ParsingState$2.AfterTable) { afterTable.push(line); continue; // Skip the rest } // Format table line: line = line.trim(); if (!line.startsWith("|")) line = "|" + line; if (!line.endsWith("|") || (line.charAt(line.length - 3) != "\\" && line.endsWith("\\|"))) // Check if last pipe is escaped ('\|') line = line + "|"; if (!line.match(rowRegex$2)) throw new ParsingError(`Invalid row: ${line}`); // Is delimiter row too early? if (state == ParsingState$2.HeaderRow && line.match(delimiterRowRegex)) { throw new ParsingError("Header row missing."); } /* Parse line depending on parsing state: */ if (state == ParsingState$2.HeaderRow || state == ParsingState$2.DataRows) { let tableRow = new TableRow(); tableRow.isHeader = state == ParsingState$2.HeaderRow; parsedTable.addRow(-1, tableRow); // Parse each character: let cellContent = ""; let colIndex = 0; let slashEscaped = false; let fenceEscaped = false; for (let char of line.substring(1, line.length)) { if (!slashEscaped && !fenceEscaped && char == "|") { // Ignore excess cells: if (state == ParsingState$2.HeaderRow || colIndex < parsedTable.columnCount()) { let tableColumn = parsedTable.getColumn(colIndex); if (!tableColumn) tableColumn = parsedTable.addColumn(); let cell = new TableCell(parsedTable, tableRow, tableColumn); parsedTable.addCell(cell); cell.setText(cellContent .trim() .replace(/(<[bB][rR]\s*\/?>)/g, "\n")); } cellContent = ""; colIndex++; } else if (!slashEscaped && char == "\\") { slashEscaped = true; } else { if (!slashEscaped && char == "\`") fenceEscaped = !fenceEscaped; if (slashEscaped) cellContent += "\\"; cellContent += char; slashEscaped = false; } } // Insert empty cells if missing: for (; colIndex < parsedTable.columnCount(); colIndex++) { let cell = new TableCell(parsedTable, tableRow, parsedTable.getColumn(colIndex)); parsedTable.addCell(cell); } // If the header row has been parsed, parse the delimiter row next: if (state == ParsingState$2.HeaderRow) state = ParsingState$2.DelimiterRow; } else if (state == ParsingState$2.DelimiterRow) { if (!line.match(delimiterRowRegex)) throw new ParsingError("Invalid delimiter row"); hasDelimiterRow = true; let colIndex = 0; let alignment = exports.TextAlignment.default; let separator = false; for (let char of line.substring(1, line.length)) { if (char == "|") { let tableColumn = parsedTable.getColumn(colIndex); if (!tableColumn) throw new ParsingError("Header row doesn't match the delimiter row in the number of cells."); tableColumn.textAlign = alignment; alignment = exports.TextAlignment.default; separator = false; colIndex++; } else if (char == ":") { if (!separator) { alignment = exports.TextAlignment.left; } else { if (alignment == exports.TextAlignment.left) alignment = exports.TextAlignment.center; else alignment = exports.TextAlignment.right; } } else if (char == "-") { separator = true; if (alignment == exports.TextAlignment.right) throw new ParsingError("Invalid delimiter row (minus sign after colon)"); } else if (!char.match(/\s/g)) { throw new ParsingError(`Unexpected character in delimiter row: '${char}'`); } } if (colIndex < parsedTable.columnCount()) { throw new ParsingError("Header row doesn't match the delimiter row in the number of cells."); } // Once the delimiter row has been parsed, parse the data rows next: state = ParsingState$2.DataRows; } else { throw new ParsingError(`Not implemented ParsingState: ${state}`); } } if (!hasDelimiterRow) throw new ParsingError("No delimiter row found."); parsedTable.beforeTable = beforeTable.join("\n"); parsedTable.afterTable = afterTable.join("\n"); return parsedTable.update(); } } class GitHubFlavoredMarkdownTableRenderer { prettify; renderOutsideTable; constructor(prettify = true, renderOutsideTable = true) { this.prettify = prettify; this.renderOutsideTable = renderOutsideTable; } render(table) { const headerRow = table.getHeaderRows()[0]; const dataRows = table.getNormalRows(); const columnWidths = this.prettify ? this.determineColumnWidths(table) : null; let result = []; if (this.renderOutsideTable && table.beforeTable.trim() !== "") result.push(table.beforeTable); // Header row: result.push(this.renderRow(table, headerRow, columnWidths)); // Delimiter row: result.push(this.renderDelimiterRow(table, columnWidths)); // Data rows: for (const row of dataRows) result.push(this.renderRow(table, row, columnWidths)); if (this.renderOutsideTable && table.afterTable.trim() !== "") result.push(table.afterTable); return result.join("\n"); } renderDelimiterRow(table, columnWidths) { let result = []; table.getColumns().forEach((col, i) => { let width = this.prettify ? columnWidths[i] : null; switch (col.textAlign) { case exports.TextAlignment.left: result.push(this.prettify ? `:${"-".repeat(width + 1)}` : ":-"); break; case exports.TextAlignment.center: result.push(this.prettify ? `:${"-".repeat(width)}:` : ":-:"); break; case exports.TextAlignment.right: result.push(this.prettify ? `${"-".repeat(width + 1)}:` : "-:"); break; case exports.TextAlignment.default: default: result.push(this.prettify ? "-".repeat(width + 2) : "-"); break; } }); if (this.prettify) return `|${result.join("|")}|`; else return result.join("|"); } renderRow(table, row, columnWidths) { let result = []; row.getCells().forEach((cell, i) => { result.push(this.renderCell(cell, this.prettify ? columnWidths[i] : null)); if (!this.prettify && i == row.getCells().length - 1 && cell.text.trim() == "") result.push(""); }); if (this.prettify) return `|${result.join("|")}|`; else return result.join("|"); } renderCell(cell, cellWidth = -1) { let text = cell.text.replace(/\r?\n/g, "<br>"); if (!this.prettify) { return text; } const textLength = stringWidth__default.default(text); switch (cell.column.textAlign) { case exports.TextAlignment.center: return `${" ".repeat(Math.max(0, Math.floor((cellWidth - textLength) / 2)))} ${text} ${" ".repeat(Math.max(0, Math.ceil((cellWidth - textLength) / 2)))}`; case exports.TextAlignment.right: return `${" ".repeat(Math.max(0, cellWidth - textLength))} ${text} `; case exports.TextAlignment.left: case exports.TextAlignment.default: default: return ` ${text} ${" ".repeat(Math.max(0, cellWidth - textLength))}`; } } determineColumnWidth(table, column) { let width = 0; for (const cell of table.getCellsInColumn(column)) { const cellTextLength = stringWidth__default.default(cell.text.replace(/\r?\n/g, "<br>")); width = Math.max(cellTextLength, width); } return width; } determineColumnWidths(table) { return table.getColumns().map(column => this.determineColumnWidth(table, column)); } } function removeInvisibleCharacters(str) { // See: https://www.utf8-chartable.de/unicode-utf8-table.pl // https://stackoverflow.com/a/13836410 // /[^\u0000-\u007E]/g // /\u00AD/g - soft hyphen // /[^\u0020-\u007E\u00A1-\u00AC\u00AE-\u00FF]/g return str .replace(/[\u0000-\u0009\u000B\u000C\u000E-\u001F\u007F-\u009F]/g, "") // Control characters .replace(/[\u00AD\u2007\u200C\u2028-\u202F\u2060-\u206F\uFEFF]/g, "") // Invisible characters, such as &shy; or "Zero Width Non-Joiner" .replace(/\u00A0/g, "&nbsp;") .replace(/\u2002/g, "&ensp;") .replace(/\u2003/g, "&emsp;") .replace(/\u2009/g, "&thinsp;"); //.replace(/[\u0378\u0379\u0380-\u0383\u038B\u038D\u03A2\u0530\u0557\u0558\u058B\u058C\u0590\u05C8-\u05CF\u05EB-\u05EE\u05F5-\u05FF\u070E\u074B\u074C\u07B2-\u07BF\u07FB\u07FC\u082E\u082F\u083F]/g, ""); // Weird characters } /** * Returns a TurndownService object configured for my own taste... * (of course, if you don't like it, you can configure it to fit your needs) */ function getTurndownService() { const turndownService = new TurndownService__default.default({ headingStyle: "atx", hr: "---", bulletListMarker: "-", codeBlockStyle: "fenced", fence: "```", emDelimiter: "*", strongDelimiter: "**", linkStyle: "inlined", linkReferenceStyle: "full", }); // Add strikethrough: turndownService.addRule("strikethrough", { filter: ["del", "s"], replacement: function (content) { return "~~" + content + "~~"; }, }); // Add blockquote: /*turndownService.addRule('blockquote', { filter: ['blockquote'], replacement: function (content) { return '> ' + content; } });*/ // Filter table tags: turndownService .remove("table") .remove("tbody") .remove("thead") .remove("tr") .remove("td") .remove("th"); return turndownService; } function escapeMarkdown(mdStr) { return mdStr.replace(/\|/g, "\\|"); } function mdToHtml(markdown, inline = true) { let html = markdown.trim(); // escape(markdown); // Blockquote: if (!inline) { let lines = []; let quoted = false; for (let line of html.split(/\r?\n/)) { if (line.startsWith("> ")) { if (!quoted) lines.push("<blockquote>"); quoted = true; lines.push(`<p>${mdToHtml(line.substring(2))}</p>`); } else { if (quoted) lines.push("</blockquote>"); quoted = false; lines.push(line); } if (quoted) lines.push("</blockquote>"); } html = lines.join("\n"); if (!html.startsWith("<blockquote>")) html = `<p>${html}</p>`; } // Image: html = html.replace(/!\[([^\[\]]+)\]\(([^\(\)]+)\)/g, "<img src=\"$2\" alt=\"$1\">"); // Links: html = html.replace(/\[([^\[\]]+)\]\(([^\(\)]+)\)/g, "<a href=\"$2\">$1</a>"); // Block code: html = html.replace(/```[a-z]*?\n(.*?)\n```/g, "<code>$1</code>"); // Inline code: html = html.replace(/`(.*?)`/g, "<code>$1</code>"); // Strikethrough: html = html.replace(/~~(.*?)~~/g, "<del>$1</del>"); // Oblique: html = html.replace(/___(.*?)___/g, "<em><strong>$1</strong></em>"); html = html.replace(/\*\*\*(.*?)\*\*\*/g, "<em><strong>$1</strong></em>"); // Bold: html = html.replace(/__(.*?)__/g, "<strong>$1</strong>"); html = html.replace(/\*\*(.*?)\*\*/g, "<strong>$1</strong>"); // Italic: html = html.replace(/_(.*?)_/g, "<em>$1</em>"); html = html.replace(/\*(.*?)\*/g, "<em>$1</em>"); // Escaped characters: html = html.replace(/\\([#\.\|\*_\s`\[\]\-])/g, "$1"); // Newlines: if (inline) html = html.replace(/\r?\n/g, "<br>"); else html = html.replace(/(\r?\n){2}/g, "</p>\n<p>").replace(/(?<!\<\\p\>)\r?\n(?!\<p\>)/g, " "); // Remove unnecessary whitespace: html = html.replace(/[ \t]{2,}/g, " "); return html; } function htmlToMd(html, turndownService) { return turndownService.turndown(html); } function textAlignToCSS(textAlign) { switch (textAlign) { case exports.TextAlignment.left: return "text-align: left"; case exports.TextAlignment.right: return "text-align: right"; case exports.TextAlignment.center: return "text-align: center"; case exports.TextAlignment.default: default: return "text-align: start"; } } function cssToTextAlign(element) { if (!element.css("text-align")) // Might return 'undefined' return exports.TextAlignment.default; switch (element.css("text-align").toLowerCase()) { case "left": return exports.TextAlignment.left; case "center": return exports.TextAlignment.center; case "right": return exports.TextAlignment.right; default: return exports.TextAlignment.default; } } /** changes the behavior of HTMLTableParser */ exports.HTMLTableParserMode = void 0; (function (HTMLTableParserMode) { /** uses only text (`Cheerio.text()`) */ HTMLTableParserMode[HTMLTableParserMode["StripHTMLElements"] = 0] = "StripHTMLElements"; /** uses the HTML code (`Cheerio.html()`) without any converting */ HTMLTableParserMode[HTMLTableParserMode["PreserveHTMLElements"] = 1] = "PreserveHTMLElements"; /** uses the HTML code (`Cheerio.html()`) and converts to Markdown using Turndown if possible (default) */ HTMLTableParserMode[HTMLTableParserMode["ConvertHTMLElements"] = 2] = "ConvertHTMLElements"; })(exports.HTMLTableParserMode || (exports.HTMLTableParserMode = {})); class HTMLTableParser { mode; turndownService; constructor(mode = exports.HTMLTableParserMode.ConvertHTMLElements, turndownService = getTurndownService()) { this.mode = mode; this.turndownService = turndownService; } parse(table) { /* Parse the html string and find our <table> tag to start: */ const dom = htmlparser2__namespace.parseDocument(table, { xmlMode: false, lowerCaseTags: true, lowerCaseAttributeNames: true, decodeEntities: true, }); const $ = cheerio__namespace.load(dom); const $tables = $("table"); if ($tables.length === 0) { throw new ParsingError("Couldn't find <table> tag in DOM."); } const $table = $($tables[0]); /* Converting table to Markdown: */ let parsedTable = new Table(); let hasSections = false; let tableTextAlign = cssToTextAlign($table); // Get everything before <table>: let m = table.match(/((.|\n)*)<\s*[tT][aA][bB][lL][eE][^<>]*>/m); if (m) { parsedTable.beforeTable = htmlToMd(m[1], this.turndownService); } // Get everything after </table>: m = table.match(/<\/\s*[tT][aA][bB][lL][eE]\s*>((.|\n)*)/m); if (m) { parsedTable.afterTable = htmlToMd(m[1], this.turndownService); } // Parse <thead> tag in <table>: let $theads = $table.find("thead"); if ($theads.length != 0) { let sectionTextAlign = cssToTextAlign($theads); this.parseSection($, parsedTable, $theads.find("tr"), (sectionTextAlign != exports.TextAlignment.default ? sectionTextAlign : tableTextAlign), true); hasSections = true; } // Parse <tbody> tags in <table>: const self = this; let $tbodies = $table.find("tbody"); if ($tbodies.length > 0) { $tbodies.each((i, element) => { const domTBody = $(element); let sectionTextAlign = cssToTextAlign(domTBody); self.parseSection($, parsedTable, domTBody.find("tr"), (sectionTextAlign != exports.TextAlignment.default ? sectionTextAlign : tableTextAlign), false, $theads == null, i > 0); }); hasSections = true; } // No <thead> or <tbody> tags? if (!hasSections) { // Parse table that doesn't have thead or tbody tags as one section with no header: this.parseSection($, parsedTable, $table.find("tr"), tableTextAlign, false, true, false); } // Parse <caption> tag in <table>: let $captions = $table.find("caption"); if ($captions.length != 0) { const $caption = $($captions); let caption = new TableCaption(); caption.text = htmlToMd($caption.html() ? $caption.html() : "", this.turndownService).replace(/(\r?\n)/g, "").trim(); // domCaption.innerText.replace(/(\r?\n|\[|\])/g, "").trim(); if ($caption.attr('id') && caption.getLabel() != $caption.attr('id')) caption.label = $caption.attr('id').replace(/(\r?\n|\[|\])/g, "").trim(); switch ($caption.css("caption-side") && $caption.css("caption-side").toLowerCase()) { case "bottom": caption.position = exports.TableCaptionPosition.bottom; break; case "top": default: caption.position = exports.TableCaptionPosition.top; } parsedTable.caption = caption; } return parsedTable.update(); } parseSection($, table, $rows, defaultTextAlign, isHeader = false, allowHeaderDetection = false, firstRowStartsNewSection = false) { // HTML skips "ghost" cells that are overshadowed by other cells that have a rowspan > 1. // We'll memorize them: let rowspanGhostCells = []; // Remember how many rows we already have: let rowOffset = table.rowCount(); // Iterate over each row (<tr>) of the HTML table: $rows.each((domRowIndex, element) => { let rowIndex = domRowIndex + rowOffset; let row = table.getRow(rowIndex); if (!row) row = table.addRow(); row.isHeader = isHeader; if (domRowIndex == 0) row.startsNewSection = firstRowStartsNewSection; // Memorize an offset (colspan): let colOffset = 0; // Iterate over each cell (<td> or <th>) of the HTML table row: const $row = $(element); let $cells = $row.find("td, th"); let allCellsAreTH = true; $cells.each((domColIndex, element) => { const $cell = $(element); // Get the TableColumn of our Table object, taking the memorized rowspans and colOffset into account: let colIndex = domColIndex + colOffset; while (rowspanGhostCells.filter(ghost => ghost.row == rowIndex && ghost.col == colIndex).length > 0) { colIndex = domColIndex + ++colOffset; } let column = table.getColumn(colIndex); if (!column) column = table.addColumn(); // Add cell to our Table object: let cellContent = this.parseCell($cell); let textAlign = cssToTextAlign($cell); let wrappable = $cell.hasClass("extend"); textAlign = textAlign != exports.TextAlignment.default ? textAlign : defaultTextAlign; let cellIsHeader = $cell.prop("tagName").toLowerCase() == "th"; allCellsAreTH = allCellsAreTH && cellIsHeader; let cell = new TableCell(table, row, column); cell.setText(cellContent); cell.textAlign = textAlign; cell.isHeader = cellIsHeader; if (column.textAlign == exports.TextAlignment.default) { column.textAlign = textAlign; } column.wrappable = wrappable; table.addCell(cell); // Take "colspan" into account: let colspan = $cell.prop("colspan"); if (colspan > 1) { // Add empty cells to our Table object: for (let i = 1; i < colspan; i++) { let nextColumn = table.getColumn(colIndex + i); if (!nextColumn) nextColumn = table.addColumn(); let mergedCell = table.getCell(row, nextColumn); mergedCell.merged = exports.TableCellMerge.left; } // Add colspan to colOffset: colOffset += colspan - 1; } // Take "rowspan" into account: let rowspan = $cell.prop("rowspan"); if (rowspan > 1) { // Add empty cells to our Table object: for (let i = 1; i < rowspan; i++) { let nextRow = table.getRow(rowIndex + i); if (!nextRow) nextRow = table.addRow(); nextRow.isHeader = isHeader; let mergedCell = table.getCell(nextRow, column); mergedCell.merged = exports.TableCellMerge.above; // Memorize "ghost" cells: rowspanGhostCells.push({ "row": rowIndex + i, "col": colIndex }); } } }); // Detect headers: if (allowHeaderDetection && !isHeader) { row.isHeader = allCellsAreTH; } }); } parseCell($cell) { switch (this.mode) { case exports.HTMLTableParserMode.PreserveHTMLElements: return removeInvisibleCharacters(escapeMarkdown($cell.html())); case exports.HTMLTableParserMode.StripHTMLElements: return removeInvisibleCharacters(escapeMarkdown($cell.text())); case exports.HTMLTableParserMode.ConvertHTMLElements: default: return removeInvisibleCharacters(escapeMarkdown(htmlToMd($cell.html(), this.turndownService))); } } } class HTMLTableRenderer { prettify; indent; renderOutsideTable; constructor(prettify = true, indent = " ", renderOutsideTable = true) { this.prettify = prettify; this.indent = indent; this.renderOutsideTable = renderOutsideTable; } render(table) { let result = []; if (this.renderOutsideTable && table.beforeTable.trim() !== "") result.push(mdToHtml(table.beforeTable, false)); result.push("<table>"); let headerRows = table.getHeaderRows(); let normalRows = table.getNormalRows(); if (headerRows.length > 0) { result.push(this.indentString("<thead>", 1)); for (const row of headerRows) result.push(...this.renderRow(table, row)); result.push(this.indentString("</thead>", 1)); } if (normalRows.length > 0) { result.push(this.indentString("<tbody>", 1)); for (const row of normalRows) { if (row.startsNewSection) result.push(this.indentString("</tbody>", 1), this.indentString("<tbody>", 1)); result.push(...this.renderRow(table, row)); } result.push(this.indentString("</tbody>", 1)); } if (table.caption && table.caption.text.length > 0) result.push(this.indentString(`<caption id="${table.caption.getLabel()}" style="caption-side: ${table.caption.position}">${mdToHtml(table.caption.text)}</caption>`, 1)); result.push("</table>"); if (this.renderOutsideTable && table.afterTable.trim() !== "") result.push(mdToHtml(table.afterTable, false)); return result.join(this.prettify ? "\n" : ""); } renderRow(table, row) { let result = []; result.push(this.indentString("<tr>", 2)); for (let cell of table.getCellsInRow(row)) { let renderedCell = this.indentString(this.renderCell(cell), 3); if (renderedCell.trim() !== "") result.push(renderedCell); } result.push(this.indentString("</tr>", 2)); return result; } renderCell(cell) { let colspan = cell.getColspan(); let rowspan = cell.getRowspan(); if (cell.merged == exports.TableCellMerge.none) { let cellProps = (colspan > 1 ? ` colspan="${colspan}"` : "") + (rowspan > 1 ? ` rowspan="${rowspan}"` : "") + (cell.getTextAlignment() != exports.TextAlignment.default ? ` style="${textAlignToCSS(cell.getTextAlignment())}"` : "") + // ` align="${cell.getTextAlignment()}"` (cell.column.wrappable ? `