UNPKG

@accordproject/markdown-common

Version:
385 lines (316 loc) • 11.7 kB
/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ 'use strict'; const commonmark = require('commonmark'); const sax = require('sax'); const { ModelManager, Factory, Serializer } = require('@accordproject/concerto-core'); const Stack = require('./Stack'); const ToMarkdownStringVisitor = require('./ToMarkdownStringVisitor'); const { DOMParser } = require('xmldom'); const { NS_PREFIX_CommonMarkModel, CommonMarkModel } = require('./externalModels/CommonMarkModel.js'); /** * Parses markdown using the commonmark parser into the * intermediate representation: a JSON object that adheres to * the 'org.accordproject.commonmark' Concerto model. */ class CommonMarkTransformer { /** * Construct the parser. * @param {object} [options] configuration options * @param {boolean} [options.trimText] trims all text nodes * @param {boolean} [options.enableSourceLocation] if true then location information is returned * @param {boolean} [options.noIndex] do not index ordered list (i.e., use 1. everywhere) * @param {boolean} [options.tagInfo] Construct tags for HTML elements */ constructor(options) { this.options = options; const modelManager = new ModelManager(); modelManager.addModelFile(CommonMarkModel, 'commonmark.cto'); const factory = new Factory(modelManager); this.serializer = new Serializer(factory, modelManager); } /** * Is it a leaf node? * @param {*} json - the JS Object for the AST * @return {boolean} whether it's a leaf node */ static isLeafNode(json) { return json.$class === NS_PREFIX_CommonMarkModel + 'Text' || json.$class === NS_PREFIX_CommonMarkModel + 'CodeBlock' || json.$class === NS_PREFIX_CommonMarkModel + 'HtmlInline' || json.$class === NS_PREFIX_CommonMarkModel + 'HtmlBlock' || json.$class === NS_PREFIX_CommonMarkModel + 'Code'; } /** * Is it a HTML node? (html blocks or html inlines) * @param {*} json - the JS Object for the AST * @return {boolean} whether it's a leaf node */ static isHtmlNode(json) { return json.$class === NS_PREFIX_CommonMarkModel + 'HtmlInline' || json.$class === NS_PREFIX_CommonMarkModel + 'HtmlBlock'; } /** * Is it a Code Block node? * @param {*} json the JS Object for the AST * @return {boolean} whether it's a leaf node */ static isCodeBlockNode(json) { return json.$class === NS_PREFIX_CommonMarkModel + 'CodeBlock'; } /** * Removing escapes * @param {string} input - escaped * @return {string} unescaped */ static unescapeCodeBlock(input) { return input.replace(/\\`/g, '`'); } /** * Converts a CommonMark DOM to a markdown string * @param {*} input - CommonMark DOM (in JSON or as a Concerto object) * @returns {string} the markdown string */ toMarkdown(input) { if (!input.getType) { input = this.serializer.fromJSON(input); } const parameters = {}; parameters.result = ''; parameters.first = true; parameters.stack = []; const visitor = new ToMarkdownStringVisitor(this.options); input.accept(visitor, parameters); return parameters.result.trim(); } /** * Converts *the children of the node* to a CommonMark DOM to a markdown string * @param {*} input - CommonMark DOM (in JSON or as a Concerto object) * @returns {string} the markdown string */ toMarkdownChildren(input) { if (!input.getType) { input = this.serializer.fromJSON(input); } const parameters = {}; parameters.result = ''; parameters.first = true; parameters.stack = []; const visitor = new ToMarkdownStringVisitor(this.options); const result = ToMarkdownStringVisitor.visitChildren(visitor, input, parameters); // console.log('RESULT!' + result.trim()); return result.trim(); } /** * Converts a markdown string into a Concerto DOM object. * * @param {string} markdown the string to parse * @param {string} [format] the format of the object to return. Defaults to 'concerto. * Pass 'json' to return the JSON object, skipping Concerto validation * @returns {*} a Concerto object (DOM) for the markdown content */ fromMarkdown(markdown) { let format = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'concerto'; let stack = new Stack(); const that = this; const parser = sax.parser(true, { position: true }); parser.onerror = function (e) { throw e; }; parser.ontext = function (t) { if (that.options && that.options.trimText) { t = t.trim(); } const head = stack.peek(); if (t.length > 0 && head) { if (CommonMarkTransformer.isLeafNode(head)) { head.text = CommonMarkTransformer.isCodeBlockNode(head) ? CommonMarkTransformer.unescapeCodeBlock(t) : t; } if (CommonMarkTransformer.isHtmlNode(head) || CommonMarkTransformer.isCodeBlockNode(head)) { const maybeHtmlText = CommonMarkTransformer.isHtmlNode(head) ? head.text : head.info; const tagInfo = that.options && that.options.tagInfo ? CommonMarkTransformer.parseHtmlBlock(maybeHtmlText) : null; if (tagInfo) { head.tag = tagInfo; } } } }; parser.onopentag = function (node) { const newNode = {}; newNode.$class = CommonMarkTransformer.toClass(node.name); if (that.options && that.options.enableSourceLocation) { newNode.line = parser.line; newNode.column = parser.column; newNode.position = parser.position; newNode.startTagPosition = parser.startTagPosition; } // hoist the attributes into the parent object Object.keys(node.attributes).forEach(key => { newNode[key] = node.attributes[key]; }); const head = stack.peek(); if (head) { if (!head.nodes) { head.nodes = []; } stack.push(newNode); } else { stack.push(newNode, false); } }; parser.onclosetag = function (name) { // ensure the document node is left on the stack // so that we can retrieve it as the result if (name !== 'document') { const json = stack.peek(); // console.log(JSON.stringify(json, null, 4)); json.nodes = CommonMarkTransformer.mergeAdjacentTextNodes(json.nodes); json.nodes = CommonMarkTransformer.mergeAdjacentHtmlNodes(json.nodes, that.options); stack.pop(); } }; const reader = new commonmark.Parser(); const writer = new commonmark.XmlRenderer(); const parsed = reader.parse(markdown); const xml = writer.render(parsed); // console.log('====== XML ======='); // console.log(xml); parser.write(xml).close(); // console.log('====== JSON ======='); const json = stack.peek(); // console.log(JSON.stringify(json, null, 4)); // validate the object using the model if (format === 'concerto') { return this.serializer.fromJSON(json); } else { const validJson = this.serializer.fromJSON(json); return this.serializer.toJSON(validJson); } } /** * Merge adjacent text nodes in a list of nodes * @param {[*]} nodes a list of nodes * @returns {*} a new list of nodes with redundant text nodes removed */ static mergeAdjacentTextNodes(nodes) { if (nodes) { const result = []; for (let n = 0; n < nodes.length; n++) { const cur = nodes[n]; const next = n + 1 < nodes.length ? nodes[n + 1] : null; if (next && cur.$class === NS_PREFIX_CommonMarkModel + 'Text' && next.$class === NS_PREFIX_CommonMarkModel + 'Text') { next.text = cur.text + next.text; // Fold text in next node, skip current node } else { result.push(cur); } } return result; } else { return nodes; } } /** * Merge adjacent Html nodes in a list of nodes * @param {[*]} nodes - a list of nodes * @param {*} options - options * @returns {*} a new list of nodes with open/closed Html nodes merged */ static mergeAdjacentHtmlNodes(nodes, options) { if (nodes) { const result = []; for (let n = 0; n < nodes.length; n++) { const cur = nodes[n]; const next = n + 1 < nodes.length ? nodes[n + 1] : null; if (next && cur.$class === NS_PREFIX_CommonMarkModel + 'HtmlInline' && next.$class === NS_PREFIX_CommonMarkModel + 'HtmlInline' && cur.tag && next.text === "</".concat(cur.tag.tagName, ">")) { next.text = cur.text + next.text; // Fold text in next node, skip current node next.tag = options && options.tagInfo ? CommonMarkTransformer.parseHtmlBlock(next.text) : null; } else { result.push(cur); } } return result; } else { return nodes; } } /** * Retrieve the serializer used by the parser * * @returns {*} a serializer capable of dealing with the Concerto * object returns by parse */ getSerializer() { return this.serializer; } /** * * @param {string} string the string to capitalize * @returns {string} the string capitalized */ static capitalizeFirstLetter(string) { return string.charAt(0).toUpperCase() + string.slice(1); } /** * * @param {string} name the name of the commonmark type * @returns {string} the concerto type name */ static toClass(name) { const camelCased = name.replace(/_([a-z])/g, function (g) { return g[1].toUpperCase(); }); return NS_PREFIX_CommonMarkModel + CommonMarkTransformer.capitalizeFirstLetter(camelCased); } /** * Parses an HTML block and extracts the attributes, tag name and tag contents. * Note that this will return null for strings like this: </foo> * @param {string} string - the HTML block to parse * @return {Object} - a tag object that holds the data for the html block */ static parseHtmlBlock(string) { try { const doc = new DOMParser().parseFromString(string, 'text/html'); const item = doc.childNodes[0]; const attributes = item.attributes; const attributeObject = {}; let attributeString = ''; for (let i = 0; i < attributes.length; i += 1) { attributeString += "".concat(attributes[i].name, " = \"").concat(attributes[i].value, "\" "); attributeObject[attributes[i].name] = attributes[i].value; } const tag = {}; tag.$class = NS_PREFIX_CommonMarkModel + 'TagInfo'; tag.tagName = item.tagName.toLowerCase(); tag.attributeString = attributeString; tag.attributes = []; for (const attName in attributeObject) { if (Object.prototype.hasOwnProperty.call(attributeObject, attName)) { const attValue = attributeObject[attName]; tag.attributes.push({ $class: NS_PREFIX_CommonMarkModel + 'Attribute', name: attName, value: attValue }); } } tag.content = item.textContent; tag.closed = string.endsWith('/>'); return tag; } catch (err) { // no children, so we return null return null; } } } module.exports = CommonMarkTransformer;