UNPKG

@adobe/helix-pipeline

Version:

(formerly known as Hypermedia Pipeline)

324 lines (301 loc) 12.1 kB
/* * Copyright 2018 Adobe. All rights reserved. * This file is licensed to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. You may obtain a copy * of the License at http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS * OF ANY KIND, either express or implied. See the License for the specific language * governing permissions and limitations under the License. */ const yaml = require('js-yaml'); const { cloneDeep } = require('lodash'); const { pipe, identity, type, size, join, map, zipLeast2, list, lookahead, filter, enumerate, range, reject, } = require('ferrum'); const { assign } = Object; const production = require('../utils/is-production'); /** * Given an mdast tree and it's text representation, this finds all * the frontmatter in it. * * Frontmatter looks like this: * * ``` * --- * Foo: bar * --- * ``` * * The frontmatter is delimited by a `thematicBreak` made from three * dashes in the mdast and contains a yaml-encoded payload. * * Not any yaml and any thematicBreak is accepted; in order to avoid * false positives, the following restrictions apply: * * - There must be the start or end of the document or an empty line (`\n\n`) * before and after the frontmatter block * - There may be no empty line within the frontmatter block; not even lines * containing only whitespace * - The yaml must yield an object (as in key-value pairs); strings, numbers or * arrays are not considered to be frontmatter in this context * - The thematic break must be made up of three dashes * * For frontmatter blocks at the beginning of the document, the rules are a bit * looser, as there is less risk of confusion: * - There must be end of the document or an empty line (`\n\n`) after the * frontmatter block * - The yaml must yield an object (as in key-value pairs); strings, numbers or * arrays are not considered to be frontmatter in this context * - The thematic break must be made up of three dashes * * Note that most of the information required to asses these properties * is not contained in the mdast itself, which is why this algorithm requires * access to the original markdown string. (The mdast is an Abstract Syntax Tree, * the proper tool for a task like this would be a Concrete Syntax Tree, but we have * no such thing...). * * Note that converting the mdast to a markdown string will not do, since * the generated markdown will be much different. * * # Future directions * * This function is likely to change in the following ways: * * - Relaxed restrictions on what constitutes frontmatter. * - The ability to specify custom formats; by specifying the name * of the format explicitly we reduce ambiguity to pretty much zero * and can allow for far more complex frontmatter formats. * * ``` * ---json * {"foo": 42} * --- * ``` * * # Ambiguous Frontmatter * * Normally, when one of the conditions above is triggered, * (e.g. Frontmatter containing an empty line; not being an array * instead of an object), a warning will be emitted instead of the * frontmatter being actually parsed & returned. * This warning may be processed by the caller in any way; e.g. by * printing a warning on the console or by throwing an error... * * In order to avoid ambiguous cases, the format described above * should be used for valid frontmatter; in order to use horizontal * rules unambiguously, the markdown author should either use symbols * other than dash to mark horizontal rules, or leave at least one empty * line before and after the three dashes. * * Both ways are guaranteed to be interpreted as horizontal rules and * never yield warnings. * * @param {Mdast} The object containing the mdast, including the root node and position information! * @param {Source} The original markdown * @returns {Iterable} Returns an iterable where each element represents either a * block frontmatter or a warning issued. * The order of warnings/frontmatter blocks is the same as in the input document. * * Blocks of frontmatter use the following format: * * ``` * { * type: 'frontmatter', * payload: {...}, * start: Number, * end: Number * } * ``` * * `start` and `end` represent the index of the mdast node * node that starts/ends the frontmatter block. * Just replace all those nodes with an appropriate frontmatter * node containing the payload to actually insert the frontmatter * into the yaml. * * Note that the `mdast` block does not necessarily contain * only mdast blocks; settext headers for instance require * us in some cases to * * Warnings use the following format: * * ``` * { * type: 'warning', * warning: String, * source: String, // Source code of the frontmatter block * start: Number, // Node index as in 'frontmatter' type * end: Number, // May be null if the fence is the last in the markdown * cause: Error, // The error that caused the problem if any * } * ``` */ const findFrontmatter = (mdast, str) => { // We do a lot of stuff with regexps here const hspace = '[^\\S\\n\\r]'; // Horizontal space const re = (x) => new RegExp(x); // Access the markdown source of a markdown ast element const start = (idx) => mdast.children[idx].position.start.offset; const end = (idx) => mdast.children[idx].position.end.offset; const nodeStr = (idx) => str.slice(start(idx), end(idx)); // Identifying MDAST nodes which are potential fences (but later // still need to be checked more thoroughly) const isHead2 = (n) => n.type === 'heading' && n.depth === 2; const isHr = (n) => n.type === 'thematicBreak'; const isPotential = (n) => isHead2(n) || isHr(n); // Classifiers for fences: Lets us distinguish actual fences // from headers and horizontal rules which should not be altered: const decentHead = (n) => n.after && isHead2(n.nod); const decentHr = (n) => n.after && n.before; const toIgnore = (n) => !n || decentHead(n) || decentHr(n); const procwarnigns = map(([fst, last]) => { const src = str.slice(fst.offStart, last === null ? undefined : last.offEnd); const warn = (cause, prosa) => ({ type: 'warning', warning: prosa, source: src, fst, last, start: fst.idx, end: last && last.idx, cause, }); if (!fst.before) { return warn(null, 'Found ambigous frontmatter fence: No empty line before the block! ' + 'Make sure your mid-document YAML blocks contain no empty lines ' + 'and your horizontal rules have an empty line before AND after them.'); } else if (last && !last.after && fst.idx > 0) { return warn(null, 'Found ambigous frontmatter fence: No empty line after the block! ' + 'Make sure your mid-document YAML blocks contain no empty lines ' + 'and your horizontal rules have an empty line before AND after them.'); } else if (src.match(re(`\\n${hspace}*\\n`)) && fst.idx > 0) { return warn(null, 'Found ambigous frontmatter fence: Block contains empty line! ' + 'Make sure your mid-document YAML blocks contain no empty lines ' + 'and your horizontal rules have an empty line before AND after them.'); } const txt = str.slice(fst.offEnd, !last ? undefined : last.offStart); let data; try { data = yaml.safeLoad(txt); } catch (e) { return warn(e, `Exception ocurred while parsing yaml: ${e}`); } if (type(data) !== Object) { return warn(null, 'Found ambigous frontmatter block: Block contains valid yaml, but ' + `it's data type is ${type(data)} instead of Object.` + 'Make sure your yaml blocks contain only key-value pairs at the root level!'); } return { type: 'frontmatter', payload: data, start: fst.idx, end: last.idx, }; }); // Preprocessing return pipe( enumerate(mdast.children), // Find any potential frontmatter starts/ends in the mdast /* eslint-disable-next-line no-unused-vars */ filter(([idx, nod]) => isPotential(nod)), // Filter out dom nodes based on their actual text content; // this filters out HRs made from other characters or setext // headings with more than three dashes... // // And: Perform some more sophisticated feature extraction on the nodes map(([idx, nod]) => { const mat = nodeStr(idx).match(re(`(?<=^|\\n)---${hspace}*\\n?$`)); if (!mat) { return null; } // Offset of the actual separator line (this may deviate from the) const offStart = mat.index + start(idx); const offEnd = offStart + size(mat[0]); // Is there a new line or EOF before/after the separator? const before = Boolean(str.slice(0, offStart).match(re(`(^|(^|\\n)${hspace}*\\n)$`))); const after = Boolean(str.slice(offEnd).match(re(`^(${hspace}*(\\n${hspace}*(\\n|$))|$)`))); return { idx, nod, offStart, offEnd, before, after, }; }), filter(identity), // Pair up two fences each; we even do this if there is only a single // fence (even though by definition that could never form a frontmatter // block) in order to warn about ambiguous nodes lookahead(1, null), // Filter out pairs in which both the start and the end is definately // a settext heading or <hr> reject(([fst, last]) => toIgnore(fst) && toIgnore(last)), // Decide which blocks to ignore, which deserve warnings and which // are actual frontmatter procwarnigns, filter(identity), // Filter out false positive warnings for pseudo frontmatter blocks // before actual frontmatter (warning gets invalidated by the fact // that it DIRECTLY PRECEDES an actual frontmatter block) lookahead(1, null), reject(([val, next]) => true && val.type === 'warning' && val.warning.startsWith('Found ambigous frontmatter') && next && next.type === 'frontmatter' && val.end === next.start), /* eslint-disable-next-line no-unused-vars */ map(([val, next]) => val), ); }; class FrontmatterParsingError extends Error {} const parseFrontmatter = ({ content = {} }, { logger }) => { const { mdast, body } = content; if (!mdast || !body) { return; } // We splice the mdast. let off = 0; for (const block of list(findFrontmatter(mdast, body))) { if (block.type === 'frontmatter') { // Replace all the ast nodes making up a frontmatter block // with the respective frontmatter block const dat = { type: 'yaml', payload: block.payload, position: { start: cloneDeep(mdast.children[block.start + off].position.start), end: cloneDeep(mdast.children[block.end + off].position.end), indent: [], }, }; const cnt = block.end - block.start + 1; mdast.children.splice(block.start + off, cnt, dat); off += -cnt + 1; // cnt removed, 1 inserted } else { const { warning, source, start } = block; const fst = mdast.children[start + off]; // This also needs to account for settext headings // hence the usage of end here, instead of using start const { line } = fst.position.end; // Source file pretty printing with line numbers const sourceref = pipe( source.split('\n'), zipLeast2(range(line, Infinity)), map(([l, no]) => ` ${no} | ${l} `), join('\n'), ); const err = new FrontmatterParsingError(`${warning}\n${sourceref}`); if (!production()) { logger.warn(err); } logger.debug(err); } } }; assign(parseFrontmatter, { findFrontmatter, FrontmatterParsingError }); module.exports = parseFrontmatter;