UNPKG

micromark-extension-caml

Version:

Micromark syntax extension for caml (semantic) attributes.

659 lines (634 loc) 18.7 kB
// micromark-extension-caml v0.0.5-mm - https://github.com/wikibonsai/remark-caml.git import { codes } from 'micromark-util-symbol/codes'; import { types } from 'micromark-util-symbol/types'; import { markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character'; import { factorySpace } from 'micromark-factory-space'; import { RGX, resolve } from 'caml-mkdn'; import { ok } from 'uvu/assert'; // modelled from: https://github.com/micromark/micromark/blob/main/packages/micromark-util-symbol/codes.js var keyUsableCharCodes = { space: 32, quotationMark: 34, // `"` numberSign: 35, // `#` dollarSign: 36, // `$` percentSign: 37, // `%` ampersand: 38, // `&` apostrophe: 39, // `'` leftParenthesis: 40, // `(` rightParenthesis: 41, // `)` asterisk: 42, // `*` plusSign: 43, // `+` comma: 44, // `,` dash: 45, // `-` dot: 46, // `.` slash: 47, // `/` digit0: 48, // `0` digit1: 49, // `1` digit2: 50, // `2` digit3: 51, // `3` digit4: 52, // `4` digit5: 53, // `5` digit6: 54, // `6` digit7: 55, // `7` digit8: 56, // `8` digit9: 57, // `9` semicolon: 59, // `;` lessThan: 60, // `<` equalsTo: 61, // `=` greaterThan: 62, // `>` questionMark: 63, // `?` atSign: 64, // `@` uppercaseA: 65, // `A` uppercaseB: 66, // `B` uppercaseC: 67, // `C` uppercaseD: 68, // `D` uppercaseE: 69, // `E` uppercaseF: 70, // `F` uppercaseG: 71, // `G` uppercaseH: 72, // `H` uppercaseI: 73, // `I` uppercaseJ: 74, // `J` uppercaseK: 75, // `K` uppercaseL: 76, // `L` uppercaseM: 77, // `M` uppercaseN: 78, // `N` uppercaseO: 79, // `O` uppercaseP: 80, // `P` uppercaseQ: 81, // `Q` uppercaseR: 82, // `R` uppercaseS: 83, // `S` uppercaseT: 84, // `T` uppercaseU: 85, // `U` uppercaseV: 86, // `V` uppercaseW: 87, // `W` uppercaseX: 88, // `X` uppercaseY: 89, // `Y` uppercaseZ: 90, // `Z` backslash: 92, // `\` underscore: 95, // `_` graveAccent: 96, // `` ` `` lowercaseA: 97, // `a` lowercaseB: 98, // `b` lowercaseC: 99, // `c` lowercaseD: 100, // `d` lowercaseE: 101, // `e` lowercaseF: 102, // `f` lowercaseG: 103, // `g` lowercaseH: 104, // `h` lowercaseI: 105, // `i` lowercaseJ: 106, // `j` lowercaseK: 107, // `k` lowercaseL: 108, // `l` lowercaseM: 109, // `m` lowercaseN: 110, // `n` lowercaseO: 111, // `o` lowercaseP: 112, // `p` lowercaseQ: 113, // `q` lowercaseR: 114, // `r` lowercaseS: 115, // `s` lowercaseT: 116, // `t` lowercaseU: 117, // `u` lowercaseV: 118, // `v` lowercaseW: 119, // `w` lowercaseX: 120, // `x` lowercaseY: 121, // `y` lowercaseZ: 122, // `z` leftCurlyBrace: 123, // `{` rightCurlyBrace: 125, // `}` tilde: 126 // `~` }; // modelled from: https://github.com/micromark/micromark/blob/main/packages/micromark-util-character/dev/index.js function markdownBullet(code) { return code === codes.asterisk || code === codes.dash || code === codes.plusSign; } // "const enums are inlined at compile time" // https://www.typescriptlang.org/docs/handbook/enums.html#const-enums var CamlToken; (function (CamlToken) { CamlToken["camlAttr"] = "camlAttr"; CamlToken["camlKeyPrfxMarker"] = "camlKeyPrfxMarker"; CamlToken["camlKeyTxt"] = "camlKeyTxt"; CamlToken["camlKeyMarker"] = "camlKeyMarker"; CamlToken["listLineEnding"] = "listLineEnding"; CamlToken["camlValTxt"] = "camlValTxt"; CamlToken["camlListBullet"] = "camlListBullet"; CamlToken["camlListComma"] = "camlListComma"; })(CamlToken || (CamlToken = {})); function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; } function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); } function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; } var keyPrefixMarker = ':'; var keyMarker = '::'; var listBulletMarker = '- '; // only listBulletMarker.length used -- have to use markdownBullet() function to test for -*+ // eslint-disable-next-line @typescript-eslint/no-unused-vars var syntaxCaml = function syntaxCaml(opts) { var flow = {}; /* eslint-disable indent */ // hooks // w/ prefix var hooks = [].concat([codes.colon]) // w/out prefix .concat(Object.values(keyUsableCharCodes)); /* eslint-enable indent */ // todo: any way to make this...nicer? var _iterator = _createForOfIteratorHelper(hooks), _step; try { for (_iterator.s(); !(_step = _iterator.n()).done;) { var code = _step.value; if (code !== null) { flow[code] = { name: 'caml', tokenize: tokenizeCaml, concrete: true, resolveAll: resolveAttrs }; } } // assemble extension } catch (err) { _iterator.e(err); } finally { _iterator.f(); } return { flow: flow }; // construct functions function resolveAttrs(events, context) { // current index var index = -1; while (++index < events.length) { // convert wikiattr token types to caml-friendly token types if (events[index][1].type.indexOf('caml') === 0) { switch (events[index][1].type) { // caml primitives case CamlToken.camlAttr: if (events[index][0] === 'enter') { events[index][1].type = 'attrBox'; } if (events[index][0] === 'exit') { events[index][1].type = 'attrBox'; } break; case CamlToken.camlKeyTxt: events[index][1].type = 'attrKey'; break; case CamlToken.camlValTxt: events[index][1].type = 'attrVal'; break; } } } return events; } function tokenizeCaml(effects, ok, nok) { // skip '@typescript-eslint/no-this-alias' // eslint-disable-next-line var self = this; // in- var inListComma; // is- // 'is-' signifies the kind of list the current construct might be. // if values continue on the same line, it can only be a comma-separated list // if values continue on the next line, it can only be a mkdn-separated list // then, if no valid value is found, it is either a single variant (from comma) or // an invalid value var isListComma; var isListMkdn; // has- var hasKey; var hasValue; // consumed- var consumedKeyWhiteSpace; var consumedBulletWhiteSpace; // cursors var cursorKeyPrefixMarker = 0; var cursorKeyMarker = 0; // cursors below need to be reset for each value (for list attrs) var cursorListBulletMarker = 0; // escape comma-separation when in double or single quotes var inDoubleQuote = false; var inSingleQuote = false; return start; // each function is given an 'end', 'invalid', and 'continue' comment // to help guide the eye more quickly to the desired line...: // // 'end' : is the condition by which to kick out of this function. // 'invalid' : are checks on whether or not to kick out and invalidate the current token ('nok') // 'continue': continue on to the next function ('ok') function start(code) { // 'attr' must start at the start of a line // from: https://github.com/micromark/micromark-extension-frontmatter/blob/main/dev/lib/syntax.js#L75 var position = self.now(); var lineFirstChar = position.column === 1; if (!lineFirstChar) { return nok(code); } effects.enter(CamlToken.camlAttr); // w/ prefix if (code === codes.colon) { effects.enter(CamlToken.camlKeyPrfxMarker); return consumeKeyPrefixMarker(code); } // w/out prefix if (code !== null && RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) { effects.enter(CamlToken.camlKeyTxt); return consumeKeyTxt(code); } // invalid return nok(code); } function consumeKeyPrefixMarker(code) { // end if (cursorKeyPrefixMarker === keyPrefixMarker.length) { effects.exit(CamlToken.camlKeyPrfxMarker); effects.enter(CamlToken.camlKeyTxt); return consumeKeyTxt(code); } // invalid if (code !== keyPrefixMarker.charCodeAt(cursorKeyPrefixMarker)) { return nok(code); } // continue... effects.consume(code); cursorKeyPrefixMarker++; return consumeKeyPrefixMarker; } function consumeKeyTxt(code) { // end if (code === keyMarker.charCodeAt(cursorKeyMarker)) { if (!hasKey) return nok(code); effects.exit(CamlToken.camlKeyTxt); effects.enter(CamlToken.camlKeyMarker); return consumeKeyMarker(code); } // invalid if (markdownLineEnding(code) || code === codes.eof) { return nok(code); } if (!RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) { return nok(code); } // continue if (!markdownLineEndingOrSpace(code)) { hasKey = true; } effects.consume(code); return consumeKeyTxt; } function consumeKeyMarker(code) { // end if (cursorKeyMarker === keyMarker.length) { effects.exit(CamlToken.camlKeyMarker); return forkKind(code); } // invalid if (code !== keyMarker.charCodeAt(cursorKeyMarker)) { return nok(code); } // continue effects.consume(code); cursorKeyMarker++; return consumeKeyMarker; } function forkKind(code) { // one whitespace is allowed for padding between // attrtype marker '::' and left marker '[[' // todo: // https://github.com/micromark/micromark-extension-gfm-footnote/blob/main/dev/lib/syntax.js#L405 // return factorySpace(effects, done, 'gfmFootnoteDefinitionWhitespace') // - padding? https://github.com/micromark/micromark-extension-math/blob/main/dev/lib/math-text.js#L169 // - factorySpace? https://github.com/micromark/micromark-extension-math/blob/main/dev/lib/math-flow.js#L162 if (markdownSpace(code)) { if (consumedKeyWhiteSpace) { return nok(code); } consumedKeyWhiteSpace = true; effects.enter(types.whitespace); effects.consume(code); effects.exit(types.whitespace); return forkKind; } // mkdn if (markdownLineEnding(code)) { isListMkdn = true; return listMkdnItemStart(code); } // single / first comma if (code !== null && RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) { isListComma = true; effects.enter(CamlToken.camlValTxt); return consumeValue(code); } return nok(code); } // ref: https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js#L125 function listMkdnItemStart(code) { // continue if (code === codes.eof) { return done(code); } if (markdownLineEnding(code)) { return effects.attempt({ partial: true, tokenize: consumeMkdnListLineEnding }, consumeListBullet, done)(code); } // invalid if (!markdownBullet(code)) { return nok(code); } // end effects.enter(CamlToken.camlListBullet); return consumeListBullet(code); } function consumeListBullet(code) { var bulletAndSpace = 2; // end if (cursorListBulletMarker === bulletAndSpace) { effects.exit(CamlToken.camlListBullet); consumedBulletWhiteSpace = false; cursorListBulletMarker = 0; effects.enter(CamlToken.camlValTxt); return consumeValue(code); } // invalid if (cursorListBulletMarker < listBulletMarker.length && !markdownLineEnding(code) && !markdownBullet(code) && !markdownSpace(code)) { // end if hasValue return nok(code); } // bullet: -*+ if (markdownBullet(code)) { effects.enter(CamlToken.camlListBullet); effects.consume(code); cursorListBulletMarker++; } // single space if (markdownSpace(code) && !consumedBulletWhiteSpace) { consumedBulletWhiteSpace = true; cursorListBulletMarker++; effects.consume(code); } return consumeListBullet; } function consumeListComma(code) { // continue if (markdownSpace(code)) { return factorySpace(effects, consumeListComma, types.whitespace)(code); } if (code === codes.comma) { inListComma = true; effects.enter(CamlToken.camlListComma); effects.consume(code); effects.exit(CamlToken.camlListComma); return consumeListComma; } // end if (hasValue && (markdownLineEnding(code) || code === codes.eof)) { return done(code); } if (code !== null && RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) { if (!inListComma) { return nok(code); } effects.enter(CamlToken.camlValTxt); return consumeValue(code); } return nok(code); } function consumeValue(code) { // end if (!inDoubleQuote && !inSingleQuote && code === codes.comma || markdownLineEnding(code)) { if (!hasValue) { return nok(code); } effects.exit(CamlToken.camlValTxt); // fork or end // list-comma-separated if (isListComma) { return consumeListComma(code); } // list-mkdn-separated if (isListMkdn) { return listMkdnItemStart(code); } } // invalid if (code === null || !RGX.VALID_CHARS.VAL.test(String.fromCharCode(code))) { return nok(code); } // continue if (!markdownLineEndingOrSpace(code)) { hasValue = true; // double quote if (codes.quotationMark === code) { inDoubleQuote = !inDoubleQuote; } // single quote if (codes.apostrophe === code) { inSingleQuote = !inSingleQuote; } } effects.consume(code); return consumeValue; } // fin(ish) function done(code) { if (!markdownLineEnding(code) && code !== codes.eof) { return nok(code); } if (!hasKey || !hasValue) { return nok(code); } effects.exit(CamlToken.camlAttr); return ok(code); } } // partial tokenizers function bulletLookahead(effects, ok, nok) { return start; // function start (code: Code): State | void { // self._gfmTableDynamicInterruptHack = true; // effects.check( // self.parser.constructs.flow, // function (code) { // self._gfmTableDynamicInterruptHack = false; // return nok(code); // }, // function (code) { // self._gfmTableDynamicInterruptHack = false; // return isBullet(code); // } // )(code); // }; function start(code) { if (markdownBullet(code)) { return ok(code); } return nok(code); } } function consumeMkdnListLineEnding(effects, ok, nok) { return start; function start(code) { if (!markdownLineEnding(code)) { return nok(code); } effects.enter(CamlToken.listLineEnding); effects.consume(code); effects.exit(CamlToken.listLineEnding); return prefix; } function prefix(code) { return factorySpace(effects, effects.check({ partial: true, tokenize: bulletLookahead }, // isBullet, ok, nok), types.linePrefix)(code); } // function isBullet(code: Code) { // if (markdownBullet(code)) { // return ok(code); // } // return nok(code); // } } }; function htmlCaml() { // note: enter/exit keys should match a token name return { enter: { attrBox: enterAttrBox }, exit: { attrKey: exitAttrKey, attrVal: exitAttrVal, attrBox: exitAttrBox } }; function enterAttrBox() { // attrs var stack = this.getData('attrStack'); if (!stack) this.setData('attrStack', stack = []); stack.push({}); // current key var curKey = this.getData('curKey'); if (!curKey) this.setData('curKey', ''); } function exitAttrKey(token) { var key = this.sliceSerialize(token); var stack = this.getData('attrStack'); var current = top(stack); if (!Object.keys(current).includes(key)) { current[key] = []; } this.setData('curKey', key); } function exitAttrVal(token) { var value = this.sliceSerialize(token); var stack = this.getData('attrStack'); var current = top(stack); var resolvedVal = resolve(value); var curKey = this.getData('curKey'); current[curKey].push(resolvedVal); } // note: leaving this here to "close the token" function exitAttrBox() { var attrs = this.getData('attrStack').pop(); ok(attrs !== undefined, 'in exitAttrBox(): problem with \'attrs\''); } // util function top(stack) { return stack[stack.length - 1]; } } export { htmlCaml, syntaxCaml }; //# sourceMappingURL=index.js.map