micromark-extension-caml
Version:
Micromark syntax extension for caml (semantic) attributes.
659 lines (634 loc) • 18.7 kB
JavaScript
// micromark-extension-caml v0.0.5-mm - https://github.com/wikibonsai/remark-caml.git
import { codes } from 'micromark-util-symbol/codes';
import { types } from 'micromark-util-symbol/types';
import { markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character';
import { factorySpace } from 'micromark-factory-space';
import { RGX, resolve } from 'caml-mkdn';
import { ok } from 'uvu/assert';
// modelled from: https://github.com/micromark/micromark/blob/main/packages/micromark-util-symbol/codes.js
var keyUsableCharCodes = {
space: 32,
quotationMark: 34,
// `"`
numberSign: 35,
// `#`
dollarSign: 36,
// `$`
percentSign: 37,
// `%`
ampersand: 38,
// `&`
apostrophe: 39,
// `'`
leftParenthesis: 40,
// `(`
rightParenthesis: 41,
// `)`
asterisk: 42,
// `*`
plusSign: 43,
// `+`
comma: 44,
// `,`
dash: 45,
// `-`
dot: 46,
// `.`
slash: 47,
// `/`
digit0: 48,
// `0`
digit1: 49,
// `1`
digit2: 50,
// `2`
digit3: 51,
// `3`
digit4: 52,
// `4`
digit5: 53,
// `5`
digit6: 54,
// `6`
digit7: 55,
// `7`
digit8: 56,
// `8`
digit9: 57,
// `9`
semicolon: 59,
// `;`
lessThan: 60,
// `<`
equalsTo: 61,
// `=`
greaterThan: 62,
// `>`
questionMark: 63,
// `?`
atSign: 64,
// `@`
uppercaseA: 65,
// `A`
uppercaseB: 66,
// `B`
uppercaseC: 67,
// `C`
uppercaseD: 68,
// `D`
uppercaseE: 69,
// `E`
uppercaseF: 70,
// `F`
uppercaseG: 71,
// `G`
uppercaseH: 72,
// `H`
uppercaseI: 73,
// `I`
uppercaseJ: 74,
// `J`
uppercaseK: 75,
// `K`
uppercaseL: 76,
// `L`
uppercaseM: 77,
// `M`
uppercaseN: 78,
// `N`
uppercaseO: 79,
// `O`
uppercaseP: 80,
// `P`
uppercaseQ: 81,
// `Q`
uppercaseR: 82,
// `R`
uppercaseS: 83,
// `S`
uppercaseT: 84,
// `T`
uppercaseU: 85,
// `U`
uppercaseV: 86,
// `V`
uppercaseW: 87,
// `W`
uppercaseX: 88,
// `X`
uppercaseY: 89,
// `Y`
uppercaseZ: 90,
// `Z`
backslash: 92,
// `\`
underscore: 95,
// `_`
graveAccent: 96,
// `` ` ``
lowercaseA: 97,
// `a`
lowercaseB: 98,
// `b`
lowercaseC: 99,
// `c`
lowercaseD: 100,
// `d`
lowercaseE: 101,
// `e`
lowercaseF: 102,
// `f`
lowercaseG: 103,
// `g`
lowercaseH: 104,
// `h`
lowercaseI: 105,
// `i`
lowercaseJ: 106,
// `j`
lowercaseK: 107,
// `k`
lowercaseL: 108,
// `l`
lowercaseM: 109,
// `m`
lowercaseN: 110,
// `n`
lowercaseO: 111,
// `o`
lowercaseP: 112,
// `p`
lowercaseQ: 113,
// `q`
lowercaseR: 114,
// `r`
lowercaseS: 115,
// `s`
lowercaseT: 116,
// `t`
lowercaseU: 117,
// `u`
lowercaseV: 118,
// `v`
lowercaseW: 119,
// `w`
lowercaseX: 120,
// `x`
lowercaseY: 121,
// `y`
lowercaseZ: 122,
// `z`
leftCurlyBrace: 123,
// `{`
rightCurlyBrace: 125,
// `}`
tilde: 126 // `~`
};
// modelled from: https://github.com/micromark/micromark/blob/main/packages/micromark-util-character/dev/index.js
function markdownBullet(code) {
return code === codes.asterisk || code === codes.dash || code === codes.plusSign;
}
// "const enums are inlined at compile time"
// https://www.typescriptlang.org/docs/handbook/enums.html#const-enums
var CamlToken;
(function (CamlToken) {
CamlToken["camlAttr"] = "camlAttr";
CamlToken["camlKeyPrfxMarker"] = "camlKeyPrfxMarker";
CamlToken["camlKeyTxt"] = "camlKeyTxt";
CamlToken["camlKeyMarker"] = "camlKeyMarker";
CamlToken["listLineEnding"] = "listLineEnding";
CamlToken["camlValTxt"] = "camlValTxt";
CamlToken["camlListBullet"] = "camlListBullet";
CamlToken["camlListComma"] = "camlListComma";
})(CamlToken || (CamlToken = {}));
function _createForOfIteratorHelper(o, allowArrayLike) { var it = typeof Symbol !== "undefined" && o[Symbol.iterator] || o["@@iterator"]; if (!it) { if (Array.isArray(o) || (it = _unsupportedIterableToArray(o)) || allowArrayLike && o && typeof o.length === "number") { if (it) o = it; var i = 0; var F = function F() {}; return { s: F, n: function n() { if (i >= o.length) return { done: true }; return { done: false, value: o[i++] }; }, e: function e(_e) { throw _e; }, f: F }; } throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); } var normalCompletion = true, didErr = false, err; return { s: function s() { it = it.call(o); }, n: function n() { var step = it.next(); normalCompletion = step.done; return step; }, e: function e(_e2) { didErr = true; err = _e2; }, f: function f() { try { if (!normalCompletion && it.return != null) it.return(); } finally { if (didErr) throw err; } } }; }
function _unsupportedIterableToArray(o, minLen) { if (!o) return; if (typeof o === "string") return _arrayLikeToArray(o, minLen); var n = Object.prototype.toString.call(o).slice(8, -1); if (n === "Object" && o.constructor) n = o.constructor.name; if (n === "Map" || n === "Set") return Array.from(o); if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); }
function _arrayLikeToArray(arr, len) { if (len == null || len > arr.length) len = arr.length; for (var i = 0, arr2 = new Array(len); i < len; i++) { arr2[i] = arr[i]; } return arr2; }
var keyPrefixMarker = ':';
var keyMarker = '::';
var listBulletMarker = '- '; // only listBulletMarker.length used -- have to use markdownBullet() function to test for -*+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
var syntaxCaml = function syntaxCaml(opts) {
var flow = {};
/* eslint-disable indent */
// hooks // w/ prefix
var hooks = [].concat([codes.colon])
// w/out prefix
.concat(Object.values(keyUsableCharCodes));
/* eslint-enable indent */
// todo: any way to make this...nicer?
var _iterator = _createForOfIteratorHelper(hooks),
_step;
try {
for (_iterator.s(); !(_step = _iterator.n()).done;) {
var code = _step.value;
if (code !== null) {
flow[code] = {
name: 'caml',
tokenize: tokenizeCaml,
concrete: true,
resolveAll: resolveAttrs
};
}
}
// assemble extension
} catch (err) {
_iterator.e(err);
} finally {
_iterator.f();
}
return {
flow: flow
};
// construct functions
function resolveAttrs(events, context) {
// current index
var index = -1;
while (++index < events.length) {
// convert wikiattr token types to caml-friendly token types
if (events[index][1].type.indexOf('caml') === 0) {
switch (events[index][1].type) {
// caml primitives
case CamlToken.camlAttr:
if (events[index][0] === 'enter') {
events[index][1].type = 'attrBox';
}
if (events[index][0] === 'exit') {
events[index][1].type = 'attrBox';
}
break;
case CamlToken.camlKeyTxt:
events[index][1].type = 'attrKey';
break;
case CamlToken.camlValTxt:
events[index][1].type = 'attrVal';
break;
}
}
}
return events;
}
function tokenizeCaml(effects, ok, nok) {
// skip '@typescript-eslint/no-this-alias'
// eslint-disable-next-line
var self = this;
// in-
var inListComma;
// is-
// 'is-' signifies the kind of list the current construct might be.
// if values continue on the same line, it can only be a comma-separated list
// if values continue on the next line, it can only be a mkdn-separated list
// then, if no valid value is found, it is either a single variant (from comma) or
// an invalid value
var isListComma;
var isListMkdn;
// has-
var hasKey;
var hasValue;
// consumed-
var consumedKeyWhiteSpace;
var consumedBulletWhiteSpace;
// cursors
var cursorKeyPrefixMarker = 0;
var cursorKeyMarker = 0;
// cursors below need to be reset for each value (for list attrs)
var cursorListBulletMarker = 0;
// escape comma-separation when in double or single quotes
var inDoubleQuote = false;
var inSingleQuote = false;
return start;
// each function is given an 'end', 'invalid', and 'continue' comment
// to help guide the eye more quickly to the desired line...:
//
// 'end' : is the condition by which to kick out of this function.
// 'invalid' : are checks on whether or not to kick out and invalidate the current token ('nok')
// 'continue': continue on to the next function ('ok')
function start(code) {
// 'attr' must start at the start of a line
// from: https://github.com/micromark/micromark-extension-frontmatter/blob/main/dev/lib/syntax.js#L75
var position = self.now();
var lineFirstChar = position.column === 1;
if (!lineFirstChar) {
return nok(code);
}
effects.enter(CamlToken.camlAttr);
// w/ prefix
if (code === codes.colon) {
effects.enter(CamlToken.camlKeyPrfxMarker);
return consumeKeyPrefixMarker(code);
}
// w/out prefix
if (code !== null && RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) {
effects.enter(CamlToken.camlKeyTxt);
return consumeKeyTxt(code);
}
// invalid
return nok(code);
}
function consumeKeyPrefixMarker(code) {
// end
if (cursorKeyPrefixMarker === keyPrefixMarker.length) {
effects.exit(CamlToken.camlKeyPrfxMarker);
effects.enter(CamlToken.camlKeyTxt);
return consumeKeyTxt(code);
}
// invalid
if (code !== keyPrefixMarker.charCodeAt(cursorKeyPrefixMarker)) {
return nok(code);
}
// continue...
effects.consume(code);
cursorKeyPrefixMarker++;
return consumeKeyPrefixMarker;
}
function consumeKeyTxt(code) {
// end
if (code === keyMarker.charCodeAt(cursorKeyMarker)) {
if (!hasKey) return nok(code);
effects.exit(CamlToken.camlKeyTxt);
effects.enter(CamlToken.camlKeyMarker);
return consumeKeyMarker(code);
}
// invalid
if (markdownLineEnding(code) || code === codes.eof) {
return nok(code);
}
if (!RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) {
return nok(code);
}
// continue
if (!markdownLineEndingOrSpace(code)) {
hasKey = true;
}
effects.consume(code);
return consumeKeyTxt;
}
function consumeKeyMarker(code) {
// end
if (cursorKeyMarker === keyMarker.length) {
effects.exit(CamlToken.camlKeyMarker);
return forkKind(code);
}
// invalid
if (code !== keyMarker.charCodeAt(cursorKeyMarker)) {
return nok(code);
}
// continue
effects.consume(code);
cursorKeyMarker++;
return consumeKeyMarker;
}
function forkKind(code) {
// one whitespace is allowed for padding between
// attrtype marker '::' and left marker '[['
// todo:
// https://github.com/micromark/micromark-extension-gfm-footnote/blob/main/dev/lib/syntax.js#L405
// return factorySpace(effects, done, 'gfmFootnoteDefinitionWhitespace')
// - padding? https://github.com/micromark/micromark-extension-math/blob/main/dev/lib/math-text.js#L169
// - factorySpace? https://github.com/micromark/micromark-extension-math/blob/main/dev/lib/math-flow.js#L162
if (markdownSpace(code)) {
if (consumedKeyWhiteSpace) {
return nok(code);
}
consumedKeyWhiteSpace = true;
effects.enter(types.whitespace);
effects.consume(code);
effects.exit(types.whitespace);
return forkKind;
}
// mkdn
if (markdownLineEnding(code)) {
isListMkdn = true;
return listMkdnItemStart(code);
}
// single / first comma
if (code !== null && RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) {
isListComma = true;
effects.enter(CamlToken.camlValTxt);
return consumeValue(code);
}
return nok(code);
}
// ref: https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-fenced.js#L125
function listMkdnItemStart(code) {
// continue
if (code === codes.eof) {
return done(code);
}
if (markdownLineEnding(code)) {
return effects.attempt({
partial: true,
tokenize: consumeMkdnListLineEnding
}, consumeListBullet, done)(code);
}
// invalid
if (!markdownBullet(code)) {
return nok(code);
}
// end
effects.enter(CamlToken.camlListBullet);
return consumeListBullet(code);
}
function consumeListBullet(code) {
var bulletAndSpace = 2;
// end
if (cursorListBulletMarker === bulletAndSpace) {
effects.exit(CamlToken.camlListBullet);
consumedBulletWhiteSpace = false;
cursorListBulletMarker = 0;
effects.enter(CamlToken.camlValTxt);
return consumeValue(code);
}
// invalid
if (cursorListBulletMarker < listBulletMarker.length && !markdownLineEnding(code) && !markdownBullet(code) && !markdownSpace(code)) {
// end if hasValue
return nok(code);
}
// bullet: -*+
if (markdownBullet(code)) {
effects.enter(CamlToken.camlListBullet);
effects.consume(code);
cursorListBulletMarker++;
}
// single space
if (markdownSpace(code) && !consumedBulletWhiteSpace) {
consumedBulletWhiteSpace = true;
cursorListBulletMarker++;
effects.consume(code);
}
return consumeListBullet;
}
function consumeListComma(code) {
// continue
if (markdownSpace(code)) {
return factorySpace(effects, consumeListComma, types.whitespace)(code);
}
if (code === codes.comma) {
inListComma = true;
effects.enter(CamlToken.camlListComma);
effects.consume(code);
effects.exit(CamlToken.camlListComma);
return consumeListComma;
}
// end
if (hasValue && (markdownLineEnding(code) || code === codes.eof)) {
return done(code);
}
if (code !== null && RGX.VALID_CHARS.KEY.test(String.fromCharCode(code))) {
if (!inListComma) {
return nok(code);
}
effects.enter(CamlToken.camlValTxt);
return consumeValue(code);
}
return nok(code);
}
function consumeValue(code) {
// end
if (!inDoubleQuote && !inSingleQuote && code === codes.comma || markdownLineEnding(code)) {
if (!hasValue) {
return nok(code);
}
effects.exit(CamlToken.camlValTxt);
// fork or end
// list-comma-separated
if (isListComma) {
return consumeListComma(code);
}
// list-mkdn-separated
if (isListMkdn) {
return listMkdnItemStart(code);
}
}
// invalid
if (code === null || !RGX.VALID_CHARS.VAL.test(String.fromCharCode(code))) {
return nok(code);
}
// continue
if (!markdownLineEndingOrSpace(code)) {
hasValue = true;
// double quote
if (codes.quotationMark === code) {
inDoubleQuote = !inDoubleQuote;
}
// single quote
if (codes.apostrophe === code) {
inSingleQuote = !inSingleQuote;
}
}
effects.consume(code);
return consumeValue;
}
// fin(ish)
function done(code) {
if (!markdownLineEnding(code) && code !== codes.eof) {
return nok(code);
}
if (!hasKey || !hasValue) {
return nok(code);
}
effects.exit(CamlToken.camlAttr);
return ok(code);
}
}
// partial tokenizers
function bulletLookahead(effects, ok, nok) {
return start;
// function start (code: Code): State | void {
// self._gfmTableDynamicInterruptHack = true;
// effects.check(
// self.parser.constructs.flow,
// function (code) {
// self._gfmTableDynamicInterruptHack = false;
// return nok(code);
// },
// function (code) {
// self._gfmTableDynamicInterruptHack = false;
// return isBullet(code);
// }
// )(code);
// };
function start(code) {
if (markdownBullet(code)) {
return ok(code);
}
return nok(code);
}
}
function consumeMkdnListLineEnding(effects, ok, nok) {
return start;
function start(code) {
if (!markdownLineEnding(code)) {
return nok(code);
}
effects.enter(CamlToken.listLineEnding);
effects.consume(code);
effects.exit(CamlToken.listLineEnding);
return prefix;
}
function prefix(code) {
return factorySpace(effects, effects.check({
partial: true,
tokenize: bulletLookahead
},
// isBullet,
ok, nok), types.linePrefix)(code);
}
// function isBullet(code: Code) {
// if (markdownBullet(code)) {
// return ok(code);
// }
// return nok(code);
// }
}
};
function htmlCaml() {
// note: enter/exit keys should match a token name
return {
enter: {
attrBox: enterAttrBox
},
exit: {
attrKey: exitAttrKey,
attrVal: exitAttrVal,
attrBox: exitAttrBox
}
};
function enterAttrBox() {
// attrs
var stack = this.getData('attrStack');
if (!stack) this.setData('attrStack', stack = []);
stack.push({});
// current key
var curKey = this.getData('curKey');
if (!curKey) this.setData('curKey', '');
}
function exitAttrKey(token) {
var key = this.sliceSerialize(token);
var stack = this.getData('attrStack');
var current = top(stack);
if (!Object.keys(current).includes(key)) {
current[key] = [];
}
this.setData('curKey', key);
}
function exitAttrVal(token) {
var value = this.sliceSerialize(token);
var stack = this.getData('attrStack');
var current = top(stack);
var resolvedVal = resolve(value);
var curKey = this.getData('curKey');
current[curKey].push(resolvedVal);
}
// note: leaving this here to "close the token"
function exitAttrBox() {
var attrs = this.getData('attrStack').pop();
ok(attrs !== undefined, 'in exitAttrBox(): problem with \'attrs\'');
}
// util
function top(stack) {
return stack[stack.length - 1];
}
}
export { htmlCaml, syntaxCaml };
//# sourceMappingURL=index.js.map