@mdfriday/foundry
Version:
The core engine of MDFriday. Convert Markdown and shortcodes into fully themed static sites – Hugo-style, powered by TypeScript.
1,290 lines • 46.3 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.pageLexer = exports.Iterator = void 0;
exports.NewIterator = NewIterator;
exports.ParseBytes = ParseBytes;
exports.ParseBytesMain = ParseBytesMain;
exports.HasShortcode = HasShortcode;
exports.newPageLexer = newPageLexer;
exports.lexIntroSection = lexIntroSection;
exports.lexMainSection = lexMainSection;
const item_1 = require("./item");
const eof = -1;
var ParamState;
(function (ParamState) {
ParamState[ParamState["none"] = 0] = "none";
ParamState[ParamState["named"] = 1] = "named";
ParamState[ParamState["positional"] = 2] = "positional";
})(ParamState || (ParamState = {}));
// Page syntax
const byteOrderMark = 0xFEFF; // Unicode BOM character (U+FEFF)
const byteOrderMarkUTF8 = new Uint8Array([0xEF, 0xBB, 0xBF]); // UTF-8 BOM bytes
const summaryDivider = new TextEncoder().encode("<!--more-->");
const summaryDividerOrg = new TextEncoder().encode("# more");
const delimTOML = new TextEncoder().encode("+++");
const delimYAML = new TextEncoder().encode("---");
const delimOrg = new TextEncoder().encode("#+");
const leftDelimSc = new TextEncoder().encode("{{");
const leftDelimScNoMarkup = new TextEncoder().encode("{{<");
const rightDelimScNoMarkup = new TextEncoder().encode(">}}");
const leftDelimScWithMarkup = new TextEncoder().encode("{{%");
const rightDelimScWithMarkup = new TextEncoder().encode("%}}");
const leftComment = new TextEncoder().encode("/*"); // comments in this context us used to to mark shortcodes as "not really a shortcode"
const rightComment = new TextEncoder().encode("*/");
// Inline shortcodes has the form {{< myshortcode.inline >}}
const inlineIdentifier = new TextEncoder().encode("inline ");
class sectionHandlers {
constructor(l) {
this.l = l;
this.skipAll = false;
this.handlers = [];
this.skipIndexes = [];
}
skip() {
if (this.skipAll) {
return -1;
}
this.skipIndexes = [];
let shouldSkip = false;
for (const handler of this.handlers) {
if (handler.skipAll) {
continue;
}
const idx = handler.skip();
if (idx !== -1) {
shouldSkip = true;
this.skipIndexes.push(idx);
}
}
if (!shouldSkip) {
this.skipAll = true;
return -1;
}
return minIndex(...this.skipIndexes);
}
lex(origin) {
if (this.skipAll) {
return null;
}
if (this.l.pos > this.l.start) {
this.l.emit(item_1.ItemType.tText);
}
for (const handler of this.handlers) {
if (handler.skipAll) {
continue;
}
const [next, handled] = handler.lexFunc(origin, handler.l);
if (next === null || handled) {
return next;
}
}
this.l.pos++;
return origin;
}
}
class sectionHandler {
constructor(l, skipFunc, lexFunc) {
this.l = l;
this.skipAll = false;
this.skipFunc = skipFunc;
this.lexFunc = lexFunc;
}
skip() {
if (this.skipAll) {
return -1;
}
const idx = this.skipFunc(this.l);
if (idx === -1) {
this.skipAll = true;
}
return idx;
}
}
function createSectionHandlers(l) {
const handlers = new sectionHandlers(l);
const shortCodeHandler = new sectionHandler(l, (l) => l.index(leftDelimSc), (origin, l) => {
if (!l.isShortCodeStart()) {
return [origin, false];
}
if (l.lexerShortcodeState.isInline) {
// If we're inside an inline shortcode, the only valid shortcode markup is
// the markup which closes it.
const b = l.input.slice(l.pos + 3);
const end = indexNonWhiteSpace(b, 0x2F); // '/'
if (end !== l.input.length - 1) {
const bTrimmed = new TextDecoder().decode(b.slice(end + 1)).trim();
if (end === -1 || !bTrimmed.startsWith(l.lexerShortcodeState.currShortcodeName + " ")) {
return [l.errorf("inline shortcodes do not support nesting"), true];
}
}
}
if (l.hasPrefix(leftDelimScWithMarkup)) {
l.lexerShortcodeState.currLeftDelimItem = item_1.ItemType.tLeftDelimScWithMarkup;
l.lexerShortcodeState.currRightDelimItem = item_1.ItemType.tRightDelimScWithMarkup;
}
else {
l.lexerShortcodeState.currLeftDelimItem = item_1.ItemType.tLeftDelimScNoMarkup;
l.lexerShortcodeState.currRightDelimItem = item_1.ItemType.tRightDelimScNoMarkup;
}
return [l.lexShortcodeLeftDelim, true];
});
const summaryDividerHandler = new sectionHandler(l, (l) => {
if (l.summaryDividerChecked || !l.summaryDivider) {
return -1;
}
return l.index(l.summaryDivider);
}, (origin, l) => {
if (!l.summaryDivider || !l.hasPrefix(l.summaryDivider)) {
return [origin, false];
}
l.summaryDividerChecked = true;
l.pos += l.summaryDivider.length;
// This makes it a little easier to reason about later.
l.consumeSpace();
l.emit(item_1.ItemType.TypeLeadSummaryDivider);
return [origin, true];
});
handlers.handlers = [shortCodeHandler, summaryDividerHandler];
handlers.skipIndexes = new Array(handlers.handlers.length);
return handlers;
}
function isSpace(r) {
return r === 0x20 || r === 0x09; // ' ' or '\t'
}
function isAlphaNumericOrHyphen(r) {
return isAlphaNumeric(r) || r === 0x2D; // '-'
}
function isEndOfLine(r) {
return r === 0x0D || r === 0x0A; // '\r' or '\n'
}
function isAlphaNumeric(r) {
return r === 0x5F || // '_'
(r >= 0x41 && r <= 0x5A) || // A-Z
(r >= 0x61 && r <= 0x7A) || // a-z
(r >= 0x30 && r <= 0x39); // 0-9
}
function minIndex(...indices) {
let min = -1;
for (const j of indices) {
if (j < 0) {
continue;
}
if (min === -1) {
min = j;
}
else if (j < min) {
min = j;
}
}
return min;
}
function indexNonWhiteSpace(s, char) {
for (let i = 0; i < s.length; i++) {
if (!isSpace(s[i])) {
if (s[i] === char) {
return i;
}
break;
}
}
return -1;
}
class Iterator {
constructor(items) {
this.items = items;
this.lastPos = -1;
}
// consumes and returns the next item
Next() {
this.lastPos++;
return this.Current();
}
// Current will repeatably return the current item
Current() {
if (this.lastPos >= this.items.length) {
const errorItem = new item_1.Item();
errorItem.Type = item_1.ItemType.tError;
errorItem.Err = new Error("no more tokens");
return errorItem;
}
return this.items[this.lastPos];
}
// backs up one token
Backup() {
if (this.lastPos < 0) {
throw new Error("need to go forward before going back");
}
this.lastPos--;
}
// Pos returns the current position in the input
Pos() {
return this.lastPos;
}
// check for non-error and non-EOF types coming next
IsValueNext() {
const i = this.Peek();
return i.Type !== item_1.ItemType.tError && i.Type !== item_1.ItemType.tEOF;
}
// look at, but do not consume, the next item
// repeated, sequential calls will return the same item
Peek() {
return this.items[this.lastPos + 1];
}
// PeekWalk will feed the next items in the iterator to walkFn
// until it returns false
PeekWalk(walkFn) {
for (let i = this.lastPos + 1; i < this.items.length; i++) {
const item = this.items[i];
if (!walkFn(item)) {
break;
}
}
}
// Consume is a convenience method to consume the next n tokens,
// but back off Errors and EOF
Consume(cnt) {
for (let i = 0; i < cnt; i++) {
const token = this.Next();
if (token.Type === item_1.ItemType.tError || token.Type === item_1.ItemType.tEOF) {
this.Backup();
break;
}
}
}
// LineNumber returns the current line number. Used for logging.
LineNumber(source) {
const lf = new Uint8Array([0x0A]); // '\n'
const slice = source.slice(0, this.Current().low);
let count = 0;
for (let i = 0; i < slice.length; i++) {
if (slice[i] === lf[0]) {
count++;
}
}
return count + 1;
}
}
exports.Iterator = Iterator;
// NewIterator creates a new Iterator
function NewIterator(items) {
return new Iterator(items);
}
class pageLexer {
constructor(input, stateStart, cfg) {
this.input = input;
this.stateStart = stateStart;
this.state = null;
this.pos = 0;
this.start = 0;
this.width = 0;
this.cfg = cfg;
this.summaryDivider = null;
this.summaryDividerChecked = false;
this.err = null;
this.items = [];
this.inFrontMatter = false;
this.parenDepth = 0;
this.lexerShortcodeState = {
currLeftDelimItem: item_1.ItemType.tLeftDelimScNoMarkup,
currRightDelimItem: item_1.ItemType.tRightDelimScNoMarkup,
isInline: false,
currShortcodeName: '',
closingState: 0,
elementStepNum: 0,
paramElements: 0,
paramState: ParamState.none,
openShortcodes: {}
};
// Bind methods that need 'this' context
this.lexSummaryDivider = this.lexSummaryDivider.bind(this);
this.lexMainSection = this.lexMainSection.bind(this);
this.lexIdentifierInShortcode = this.lexIdentifierInShortcode.bind(this);
this.lexEndOfShortcode = this.lexEndOfShortcode.bind(this);
this.lexShortcodeLeftDelim = this.lexShortcodeLeftDelim.bind(this);
this.lexShortcodeRightDelim = this.lexShortcodeRightDelim.bind(this);
this.lexShortcodeParam = this.lexShortcodeParam.bind(this);
this.lexShortcodeValue = this.lexShortcodeValue.bind(this);
this.lexShortcodeValueQuoted = this.lexShortcodeValueQuoted.bind(this);
this.lexShortcodeValueUnquoted = this.lexShortcodeValueUnquoted.bind(this);
this.lexInsideShortcode = this.lexInsideShortcode.bind(this);
this.lexDone = this.lexDone.bind(this);
this.sectionHandlers = createSectionHandlers(this);
}
// Implement the Result interface
Iterator() {
return NewIterator(this.items);
}
Input() {
return this.input;
}
// main loop
run() {
for (this.state = this.stateStart; this.state !== null;) {
this.state = this.state(this);
}
return this;
}
// next returns the next rune in the input.
next() {
if (this.pos >= this.input.length) {
this.width = 0;
return eof;
}
const r = this.input[this.pos];
this.width = 1;
this.pos += this.width;
return r;
}
// peek, but no consume
peek() {
const r = this.next();
this.backup();
return r;
}
// steps back one
backup() {
this.pos -= this.width;
}
append(item) {
if (item.Pos() < this.input.length) {
if (item.Type === item_1.ItemType.TypeIgnore && this.input[item.Pos()] === 0xEF) {
item.firstByte = 0xEF; // BOM mark's first byte
}
else {
item.firstByte = this.input[item.Pos()];
}
}
this.items.push(item);
}
// sends an item back to the client.
emit(t) {
// 使用 defer 模式来确保在函数结束时设置 start
const defer = () => {
this.start = this.pos;
};
if (t === item_1.ItemType.tText) {
// Identify any trailing whitespace/intendation.
// We currently only care about the last one.
for (let i = this.pos - 1; i >= this.start; i--) {
const b = this.input[i];
if (b !== 0x20 && b !== 0x09 && b !== 0x0D && b !== 0x0A) { // ' ', '\t', '\r', '\n'
break;
}
if (i === this.start && b !== 0x0A) { // '\n'
const item = new item_1.Item();
item.Type = item_1.ItemType.tIndentation;
item.low = this.start;
item.high = this.pos;
this.append(item);
defer();
return;
}
else if (b === 0x0A && i < this.pos - 1) {
const textItem = new item_1.Item();
textItem.Type = t;
textItem.low = this.start;
textItem.high = i + 1;
this.append(textItem);
const indentItem = new item_1.Item();
indentItem.Type = item_1.ItemType.tIndentation;
indentItem.low = i + 1;
indentItem.high = this.pos;
this.append(indentItem);
defer();
return;
}
else if (b === 0x0A && i === this.pos - 1) {
break;
}
}
}
// 创建并添加 item
const item = new item_1.Item();
item.Type = t;
item.low = this.start;
item.high = this.pos;
// Set firstByte if we have content
if (item.low < this.input.length) {
if (t === item_1.ItemType.TypeIgnore && this.input[item.low] === 0xEF) {
item.firstByte = 0xEF; // BOM mark's first byte
}
else {
item.firstByte = this.input[item.low];
}
}
this.append(item);
defer();
}
// sends a string item back to the client.
emitString(t) {
const item = new item_1.Item();
item.Type = t;
item['low'] = this.start;
item['high'] = this.pos;
item['isString'] = true;
if (this.pos > this.start) {
item.firstByte = this.input[this.start];
}
this.items.push(item);
this.start = this.pos;
}
isEOF() {
return this.pos >= this.input.length;
}
// special case, do not send '\\' back to client
ignoreEscapesAndEmit(t, isString) {
let i = this.start;
let k = i;
const segments = [];
while (i < this.pos) {
const r = this.input[i];
const w = 1; // In TypeScript we handle one byte at a time
if (r === 0x5C) { // '\\'
if (i > k) {
segments.push({ Low: k, High: i });
}
k = i + w;
}
i += w;
}
if (k < this.pos) {
segments.push({ Low: k, High: this.pos });
}
if (segments.length > 0) {
const item = new item_1.Item();
item.Type = t;
item['segments'] = segments;
if (segments[0].High > segments[0].Low) {
item.firstByte = this.input[segments[0].Low];
}
this.items.push(item);
this.start = this.pos;
}
this.start = this.pos;
}
// gets the current value (for debugging and error handling)
current() {
return this.input.slice(this.start, this.pos);
}
// ignore current element
ignore() {
this.start = this.pos;
}
// nil terminates the parser
errorf(format, ...args) {
const err = new Error(format.replace(/%[a-z]/g, () => String(args.shift())));
const item = new item_1.Item();
item.Type = item_1.ItemType.tError;
item.Err = err;
item.low = this.start;
item.high = this.pos;
this.append(item);
return null;
}
consumeCRLF() {
let consumed = false;
const r = this.peek();
if (r === 0x0D) { // '\r'
this.next();
if (this.peek() === 0x0A) { // '\n'
this.next();
consumed = true;
}
}
else if (r === 0x0A) { // '\n'
this.next();
consumed = true;
}
return consumed;
}
consumeToSpace() {
while (true) {
const r = this.next();
if (r === eof || isSpace(r)) {
this.backup();
return;
}
}
}
consumeSpace() {
for (;;) {
const r = this.next();
if (r === eof || !this.isUnicodeSpace(r)) {
this.backup();
return;
}
}
}
// Helper function to match Golang's unicode.IsSpace behavior
isUnicodeSpace(r) {
// This matches Golang's unicode.IsSpace implementation
return r === 0x20 || // Space
r === 0x09 || // Tab
r === 0x0A || // Line Feed
r === 0x0C || // Form Feed
r === 0x0D || // Carriage Return
r === 0x85 || // Next Line
r === 0xA0 || // No-Break Space
r === 0x2000 || // En Quad
r === 0x2001 || // Em Quad
r === 0x2002 || // En Space
r === 0x2003 || // Em Space
r === 0x2004 || // Three-Per-Em Space
r === 0x2005 || // Four-Per-Em Space
r === 0x2006 || // Six-Per-Em Space
r === 0x2007 || // Figure Space
r === 0x2008 || // Punctuation Space
r === 0x2009 || // Thin Space
r === 0x200A || // Hair Space
r === 0x2028 || // Line Separator
r === 0x2029 || // Paragraph Separator
r === 0x202F || // Narrow No-Break Space
r === 0x205F || // Medium Mathematical Space
r === 0x3000; // Ideographic Space
}
index(sep) {
const input = this.input.slice(this.pos);
const sepLen = sep.length;
const inputLen = input.length;
// If the separator is longer than the remaining input, it can't be found
if (sepLen > inputLen) {
return -1;
}
// Search for the separator
outer: for (let i = 0; i <= inputLen - sepLen; i++) {
for (let j = 0; j < sepLen; j++) {
if (input[i + j] !== sep[j]) {
continue outer;
}
}
return i;
}
return -1;
}
hasPrefix(prefix) {
if (this.pos + prefix.length > this.input.length) {
return false;
}
for (let i = 0; i < prefix.length; i++) {
if (this.input[this.pos + i] !== prefix[i]) {
return false;
}
}
return true;
}
isShortCodeStart() {
return this.hasPrefix(leftDelimScWithMarkup) || this.hasPrefix(leftDelimScNoMarkup);
}
// Handle YAML or TOML front matter.
lexFrontMatterSection(tp, delimr, name, delim) {
for (let i = 0; i < 2; i++) {
if (this.next() !== delimr) {
return this.errorf(`invalid ${name} delimiter`);
}
}
// Let front matter start at line 1
let wasEndOfLine = this.consumeCRLF();
// We don't care about the delimiters.
this.ignore();
let r;
for (;;) {
if (!wasEndOfLine) {
r = this.next();
if (r === eof) {
return this.errorf(`EOF looking for end ${name} front matter delimiter`);
}
}
if (wasEndOfLine || isEndOfLine(r)) {
if (this.hasPrefix(delim)) {
this.emit(tp);
this.pos += 3;
this.consumeCRLF();
this.ignore();
break;
}
}
wasEndOfLine = false;
}
return () => this.lexMainSection();
}
currentLeftShortcodeDelimItem() {
return this.lexerShortcodeState.currLeftDelimItem;
}
currentRightShortcodeDelimItem() {
return this.lexerShortcodeState.currRightDelimItem;
}
currentLeftShortcodeDelim() {
if (this.lexerShortcodeState.currLeftDelimItem === item_1.ItemType.tLeftDelimScWithMarkup) {
return leftDelimScWithMarkup;
}
return leftDelimScNoMarkup;
}
currentRightShortcodeDelim() {
if (this.lexerShortcodeState.currRightDelimItem === item_1.ItemType.tRightDelimScWithMarkup) {
return rightDelimScWithMarkup;
}
return rightDelimScNoMarkup;
}
// lexIdentifier scans an alphanumeric identifier.
lexIdentifier() {
for (;;) {
const r = this.next();
if (isAlphaNumericOrHyphen(r)) {
continue;
}
this.backup();
const word = new TextDecoder().decode(this.input.slice(this.start, this.pos));
if (word.length === 0) {
return null;
}
this.emit(item_1.ItemType.tScParam);
return null;
}
}
// lexMainSection is the default state.
lexMainSection() {
if (this.isEOF()) {
return () => this.lexDone();
}
// Fast forward as far as possible.
const skip = this.sectionHandlers.skip();
if (skip === -1) {
this.pos = this.input.length;
return () => this.lexDone();
}
else if (skip > 0) {
this.pos += skip;
}
const next = this.sectionHandlers.lex(this.lexMainSection);
if (next !== null) {
return next;
}
this.pos = this.input.length;
return this.lexDone;
}
lexInlineShortcodeContent() {
for (;;) {
if (this.pos >= this.input.length) {
return this.errorf("unclosed inline shortcode");
}
if (this.hasPrefix(leftDelimScWithMarkup) || this.hasPrefix(leftDelimScNoMarkup)) {
return this.errorf("inline shortcodes do not support nesting");
}
if (this.hasPrefix(this.currentRightShortcodeDelim())) {
this.backup();
if (this.pos > this.start) {
this.emit(item_1.ItemType.tText);
}
return () => this.lexShortcodeRightDelim();
}
this.next();
}
}
// lexIdentifierInShortcode scans an alphanumeric inside shortcode
lexIdentifierInShortcode() {
let lookForEnd = false;
// 使用 for(;;) 来模拟 Go 的 Loop 标签
for (;;) {
const r = this.next();
switch (true) {
case isAlphaNumericOrHyphen(r):
// 与 Go 版本一样,这里什么都不做,继续循环
break;
case r === 0x2F: // '/'
// 与 Go 版本一样,允许命名空间中的斜杠
break;
case r === 0x2E: // '.'
this.lexerShortcodeState.isInline = this.hasPrefix(inlineIdentifier);
if (!this.lexerShortcodeState.isInline) {
return this.errorf("period in shortcode name only allowed for inline identifiers");
}
break;
default:
this.backup();
const word = new TextDecoder().decode(this.input.slice(this.start, this.pos));
// 完全按照 Go 版本的逻辑顺序处理
if (this.lexerShortcodeState.closingState > 0 && !this.lexerShortcodeState.openShortcodes[word]) {
return this.errorf(`closing tag for shortcode '${word}' does not match start tag`);
}
else if (this.lexerShortcodeState.closingState > 0) {
this.lexerShortcodeState.openShortcodes[word] = false;
lookForEnd = true;
}
// 重置和设置状态
this.lexerShortcodeState.closingState = 0;
this.lexerShortcodeState.currShortcodeName = word;
this.lexerShortcodeState.openShortcodes[word] = true;
this.lexerShortcodeState.elementStepNum++;
// 发出对应的 token
if (this.lexerShortcodeState.isInline) {
this.emit(item_1.ItemType.tScNameInline);
}
else {
this.emit(item_1.ItemType.tScName);
}
// 跳出循环
if (lookForEnd) {
return () => this.lexEndOfShortcode();
}
return this.lexInsideShortcode;
}
}
}
// lexEndOfShortcode scans until it finds the end of shortcode
lexEndOfShortcode() {
this.lexerShortcodeState.isInline = false;
if (this.hasPrefix(this.currentRightShortcodeDelim())) {
return () => this.lexShortcodeRightDelim();
}
const r = this.next();
switch (true) {
case isSpace(r):
this.ignore();
break;
default:
return this.errorf("unclosed shortcode");
}
return () => this.lexEndOfShortcode();
}
// lexShortcodeLeftDelim scans the left delimiter.
lexShortcodeLeftDelim() {
this.pos += this.currentLeftShortcodeDelim().length;
if (this.isComment()) {
return () => this.lexComment();
}
this.emit(this.lexerShortcodeState.currLeftDelimItem);
this.lexerShortcodeState.elementStepNum = 0;
this.lexerShortcodeState.paramElements = 0;
this.lexerShortcodeState.paramState = ParamState.none;
this.start = this.pos;
return this.lexInsideShortcode;
}
// peekString returns the next n bytes as a string without advancing the pos.
peekString(s) {
const n = s instanceof Uint8Array ? s.length : s.length;
if (this.pos + n > this.input.length) {
return null;
}
const peek = this.input.slice(this.pos, this.pos + n);
if (s instanceof Uint8Array) {
// Compare byte arrays
for (let i = 0; i < n; i++) {
if (peek[i] !== s[i]) {
return null;
}
}
return new TextDecoder().decode(peek);
}
else {
// Compare with string
const str = new TextDecoder().decode(peek);
if (str === s) {
return str;
}
return null;
}
}
// lexSummaryDivider scans the summary divider.
lexSummaryDivider() {
if (!this.hasPrefix(summaryDivider)) {
return null;
}
// If we have text before the divider on the same line, emit it first
if (this.start < this.pos) {
const item = new item_1.Item();
item.Type = item_1.ItemType.tText;
item.low = this.start;
item.high = this.pos;
if (item.low < this.input.length) {
item.firstByte = this.input[item.low];
}
this.append(item);
}
// Emit the divider
const divider = new item_1.Item();
divider.Type = item_1.ItemType.TypeLeadSummaryDivider;
divider.low = this.pos;
divider.high = this.pos + summaryDivider.length;
if (divider.low < this.input.length) {
divider.firstByte = this.input[divider.low];
}
this.append(divider);
this.pos += summaryDivider.length;
this.start = this.pos;
// Continue with the main section lexer
return () => this.lexMainSection();
}
// handleBOM checks for and handles a UTF-8 BOM sequence.
// Returns true if a BOM was found and handled, false otherwise.
handleBOM() {
// Check for UTF-8 BOM sequence (EF BB BF)
if (this.pos + 3 <= this.input.length) {
const possibleBOM = this.input.slice(this.pos, this.pos + 3);
if (possibleBOM[0] === 0xEF && possibleBOM[1] === 0xBB && possibleBOM[2] === 0xBF) {
// Emit the entire BOM sequence as a single token
const item = new item_1.Item();
item.Type = item_1.ItemType.TypeIgnore;
item.low = this.pos;
item.high = this.pos + 3;
item.firstByte = 0xEF;
this.append(item);
this.pos += 3;
this.start = this.pos;
return true;
}
}
return false;
}
// lexIntroSection scans until it finds a front matter or a non-space.
lexIntroSection() {
this.summaryDivider = summaryDivider;
// First check for BOM at the start of the file
if (this.pos === 0 && this.handleBOM()) {
// todo
}
LOOP: for (;;) {
const r = this.next();
if (r === eof) {
break;
}
switch (true) {
case r === 0x2B: // '+'
// Check for complete TOML delimiter '+++'
if (this.hasPrefix(delimTOML.slice(1))) {
return () => this.lexFrontMatterSection(item_1.ItemType.TypeFrontMatterTOML, r, "TOML", delimTOML);
}
else {
// Not a TOML delimiter, backup and continue as regular content
this.backup();
break LOOP;
}
case r === 0x2D: // '-'
// Check for complete YAML delimiter '---'
if (this.hasPrefix(delimYAML.slice(1))) {
return () => this.lexFrontMatterSection(item_1.ItemType.TypeFrontMatterYAML, r, "YAML", delimYAML);
}
else {
// Not a YAML delimiter, backup and continue as regular content
this.backup();
break LOOP;
}
case r === 0x7B: // '{'
return lexFrontMatterJSON;
case r === 0x23: // '#'
return lexFrontMatterOrgMode;
case r === byteOrderMark:
this.emit(item_1.ItemType.TypeIgnore);
break;
case !isSpace(r) && !isEndOfLine(r):
break LOOP;
}
}
// Now move on to the shortcodes.
return () => this.lexMainSection();
}
// lexInsideShortcode scans the content inside a shortcode
lexInsideShortcode() {
if (this.hasPrefix(this.currentRightShortcodeDelim())) {
return this.lexShortcodeRightDelim;
}
const r = this.next();
if (r === eof) {
// eol is allowed inside shortcodes; this may go to end of document before it fails
return this.errorf("unclosed shortcode action");
}
if (isSpace(r) || isEndOfLine(r)) {
this.ignore();
}
else if (r === 0x3D) { // '='
this.consumeSpace();
this.ignore();
const peek = this.peek();
if (peek === 0x22 || peek === 0x5C) { // '"' or '\\'
return this.lexShortcodeQuotedParamVal(peek !== 0x5C, item_1.ItemType.tScParamVal);
}
else if (peek === 0x60) { // '`'
return this.lexShortCodeParamRawStringVal(item_1.ItemType.tScParamVal);
}
return this.lexShortcodeParamVal;
}
else if (r === 0x2F) { // '/'
if (this.lexerShortcodeState.currShortcodeName === "") {
return this.errorf("got closing shortcode, but none is open");
}
this.lexerShortcodeState.closingState++;
this.lexerShortcodeState.isInline = false;
this.emit(item_1.ItemType.tScClose);
}
else if (r === 0x5C) { // '\\'
this.ignore();
if (this.peek() === 0x22 || this.peek() === 0x60) { // '"' or '`'
return this.lexShortcodeParam(true);
}
}
else if (this.lexerShortcodeState.elementStepNum > 0 && (isAlphaNumericOrHyphen(r) || r === 0x22 || r === 0x60)) { // '"' or '`'
// positional params can have quotes
this.backup();
return this.lexShortcodeParam(false);
}
else if (isAlphaNumeric(r)) {
this.backup();
return this.lexIdentifierInShortcode;
}
else {
return this.errorf(`unrecognized character in shortcode action: U+${r.toString(16).toUpperCase()} '${String.fromCharCode(r)}'. Note: Parameters with non-alphanumeric args must be quoted`);
}
return this.lexInsideShortcode;
}
// lexShortcodeParam scans a shortcode parameter.
lexShortcodeParam(escapedQuoteStart) {
let first = true;
let nextEq = false;
let r;
for (;;) {
r = this.next();
if (first) {
if (r === 0x22 || (r === 0x60 && !escapedQuoteStart)) { // '"' or '`'
// a positional param with quotes
if (this.lexerShortcodeState.paramElements === 2) {
return this.errorf("got quoted positional parameter. Cannot mix named and positional parameters");
}
this.lexerShortcodeState.paramElements = 1;
this.backup();
if (r === 0x22) { // '"'
return () => this.lexShortcodeQuotedParamVal(!escapedQuoteStart, item_1.ItemType.tScParam);
}
return () => this.lexShortCodeParamRawStringVal(item_1.ItemType.tScParam);
}
else if (r === 0x60 && escapedQuoteStart) { // '`'
return this.errorf("unrecognized escape character");
}
first = false;
}
else if (r === 0x3D) { // '='
// a named param
this.backup();
nextEq = true;
break;
}
if (!isAlphaNumericOrHyphen(r) && r !== 0x2E) { // '.' for floats
this.backup();
break;
}
}
if (this.lexerShortcodeState.paramElements === 0) {
this.lexerShortcodeState.paramElements++;
if (nextEq) {
this.lexerShortcodeState.paramElements++;
}
}
else {
if (nextEq && this.lexerShortcodeState.paramElements === 1) {
return this.errorf(`got named parameter '${new TextDecoder().decode(this.current())}'. Cannot mix named and positional parameters`);
}
else if (!nextEq && this.lexerShortcodeState.paramElements === 2) {
return this.errorf(`got positional parameter '${new TextDecoder().decode(this.current())}'. Cannot mix named and positional parameters`);
}
}
this.emit(item_1.ItemType.tScParam);
return this.lexInsideShortcode;
}
// lexShortcodeParamVal scans a shortcode parameter value.
lexShortcodeParamVal() {
const r = this.next();
if (isSpace(r)) {
this.ignore();
return () => this.lexShortcodeParamVal();
}
this.backup();
switch (r) {
case 0x22: // '"'
this.next();
this.ignore();
return () => this.lexShortcodeQuotedParamVal(false, item_1.ItemType.tScParamVal);
case 0x60: // '`'
this.next();
this.ignore();
return () => this.lexShortCodeParamRawStringVal(item_1.ItemType.tScParamVal);
default:
return () => this.lexShortcodeValueUnquoted();
}
}
// lexShortcodeValueUnquoted scans an unquoted shortcode parameter value.
lexShortcodeValueUnquoted() {
for (;;) {
const r = this.next();
if (r === eof || isSpace(r) || this.isRightShortcodeDelim()) {
this.backup();
if (this.pos > this.start) {
this.emit(item_1.ItemType.tScParamVal);
}
return this.lexInsideShortcode;
}
}
}
// lexShortcodeQuotedParamVal scans a quoted shortcode parameter value.
lexShortcodeQuotedParamVal(escapedQuotedValuesAllowed, typ) {
let openQuoteFound = false;
let escapedInnerQuoteFound = false;
let escapedQuoteState = 0;
LOOP: for (;;) {
const r = this.next();
switch (true) {
case r === 0x5C: // '\'
if (this.peek() === 0x22) { // '"'
if (openQuoteFound && !escapedQuotedValuesAllowed) {
this.backup();
break LOOP;
}
else if (openQuoteFound) {
// the coming quote is inside
escapedInnerQuoteFound = true;
escapedQuoteState = 1;
}
}
else if (this.peek() === 0x60) { // '`'
return this.errorf("unrecognized escape character");
}
break;
case r === eof:
case r === 0x0A: // '\n'
return this.errorf(`unterminated quoted string in shortcode parameter-argument: '${new TextDecoder().decode(this.current())}'`);
case r === 0x22: // '"'
if (escapedQuoteState === 0) {
if (openQuoteFound) {
this.backup();
break LOOP;
}
else {
openQuoteFound = true;
this.ignore();
}
}
else {
escapedQuoteState = 0;
}
break;
}
}
if (escapedInnerQuoteFound) {
this.ignoreEscapesAndEmit(typ, true);
}
else {
this.emitString(typ);
}
const r = this.next();
if (r === 0x5C) { // '\'
if (this.peek() === 0x22) { // '"'
// ignore the escaped closing quote
this.ignore();
this.next();
this.ignore();
}
}
else if (r === 0x22) { // '"'
// ignore closing quote
this.ignore();
}
else {
// handled by next state
this.backup();
}
return this.lexInsideShortcode;
}
// lexShortCodeParamRawStringVal scans a raw string shortcode parameter value.
lexShortCodeParamRawStringVal(typ) {
let openBacktickFound = false;
LOOP: for (;;) {
const r = this.next();
switch (r) {
case 0x60: // '`'
if (openBacktickFound) {
this.backup();
break LOOP;
}
else {
openBacktickFound = true;
this.ignore();
}
break;
case eof:
return this.errorf(`unterminated raw string in shortcode parameter-argument: '${new TextDecoder().decode(this.input.slice(this.start, this.pos))}'`);
}
}
this.emitString(typ);
this.next();
this.ignore();
return this.lexInsideShortcode;
}
lexDone() {
// Done!
if (this.pos > this.start) {
this.emit(item_1.ItemType.tText);
}
this.emit(item_1.ItemType.tEOF);
return null;
}
lexShortcodeRightDelim() {
this.pos += this.currentRightShortcodeDelim().length;
this.emit(this.lexerShortcodeState.currRightDelimItem);
this.lexerShortcodeState.closingState = 0;
return () => this.lexMainSection();
}
lexShortcodeValue() {
const r = this.next();
switch (r) {
case eof:
return this.errorf("unterminated quoted string in shortcode parameter");
case 0x20: // ' '
case 0x09: // '\t'
this.ignore();
return () => this.lexShortcodeValue();
case 0x22: // '"'
this.ignore();
return () => this.lexShortcodeValueQuoted();
default:
this.backup();
return () => this.lexShortcodeValueUnquoted();
}
}
lexShortcodeValueQuoted() {
for (;;) {
const r = this.next();
switch (r) {
case eof:
return this.errorf("unterminated quoted string in shortcode parameter");
case 0x5C: // '\'
const peek = this.peek();
if (peek === 0x22) { // '"'
this.next();
}
break;
case 0x22: // '"'
this.backup();
this.emitString(item_1.ItemType.tScParamVal);
this.next();
this.ignore();
return this.lexInsideShortcode;
}
}
}
isRightShortcodeDelim() {
return this.hasPrefix(this.currentRightShortcodeDelim());
}
// lexComment scans a shortcode comment.
lexComment() {
const posRightComment = this.index(new Uint8Array([...rightComment, ...this.currentRightShortcodeDelim()]));
if (posRightComment <= 1) {
return this.errorf("comment must be closed");
}
// we emit all as text, except the comment markers
this.emit(item_1.ItemType.tText);
this.pos += leftComment.length;
this.ignore();
this.pos += posRightComment - leftComment.length;
this.emit(item_1.ItemType.tText);
this.pos += rightComment.length;
this.ignore();
this.pos += this.currentRightShortcodeDelim().length;
this.emit(item_1.ItemType.tText);
return this.lexMainSection;
}
// isComment checks if we're at the start of a comment
isComment() {
if (this.pos + 2 > this.input.length) {
return false;
}
return this.input[this.pos] === 0x2F && this.input[this.pos + 1] === 0x2A; // /*
}
}
exports.pageLexer = pageLexer;
function lexFrontMatterJSON(l) {
// Include the left delimiter
l.backup();
let inQuote = false;
let level = 0;
for (;;) {
const r = l.next();
switch (true) {
case r === eof:
return l.errorf("unexpected EOF parsing JSON front matter");
case r === 0x7B: // '{'
if (!inQuote) {
level++;
}
break;
case r === 0x7D: // '}'
if (!inQuote) {
level--;
}
break;
case r === 0x22: // '"'
inQuote = !inQuote;
break;
case r === 0x5C: // '\\'
// This may be an escaped quote. Make sure it's not marked as a
// real one.
l.next();
break;
}
if (level === 0) {
break;
}
}
l.consumeCRLF();
l.emit(item_1.ItemType.TypeFrontMatterJSON);
return () => l.lexMainSection();
}
function lexFrontMatterOrgMode(l) {
l.summaryDivider = summaryDividerOrg;
l.backup();
if (!l.hasPrefix(delimOrg)) {
return () => l.lexMainSection();
}
// Read lines until we no longer see a #+ prefix
LOOP: for (;;) {
const r = l.next();
switch (true) {
case r === 0x0A: // '\n'
if (!l.hasPrefix(delimOrg)) {
break LOOP;
}
break;
case r === eof:
break LOOP;
}
}
l.emit(item_1.ItemType.TypeFrontMatterORG);
return () => l.lexMainSection();
}
// ParseBytes parses the page in b according to the given Config.
function ParseBytes(b, cfg) {
const l = parseBytes(b, cfg, lexIntroSection);
return [l.items, l.err];
}
// ParseBytesMain parses b starting with the main section.
function ParseBytesMain(b, cfg) {
const l = parseBytes(b, cfg, lexMainSection);
return [l.items, l.err];
}
function parseBytes(b, cfg, start) {
const l = newPageLexer(b, start, cfg);
return l.run();
}
// HasShortcode returns true if the given string contains a shortcode.
function HasShortcode(s) {
return s.includes("{{<") || s.includes("{{%");
}
// note: the input position here is normally 0 (start), but
// can be set if position of first shortcode is known
function newPageLexer(input, stateStart, cfg) {
return new pageLexer(input, stateStart, cfg);
}
// Export the lexIntroSection function
function lexIntroSection(l) {
return l.lexIntroSection();
}
function lexMainSection(l) {
return l.lexMainSection();
}
//# sourceMappingURL=pagelexer.js.map