@npmstuff/argdown-core
Version:
A pluggable parser for the Argdown argumentation syntax
606 lines • 21.4 kB
JavaScript
"use strict";
import * as chevrotain from "chevrotain";
import last from "lodash.last";
import partialRight from "lodash.partialright";
import { TokenNames } from "./TokenNames";
import { arrayIsEmpty } from "./utils";
const createToken = chevrotain.createToken;
const createTokenInstance = chevrotain.createTokenInstance;
const tokenMatcher = chevrotain.tokenMatcher;
let indentStack = [];
let rangesStack = [];
export const tokenList = [];
const init = () => {
indentStack = [0];
rangesStack = [];
};
const getCurrentLine = (tokens) => {
if (arrayIsEmpty(tokens))
return 1;
const lastToken = last(tokens);
let currentLine = lastToken ? lastToken.endLine : 1;
if (lastToken &&
(chevrotain.tokenMatcher(lastToken, Emptyline) ||
chevrotain.tokenMatcher(lastToken, Newline))) {
currentLine++;
}
return currentLine;
};
const getCurrentEndOffset = (tokens) => {
if (arrayIsEmpty(tokens))
return 0;
const lastToken = last(tokens);
return lastToken ? lastToken.endOffset : 0;
};
const lastTokenIsNewline = (lastToken) => {
if (lastToken == undefined)
return false;
return tokenMatcher(lastToken, Newline);
};
const emitRemainingDedentTokens = (matchedTokens) => {
if (indentStack.length <= 1) {
return;
}
const lastToken = last(matchedTokens);
const startOffset = getCurrentEndOffset(matchedTokens);
const endOffset = startOffset;
const startLine = getCurrentLine(matchedTokens);
const endLine = startLine;
const startColumn = lastToken && lastToken.endColumn ? lastToken.endColumn : 0;
const endColumn = startColumn;
while (indentStack.length > 1) {
matchedTokens.push(createTokenInstance(Dedent, "", startOffset, endOffset, startLine, endLine, startColumn, endColumn));
indentStack.pop();
}
};
const emitIndentOrDedent = (matchedTokens, indentStr) => {
const currIndentLevel = indentStr.length;
let lastIndentLevel = last(indentStack) || 0;
const image = "";
const startOffset = getCurrentEndOffset(matchedTokens) + 1;
const endOffset = startOffset + indentStr.length - 1;
const startLine = getCurrentLine(matchedTokens);
const endLine = startLine;
const startColumn = 1;
const endColumn = startColumn + indentStr.length - 1;
if (currIndentLevel > lastIndentLevel) {
indentStack.push(currIndentLevel);
let indentToken = createTokenInstance(Indent, image, startOffset, endOffset, startLine, endLine, startColumn, endColumn);
matchedTokens.push(indentToken);
}
else if (currIndentLevel < lastIndentLevel) {
while (indentStack.length > 1 && currIndentLevel < lastIndentLevel) {
indentStack.pop();
lastIndentLevel = last(indentStack) || 0;
let dedentToken = createTokenInstance(Dedent, image, startOffset, endOffset, startLine, endLine, startColumn, endColumn);
matchedTokens.push(dedentToken);
}
}
};
const matchRelation = (text, offset, tokens, _groups, pattern) => {
const remainingText = text.substr(offset || 0);
const lastToken = last(tokens);
const afterNewline = lastTokenIsNewline(lastToken);
const afterEmptyline = lastToken && tokenMatcher(lastToken, Emptyline);
if (arrayIsEmpty(tokens) || afterEmptyline || afterNewline) {
let match = pattern.exec(remainingText);
if (match !== null && match.length == 3) {
const indentStr = match[1];
emitIndentOrDedent(tokens, indentStr);
return match;
}
}
return null;
};
const matchIncomingSupport = partialRight(matchRelation, /^([' '\t]*)(\+>)/);
const matchIncomingAttack = partialRight(matchRelation, /^([' '\t]*)(->)/);
const matchOutgoingSupport = partialRight(matchRelation, /^([' '\t]*)(<?\+)/);
const matchOutgoingAttack = partialRight(matchRelation, /^([' '\t]*)(<?-)/);
const matchContradiction = partialRight(matchRelation, /^([' '\t]*)(><)/);
const matchIncomingUndercut = partialRight(matchRelation, /^([' '\t]*)(_>)/);
const matchOutgoingUndercut = partialRight(matchRelation, /^([' '\t]*)(<_|(?:_(?=\s)))/);
export const IncomingSupport = createToken({
name: TokenNames.INCOMING_SUPPORT,
pattern: matchIncomingSupport,
line_breaks: true,
label: "+> (Incoming Support)",
start_chars_hint: [" ", "\t", "+"]
});
tokenList.push(IncomingSupport);
export const IncomingAttack = createToken({
name: TokenNames.INCOMING_ATTACK,
pattern: matchIncomingAttack,
line_breaks: true,
label: "-> (Incoming Attack)",
start_chars_hint: [" ", "\t", "-"]
});
tokenList.push(IncomingAttack);
export const OutgoingSupport = createToken({
name: TokenNames.OUTGOING_SUPPORT,
pattern: matchOutgoingSupport,
line_breaks: true,
label: "<+ (Outgoing Support)",
start_chars_hint: [" ", "\t", "<"]
});
tokenList.push(OutgoingSupport);
export const OutgoingAttack = createToken({
name: TokenNames.OUTGOING_ATTACK,
pattern: matchOutgoingAttack,
line_breaks: true,
label: "<- (Outgoing Attack)",
start_chars_hint: [" ", "\t", "<"]
});
tokenList.push(OutgoingAttack);
export const Contradiction = createToken({
name: TokenNames.CONTRADICTION,
pattern: matchContradiction,
line_breaks: true,
label: ">< (Contradiction)",
start_chars_hint: [" ", "\t", ">"]
});
tokenList.push(Contradiction);
export const IncomingUndercut = createToken({
name: TokenNames.INCOMING_UNDERCUT,
pattern: matchIncomingUndercut,
line_breaks: true,
label: "_> (Incoming Undercut)",
start_chars_hint: [" ", "\t", "_"]
});
tokenList.push(IncomingUndercut);
export const OutgoingUndercut = createToken({
name: TokenNames.OUTGOING_UNDERCUT,
pattern: matchOutgoingUndercut,
line_breaks: true,
label: "<_ (Outgoing Undercut)",
start_chars_hint: [" ", "\t", "<"]
});
tokenList.push(OutgoingUndercut);
const inferenceStartPattern = /^[' '\t]*-{2}/;
const matchInferenceStart = (text, offset, tokens) => {
let remainingText = text.substr(offset || 0);
const lastToken = last(tokens);
let afterNewline = lastTokenIsNewline(lastToken);
if (arrayIsEmpty(tokens) || afterNewline) {
const match = inferenceStartPattern.exec(remainingText);
if (match != null) {
emitRemainingDedentTokens(tokens);
return match;
}
}
return null;
};
export const InferenceStart = createToken({
name: TokenNames.INFERENCE_START,
pattern: matchInferenceStart,
push_mode: "inference_mode",
line_breaks: true,
label: "-- (Inference Start)",
start_chars_hint: [" ", "\t", "-"]
});
tokenList.push(InferenceStart);
export const FrontMatter = createToken({
name: TokenNames.FRONT_MATTER,
pattern: /===+[\s\S]*?===+/,
label: "Front Matter (YAML)"
});
tokenList.push(FrontMatter);
export const Data = createToken({
name: TokenNames.DATA,
pattern: /{((?!}\s[^\,}])(.|\n))*}(?!\s*(\,|}))/,
label: "Meta Data (YAML)"
});
tokenList.push(Data);
export const ListDelimiter = createToken({
name: TokenNames.LIST_DELIMITER,
pattern: /,/,
label: ","
});
tokenList.push(ListDelimiter);
export const InferenceEnd = createToken({
name: TokenNames.INFERENCE_END,
pattern: /-{2,}/,
pop_mode: true,
label: "-- (Inference End)"
});
tokenList.push(InferenceEnd);
const matchListItem = (text, offset, tokens, _groups, pattern) => {
let remainingText = text.substr(offset || 0);
let lastToken = last(tokens);
let afterNewline = lastTokenIsNewline(lastToken);
let afterEmptyline = lastToken && tokenMatcher(lastToken, Emptyline);
if (arrayIsEmpty(tokens) || afterEmptyline || afterNewline) {
let match = pattern.exec(remainingText);
if (match !== null) {
const indentStr = match[1] + " ";
emitIndentOrDedent(tokens, indentStr);
return match;
}
}
return null;
};
const orderedListItemPattern = /^([' '\t]*)\d+\.(?=\s)/;
const matchOrderedListItem = partialRight(matchListItem, orderedListItemPattern);
export const OrderedListItem = createToken({
name: TokenNames.ORDERED_LIST_ITEM,
pattern: matchOrderedListItem,
line_breaks: true,
label: "{Indentation}{number}. (Ordered List Item)",
start_chars_hint: [" ", "\t"]
});
tokenList.push(OrderedListItem);
const unorderedListItemPattern = /^([' '\t]*)\*(?=\s)/;
const matchUnorderedListItem = partialRight(matchListItem, unorderedListItemPattern);
export const UnorderedListItem = createToken({
name: TokenNames.UNORDERED_LIST_ITEM,
pattern: matchUnorderedListItem,
line_breaks: true,
label: "{Indentation}* (Unordered List Item)",
start_chars_hint: [" ", "\t"]
});
tokenList.push(UnorderedListItem);
const emptylinePattern = /^(?:[ \t]*(?:\r\n|\n)){2,}/;
const matchEmptyline = (text, offset, tokens) => {
let remainingText = text.substr(offset || 0);
let lastToken = last(tokens);
if (lastToken && tokenMatcher(lastToken, Emptyline))
return null;
let match = emptylinePattern.exec(remainingText);
if (match !== null) {
if (match[0].length < remainingText.length) {
emitRemainingDedentTokens(tokens);
}
return match;
}
return null;
};
export const Emptyline = createToken({
name: TokenNames.EMPTYLINE,
pattern: matchEmptyline,
line_breaks: true,
label: "{linebreak}{linebreak} (Empty Line)",
start_chars_hint: ["\r", "\n"]
});
tokenList.push(Emptyline);
export const Indent = createToken({
name: TokenNames.INDENT,
pattern: chevrotain.Lexer.NA
});
tokenList.push(Indent);
export const Dedent = createToken({
name: TokenNames.DEDENT,
pattern: chevrotain.Lexer.NA
});
tokenList.push(Dedent);
export const StatementDefinition = createToken({
name: TokenNames.STATEMENT_DEFINITION,
pattern: /\[.+?\]\:/,
label: "[Statement Title]: (Statement Definition)"
});
tokenList.push(StatementDefinition);
export const StatementReference = createToken({
name: TokenNames.STATEMENT_REFERENCE,
pattern: /\[[^-].*?\]/,
label: "[Statement Title] (Statement Reference)"
});
tokenList.push(StatementReference);
export const StatementMention = createToken({
name: TokenNames.STATEMENT_MENTION,
pattern: /\@\[.+?\][ \t]?/,
label: "@[Statement Title] (Statement Mention)"
});
tokenList.push(StatementMention);
const statementNumberPattern = /^[' '\t]*\(\d+\)/;
const matchStatementNumber = (text, offset, tokens) => {
let remainingText = text.substr(offset || 0);
var lastToken = last(tokens);
let afterNewline = lastTokenIsNewline(lastToken);
let afterEmptyline = lastToken && tokenMatcher(lastToken, Emptyline);
if (arrayIsEmpty(tokens) || afterEmptyline || afterNewline) {
let match = statementNumberPattern.exec(remainingText);
if (match !== null) {
emitRemainingDedentTokens(tokens);
return match;
}
}
return null;
};
export const StatementNumber = createToken({
name: TokenNames.STATEMENT_NUMBER,
pattern: matchStatementNumber,
line_breaks: true,
label: "(Number) (Statement Number)",
start_chars_hint: [" ", "\t", "("]
});
tokenList.push(StatementNumber);
export const ArgumentDefinition = createToken({
name: TokenNames.ARGUMENT_DEFINITION,
pattern: /\<.+?\>\:/,
label: "<Argument Title>: (Argument Definition)"
});
tokenList.push(ArgumentDefinition);
export const ArgumentReference = createToken({
name: TokenNames.ARGUMENT_REFERENCE,
pattern: /\<[^-].*?\>/,
label: "<Argument Title> (Argument Reference)"
});
tokenList.push(ArgumentReference);
export const ArgumentMention = createToken({
name: TokenNames.ARGUMENT_MENTION,
pattern: /\@\<.+?\>[ \t]?/,
label: "@<Argument Title> (Argument Mention)"
});
tokenList.push(ArgumentMention);
const headingPattern = /^(#+)(?: )/;
const matchHeadingStart = (text, offset, tokens) => {
let remainingText = text.substr(offset || 0);
let lastToken = last(tokens);
let afterEmptyline = lastToken &&
(tokenMatcher(lastToken, Emptyline) || tokenMatcher(lastToken, Newline));
if (!lastToken || afterEmptyline) {
const match = headingPattern.exec(remainingText);
if (match) {
return match;
}
}
return null;
};
export const HeadingStart = createToken({
name: TokenNames.HEADING_START,
pattern: matchHeadingStart,
label: "# (Heading Start)",
line_breaks: false,
start_chars_hint: ["#"]
});
tokenList.push(HeadingStart);
const matchBoldOrItalicStart = (text, offset, _tokens, _groups, pattern, rangeType) => {
let remainingText = text.substr(offset || 0);
let match = pattern.exec(remainingText);
if (match != null) {
rangesStack.push(rangeType);
return match;
}
return null;
};
const matchBoldOrItalicEnd = (text, offset, tokens, groups, pattern, rangeType) => {
let lastRange = last(rangesStack);
if (lastRange != rangeType)
return null;
let skipped = groups ? groups[chevrotain.Lexer.SKIPPED] : null;
let lastSkipped = last(skipped);
let lastMatched = last(tokens);
if (!lastMatched ||
(lastSkipped && lastSkipped.endOffset > lastMatched.endOffset)) {
return null;
}
let remainingText = text.substr(offset || 0);
let match = pattern.exec(remainingText);
if (match != null) {
rangesStack.pop();
return match;
}
return null;
};
const matchAsteriskBoldStart = partialRight(matchBoldOrItalicStart, /^\*\*(?!\s)/, "AsteriskBold");
const matchAsteriskBoldEnd = partialRight(matchBoldOrItalicEnd, /^\*\*(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "AsteriskBold");
const matchUnderscoreBoldStart = partialRight(matchBoldOrItalicStart, /^__(?!\s)/, "UnderscoreBold");
const matchUnderscoreBoldEnd = partialRight(matchBoldOrItalicEnd, /^__(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "UnderscoreBold");
const matchAsteriskItalicStart = partialRight(matchBoldOrItalicStart, /^\*(?!\s)/, "AsteriskItalic");
const matchAsteriskItalicEnd = partialRight(matchBoldOrItalicEnd, /^\*(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "AsteriskItalic");
const matchUnderscoreItalicStart = partialRight(matchBoldOrItalicStart, /^\_(?!\s)/, "UnderscoreItalic");
const matchUnderscoreItalicEnd = partialRight(matchBoldOrItalicEnd, /^\_(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "UnderscoreItalic");
export const AsteriskBoldStart = createToken({
name: TokenNames.ASTERISK_BOLD_START,
pattern: matchAsteriskBoldStart,
label: "** (Bold Start)",
line_breaks: false,
start_chars_hint: ["*"]
});
tokenList.push(AsteriskBoldStart);
export const AsteriskBoldEnd = createToken({
name: TokenNames.ASTERISK_BOLD_END,
pattern: matchAsteriskBoldEnd,
label: "** (Bold End)",
line_breaks: false,
start_chars_hint: ["*"]
});
tokenList.push(AsteriskBoldEnd);
export const UnderscoreBoldStart = createToken({
name: TokenNames.UNDERSCORE_BOLD_START,
pattern: matchUnderscoreBoldStart,
label: "__ (Bold Start)",
line_breaks: false,
start_chars_hint: ["_"]
});
tokenList.push(UnderscoreBoldStart);
export const UnderscoreBoldEnd = createToken({
name: TokenNames.UNDERSCORE_BOLD_END,
pattern: matchUnderscoreBoldEnd,
label: "__ (Bold End)",
line_breaks: false,
start_chars_hint: ["_"]
});
tokenList.push(UnderscoreBoldEnd);
export const AsteriskItalicStart = createToken({
name: TokenNames.ASTERISK_ITALIC_START,
pattern: matchAsteriskItalicStart,
label: "* (Italic Start)",
line_breaks: false,
start_chars_hint: ["*"]
});
tokenList.push(AsteriskItalicStart);
export const AsteriskItalicEnd = createToken({
name: TokenNames.ASTERISK_ITALIC_END,
pattern: matchAsteriskItalicEnd,
label: "* (Italic End)",
line_breaks: false,
start_chars_hint: ["*"]
});
tokenList.push(AsteriskItalicEnd);
export const UnderscoreItalicStart = createToken({
name: TokenNames.UNDERSCORE_ITALIC_START,
pattern: matchUnderscoreItalicStart,
label: "_ (Italic Start)",
line_breaks: false,
start_chars_hint: ["_"]
});
tokenList.push(UnderscoreItalicStart);
export const UnderscoreItalicEnd = createToken({
name: TokenNames.UNDERSCORE_ITALIC_END,
pattern: matchUnderscoreItalicEnd,
label: "_ (Italic End)",
line_breaks: false,
start_chars_hint: ["_"]
});
tokenList.push(UnderscoreItalicEnd);
const commentPattern = /(?:<!--(?:.|\n|\r)*?-->)|(?:\/\*(?:.|\n|\r)*?\*\/)|(?:\/\/.*?(?=\r\n|\n|\r))/y;
const commentWithTrailingLinebreaksPattern = /(?:(?:<!--(?:.|\n|\r)*?-->)|(?:\/\*(?:.|\n|\r)*?\*\/)|(?:\/\/.*?(?=\r\n|\n|\r)))(?:[ \t]*\r\n|\n|\r)*/y;
const commentWithOneTrailingLinebreakPattern = /(?:(?:<!--(?:.|\n|\r)*?-->)|(?:\/\*(?:.|\n|\r)*?\*\/)|(?:\/\/.*?(?=\r\n|\n|\r)))(?:[ \t]*(?:\r\n|\n|\r)(?!([ \t]*(\r\n|\n|\r))))?/y;
const matchComment = (text, offset, tokens) => {
let lastToken = last(tokens);
if (lastToken && tokenMatcher(lastToken, Emptyline)) {
commentWithTrailingLinebreaksPattern.lastIndex = offset || 0;
return commentWithTrailingLinebreaksPattern.exec(text);
}
else if (lastToken && tokenMatcher(lastToken, Newline)) {
commentWithOneTrailingLinebreakPattern.lastIndex = offset || 0;
return commentWithOneTrailingLinebreakPattern.exec(text);
}
else {
commentPattern.lastIndex = offset || 0;
return commentPattern.exec(text);
}
};
export const Comment = createToken({
name: TokenNames.COMMENT,
pattern: matchComment,
group: chevrotain.Lexer.SKIPPED,
start_chars_hint: ["/", "<"],
label: "// or /**/ or <!-- --> (Comment)",
line_breaks: true
});
tokenList.push(Comment);
export const Link = createToken({
name: TokenNames.LINK,
pattern: /\[[^\]]+?\]\([^\)]+?\)[ \t]?/,
label: "[Title](Url) (Link)"
});
tokenList.push(Link);
export const Tag = createToken({
name: TokenNames.TAG,
pattern: /#(?:\([^\)]+\)|[a-zA-z0-9-\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+)[ \t]?/,
label: "#tag-text or #(tag text) (Tag)"
});
tokenList.push(Tag);
export const Newline = createToken({
name: TokenNames.NEWLINE,
pattern: /[ \t]*(?:\r\n|\n|\r)/,
line_breaks: true,
label: "{linebreak} (New Line)"
});
tokenList.push(Newline);
export const Spaces = createToken({
name: TokenNames.SPACES,
pattern: /( |\t)+/,
group: chevrotain.Lexer.SKIPPED
});
tokenList.push(Spaces);
export const EscapedChar = createToken({
name: TokenNames.ESCAPED_CHAR,
pattern: /\\.(?: )*/,
label: "\\{character} (Escaped Character)"
});
tokenList.push(EscapedChar);
export const SpecialChar = createToken({
name: TokenNames.SPECIAL_CHAR,
pattern: /(?:\.[^\s]+?\.[ \t]?)|(?:\:[^\s]+?\:[ \t]?)/,
label: ".{name}. or :{name}: (Special Character)"
});
tokenList.push(SpecialChar);
export const Freestyle = createToken({
name: TokenNames.FREESTYLE,
pattern: /[^\\\@\#\*\_\[\]\,\.\:\;\<\/\>\-\r\n\(\)\{\}]+/,
line_breaks: true,
label: "Text Content"
});
tokenList.push(Freestyle);
export const UnusedControlChar = createToken({
name: TokenNames.UNUSED_CONTROL_CHAR,
pattern: /[\@\#\*\_\[\]\,\.\:\;\<\/\>\-\(\)\{\}][ \t]?/,
label: "Text Content (Control Characters)"
});
tokenList.push(UnusedControlChar);
export const EOF = chevrotain.EOF;
const lexerConfig = {
modes: {
default_mode: [
Comment,
FrontMatter,
Data,
EscapedChar,
SpecialChar,
Emptyline,
Newline,
Dedent,
Indent,
InferenceStart,
IncomingSupport,
IncomingAttack,
OutgoingSupport,
OutgoingAttack,
Contradiction,
IncomingUndercut,
OutgoingUndercut,
HeadingStart,
StatementNumber,
OrderedListItem,
UnorderedListItem,
AsteriskBoldEnd,
UnderscoreBoldEnd,
AsteriskItalicEnd,
UnderscoreItalicEnd,
AsteriskBoldStart,
UnderscoreBoldStart,
AsteriskItalicStart,
UnderscoreItalicStart,
Link,
Tag,
StatementDefinition,
StatementReference,
StatementMention,
ArgumentDefinition,
ArgumentReference,
ArgumentMention,
Spaces,
Freestyle,
UnusedControlChar
],
inference_mode: [
Comment,
Newline,
EscapedChar,
SpecialChar,
InferenceEnd,
Data,
ListDelimiter,
Spaces,
Freestyle,
UnusedControlChar
]
},
defaultMode: "default_mode"
};
const lexer = new chevrotain.Lexer(lexerConfig);
export const tokenize = (text) => {
init();
let lexResult = lexer.tokenize(text);
if (lexResult.errors && lexResult.errors.length > 0) {
throw new Error("sad sad panda lexing errors detected");
}
const lastToken = last(lexResult.tokens);
if (lastToken && tokenMatcher(lastToken, Emptyline)) {
lexResult.tokens.pop();
}
emitRemainingDedentTokens(lexResult.tokens);
return lexResult;
};
//# sourceMappingURL=lexer.js.map