UNPKG

@npmstuff/argdown-core

Version:

A pluggable parser for the Argdown argumentation syntax

606 lines 21.4 kB
"use strict"; import * as chevrotain from "chevrotain"; import last from "lodash.last"; import partialRight from "lodash.partialright"; import { TokenNames } from "./TokenNames"; import { arrayIsEmpty } from "./utils"; const createToken = chevrotain.createToken; const createTokenInstance = chevrotain.createTokenInstance; const tokenMatcher = chevrotain.tokenMatcher; let indentStack = []; let rangesStack = []; export const tokenList = []; const init = () => { indentStack = [0]; rangesStack = []; }; const getCurrentLine = (tokens) => { if (arrayIsEmpty(tokens)) return 1; const lastToken = last(tokens); let currentLine = lastToken ? lastToken.endLine : 1; if (lastToken && (chevrotain.tokenMatcher(lastToken, Emptyline) || chevrotain.tokenMatcher(lastToken, Newline))) { currentLine++; } return currentLine; }; const getCurrentEndOffset = (tokens) => { if (arrayIsEmpty(tokens)) return 0; const lastToken = last(tokens); return lastToken ? lastToken.endOffset : 0; }; const lastTokenIsNewline = (lastToken) => { if (lastToken == undefined) return false; return tokenMatcher(lastToken, Newline); }; const emitRemainingDedentTokens = (matchedTokens) => { if (indentStack.length <= 1) { return; } const lastToken = last(matchedTokens); const startOffset = getCurrentEndOffset(matchedTokens); const endOffset = startOffset; const startLine = getCurrentLine(matchedTokens); const endLine = startLine; const startColumn = lastToken && lastToken.endColumn ? lastToken.endColumn : 0; const endColumn = startColumn; while (indentStack.length > 1) { matchedTokens.push(createTokenInstance(Dedent, "", startOffset, endOffset, startLine, endLine, startColumn, endColumn)); indentStack.pop(); } }; const emitIndentOrDedent = (matchedTokens, indentStr) => { const currIndentLevel = indentStr.length; let lastIndentLevel = last(indentStack) || 0; const image = ""; const startOffset = getCurrentEndOffset(matchedTokens) + 1; const endOffset = startOffset + indentStr.length - 1; const startLine = getCurrentLine(matchedTokens); const endLine = startLine; const startColumn = 1; const endColumn = startColumn + indentStr.length - 1; if (currIndentLevel > lastIndentLevel) { indentStack.push(currIndentLevel); let indentToken = createTokenInstance(Indent, image, startOffset, endOffset, startLine, endLine, startColumn, endColumn); matchedTokens.push(indentToken); } else if (currIndentLevel < lastIndentLevel) { while (indentStack.length > 1 && currIndentLevel < lastIndentLevel) { indentStack.pop(); lastIndentLevel = last(indentStack) || 0; let dedentToken = createTokenInstance(Dedent, image, startOffset, endOffset, startLine, endLine, startColumn, endColumn); matchedTokens.push(dedentToken); } } }; const matchRelation = (text, offset, tokens, _groups, pattern) => { const remainingText = text.substr(offset || 0); const lastToken = last(tokens); const afterNewline = lastTokenIsNewline(lastToken); const afterEmptyline = lastToken && tokenMatcher(lastToken, Emptyline); if (arrayIsEmpty(tokens) || afterEmptyline || afterNewline) { let match = pattern.exec(remainingText); if (match !== null && match.length == 3) { const indentStr = match[1]; emitIndentOrDedent(tokens, indentStr); return match; } } return null; }; const matchIncomingSupport = partialRight(matchRelation, /^([' '\t]*)(\+>)/); const matchIncomingAttack = partialRight(matchRelation, /^([' '\t]*)(->)/); const matchOutgoingSupport = partialRight(matchRelation, /^([' '\t]*)(<?\+)/); const matchOutgoingAttack = partialRight(matchRelation, /^([' '\t]*)(<?-)/); const matchContradiction = partialRight(matchRelation, /^([' '\t]*)(><)/); const matchIncomingUndercut = partialRight(matchRelation, /^([' '\t]*)(_>)/); const matchOutgoingUndercut = partialRight(matchRelation, /^([' '\t]*)(<_|(?:_(?=\s)))/); export const IncomingSupport = createToken({ name: TokenNames.INCOMING_SUPPORT, pattern: matchIncomingSupport, line_breaks: true, label: "+> (Incoming Support)", start_chars_hint: [" ", "\t", "+"] }); tokenList.push(IncomingSupport); export const IncomingAttack = createToken({ name: TokenNames.INCOMING_ATTACK, pattern: matchIncomingAttack, line_breaks: true, label: "-> (Incoming Attack)", start_chars_hint: [" ", "\t", "-"] }); tokenList.push(IncomingAttack); export const OutgoingSupport = createToken({ name: TokenNames.OUTGOING_SUPPORT, pattern: matchOutgoingSupport, line_breaks: true, label: "<+ (Outgoing Support)", start_chars_hint: [" ", "\t", "<"] }); tokenList.push(OutgoingSupport); export const OutgoingAttack = createToken({ name: TokenNames.OUTGOING_ATTACK, pattern: matchOutgoingAttack, line_breaks: true, label: "<- (Outgoing Attack)", start_chars_hint: [" ", "\t", "<"] }); tokenList.push(OutgoingAttack); export const Contradiction = createToken({ name: TokenNames.CONTRADICTION, pattern: matchContradiction, line_breaks: true, label: ">< (Contradiction)", start_chars_hint: [" ", "\t", ">"] }); tokenList.push(Contradiction); export const IncomingUndercut = createToken({ name: TokenNames.INCOMING_UNDERCUT, pattern: matchIncomingUndercut, line_breaks: true, label: "_> (Incoming Undercut)", start_chars_hint: [" ", "\t", "_"] }); tokenList.push(IncomingUndercut); export const OutgoingUndercut = createToken({ name: TokenNames.OUTGOING_UNDERCUT, pattern: matchOutgoingUndercut, line_breaks: true, label: "<_ (Outgoing Undercut)", start_chars_hint: [" ", "\t", "<"] }); tokenList.push(OutgoingUndercut); const inferenceStartPattern = /^[' '\t]*-{2}/; const matchInferenceStart = (text, offset, tokens) => { let remainingText = text.substr(offset || 0); const lastToken = last(tokens); let afterNewline = lastTokenIsNewline(lastToken); if (arrayIsEmpty(tokens) || afterNewline) { const match = inferenceStartPattern.exec(remainingText); if (match != null) { emitRemainingDedentTokens(tokens); return match; } } return null; }; export const InferenceStart = createToken({ name: TokenNames.INFERENCE_START, pattern: matchInferenceStart, push_mode: "inference_mode", line_breaks: true, label: "-- (Inference Start)", start_chars_hint: [" ", "\t", "-"] }); tokenList.push(InferenceStart); export const FrontMatter = createToken({ name: TokenNames.FRONT_MATTER, pattern: /===+[\s\S]*?===+/, label: "Front Matter (YAML)" }); tokenList.push(FrontMatter); export const Data = createToken({ name: TokenNames.DATA, pattern: /{((?!}\s[^\,}])(.|\n))*}(?!\s*(\,|}))/, label: "Meta Data (YAML)" }); tokenList.push(Data); export const ListDelimiter = createToken({ name: TokenNames.LIST_DELIMITER, pattern: /,/, label: "," }); tokenList.push(ListDelimiter); export const InferenceEnd = createToken({ name: TokenNames.INFERENCE_END, pattern: /-{2,}/, pop_mode: true, label: "-- (Inference End)" }); tokenList.push(InferenceEnd); const matchListItem = (text, offset, tokens, _groups, pattern) => { let remainingText = text.substr(offset || 0); let lastToken = last(tokens); let afterNewline = lastTokenIsNewline(lastToken); let afterEmptyline = lastToken && tokenMatcher(lastToken, Emptyline); if (arrayIsEmpty(tokens) || afterEmptyline || afterNewline) { let match = pattern.exec(remainingText); if (match !== null) { const indentStr = match[1] + " "; emitIndentOrDedent(tokens, indentStr); return match; } } return null; }; const orderedListItemPattern = /^([' '\t]*)\d+\.(?=\s)/; const matchOrderedListItem = partialRight(matchListItem, orderedListItemPattern); export const OrderedListItem = createToken({ name: TokenNames.ORDERED_LIST_ITEM, pattern: matchOrderedListItem, line_breaks: true, label: "{Indentation}{number}. (Ordered List Item)", start_chars_hint: [" ", "\t"] }); tokenList.push(OrderedListItem); const unorderedListItemPattern = /^([' '\t]*)\*(?=\s)/; const matchUnorderedListItem = partialRight(matchListItem, unorderedListItemPattern); export const UnorderedListItem = createToken({ name: TokenNames.UNORDERED_LIST_ITEM, pattern: matchUnorderedListItem, line_breaks: true, label: "{Indentation}* (Unordered List Item)", start_chars_hint: [" ", "\t"] }); tokenList.push(UnorderedListItem); const emptylinePattern = /^(?:[ \t]*(?:\r\n|\n)){2,}/; const matchEmptyline = (text, offset, tokens) => { let remainingText = text.substr(offset || 0); let lastToken = last(tokens); if (lastToken && tokenMatcher(lastToken, Emptyline)) return null; let match = emptylinePattern.exec(remainingText); if (match !== null) { if (match[0].length < remainingText.length) { emitRemainingDedentTokens(tokens); } return match; } return null; }; export const Emptyline = createToken({ name: TokenNames.EMPTYLINE, pattern: matchEmptyline, line_breaks: true, label: "{linebreak}{linebreak} (Empty Line)", start_chars_hint: ["\r", "\n"] }); tokenList.push(Emptyline); export const Indent = createToken({ name: TokenNames.INDENT, pattern: chevrotain.Lexer.NA }); tokenList.push(Indent); export const Dedent = createToken({ name: TokenNames.DEDENT, pattern: chevrotain.Lexer.NA }); tokenList.push(Dedent); export const StatementDefinition = createToken({ name: TokenNames.STATEMENT_DEFINITION, pattern: /\[.+?\]\:/, label: "[Statement Title]: (Statement Definition)" }); tokenList.push(StatementDefinition); export const StatementReference = createToken({ name: TokenNames.STATEMENT_REFERENCE, pattern: /\[[^-].*?\]/, label: "[Statement Title] (Statement Reference)" }); tokenList.push(StatementReference); export const StatementMention = createToken({ name: TokenNames.STATEMENT_MENTION, pattern: /\@\[.+?\][ \t]?/, label: "@[Statement Title] (Statement Mention)" }); tokenList.push(StatementMention); const statementNumberPattern = /^[' '\t]*\(\d+\)/; const matchStatementNumber = (text, offset, tokens) => { let remainingText = text.substr(offset || 0); var lastToken = last(tokens); let afterNewline = lastTokenIsNewline(lastToken); let afterEmptyline = lastToken && tokenMatcher(lastToken, Emptyline); if (arrayIsEmpty(tokens) || afterEmptyline || afterNewline) { let match = statementNumberPattern.exec(remainingText); if (match !== null) { emitRemainingDedentTokens(tokens); return match; } } return null; }; export const StatementNumber = createToken({ name: TokenNames.STATEMENT_NUMBER, pattern: matchStatementNumber, line_breaks: true, label: "(Number) (Statement Number)", start_chars_hint: [" ", "\t", "("] }); tokenList.push(StatementNumber); export const ArgumentDefinition = createToken({ name: TokenNames.ARGUMENT_DEFINITION, pattern: /\<.+?\>\:/, label: "<Argument Title>: (Argument Definition)" }); tokenList.push(ArgumentDefinition); export const ArgumentReference = createToken({ name: TokenNames.ARGUMENT_REFERENCE, pattern: /\<[^-].*?\>/, label: "<Argument Title> (Argument Reference)" }); tokenList.push(ArgumentReference); export const ArgumentMention = createToken({ name: TokenNames.ARGUMENT_MENTION, pattern: /\@\<.+?\>[ \t]?/, label: "@<Argument Title> (Argument Mention)" }); tokenList.push(ArgumentMention); const headingPattern = /^(#+)(?: )/; const matchHeadingStart = (text, offset, tokens) => { let remainingText = text.substr(offset || 0); let lastToken = last(tokens); let afterEmptyline = lastToken && (tokenMatcher(lastToken, Emptyline) || tokenMatcher(lastToken, Newline)); if (!lastToken || afterEmptyline) { const match = headingPattern.exec(remainingText); if (match) { return match; } } return null; }; export const HeadingStart = createToken({ name: TokenNames.HEADING_START, pattern: matchHeadingStart, label: "# (Heading Start)", line_breaks: false, start_chars_hint: ["#"] }); tokenList.push(HeadingStart); const matchBoldOrItalicStart = (text, offset, _tokens, _groups, pattern, rangeType) => { let remainingText = text.substr(offset || 0); let match = pattern.exec(remainingText); if (match != null) { rangesStack.push(rangeType); return match; } return null; }; const matchBoldOrItalicEnd = (text, offset, tokens, groups, pattern, rangeType) => { let lastRange = last(rangesStack); if (lastRange != rangeType) return null; let skipped = groups ? groups[chevrotain.Lexer.SKIPPED] : null; let lastSkipped = last(skipped); let lastMatched = last(tokens); if (!lastMatched || (lastSkipped && lastSkipped.endOffset > lastMatched.endOffset)) { return null; } let remainingText = text.substr(offset || 0); let match = pattern.exec(remainingText); if (match != null) { rangesStack.pop(); return match; } return null; }; const matchAsteriskBoldStart = partialRight(matchBoldOrItalicStart, /^\*\*(?!\s)/, "AsteriskBold"); const matchAsteriskBoldEnd = partialRight(matchBoldOrItalicEnd, /^\*\*(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "AsteriskBold"); const matchUnderscoreBoldStart = partialRight(matchBoldOrItalicStart, /^__(?!\s)/, "UnderscoreBold"); const matchUnderscoreBoldEnd = partialRight(matchBoldOrItalicEnd, /^__(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "UnderscoreBold"); const matchAsteriskItalicStart = partialRight(matchBoldOrItalicStart, /^\*(?!\s)/, "AsteriskItalic"); const matchAsteriskItalicEnd = partialRight(matchBoldOrItalicEnd, /^\*(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "AsteriskItalic"); const matchUnderscoreItalicStart = partialRight(matchBoldOrItalicStart, /^\_(?!\s)/, "UnderscoreItalic"); const matchUnderscoreItalicEnd = partialRight(matchBoldOrItalicEnd, /^\_(?:[ \t]|(?=\n|\r|\)|\}|\_|\.|,|!|\?|;|:|-|\*|$))/, "UnderscoreItalic"); export const AsteriskBoldStart = createToken({ name: TokenNames.ASTERISK_BOLD_START, pattern: matchAsteriskBoldStart, label: "** (Bold Start)", line_breaks: false, start_chars_hint: ["*"] }); tokenList.push(AsteriskBoldStart); export const AsteriskBoldEnd = createToken({ name: TokenNames.ASTERISK_BOLD_END, pattern: matchAsteriskBoldEnd, label: "** (Bold End)", line_breaks: false, start_chars_hint: ["*"] }); tokenList.push(AsteriskBoldEnd); export const UnderscoreBoldStart = createToken({ name: TokenNames.UNDERSCORE_BOLD_START, pattern: matchUnderscoreBoldStart, label: "__ (Bold Start)", line_breaks: false, start_chars_hint: ["_"] }); tokenList.push(UnderscoreBoldStart); export const UnderscoreBoldEnd = createToken({ name: TokenNames.UNDERSCORE_BOLD_END, pattern: matchUnderscoreBoldEnd, label: "__ (Bold End)", line_breaks: false, start_chars_hint: ["_"] }); tokenList.push(UnderscoreBoldEnd); export const AsteriskItalicStart = createToken({ name: TokenNames.ASTERISK_ITALIC_START, pattern: matchAsteriskItalicStart, label: "* (Italic Start)", line_breaks: false, start_chars_hint: ["*"] }); tokenList.push(AsteriskItalicStart); export const AsteriskItalicEnd = createToken({ name: TokenNames.ASTERISK_ITALIC_END, pattern: matchAsteriskItalicEnd, label: "* (Italic End)", line_breaks: false, start_chars_hint: ["*"] }); tokenList.push(AsteriskItalicEnd); export const UnderscoreItalicStart = createToken({ name: TokenNames.UNDERSCORE_ITALIC_START, pattern: matchUnderscoreItalicStart, label: "_ (Italic Start)", line_breaks: false, start_chars_hint: ["_"] }); tokenList.push(UnderscoreItalicStart); export const UnderscoreItalicEnd = createToken({ name: TokenNames.UNDERSCORE_ITALIC_END, pattern: matchUnderscoreItalicEnd, label: "_ (Italic End)", line_breaks: false, start_chars_hint: ["_"] }); tokenList.push(UnderscoreItalicEnd); const commentPattern = /(?:<!--(?:.|\n|\r)*?-->)|(?:\/\*(?:.|\n|\r)*?\*\/)|(?:\/\/.*?(?=\r\n|\n|\r))/y; const commentWithTrailingLinebreaksPattern = /(?:(?:<!--(?:.|\n|\r)*?-->)|(?:\/\*(?:.|\n|\r)*?\*\/)|(?:\/\/.*?(?=\r\n|\n|\r)))(?:[ \t]*\r\n|\n|\r)*/y; const commentWithOneTrailingLinebreakPattern = /(?:(?:<!--(?:.|\n|\r)*?-->)|(?:\/\*(?:.|\n|\r)*?\*\/)|(?:\/\/.*?(?=\r\n|\n|\r)))(?:[ \t]*(?:\r\n|\n|\r)(?!([ \t]*(\r\n|\n|\r))))?/y; const matchComment = (text, offset, tokens) => { let lastToken = last(tokens); if (lastToken && tokenMatcher(lastToken, Emptyline)) { commentWithTrailingLinebreaksPattern.lastIndex = offset || 0; return commentWithTrailingLinebreaksPattern.exec(text); } else if (lastToken && tokenMatcher(lastToken, Newline)) { commentWithOneTrailingLinebreakPattern.lastIndex = offset || 0; return commentWithOneTrailingLinebreakPattern.exec(text); } else { commentPattern.lastIndex = offset || 0; return commentPattern.exec(text); } }; export const Comment = createToken({ name: TokenNames.COMMENT, pattern: matchComment, group: chevrotain.Lexer.SKIPPED, start_chars_hint: ["/", "<"], label: "// or /**/ or <!-- --> (Comment)", line_breaks: true }); tokenList.push(Comment); export const Link = createToken({ name: TokenNames.LINK, pattern: /\[[^\]]+?\]\([^\)]+?\)[ \t]?/, label: "[Title](Url) (Link)" }); tokenList.push(Link); export const Tag = createToken({ name: TokenNames.TAG, pattern: /#(?:\([^\)]+\)|[a-zA-z0-9-\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF]+)[ \t]?/, label: "#tag-text or #(tag text) (Tag)" }); tokenList.push(Tag); export const Newline = createToken({ name: TokenNames.NEWLINE, pattern: /[ \t]*(?:\r\n|\n|\r)/, line_breaks: true, label: "{linebreak} (New Line)" }); tokenList.push(Newline); export const Spaces = createToken({ name: TokenNames.SPACES, pattern: /( |\t)+/, group: chevrotain.Lexer.SKIPPED }); tokenList.push(Spaces); export const EscapedChar = createToken({ name: TokenNames.ESCAPED_CHAR, pattern: /\\.(?: )*/, label: "\\{character} (Escaped Character)" }); tokenList.push(EscapedChar); export const SpecialChar = createToken({ name: TokenNames.SPECIAL_CHAR, pattern: /(?:\.[^\s]+?\.[ \t]?)|(?:\:[^\s]+?\:[ \t]?)/, label: ".{name}. or :{name}: (Special Character)" }); tokenList.push(SpecialChar); export const Freestyle = createToken({ name: TokenNames.FREESTYLE, pattern: /[^\\\@\#\*\_\[\]\,\.\:\;\<\/\>\-\r\n\(\)\{\}]+/, line_breaks: true, label: "Text Content" }); tokenList.push(Freestyle); export const UnusedControlChar = createToken({ name: TokenNames.UNUSED_CONTROL_CHAR, pattern: /[\@\#\*\_\[\]\,\.\:\;\<\/\>\-\(\)\{\}][ \t]?/, label: "Text Content (Control Characters)" }); tokenList.push(UnusedControlChar); export const EOF = chevrotain.EOF; const lexerConfig = { modes: { default_mode: [ Comment, FrontMatter, Data, EscapedChar, SpecialChar, Emptyline, Newline, Dedent, Indent, InferenceStart, IncomingSupport, IncomingAttack, OutgoingSupport, OutgoingAttack, Contradiction, IncomingUndercut, OutgoingUndercut, HeadingStart, StatementNumber, OrderedListItem, UnorderedListItem, AsteriskBoldEnd, UnderscoreBoldEnd, AsteriskItalicEnd, UnderscoreItalicEnd, AsteriskBoldStart, UnderscoreBoldStart, AsteriskItalicStart, UnderscoreItalicStart, Link, Tag, StatementDefinition, StatementReference, StatementMention, ArgumentDefinition, ArgumentReference, ArgumentMention, Spaces, Freestyle, UnusedControlChar ], inference_mode: [ Comment, Newline, EscapedChar, SpecialChar, InferenceEnd, Data, ListDelimiter, Spaces, Freestyle, UnusedControlChar ] }, defaultMode: "default_mode" }; const lexer = new chevrotain.Lexer(lexerConfig); export const tokenize = (text) => { init(); let lexResult = lexer.tokenize(text); if (lexResult.errors && lexResult.errors.length > 0) { throw new Error("sad sad panda lexing errors detected"); } const lastToken = last(lexResult.tokens); if (lastToken && tokenMatcher(lastToken, Emptyline)) { lexResult.tokens.pop(); } emitRemainingDedentTokens(lexResult.tokens); return lexResult; }; //# sourceMappingURL=lexer.js.map