UNPKG

uniorg-parse

Version:

uniorg plugin to parse org-mode

1,172 lines (1,171 loc) 76.2 kB
import { VFile } from 'vfile'; import { u } from 'unist-builder'; import { getOrgEntity } from './entities.js'; import { restrictionFor, greaterElements, unescapeCodeInString, escapeRegExp, OrgRegexUtils, } from './utils.js'; import { defaultOptions } from './parse-options.js'; import { Reader } from './reader.js'; /* (defun rasen/org-debug () "Show org AST for the current buffer." (interactive) (let ((document (org-element-parse-buffer))) (with-current-buffer-window "*org-elements*" nil nil (emacs-lisp-mode) (pp (cddr document))))) */ /** * `ParseMode` determines what Elements are expected/allowed in the given context. * * By default, all Elements except `Headline`, `Planning`, `PropertyDrawer`, `ListItem`, `TableRow`, and * `NodeProperty` are supported. * * If the documentation of the mode say "allows"—the specified elements are supported in additional to default * elements. * * If the documentation of the mode says "expecting"—only that elements are allowed (default elements are * not). */ var ParseMode; (function (ParseMode) { /** Initial parsing mode. Allows property-drawer. */ ParseMode[ParseMode["TopComment"] = 0] = "TopComment"; /** First inside section. Expecting a headline only. */ ParseMode[ParseMode["Headline"] = 1] = "Headline"; /** Right after headline. Allows planning and property-drawer. */ ParseMode[ParseMode["Planning"] = 2] = "Planning"; /** After planning or top-commment. Allows property-drawer. */ ParseMode[ParseMode["PropertyDrawer"] = 3] = "PropertyDrawer"; /** Inside a property-drawer. Expecting node-property only. */ ParseMode[ParseMode["NodeProperty"] = 4] = "NodeProperty"; /** Inside a list. Expecting list-item only. */ ParseMode[ParseMode["ListItem"] = 5] = "ListItem"; /** Inside a table. Expecting table-row only. */ ParseMode[ParseMode["TableRow"] = 6] = "TableRow"; /** Default parsing mode. */ ParseMode[ParseMode["Default"] = 7] = "Default"; })(ParseMode || (ParseMode = {})); export function parse(file, options) { return new Parser(new VFile(file), options).parse(); } class Parser { constructor(file, options = {}) { this.r = new Reader(file); this.options = { ...defaultOptions, ...options }; this.re = new OrgRegexUtils(this.options); } /** * Adds position information to node data when trackPosition is enabled */ addPosition(data, startOffset, endOffset) { if (!this.options.trackPosition) { return data; } const position = this.r.positionFromOffsets(startOffset, endOffset); if (!position) { return data; } return { ...data, position, }; } parse() { this.parseEmptyLines(); const startOffset = 0; const children = this.parseElements(ParseMode.TopComment); const endOffset = this.r.endOffset(); return u('org-data', this.addPosition({ contentsBegin: startOffset, contentsEnd: endOffset }, startOffset, endOffset), children); } // General parsing structure parseElements(mode, structure) { const elements = []; let prevOffset = -1; while (!this.r.eof()) { const offset = this.r.offset(); if (offset === prevOffset) { console.log('elements:', elements, 'rest:', JSON.stringify(this.r.rest())); throw new Error('no progress (elements), if you see this, please report an issue to https://github.com/rasendubi/uniorg/issues'); } prevOffset = offset; const element = this.parseElement(mode, structure); const type = element.type; // @ts-expect-error contentsBegin is not defined for "literals" const cbeg = element.contentsBegin; // @ts-expect-error contentsBegin is not defined for "literals" const cend = element.contentsEnd; if (cbeg === undefined || cend === undefined) { // do nothing } else if (greaterElements.has(type)) { this.r.narrow(cbeg, cend); appendChildren(element, this.parseElements(Parser.nextMode(mode, type, true), element.type === 'plain-list' || element.type === 'list-item' ? // @ts-expect-error Property 'structure' does not exist on type 'OrgData' element.structure : undefined)); this.r.widen(); // Delete structure from lists. It's only here to facilitate // parsing and should not be exposed to the user. // @ts-expect-error Property 'structure' does not exist on type 'OrgData' if (element.structure) { // @ts-expect-error Property 'structure' does not exist on type 'OrgData' delete element.structure; } } else { this.r.narrow(cbeg, cend); appendChildren(element, this.parseObjects(restrictionFor(element.type))); this.r.widen(); } elements.push(element); mode = Parser.nextMode(mode, type, false); } return elements; } static nextMode(mode, type, parent) { if (parent) { if (type === 'section') return ParseMode.Headline; if (type === 'inlinetask') return ParseMode.Headline; if (type === 'plain-list') return ParseMode.ListItem; if (type === 'property-drawer') return ParseMode.NodeProperty; if (type === 'table') return ParseMode.TableRow; } else { if (mode === ParseMode.TopComment && type === 'comment') return ParseMode.PropertyDrawer; if (mode === ParseMode.Headline) return ParseMode.Planning; if (mode === ParseMode.Planning && type === 'planning') return ParseMode.PropertyDrawer; if (mode === ParseMode.ListItem) return ParseMode.ListItem; if (mode === ParseMode.TableRow) return ParseMode.TableRow; if (mode === ParseMode.NodeProperty) return ParseMode.NodeProperty; } return ParseMode.Default; } parseElement(mode, structure) { // List Item. if (mode === ParseMode.ListItem) return this.parseListItem(structure); // Table Row. if (mode === ParseMode.TableRow) return this.parseTableRow(); // Node Property. if (mode === ParseMode.NodeProperty) return this.parseNodeProperty(); // Headline. if (mode === ParseMode.Headline) return this.parseHeadline(); // Section. if (this.atHeading()) return this.parseSection(); const isBeginningOfLine = this.r.offset() === 0 || this.r.substring(this.r.offset() - 1, this.r.offset()) === '\n'; // Comments. if (isBeginningOfLine && this.r.lookingAt(/^[ \t]*#(?: |$)/m)) { return this.parseComment(); } // Planning. if (mode === ParseMode.Planning && // TODO: check previous line is headline this.r.lookingAt(/^[ \t]*(CLOSED:|DEADLINE:|SCHEDULED:)/)) { return this.parsePlanning(); } if ((mode === ParseMode.Planning || // && TODO: check previous line is headline ((mode === ParseMode.PropertyDrawer || mode === ParseMode.TopComment) && !this.r.lookingAt(/\s*$/m))) && this.r.lookingAt(/^[ \t]*:PROPERTIES:[ \t]*\n(?:[ \t]*:\S+:(?: .*)?[ \t]*\n)*?[ \t]*:END:[ \t]*$/im)) { return this.parsePropertyDrawer(); } // When not at beginning of line, point is at the beginning of an // item or a footnote definition: next item is always a paragraph. if (!isBeginningOfLine) { return this.parseParagraph({}); } // Clock. if (this.r.lookingAt(/^[ \t]*CLOCK:/)) { return this.parseClock(); } // TODO: Inlinetask. // From there, elements can have affiliated keywords. const affiliated = this.parseAffiliatedKeywords(); // LaTeX Environment. if (this.r.lookingAt(latexBeginEnvironmentRe)) { return this.parseLatexEnvironment(affiliated); } // Drawer. if (this.r.lookingAt(drawerRe)) { return this.parseDrawer(affiliated); } // Fixed width if (this.r.lookingAt(/[ \t]*:( |$)/m)) { return this.parseFixedWidth(affiliated); } // Inline Comments, Blocks, Babel Calls, Dynamic Blocks and // Keywords. { const offset = this.r.offset(); if (this.r.advance(this.r.lookingAt(/^[ \t]*#\+/))) { const blockM = this.r.lookingAt(/^begin_(\S+)/i); if (blockM) { this.r.resetOffset(offset); // reset so that parse*Block can match starting #+ const blockType = blockM[1].toLowerCase(); switch (blockType) { case 'center': return this.parseBlock('center-block', 'center', affiliated); case 'comment': return this.parseCommentBlock(affiliated); case 'example': return this.parseExampleBlock(affiliated); case 'export': return this.parseExportBlock(affiliated); case 'quote': return this.parseBlock('quote-block', 'quote', affiliated); case 'src': return this.parseSrcBlock(affiliated); case 'verse': return this.parseBlock('verse-block', 'verse', affiliated); default: return this.parseSpecialBlock(affiliated); } } // TODO: parse babel-call // TODO: parse dynamic-block if (this.r.lookingAt(/^\S+:/)) { this.r.resetOffset(offset); // reset, so that parseKeyword can match starting #+ return this.parseKeyword(affiliated); } // fallback: parse as paragraph this.r.resetOffset(offset); return this.parseParagraph(affiliated); } } // Footnote Definition. if (this.r.lookingAt(footnoteDefinitionRe)) { return this.parseFootnoteDefinition(affiliated); } // Horizontal Rule. if (this.r.lookingAt(/^[ \t]*-{5,}[ \t]*$/m)) { return this.parseHorizontalRule(affiliated); } // Diary Sexp. if (this.r.lookingAt(/^%%\(/)) { return this.parseDiarySexp(affiliated); } // Table. const ruleRe = /[ \t]*\+(-+\+)+[ \t]*$/m; if (this.r.lookingAt(/^[ \t]*\|/)) { return this.parseTable(affiliated); } else if (this.r.lookingAt(ruleRe)) { // There is no strict definition of a table.el table. Try to // prevent false positive while being quick. const offset = this.r.offset(); this.r.advance(this.r.line()); const nextLineOffset = this.r.offset(); const firstNonTable = this.r.match(/^[ \t]*($|[^|])/m)?.index ?? null; this.r.advance(firstNonTable); const isTable = this.r.offset() > nextLineOffset && this.r.lookingAt(ruleRe); this.r.resetOffset(offset); if (isTable) { return this.parseTable(affiliated); } // fallthrough } // List. if (this.r.lookingAt(this.re.listItemRe())) { if (structure === undefined) { const offset = this.r.offset(); structure = this.parseListStructure(); this.r.resetOffset(offset); } return this.parseList(structure, affiliated); } // Default element: Paragraph. return this.parseParagraph(affiliated); } parseObjects(restriction) { const objects = []; // offset where previously parsed object ends. let prevEnd = this.r.offset(); while (!this.r.eof()) { const prevOffset = this.r.offset(); const mobject = this.parseObject(restriction); if (!mobject) break; // Handle parseObject returning result without advancing the // cursor. This is always a programming error and leads to // infinite loop here. if (this.r.offset() === prevOffset) { throw new Error(`no progress (parseObject). If you see this, please report an issue to https://github.com/rasendubi/uniorg/issues with the following information: ${JSON.stringify(mobject)}, text: ${JSON.stringify(this.r.rest())}, objects: ${JSON.stringify(objects, null, 2)}`); } const [objectBegin, o] = mobject; if (objectBegin !== prevEnd) { // parse text before object const value = this.r.substring(prevEnd, objectBegin); objects.push(u('text', this.addPosition({ value }, prevEnd, objectBegin))); } // @ts-expect-error contentsBegin is not defined for "literals" const cbeg = o.contentsBegin; // @ts-expect-error contentsBegin is not defined for "literals" const cend = o.contentsEnd; if (cbeg !== undefined && cend !== undefined) { this.r.narrow(cbeg, cend); appendChildren(o, this.parseObjects(restrictionFor(o.type))); this.r.widen(); } objects.push(o); prevEnd = this.r.offset(); } this.r.resetOffset(prevEnd); // handle text after the last object const text = this.r.rest(); this.r.advance(text.length); if (text.trim().length) { objects.push(u('text', this.addPosition({ value: text }, prevEnd, this.r.offset()))); } return objects; } parseObject(restriction) { // table-cell is only allowed inside table-row and always succeed. if (restriction.has('table-cell')) { return [this.r.offset(), this.parseTableCell()]; } // citation-common-prefix is only allowed inside citation as a // first element. We remove citation-common-prefix from the // restriction as soon as we tried to parse it once. if (restriction.has('citation-common-prefix')) { restriction.delete('citation-common-prefix'); const begin = this.r.offset(); const prefix = this.parseCitationCommonPrefix(); if (prefix) { return [begin, prefix]; } // fall-through } // citation-reference is only allowed inside citation if (restriction.has('citation-reference')) { const offset = this.r.offset(); const ref = this.parseCitationReference(); if (ref) { return [offset, ref]; } // fall-through } // citation-common-prefix is only allowed inside citation as a // first element. We remove citation-common-prefix from the // restriction as soon as we tried to parse it once. if (restriction.has('citation-common-suffix')) { restriction.delete('citation-common-suffix'); const begin = this.r.offset(); const suffix = this.parseCitationCommonSuffix(); if (suffix) { return [begin, suffix]; } // fall-through } // citation-prefix is only allowed as the first element inside // citation-reference. We remove it from restriction as soon as we // tried to parse it. if (restriction.has('citation-prefix')) { restriction.delete('citation-prefix'); const offset = this.r.offset(); const prefix = this.parseCitationPrefix(); if (prefix) { return [offset, prefix]; } // fall-through } // citation-key can only occur once inside citation-reference if (restriction.has('citation-key')) { restriction.delete('citation-key'); const offset = this.r.offset(); const key = this.parseCitationKey(); if (key) { return [offset, key]; } // fall-through } // citation-suffix can only occur once inside citation-reference if (restriction.has('citation-suffix')) { restriction.delete('citation-suffix'); const offset = this.r.offset(); const suffix = this.parseCitationSuffix(); if (suffix) { return [offset, suffix]; } // fall-through } // 1. Search for pattern that probably starts an object. // 2. Try to parse object at that position. // 3. If not a valid object, advance by one char and repeat. const objectRe = this.re.objectRe(); while (!this.r.eof()) { const m = this.r.match(objectRe); if (!m) return null; this.r.advance(m.index); const begin = this.r.offset(); const o = this.tryParseObject(restriction); if (o) { if (begin === this.r.offset()) { throw new Error('no progress (tryParseObject). If you see this, please report an issue to https://github.com/rasendubi/uniorg/issues.'); } return [begin, o]; } this.r.resetOffset(begin); // Matching objectRegexp does not guarantee that we've found a // valid object (e.g., italic without closing /). Advance cursor // by one char and try searching for the next object. this.r.advance(1); } return null; } tryParseObject(restriction) { const c = this.r.peek(2); switch (c[0]) { case '^': if (restriction.has('superscript')) { return this.parseSuperscript(); } break; case '_': const offset = this.r.offset(); const subscript = restriction.has('subscript') && this.parseSubscript(); if (subscript) { return subscript; } this.r.resetOffset(offset); if (restriction.has('underline')) { return this.parseUnderline(); } break; case '*': if (restriction.has('bold')) { return this.parseBold(); } break; case '/': if (restriction.has('italic')) { return this.parseItalic(); } break; case '~': if (restriction.has('code')) { return this.parseCode(); } break; case '=': if (restriction.has('verbatim')) { return this.parseVerbatim(); } break; case '+': if (restriction.has('strike-through')) { return this.parseStrikeThrough(); } break; case '@': if (restriction.has('export-snippet')) { return this.parseExportSnippet(); } break; case '$': if (restriction.has('latex-fragment')) { return this.parseLatexFragment(); } break; case '<': if (c[1] === '<') { // TODO: radio target / target } else { const offset = this.r.offset(); const ts = restriction.has('timestamp') && this.parseTimestamp(); if (ts) return ts; this.r.resetOffset(offset); const link = restriction.has('link') && this.parseLink(); if (link) return link; this.r.resetOffset(offset); } break; case '\\': if (c[1] === '\\') { if (restriction.has('line-break')) { return this.parseLineBreak(); } } else { const offset = this.r.offset(); const entity = restriction.has('entity') && this.parseEntity(); if (entity) return entity; this.r.resetOffset(offset); const fragment = restriction.has('latex-fragment') && this.parseLatexFragment(); if (fragment) return fragment; this.r.resetOffset(offset); } break; case '[': if (c[1] === '[') { // normal link if (restriction.has('link')) { return this.parseLink(); } } else if (c[1] === 'f') { if (restriction.has('footnote-reference')) { return this.parseFootnoteReference(); } } else if (c[1] === 'c') { if (restriction.has('citation')) { return this.parseCitation(); } } else { const offset = this.r.offset(); const ts = restriction.has('timestamp') && this.parseTimestamp(); if (ts) return ts; this.r.resetOffset(offset); const cookie = restriction.has('statistics-cookie') && this.parseStatisticsCookie(); if (cookie) return cookie; this.r.resetOffset(offset); } break; default: // This is probably a plain link. if (restriction.has('link')) { return this.parseLink(); } } return null; } // Elements parsers parseSection() { const contentsBegin = this.r.offset(); const m = this.r.forceLookingAt(/^(\*+)[ \t]/m); const level = m[1].length; this.r.advance(this.r.line()); const endOfSubtree = this.r.match(new RegExp(`^\\*{1,${level}}[ \\t]`, 'm')); const contentsEnd = endOfSubtree ? this.r.offset() + endOfSubtree.index : this.r.endOffset(); this.r.resetOffset(contentsEnd); return u('section', this.addPosition({ contentsBegin, contentsEnd }, contentsBegin, contentsEnd), []); } parseHeadline() { const begin = this.r.offset(); this.r.advance(this.r.line()); this.r.narrow(begin, this.r.offset()); const stars = this.r.advance(this.r.forceLookingAt(/^(\*+)[ \t]+/)); const level = stars[1].length; const todoM = this.r.advance(this.r.lookingAt(new RegExp('^' + this.options.todoKeywords.join('|')))); const todoKeyword = todoM?.[0] ?? null; this.r.advance(this.r.lookingAt(/^[ \t]*/)); const priorityM = this.r.advance(this.r.lookingAt(/^\[#.\]/)); const priority = priorityM?.[0][2] ?? null; this.r.advance(this.r.lookingAt(/^[ \t]*/)); const commented = !!this.r.advance(this.r.lookingAt(/^COMMENT/)); this.r.advance(this.r.lookingAt(/^[ \t]*/)); const titleStart = this.r.offset(); const tagsM = this.r.lookingAt(/^(.*?)[ \t]+:([\p{L}\p{N}_@#%:]+):[ \t]*$/mu); const tags = tagsM?.[2].split(':') ?? []; const titleEnd = tagsM ? titleStart + tagsM.index + tagsM[1].length : titleStart + this.r.forceLookingAt(/.*/)[0].length; const rawValue = this.r.substring(titleStart, titleEnd); const contentsBegin = titleStart; const contentsEnd = titleEnd; // Reset line restriction. this.r.widen(); this.parseEmptyLines(); return u('headline', this.addPosition({ level, todoKeyword, priority, commented, rawValue, tags, contentsBegin, contentsEnd, }, begin, titleEnd), []); } parsePlanning() { this.r.narrow(this.r.offset(), this.r.offset() + this.r.line().length); this.r.advance(this.r.match(/^[ \t]*/)); const begin = this.r.offset(); let scheduled = null; let deadline = null; let closed = null; while (true) { const m = this.r.match(/\b(SCHEDULED:|DEADLINE:|CLOSED:) *[\[<]([^\]>]+)[\]>]/); if (!m) break; this.r.advance(m.index + m[1].length); this.r.advance(this.r.match(/^[ \t]*/)); const keyword = m[1]; const time = this.parseTimestamp(); if (keyword === 'SCHEDULED:') scheduled = time; if (keyword === 'DEADLINE:') deadline = time; if (keyword === 'CLOSED:') closed = time; } const end = this.r.offset(); this.r.widen(); this.r.advance(this.r.line()); this.parseEmptyLines(); return u('planning', this.addPosition({ scheduled, deadline, closed }, begin, end)); } parsePropertyDrawer() { const begin = this.r.offset(); this.r.advance(this.r.line()); const contentsBegin = this.r.offset(); const endM = this.r.forceMatch(/^[ \t]*:END:[ \t]*$/im); this.r.advance(endM.index); const contentsEnd = this.r.offset(); this.r.advance(this.r.line()); const end = this.r.offset(); this.parseEmptyLines(); return u('property-drawer', this.addPosition({ contentsBegin, contentsEnd }, begin, end), []); } parseBlock(type, pattern, affiliated) { const endM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${pattern}[ \\t]*$`, 'im')); if (!endM) { // Incomplete block: parse it as a paragraph. return this.parseParagraph(affiliated); } const begin = this.r.offset(); const contentsBegin = begin + this.r.line().length; const contentsEnd = begin + endM.index; this.r.resetOffset(contentsEnd); this.r.advance(this.r.line()); const end = this.r.offset(); this.parseEmptyLines(); return u(type, this.addPosition({ affiliated, contentsBegin, contentsEnd }, begin, end), []); } parseComment() { let valueLines = []; this.r.advance(this.r.forceLookingAt(/^[ \t]*# ?/)); const start = this.r.offset(); valueLines.push(this.r.advance(this.r.line())); while (true) { const m = this.r.advance(this.r.lookingAt(/^[ \t]*#( |$)/m)); if (!m) break; valueLines.push(this.r.advance(this.r.line())); } let end = this.r.offset(); if (this.r.substring(end - 1, end) === '\n') { end -= 1; } let value = valueLines.join(''); if (value[value.length - 1] === '\n') { value = value.substring(0, value.length - 1); } return u('comment', this.addPosition({ value }, start, end)); } parseFixedWidth(affiliated) { let valueLines = []; const begin = this.r.offset(); while (true) { const m = this.r.lookingAt(/^[ \t]*: ?(.*)$/m); if (!m) break; this.r.advance(this.r.line()); valueLines.push(m[1]); } const value = valueLines.join('\n'); let end = this.r.offset(); if (this.r.substring(end - 1, end) === '\n') { end -= 1; } return u('fixed-width', this.addPosition({ affiliated, value }, begin, end)); } parseCommentBlock(affiliated) { const comment = this.parseBlock('comment-block', 'comment', affiliated); if (comment.type !== 'comment-block') { // parsed as paragraph return comment; } const { type: _, contentsBegin, contentsEnd, children, ...rest } = comment; const value = this.r.substring(contentsBegin, contentsEnd); return u('comment-block', { ...rest, value, affiliated }); } parseSrcBlock(affiliated) { const endM = this.r.match(/^[ \t]*#\+end_src[ \t]*$/im); if (!endM) { // Incomplete block: parse it as a paragraph. return this.parseParagraph(affiliated); } const headerM = this.r.forceMatch(/^[ \t]*#\+begin_src(?: +(?<language>\S+))?(?<switches>(?: +(?:-(?:l ".+"|[ikr])|[-+]n(?: *[0-9]+)?))+)?(?<parameters>.*)[ \t]*$/im); const { language, switches, parameters } = headerM.groups; const begin = this.r.offset(); const contentsBegin = begin + this.r.line().length; const contentsEnd = begin + endM.index; const value = unescapeCodeInString(this.r.substring(contentsBegin, contentsEnd)); this.r.resetOffset(contentsEnd); this.r.advance(this.r.line()); const end = begin + endM.index + endM[0].length; this.parseEmptyLines(); return u('src-block', this.addPosition({ affiliated, language, switches: switches?.trim() ?? null, // using || to convert empty strings to null as well parameters: parameters.trim() || null, value, }, begin, end)); } parseExampleBlock(affiliated) { // TODO: parse switches const block = this.parseBlock('example-block', 'example', affiliated); if (block.type !== 'example-block') { // parsed as paragraph return block; } const { type: _, contentsBegin, contentsEnd, children, ...rest } = block; const value = this.r.substring(contentsBegin, contentsEnd); return u('example-block', { ...rest, value, affiliated }); } parseExportBlock(affiliated) { const endM = this.r.match(/^[ \t]*#\+end_export[ \t]*$/im); if (!endM) { // Incomplete block: parse it as a paragraph. return this.parseParagraph(affiliated); } const headerM = this.r.match(/^[ \t]*#\+begin_export(?:[ \t]+(\S+))?[ \t]*$/im); const backend = headerM?.[1] ?? null; const begin = this.r.offset(); const contentsBegin = begin + this.r.line().length; const contentsEnd = begin + endM.index; const value = unescapeCodeInString(this.r.substring(contentsBegin, contentsEnd)); this.r.resetOffset(contentsEnd); this.r.advance(this.r.line()); this.parseEmptyLines(); const end = begin + endM.index + endM[0].length; return u('export-block', this.addPosition({ affiliated, backend, value }, begin, end)); } parseSpecialBlock(affiliated) { const blockType = this.r.forceLookingAt(/[ \t]*#\+begin_(\S+)/i)[1]; const endM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${escapeRegExp(blockType)}[ \\t]*$`, 'im')); if (!endM) { this.r.message('incomplete block', this.r.offset(), 'uniorg'); // Incomplete block: parse it as a paragraph. return this.parseParagraph(affiliated); } const begin = this.r.offset(); const contentsBegin = begin + this.r.line().length; const contentsEnd = begin + endM.index; this.r.resetOffset(contentsEnd); this.r.advance(this.r.line()); this.parseEmptyLines(); const end = begin + endM.index + endM[0].length; return u('special-block', this.addPosition({ affiliated, blockType, contentsBegin, contentsEnd }, begin, end), []); } parseAffiliatedKeywords() { const offset = this.r.offset(); const result = {}; while (!this.r.eof()) { const keywordM = this.r.lookingAt(affiliatedRe); if (!keywordM) break; const rawKeyword = (keywordM.groups.dualKeyword ?? keywordM.groups.regularKeyword ?? keywordM.groups.attributeKeyword).toUpperCase(); const keyword = keywordTranslationTable[rawKeyword] ?? rawKeyword; // true if keyword should have its value parsed const isParsed = parsedKeywords.has(keyword); this.r.advance(keywordM); this.r.narrow(this.r.offset(), this.r.offset() + this.r.line().length); const mainValue = isParsed ? this.parseObjects(restrictionFor('keyword')) : this.r.rest().trim(); this.r.widen(); this.r.advance(this.r.line()); const isDual = dualKeywords.has(keyword); const dualValue = isDual ? (keywordM.groups.dualValue ?? null) : null; const value = dualValue === null ? mainValue : [mainValue, dualValue]; if (multipleKeywords.has(keyword) || // Attributes can always appear on multiple lines. keyword.match(/^ATTR_/)) { result[keyword] = result[keyword] || []; result[keyword].push(value); } else { result[keyword] = value; } } // If affiliated keywords are orphaned: move back to first one. // They will be parsed as a paragraph. if (this.r.lookingAt(/^[ \t]*$/m)) { this.r.resetOffset(offset); return {}; } return result; } parseKeyword(affiliated) { const m = this.r.forceLookingAt(/[ \t]*#\+(\S+):(.*)/); const key = m[1].toUpperCase(); const value = m[2].trim(); const begin = this.r.offset(); this.r.advance(this.r.line()); const end = this.r.offset(); this.parseEmptyLines(); return u('keyword', this.addPosition({ affiliated, key, value }, begin, end)); } parseLatexEnvironment(affiliated) { const beginOffset = this.r.offset(); const beginM = this.r.advance(this.r.forceLookingAt(latexBeginEnvironmentRe)); const name = beginM[1]; const endM = this.r.match(latexEndEnvironmentRe(name)); if (!endM) { // Incomplete latex environment: parse it as a paragraph. this.r.resetOffset(beginOffset); return this.parseParagraph(affiliated); } this.r.advance(endM); const endOffset = this.r.offset(); this.parseEmptyLines(); const value = this.r.substring(beginOffset, endOffset); return u('latex-environment', this.addPosition({ affiliated, value }, beginOffset, endOffset)); } parseDrawer(affiliated) { const start = this.r.offset(); const endM = this.r.match(/^[ \t]*:END:[ \t]*$/im); if (!endM) { this.r.message('incomplete drawer', this.r.offset(), 'uniorg'); // Incomplete drawer: parse it as a paragraph. return this.parseParagraph(affiliated); } const end = start + endM.index + endM[0].length; const contentsEnd = this.r.offset() + endM.index; const name = this.r.forceLookingAt(drawerRe)[1]; this.r.advance(this.r.line()); const contentsBegin = this.r.offset(); this.r.resetOffset(contentsEnd); this.r.advance(this.r.line()); this.parseEmptyLines(); return u('drawer', this.addPosition({ affiliated, name, contentsBegin, contentsEnd }, start, end), []); } parseClock() { const start = this.r.offset(); this.r.advance(this.r.forceMatch(/^[ \t]*CLOCK:[ \t]*/)); const value = this.parseTimestamp(); this.r.advance(this.r.match(/^[ \t]+=>[ \t]*/)); const durationM = this.r.advance(this.r.lookingAt(/^(\S+)[ \t]*$/m)); const duration = durationM ? durationM[1] : null; const status = duration ? 'closed' : 'running'; const end = this.r.offset(); this.parseEmptyLines(); return u('clock', this.addPosition({ value, duration, status }, start, end)); } parseNodeProperty() { const start = this.r.offset(); const propertyRe = /^[ \t]*:(?<key>\S+):(?:(?<value1>$)|[ \t]+(?<value2>.*?))[ \t]*$/m; const m = this.r.forceLookingAt(propertyRe); const key = m.groups['key']; const value = m.groups['value1'] ?? m.groups['value2']; const end = this.r.offset() + m.index + m[0].length; this.r.advance(this.r.line()); return u('node-property', this.addPosition({ key, value }, start, end)); } parseParagraph(affiliated) { const begin = this.r.offset(); const contentsBegin = begin; this.r.advance(this.r.line()); let next = null; while ((next = this.r.match(this.re.paragraphSeparateRe()))) { this.r.advance(next.index); // A matching `paragraphSeparateRe` is not necessarily the end // of the paragraph. In particular, drawers, blocks or LaTeX // environments opening lines must be closed. Moreover keywords // with a secondary value must belong to "dual keywords". const blockBeginM = this.r.lookingAt(/[ \t]*#\+begin_(\S+)/i); if (blockBeginM) { const blockEndM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${blockBeginM[1]}[ \\t]*$`, 'im')); if (!blockEndM) { this.r.advance(this.r.line()); continue; } break; } const drawerM = this.r.lookingAt(drawerRe); if (drawerM) { const endM = this.r.match(/^[ \t]*:END:[ \t]*$/im); if (!endM) { this.r.advance(this.r.line()); continue; } break; } const latexEnvironmentM = this.r.lookingAt(latexBeginEnvironmentRe); if (latexEnvironmentM) { const name = latexEnvironmentM[1]; const endM = this.r.match(latexEndEnvironmentRe(name)); if (!endM) { this.r.advance(this.r.line()); continue; } break; } const dualKeywordM = this.r.lookingAt(/[ \t]*#\+(\S+)\[.*\]:/); if (dualKeywordM) { if (!dualKeywords.has(dualKeywordM[1].toLowerCase())) { this.r.advance(this.r.line()); continue; } break; } // Everything else unambigously ends paragraph. break; } const contentsEnd = next ? this.r.offset() : this.r.endOffset(); const end = contentsEnd; this.r.resetOffset(contentsEnd); this.parseEmptyLines(); return u('paragraph', this.addPosition({ affiliated, contentsBegin, contentsEnd }, begin, end), []); } parseFootnoteDefinition(affiliated) { const start = this.r.offset(); const m = this.r.forceLookingAt(footnoteDefinitionRe); const label = m[1]; const begin = this.r.offset(); this.r.advance(this.r.line()); const endM = this.r.match(footnoteDefinitionSeparatorRe); this.r.advance(endM?.index); let contentsEnd = endM ? this.r.offset() : this.r.endOffset(); if (endM && endM[0][0] === '[') { // At a new footnote definition, make sure we end before any // affiliated keyword above. let lines = this.r.substring(begin, this.r.offset()).split('\n'); // drop first line because this is the line definition starts, // drop last line because it is empty. lines = lines.slice(1, lines.length - 1); while (lines.length) { const line = lines.pop(); if (line.match(affiliatedRe)?.index === 0) { // -1 to compensate for \n this.r.advance(-line.length - 1); } else { break; } } contentsEnd = this.r.offset(); } const end = contentsEnd; this.r.narrow(begin, contentsEnd); this.r.advance(this.r.forceMatch(/\][ \r\t\n]*/m)); const contentsBegin = this.r.offset(); this.r.widen(); this.r.resetOffset(contentsEnd); this.parseEmptyLines(); return u('footnote-definition', this.addPosition({ affiliated, label, contentsBegin, contentsEnd }, start, end), []); } parseHorizontalRule(affiliated) { const start = this.r.offset(); this.r.advance(this.r.line()); const end = this.r.offset(); this.parseEmptyLines(); return u('horizontal-rule', this.addPosition({ affiliated }, start, end)); } parseDiarySexp(affiliated) { const start = this.r.offset(); const value = this.r.forceLookingAt(/^(%%\(.*)[ \t]*$/m)[1]; this.r.advance(this.r.line()); const end = this.r.offset(); this.parseEmptyLines(); return u('diary-sexp', this.addPosition({ affiliated, value }, start, end)); } parseTable(affiliated) { const start = this.r.offset(); const contentsBegin = this.r.offset(); const tableType = this.r.lookingAt(/^[ \t]*\|/) ? 'org' : 'table.el'; const endRe = new RegExp(`^[ \\t]*($|[^| \\t${tableType === 'org' ? '' : '+'}])`, 'm'); const endM = this.r.match(endRe); const contentsEnd = endM ? contentsBegin + endM.index : this.r.endOffset(); this.r.resetOffset(contentsEnd); let tblfm = ''; while (true) { const tblfmM = this.r.lookingAt(/^[ \t]*#\+TBLFM: +(.*?)[ \t]*$/m); if (!tblfmM) break; tblfm = tblfm + tblfmM[1]; this.r.advance(this.r.line()); } const end = this.r.offset(); this.parseEmptyLines(); if (tableType === 'org') { return u('table', this.addPosition({ tableType, tblfm, contentsBegin, contentsEnd }, start, end), []); } else { return u('table', this.addPosition({ affiliated, tableType, tblfm, value: this.r.substring(contentsBegin, contentsEnd), }, start, end)); } } parseTableRow() { const start = this.r.offset(); const rowType = this.r.lookingAt(/^[ \t]*\|-/) ? 'rule' : 'standard'; this.r.advance(this.r.forceMatch(/\|/)); const contentsBegin = this.r.offset(); this.r.advance(this.r.forceMatch(/^.*?[ \t]*$/m)); const end = this.r.offset(); // A table rule has no contents. In that case, ensure // contentsBegin matches contentsEnd. const contentsEnd = rowType === 'rule' ? contentsBegin : this.r.offset(); this.r.advance(this.r.line()); return u('table-row', this.addPosition({ rowType, contentsBegin, contentsEnd }, start, end), []); } parseTableCell() { const start = this.r.offset(); this.r.advance(this.r.forceLookingAt(/^[ \t]*/)); const contentsBegin = this.r.offset(); const m = this.r.advance(this.r.forceLookingAt(/(.*?)[ \t]*(?:\||$)/m)); const contentsEnd = contentsBegin + m[1].length; const end = contentsBegin + m[0].length; return u('table-cell', this.addPosition({ contentsBegin, contentsEnd }, start, end), []); } parseList(structure, affiliated) { const start = this.r.offset(); const contentsBegin = this.r.offset(); const item = structure.find((x) => x.begin === contentsBegin); if (!item) { throw new Error(`parseList: cannot find item. contentsBegin: ${contentsBegin}, structure: ${JSON.stringify(structure, null, 2)}`); } const indent = item.indent; const listType = item.tag ? 'descriptive' : '-+*'.includes(item.bullet[0]) ? 'unordered' : 'ordered'; let pos = item.end; while (true) { const next = structure.find((x) => x.begin === pos && x.indent === indent); if (!next) break; pos = next.end; } const contentsEnd = pos; this.r.resetOffset(contentsEnd); const end = this.r.offset(); return u('plain-list', this.addPosition({ affiliated, indent, listType, contentsBegin, contentsEnd, // Exposing structure here is temporary as it gets removed in parseElements(). It is only exposed so // that parseElements() can pick it up and use it for parsing list items. structure, }, start, end), []); } parseListItem(structure) { const start = this.r.offset(); const m = this.r.advance(this.r.forceMatch(this.re.fullListItemRe())); const bullet = m.groups.bullet; const counter = m.groups.counter ?? null; const checkbox = m.groups.checkbox === '[ ]' ? 'off' : m.groups.checkbox?.toLowerCase() === '[x]' ? 'on' : m.groups.checkbox === '[-]' ? 'trans' : null; const item = structure.find((x) => x.begin === start); const contentsBegin = this.r.offset(); const contentsEnd = item.end; this.r.resetOffset(contentsEnd); const end = this.r.offset(); return u('list-item', this.addPosition({ indent: item.indent, bullet, counter, checkbox, contentsBegin, contentsEnd, structure, }, start, end), item.tag ? [item.tag] : []); } parseListStructure() { const items = []; const struct = []; while (true) { if (this.r.eof() || this.r.match(this.re.listEndRe())?.index === 0) { break; } const m = this.r.match(this.re.listItemRe()); if (m) { const indent = (m.groups.indent1?.length || 0) + (m.groups.indent2?.length || 0); // end previous siblings while (items.length && items[items.length - 1].indent >= indent) { const item = items.pop(); item.end = this.r.offset(); struct.push(item); } const fullM = this.r.forceMatch(this.re.fullListItemRe()); const { bullet, counter, checkbox } = fullM.groups; if (indent === 0 && bullet.startsWith('*')) { // *-bullets cannot start at 0 indent—this is headline, not a bullet break; } // js doesn't have a way to get start offset of a selected // group, so we add lengths of all groups before it. let tag = null; if (fullM.groups.tag !== undefined) { const tagStartOffset = this.r.offset() + (fullM.groups.indent?.length ?? 0) + (fullM.groups.bullet?.length ?? 0) + (fullM.groups.counter_group?.length ?? 0) + (fullM.groups.checkbox_group?.length ?? 0); const tagStopOffset = tagStartOffset + fullM.groups.tag.length; this.r.narrow(tagStartOffset, tagStopOffset); tag = u('list-item-tag', {}, this.parseObjects(restrictionFor('list-item'))); this.r.widen(); } const item = { begin: this.r.offset(), indent, bullet, counter: counter ?? null, checkbox: checkbox ?? null, tag, // will be overwritten later end: this.r.offset(), }; items.push(item); this.r.advance(this.r.line()); } else if (this.r.match(/^[ \t]*\n/)) { // skip blank lines this.r.advance(this.r.line()); } else { // At some text line. Check if it ends any previous item. const indent = this.r.forceLookingAt(/^[ \t]*/)[0].length; while (items.length && items[items.length - 1].indent >= indent) { const item = items.pop();