uniorg-parse
Version:
uniorg plugin to parse org-mode
1,172 lines (1,171 loc) • 76.2 kB
JavaScript
import { VFile } from 'vfile';
import { u } from 'unist-builder';
import { getOrgEntity } from './entities.js';
import { restrictionFor, greaterElements, unescapeCodeInString, escapeRegExp, OrgRegexUtils, } from './utils.js';
import { defaultOptions } from './parse-options.js';
import { Reader } from './reader.js';
/*
(defun rasen/org-debug ()
"Show org AST for the current buffer."
(interactive)
(let ((document (org-element-parse-buffer)))
(with-current-buffer-window "*org-elements*" nil nil
(emacs-lisp-mode)
(pp (cddr document)))))
*/
/**
* `ParseMode` determines what Elements are expected/allowed in the given context.
*
* By default, all Elements except `Headline`, `Planning`, `PropertyDrawer`, `ListItem`, `TableRow`, and
* `NodeProperty` are supported.
*
* If the documentation of the mode say "allows"—the specified elements are supported in additional to default
* elements.
*
* If the documentation of the mode says "expecting"—only that elements are allowed (default elements are
* not).
*/
var ParseMode;
(function (ParseMode) {
/** Initial parsing mode. Allows property-drawer. */
ParseMode[ParseMode["TopComment"] = 0] = "TopComment";
/** First inside section. Expecting a headline only. */
ParseMode[ParseMode["Headline"] = 1] = "Headline";
/** Right after headline. Allows planning and property-drawer. */
ParseMode[ParseMode["Planning"] = 2] = "Planning";
/** After planning or top-commment. Allows property-drawer. */
ParseMode[ParseMode["PropertyDrawer"] = 3] = "PropertyDrawer";
/** Inside a property-drawer. Expecting node-property only. */
ParseMode[ParseMode["NodeProperty"] = 4] = "NodeProperty";
/** Inside a list. Expecting list-item only. */
ParseMode[ParseMode["ListItem"] = 5] = "ListItem";
/** Inside a table. Expecting table-row only. */
ParseMode[ParseMode["TableRow"] = 6] = "TableRow";
/** Default parsing mode. */
ParseMode[ParseMode["Default"] = 7] = "Default";
})(ParseMode || (ParseMode = {}));
export function parse(file, options) {
return new Parser(new VFile(file), options).parse();
}
class Parser {
constructor(file, options = {}) {
this.r = new Reader(file);
this.options = { ...defaultOptions, ...options };
this.re = new OrgRegexUtils(this.options);
}
/**
* Adds position information to node data when trackPosition is enabled
*/
addPosition(data, startOffset, endOffset) {
if (!this.options.trackPosition) {
return data;
}
const position = this.r.positionFromOffsets(startOffset, endOffset);
if (!position) {
return data;
}
return {
...data,
position,
};
}
parse() {
this.parseEmptyLines();
const startOffset = 0;
const children = this.parseElements(ParseMode.TopComment);
const endOffset = this.r.endOffset();
return u('org-data', this.addPosition({ contentsBegin: startOffset, contentsEnd: endOffset }, startOffset, endOffset), children);
}
// General parsing structure
parseElements(mode, structure) {
const elements = [];
let prevOffset = -1;
while (!this.r.eof()) {
const offset = this.r.offset();
if (offset === prevOffset) {
console.log('elements:', elements, 'rest:', JSON.stringify(this.r.rest()));
throw new Error('no progress (elements), if you see this, please report an issue to https://github.com/rasendubi/uniorg/issues');
}
prevOffset = offset;
const element = this.parseElement(mode, structure);
const type = element.type;
// @ts-expect-error contentsBegin is not defined for "literals"
const cbeg = element.contentsBegin;
// @ts-expect-error contentsBegin is not defined for "literals"
const cend = element.contentsEnd;
if (cbeg === undefined || cend === undefined) {
// do nothing
}
else if (greaterElements.has(type)) {
this.r.narrow(cbeg, cend);
appendChildren(element, this.parseElements(Parser.nextMode(mode, type, true), element.type === 'plain-list' || element.type === 'list-item'
? // @ts-expect-error Property 'structure' does not exist on type 'OrgData'
element.structure
: undefined));
this.r.widen();
// Delete structure from lists. It's only here to facilitate
// parsing and should not be exposed to the user.
// @ts-expect-error Property 'structure' does not exist on type 'OrgData'
if (element.structure) {
// @ts-expect-error Property 'structure' does not exist on type 'OrgData'
delete element.structure;
}
}
else {
this.r.narrow(cbeg, cend);
appendChildren(element, this.parseObjects(restrictionFor(element.type)));
this.r.widen();
}
elements.push(element);
mode = Parser.nextMode(mode, type, false);
}
return elements;
}
static nextMode(mode, type, parent) {
if (parent) {
if (type === 'section')
return ParseMode.Headline;
if (type === 'inlinetask')
return ParseMode.Headline;
if (type === 'plain-list')
return ParseMode.ListItem;
if (type === 'property-drawer')
return ParseMode.NodeProperty;
if (type === 'table')
return ParseMode.TableRow;
}
else {
if (mode === ParseMode.TopComment && type === 'comment')
return ParseMode.PropertyDrawer;
if (mode === ParseMode.Headline)
return ParseMode.Planning;
if (mode === ParseMode.Planning && type === 'planning')
return ParseMode.PropertyDrawer;
if (mode === ParseMode.ListItem)
return ParseMode.ListItem;
if (mode === ParseMode.TableRow)
return ParseMode.TableRow;
if (mode === ParseMode.NodeProperty)
return ParseMode.NodeProperty;
}
return ParseMode.Default;
}
parseElement(mode, structure) {
// List Item.
if (mode === ParseMode.ListItem)
return this.parseListItem(structure);
// Table Row.
if (mode === ParseMode.TableRow)
return this.parseTableRow();
// Node Property.
if (mode === ParseMode.NodeProperty)
return this.parseNodeProperty();
// Headline.
if (mode === ParseMode.Headline)
return this.parseHeadline();
// Section.
if (this.atHeading())
return this.parseSection();
const isBeginningOfLine = this.r.offset() === 0 ||
this.r.substring(this.r.offset() - 1, this.r.offset()) === '\n';
// Comments.
if (isBeginningOfLine && this.r.lookingAt(/^[ \t]*#(?: |$)/m)) {
return this.parseComment();
}
// Planning.
if (mode === ParseMode.Planning &&
// TODO: check previous line is headline
this.r.lookingAt(/^[ \t]*(CLOSED:|DEADLINE:|SCHEDULED:)/)) {
return this.parsePlanning();
}
if ((mode === ParseMode.Planning ||
// && TODO: check previous line is headline
((mode === ParseMode.PropertyDrawer || mode === ParseMode.TopComment) &&
!this.r.lookingAt(/\s*$/m))) &&
this.r.lookingAt(/^[ \t]*:PROPERTIES:[ \t]*\n(?:[ \t]*:\S+:(?: .*)?[ \t]*\n)*?[ \t]*:END:[ \t]*$/im)) {
return this.parsePropertyDrawer();
}
// When not at beginning of line, point is at the beginning of an
// item or a footnote definition: next item is always a paragraph.
if (!isBeginningOfLine) {
return this.parseParagraph({});
}
// Clock.
if (this.r.lookingAt(/^[ \t]*CLOCK:/)) {
return this.parseClock();
}
// TODO: Inlinetask.
// From there, elements can have affiliated keywords.
const affiliated = this.parseAffiliatedKeywords();
// LaTeX Environment.
if (this.r.lookingAt(latexBeginEnvironmentRe)) {
return this.parseLatexEnvironment(affiliated);
}
// Drawer.
if (this.r.lookingAt(drawerRe)) {
return this.parseDrawer(affiliated);
}
// Fixed width
if (this.r.lookingAt(/[ \t]*:( |$)/m)) {
return this.parseFixedWidth(affiliated);
}
// Inline Comments, Blocks, Babel Calls, Dynamic Blocks and
// Keywords.
{
const offset = this.r.offset();
if (this.r.advance(this.r.lookingAt(/^[ \t]*#\+/))) {
const blockM = this.r.lookingAt(/^begin_(\S+)/i);
if (blockM) {
this.r.resetOffset(offset); // reset so that parse*Block can match starting #+
const blockType = blockM[1].toLowerCase();
switch (blockType) {
case 'center':
return this.parseBlock('center-block', 'center', affiliated);
case 'comment':
return this.parseCommentBlock(affiliated);
case 'example':
return this.parseExampleBlock(affiliated);
case 'export':
return this.parseExportBlock(affiliated);
case 'quote':
return this.parseBlock('quote-block', 'quote', affiliated);
case 'src':
return this.parseSrcBlock(affiliated);
case 'verse':
return this.parseBlock('verse-block', 'verse', affiliated);
default:
return this.parseSpecialBlock(affiliated);
}
}
// TODO: parse babel-call
// TODO: parse dynamic-block
if (this.r.lookingAt(/^\S+:/)) {
this.r.resetOffset(offset); // reset, so that parseKeyword can match starting #+
return this.parseKeyword(affiliated);
}
// fallback: parse as paragraph
this.r.resetOffset(offset);
return this.parseParagraph(affiliated);
}
}
// Footnote Definition.
if (this.r.lookingAt(footnoteDefinitionRe)) {
return this.parseFootnoteDefinition(affiliated);
}
// Horizontal Rule.
if (this.r.lookingAt(/^[ \t]*-{5,}[ \t]*$/m)) {
return this.parseHorizontalRule(affiliated);
}
// Diary Sexp.
if (this.r.lookingAt(/^%%\(/)) {
return this.parseDiarySexp(affiliated);
}
// Table.
const ruleRe = /[ \t]*\+(-+\+)+[ \t]*$/m;
if (this.r.lookingAt(/^[ \t]*\|/)) {
return this.parseTable(affiliated);
}
else if (this.r.lookingAt(ruleRe)) {
// There is no strict definition of a table.el table. Try to
// prevent false positive while being quick.
const offset = this.r.offset();
this.r.advance(this.r.line());
const nextLineOffset = this.r.offset();
const firstNonTable = this.r.match(/^[ \t]*($|[^|])/m)?.index ?? null;
this.r.advance(firstNonTable);
const isTable = this.r.offset() > nextLineOffset && this.r.lookingAt(ruleRe);
this.r.resetOffset(offset);
if (isTable) {
return this.parseTable(affiliated);
}
// fallthrough
}
// List.
if (this.r.lookingAt(this.re.listItemRe())) {
if (structure === undefined) {
const offset = this.r.offset();
structure = this.parseListStructure();
this.r.resetOffset(offset);
}
return this.parseList(structure, affiliated);
}
// Default element: Paragraph.
return this.parseParagraph(affiliated);
}
parseObjects(restriction) {
const objects = [];
// offset where previously parsed object ends.
let prevEnd = this.r.offset();
while (!this.r.eof()) {
const prevOffset = this.r.offset();
const mobject = this.parseObject(restriction);
if (!mobject)
break;
// Handle parseObject returning result without advancing the
// cursor. This is always a programming error and leads to
// infinite loop here.
if (this.r.offset() === prevOffset) {
throw new Error(`no progress (parseObject). If you see this, please report an issue to https://github.com/rasendubi/uniorg/issues with the following information: ${JSON.stringify(mobject)}, text: ${JSON.stringify(this.r.rest())}, objects: ${JSON.stringify(objects, null, 2)}`);
}
const [objectBegin, o] = mobject;
if (objectBegin !== prevEnd) {
// parse text before object
const value = this.r.substring(prevEnd, objectBegin);
objects.push(u('text', this.addPosition({ value }, prevEnd, objectBegin)));
}
// @ts-expect-error contentsBegin is not defined for "literals"
const cbeg = o.contentsBegin;
// @ts-expect-error contentsBegin is not defined for "literals"
const cend = o.contentsEnd;
if (cbeg !== undefined && cend !== undefined) {
this.r.narrow(cbeg, cend);
appendChildren(o, this.parseObjects(restrictionFor(o.type)));
this.r.widen();
}
objects.push(o);
prevEnd = this.r.offset();
}
this.r.resetOffset(prevEnd);
// handle text after the last object
const text = this.r.rest();
this.r.advance(text.length);
if (text.trim().length) {
objects.push(u('text', this.addPosition({ value: text }, prevEnd, this.r.offset())));
}
return objects;
}
parseObject(restriction) {
// table-cell is only allowed inside table-row and always succeed.
if (restriction.has('table-cell')) {
return [this.r.offset(), this.parseTableCell()];
}
// citation-common-prefix is only allowed inside citation as a
// first element. We remove citation-common-prefix from the
// restriction as soon as we tried to parse it once.
if (restriction.has('citation-common-prefix')) {
restriction.delete('citation-common-prefix');
const begin = this.r.offset();
const prefix = this.parseCitationCommonPrefix();
if (prefix) {
return [begin, prefix];
}
// fall-through
}
// citation-reference is only allowed inside citation
if (restriction.has('citation-reference')) {
const offset = this.r.offset();
const ref = this.parseCitationReference();
if (ref) {
return [offset, ref];
}
// fall-through
}
// citation-common-prefix is only allowed inside citation as a
// first element. We remove citation-common-prefix from the
// restriction as soon as we tried to parse it once.
if (restriction.has('citation-common-suffix')) {
restriction.delete('citation-common-suffix');
const begin = this.r.offset();
const suffix = this.parseCitationCommonSuffix();
if (suffix) {
return [begin, suffix];
}
// fall-through
}
// citation-prefix is only allowed as the first element inside
// citation-reference. We remove it from restriction as soon as we
// tried to parse it.
if (restriction.has('citation-prefix')) {
restriction.delete('citation-prefix');
const offset = this.r.offset();
const prefix = this.parseCitationPrefix();
if (prefix) {
return [offset, prefix];
}
// fall-through
}
// citation-key can only occur once inside citation-reference
if (restriction.has('citation-key')) {
restriction.delete('citation-key');
const offset = this.r.offset();
const key = this.parseCitationKey();
if (key) {
return [offset, key];
}
// fall-through
}
// citation-suffix can only occur once inside citation-reference
if (restriction.has('citation-suffix')) {
restriction.delete('citation-suffix');
const offset = this.r.offset();
const suffix = this.parseCitationSuffix();
if (suffix) {
return [offset, suffix];
}
// fall-through
}
// 1. Search for pattern that probably starts an object.
// 2. Try to parse object at that position.
// 3. If not a valid object, advance by one char and repeat.
const objectRe = this.re.objectRe();
while (!this.r.eof()) {
const m = this.r.match(objectRe);
if (!m)
return null;
this.r.advance(m.index);
const begin = this.r.offset();
const o = this.tryParseObject(restriction);
if (o) {
if (begin === this.r.offset()) {
throw new Error('no progress (tryParseObject). If you see this, please report an issue to https://github.com/rasendubi/uniorg/issues.');
}
return [begin, o];
}
this.r.resetOffset(begin);
// Matching objectRegexp does not guarantee that we've found a
// valid object (e.g., italic without closing /). Advance cursor
// by one char and try searching for the next object.
this.r.advance(1);
}
return null;
}
tryParseObject(restriction) {
const c = this.r.peek(2);
switch (c[0]) {
case '^':
if (restriction.has('superscript')) {
return this.parseSuperscript();
}
break;
case '_':
const offset = this.r.offset();
const subscript = restriction.has('subscript') && this.parseSubscript();
if (subscript) {
return subscript;
}
this.r.resetOffset(offset);
if (restriction.has('underline')) {
return this.parseUnderline();
}
break;
case '*':
if (restriction.has('bold')) {
return this.parseBold();
}
break;
case '/':
if (restriction.has('italic')) {
return this.parseItalic();
}
break;
case '~':
if (restriction.has('code')) {
return this.parseCode();
}
break;
case '=':
if (restriction.has('verbatim')) {
return this.parseVerbatim();
}
break;
case '+':
if (restriction.has('strike-through')) {
return this.parseStrikeThrough();
}
break;
case '@':
if (restriction.has('export-snippet')) {
return this.parseExportSnippet();
}
break;
case '$':
if (restriction.has('latex-fragment')) {
return this.parseLatexFragment();
}
break;
case '<':
if (c[1] === '<') {
// TODO: radio target / target
}
else {
const offset = this.r.offset();
const ts = restriction.has('timestamp') && this.parseTimestamp();
if (ts)
return ts;
this.r.resetOffset(offset);
const link = restriction.has('link') && this.parseLink();
if (link)
return link;
this.r.resetOffset(offset);
}
break;
case '\\':
if (c[1] === '\\') {
if (restriction.has('line-break')) {
return this.parseLineBreak();
}
}
else {
const offset = this.r.offset();
const entity = restriction.has('entity') && this.parseEntity();
if (entity)
return entity;
this.r.resetOffset(offset);
const fragment = restriction.has('latex-fragment') && this.parseLatexFragment();
if (fragment)
return fragment;
this.r.resetOffset(offset);
}
break;
case '[':
if (c[1] === '[') {
// normal link
if (restriction.has('link')) {
return this.parseLink();
}
}
else if (c[1] === 'f') {
if (restriction.has('footnote-reference')) {
return this.parseFootnoteReference();
}
}
else if (c[1] === 'c') {
if (restriction.has('citation')) {
return this.parseCitation();
}
}
else {
const offset = this.r.offset();
const ts = restriction.has('timestamp') && this.parseTimestamp();
if (ts)
return ts;
this.r.resetOffset(offset);
const cookie = restriction.has('statistics-cookie') &&
this.parseStatisticsCookie();
if (cookie)
return cookie;
this.r.resetOffset(offset);
}
break;
default:
// This is probably a plain link.
if (restriction.has('link')) {
return this.parseLink();
}
}
return null;
}
// Elements parsers
parseSection() {
const contentsBegin = this.r.offset();
const m = this.r.forceLookingAt(/^(\*+)[ \t]/m);
const level = m[1].length;
this.r.advance(this.r.line());
const endOfSubtree = this.r.match(new RegExp(`^\\*{1,${level}}[ \\t]`, 'm'));
const contentsEnd = endOfSubtree
? this.r.offset() + endOfSubtree.index
: this.r.endOffset();
this.r.resetOffset(contentsEnd);
return u('section', this.addPosition({ contentsBegin, contentsEnd }, contentsBegin, contentsEnd), []);
}
parseHeadline() {
const begin = this.r.offset();
this.r.advance(this.r.line());
this.r.narrow(begin, this.r.offset());
const stars = this.r.advance(this.r.forceLookingAt(/^(\*+)[ \t]+/));
const level = stars[1].length;
const todoM = this.r.advance(this.r.lookingAt(new RegExp('^' + this.options.todoKeywords.join('|'))));
const todoKeyword = todoM?.[0] ?? null;
this.r.advance(this.r.lookingAt(/^[ \t]*/));
const priorityM = this.r.advance(this.r.lookingAt(/^\[#.\]/));
const priority = priorityM?.[0][2] ?? null;
this.r.advance(this.r.lookingAt(/^[ \t]*/));
const commented = !!this.r.advance(this.r.lookingAt(/^COMMENT/));
this.r.advance(this.r.lookingAt(/^[ \t]*/));
const titleStart = this.r.offset();
const tagsM = this.r.lookingAt(/^(.*?)[ \t]+:([\p{L}\p{N}_@#%:]+):[ \t]*$/mu);
const tags = tagsM?.[2].split(':') ?? [];
const titleEnd = tagsM
? titleStart + tagsM.index + tagsM[1].length
: titleStart + this.r.forceLookingAt(/.*/)[0].length;
const rawValue = this.r.substring(titleStart, titleEnd);
const contentsBegin = titleStart;
const contentsEnd = titleEnd;
// Reset line restriction.
this.r.widen();
this.parseEmptyLines();
return u('headline', this.addPosition({
level,
todoKeyword,
priority,
commented,
rawValue,
tags,
contentsBegin,
contentsEnd,
}, begin, titleEnd), []);
}
parsePlanning() {
this.r.narrow(this.r.offset(), this.r.offset() + this.r.line().length);
this.r.advance(this.r.match(/^[ \t]*/));
const begin = this.r.offset();
let scheduled = null;
let deadline = null;
let closed = null;
while (true) {
const m = this.r.match(/\b(SCHEDULED:|DEADLINE:|CLOSED:) *[\[<]([^\]>]+)[\]>]/);
if (!m)
break;
this.r.advance(m.index + m[1].length);
this.r.advance(this.r.match(/^[ \t]*/));
const keyword = m[1];
const time = this.parseTimestamp();
if (keyword === 'SCHEDULED:')
scheduled = time;
if (keyword === 'DEADLINE:')
deadline = time;
if (keyword === 'CLOSED:')
closed = time;
}
const end = this.r.offset();
this.r.widen();
this.r.advance(this.r.line());
this.parseEmptyLines();
return u('planning', this.addPosition({ scheduled, deadline, closed }, begin, end));
}
parsePropertyDrawer() {
const begin = this.r.offset();
this.r.advance(this.r.line());
const contentsBegin = this.r.offset();
const endM = this.r.forceMatch(/^[ \t]*:END:[ \t]*$/im);
this.r.advance(endM.index);
const contentsEnd = this.r.offset();
this.r.advance(this.r.line());
const end = this.r.offset();
this.parseEmptyLines();
return u('property-drawer', this.addPosition({ contentsBegin, contentsEnd }, begin, end), []);
}
parseBlock(type, pattern, affiliated) {
const endM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${pattern}[ \\t]*$`, 'im'));
if (!endM) {
// Incomplete block: parse it as a paragraph.
return this.parseParagraph(affiliated);
}
const begin = this.r.offset();
const contentsBegin = begin + this.r.line().length;
const contentsEnd = begin + endM.index;
this.r.resetOffset(contentsEnd);
this.r.advance(this.r.line());
const end = this.r.offset();
this.parseEmptyLines();
return u(type, this.addPosition({ affiliated, contentsBegin, contentsEnd }, begin, end), []);
}
parseComment() {
let valueLines = [];
this.r.advance(this.r.forceLookingAt(/^[ \t]*# ?/));
const start = this.r.offset();
valueLines.push(this.r.advance(this.r.line()));
while (true) {
const m = this.r.advance(this.r.lookingAt(/^[ \t]*#( |$)/m));
if (!m)
break;
valueLines.push(this.r.advance(this.r.line()));
}
let end = this.r.offset();
if (this.r.substring(end - 1, end) === '\n') {
end -= 1;
}
let value = valueLines.join('');
if (value[value.length - 1] === '\n') {
value = value.substring(0, value.length - 1);
}
return u('comment', this.addPosition({ value }, start, end));
}
parseFixedWidth(affiliated) {
let valueLines = [];
const begin = this.r.offset();
while (true) {
const m = this.r.lookingAt(/^[ \t]*: ?(.*)$/m);
if (!m)
break;
this.r.advance(this.r.line());
valueLines.push(m[1]);
}
const value = valueLines.join('\n');
let end = this.r.offset();
if (this.r.substring(end - 1, end) === '\n') {
end -= 1;
}
return u('fixed-width', this.addPosition({ affiliated, value }, begin, end));
}
parseCommentBlock(affiliated) {
const comment = this.parseBlock('comment-block', 'comment', affiliated);
if (comment.type !== 'comment-block') {
// parsed as paragraph
return comment;
}
const { type: _, contentsBegin, contentsEnd, children, ...rest } = comment;
const value = this.r.substring(contentsBegin, contentsEnd);
return u('comment-block', { ...rest, value, affiliated });
}
parseSrcBlock(affiliated) {
const endM = this.r.match(/^[ \t]*#\+end_src[ \t]*$/im);
if (!endM) {
// Incomplete block: parse it as a paragraph.
return this.parseParagraph(affiliated);
}
const headerM = this.r.forceMatch(/^[ \t]*#\+begin_src(?: +(?<language>\S+))?(?<switches>(?: +(?:-(?:l ".+"|[ikr])|[-+]n(?: *[0-9]+)?))+)?(?<parameters>.*)[ \t]*$/im);
const { language, switches, parameters } = headerM.groups;
const begin = this.r.offset();
const contentsBegin = begin + this.r.line().length;
const contentsEnd = begin + endM.index;
const value = unescapeCodeInString(this.r.substring(contentsBegin, contentsEnd));
this.r.resetOffset(contentsEnd);
this.r.advance(this.r.line());
const end = begin + endM.index + endM[0].length;
this.parseEmptyLines();
return u('src-block', this.addPosition({
affiliated,
language,
switches: switches?.trim() ?? null,
// using || to convert empty strings to null as well
parameters: parameters.trim() || null,
value,
}, begin, end));
}
parseExampleBlock(affiliated) {
// TODO: parse switches
const block = this.parseBlock('example-block', 'example', affiliated);
if (block.type !== 'example-block') {
// parsed as paragraph
return block;
}
const { type: _, contentsBegin, contentsEnd, children, ...rest } = block;
const value = this.r.substring(contentsBegin, contentsEnd);
return u('example-block', { ...rest, value, affiliated });
}
parseExportBlock(affiliated) {
const endM = this.r.match(/^[ \t]*#\+end_export[ \t]*$/im);
if (!endM) {
// Incomplete block: parse it as a paragraph.
return this.parseParagraph(affiliated);
}
const headerM = this.r.match(/^[ \t]*#\+begin_export(?:[ \t]+(\S+))?[ \t]*$/im);
const backend = headerM?.[1] ?? null;
const begin = this.r.offset();
const contentsBegin = begin + this.r.line().length;
const contentsEnd = begin + endM.index;
const value = unescapeCodeInString(this.r.substring(contentsBegin, contentsEnd));
this.r.resetOffset(contentsEnd);
this.r.advance(this.r.line());
this.parseEmptyLines();
const end = begin + endM.index + endM[0].length;
return u('export-block', this.addPosition({ affiliated, backend, value }, begin, end));
}
parseSpecialBlock(affiliated) {
const blockType = this.r.forceLookingAt(/[ \t]*#\+begin_(\S+)/i)[1];
const endM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${escapeRegExp(blockType)}[ \\t]*$`, 'im'));
if (!endM) {
this.r.message('incomplete block', this.r.offset(), 'uniorg');
// Incomplete block: parse it as a paragraph.
return this.parseParagraph(affiliated);
}
const begin = this.r.offset();
const contentsBegin = begin + this.r.line().length;
const contentsEnd = begin + endM.index;
this.r.resetOffset(contentsEnd);
this.r.advance(this.r.line());
this.parseEmptyLines();
const end = begin + endM.index + endM[0].length;
return u('special-block', this.addPosition({ affiliated, blockType, contentsBegin, contentsEnd }, begin, end), []);
}
parseAffiliatedKeywords() {
const offset = this.r.offset();
const result = {};
while (!this.r.eof()) {
const keywordM = this.r.lookingAt(affiliatedRe);
if (!keywordM)
break;
const rawKeyword = (keywordM.groups.dualKeyword ??
keywordM.groups.regularKeyword ??
keywordM.groups.attributeKeyword).toUpperCase();
const keyword = keywordTranslationTable[rawKeyword] ?? rawKeyword;
// true if keyword should have its value parsed
const isParsed = parsedKeywords.has(keyword);
this.r.advance(keywordM);
this.r.narrow(this.r.offset(), this.r.offset() + this.r.line().length);
const mainValue = isParsed
? this.parseObjects(restrictionFor('keyword'))
: this.r.rest().trim();
this.r.widen();
this.r.advance(this.r.line());
const isDual = dualKeywords.has(keyword);
const dualValue = isDual ? (keywordM.groups.dualValue ?? null) : null;
const value = dualValue === null ? mainValue : [mainValue, dualValue];
if (multipleKeywords.has(keyword) ||
// Attributes can always appear on multiple lines.
keyword.match(/^ATTR_/)) {
result[keyword] = result[keyword] || [];
result[keyword].push(value);
}
else {
result[keyword] = value;
}
}
// If affiliated keywords are orphaned: move back to first one.
// They will be parsed as a paragraph.
if (this.r.lookingAt(/^[ \t]*$/m)) {
this.r.resetOffset(offset);
return {};
}
return result;
}
parseKeyword(affiliated) {
const m = this.r.forceLookingAt(/[ \t]*#\+(\S+):(.*)/);
const key = m[1].toUpperCase();
const value = m[2].trim();
const begin = this.r.offset();
this.r.advance(this.r.line());
const end = this.r.offset();
this.parseEmptyLines();
return u('keyword', this.addPosition({ affiliated, key, value }, begin, end));
}
parseLatexEnvironment(affiliated) {
const beginOffset = this.r.offset();
const beginM = this.r.advance(this.r.forceLookingAt(latexBeginEnvironmentRe));
const name = beginM[1];
const endM = this.r.match(latexEndEnvironmentRe(name));
if (!endM) {
// Incomplete latex environment: parse it as a paragraph.
this.r.resetOffset(beginOffset);
return this.parseParagraph(affiliated);
}
this.r.advance(endM);
const endOffset = this.r.offset();
this.parseEmptyLines();
const value = this.r.substring(beginOffset, endOffset);
return u('latex-environment', this.addPosition({ affiliated, value }, beginOffset, endOffset));
}
parseDrawer(affiliated) {
const start = this.r.offset();
const endM = this.r.match(/^[ \t]*:END:[ \t]*$/im);
if (!endM) {
this.r.message('incomplete drawer', this.r.offset(), 'uniorg');
// Incomplete drawer: parse it as a paragraph.
return this.parseParagraph(affiliated);
}
const end = start + endM.index + endM[0].length;
const contentsEnd = this.r.offset() + endM.index;
const name = this.r.forceLookingAt(drawerRe)[1];
this.r.advance(this.r.line());
const contentsBegin = this.r.offset();
this.r.resetOffset(contentsEnd);
this.r.advance(this.r.line());
this.parseEmptyLines();
return u('drawer', this.addPosition({ affiliated, name, contentsBegin, contentsEnd }, start, end), []);
}
parseClock() {
const start = this.r.offset();
this.r.advance(this.r.forceMatch(/^[ \t]*CLOCK:[ \t]*/));
const value = this.parseTimestamp();
this.r.advance(this.r.match(/^[ \t]+=>[ \t]*/));
const durationM = this.r.advance(this.r.lookingAt(/^(\S+)[ \t]*$/m));
const duration = durationM ? durationM[1] : null;
const status = duration ? 'closed' : 'running';
const end = this.r.offset();
this.parseEmptyLines();
return u('clock', this.addPosition({ value, duration, status }, start, end));
}
parseNodeProperty() {
const start = this.r.offset();
const propertyRe = /^[ \t]*:(?<key>\S+):(?:(?<value1>$)|[ \t]+(?<value2>.*?))[ \t]*$/m;
const m = this.r.forceLookingAt(propertyRe);
const key = m.groups['key'];
const value = m.groups['value1'] ?? m.groups['value2'];
const end = this.r.offset() + m.index + m[0].length;
this.r.advance(this.r.line());
return u('node-property', this.addPosition({ key, value }, start, end));
}
parseParagraph(affiliated) {
const begin = this.r.offset();
const contentsBegin = begin;
this.r.advance(this.r.line());
let next = null;
while ((next = this.r.match(this.re.paragraphSeparateRe()))) {
this.r.advance(next.index);
// A matching `paragraphSeparateRe` is not necessarily the end
// of the paragraph. In particular, drawers, blocks or LaTeX
// environments opening lines must be closed. Moreover keywords
// with a secondary value must belong to "dual keywords".
const blockBeginM = this.r.lookingAt(/[ \t]*#\+begin_(\S+)/i);
if (blockBeginM) {
const blockEndM = this.r.match(new RegExp(`^[ \\t]*#\\+end_${blockBeginM[1]}[ \\t]*$`, 'im'));
if (!blockEndM) {
this.r.advance(this.r.line());
continue;
}
break;
}
const drawerM = this.r.lookingAt(drawerRe);
if (drawerM) {
const endM = this.r.match(/^[ \t]*:END:[ \t]*$/im);
if (!endM) {
this.r.advance(this.r.line());
continue;
}
break;
}
const latexEnvironmentM = this.r.lookingAt(latexBeginEnvironmentRe);
if (latexEnvironmentM) {
const name = latexEnvironmentM[1];
const endM = this.r.match(latexEndEnvironmentRe(name));
if (!endM) {
this.r.advance(this.r.line());
continue;
}
break;
}
const dualKeywordM = this.r.lookingAt(/[ \t]*#\+(\S+)\[.*\]:/);
if (dualKeywordM) {
if (!dualKeywords.has(dualKeywordM[1].toLowerCase())) {
this.r.advance(this.r.line());
continue;
}
break;
}
// Everything else unambigously ends paragraph.
break;
}
const contentsEnd = next ? this.r.offset() : this.r.endOffset();
const end = contentsEnd;
this.r.resetOffset(contentsEnd);
this.parseEmptyLines();
return u('paragraph', this.addPosition({ affiliated, contentsBegin, contentsEnd }, begin, end), []);
}
parseFootnoteDefinition(affiliated) {
const start = this.r.offset();
const m = this.r.forceLookingAt(footnoteDefinitionRe);
const label = m[1];
const begin = this.r.offset();
this.r.advance(this.r.line());
const endM = this.r.match(footnoteDefinitionSeparatorRe);
this.r.advance(endM?.index);
let contentsEnd = endM ? this.r.offset() : this.r.endOffset();
if (endM && endM[0][0] === '[') {
// At a new footnote definition, make sure we end before any
// affiliated keyword above.
let lines = this.r.substring(begin, this.r.offset()).split('\n');
// drop first line because this is the line definition starts,
// drop last line because it is empty.
lines = lines.slice(1, lines.length - 1);
while (lines.length) {
const line = lines.pop();
if (line.match(affiliatedRe)?.index === 0) {
// -1 to compensate for \n
this.r.advance(-line.length - 1);
}
else {
break;
}
}
contentsEnd = this.r.offset();
}
const end = contentsEnd;
this.r.narrow(begin, contentsEnd);
this.r.advance(this.r.forceMatch(/\][ \r\t\n]*/m));
const contentsBegin = this.r.offset();
this.r.widen();
this.r.resetOffset(contentsEnd);
this.parseEmptyLines();
return u('footnote-definition', this.addPosition({ affiliated, label, contentsBegin, contentsEnd }, start, end), []);
}
parseHorizontalRule(affiliated) {
const start = this.r.offset();
this.r.advance(this.r.line());
const end = this.r.offset();
this.parseEmptyLines();
return u('horizontal-rule', this.addPosition({ affiliated }, start, end));
}
parseDiarySexp(affiliated) {
const start = this.r.offset();
const value = this.r.forceLookingAt(/^(%%\(.*)[ \t]*$/m)[1];
this.r.advance(this.r.line());
const end = this.r.offset();
this.parseEmptyLines();
return u('diary-sexp', this.addPosition({ affiliated, value }, start, end));
}
parseTable(affiliated) {
const start = this.r.offset();
const contentsBegin = this.r.offset();
const tableType = this.r.lookingAt(/^[ \t]*\|/)
? 'org'
: 'table.el';
const endRe = new RegExp(`^[ \\t]*($|[^| \\t${tableType === 'org' ? '' : '+'}])`, 'm');
const endM = this.r.match(endRe);
const contentsEnd = endM ? contentsBegin + endM.index : this.r.endOffset();
this.r.resetOffset(contentsEnd);
let tblfm = '';
while (true) {
const tblfmM = this.r.lookingAt(/^[ \t]*#\+TBLFM: +(.*?)[ \t]*$/m);
if (!tblfmM)
break;
tblfm = tblfm + tblfmM[1];
this.r.advance(this.r.line());
}
const end = this.r.offset();
this.parseEmptyLines();
if (tableType === 'org') {
return u('table', this.addPosition({ tableType, tblfm, contentsBegin, contentsEnd }, start, end), []);
}
else {
return u('table', this.addPosition({
affiliated,
tableType,
tblfm,
value: this.r.substring(contentsBegin, contentsEnd),
}, start, end));
}
}
parseTableRow() {
const start = this.r.offset();
const rowType = this.r.lookingAt(/^[ \t]*\|-/)
? 'rule'
: 'standard';
this.r.advance(this.r.forceMatch(/\|/));
const contentsBegin = this.r.offset();
this.r.advance(this.r.forceMatch(/^.*?[ \t]*$/m));
const end = this.r.offset();
// A table rule has no contents. In that case, ensure
// contentsBegin matches contentsEnd.
const contentsEnd = rowType === 'rule' ? contentsBegin : this.r.offset();
this.r.advance(this.r.line());
return u('table-row', this.addPosition({ rowType, contentsBegin, contentsEnd }, start, end), []);
}
parseTableCell() {
const start = this.r.offset();
this.r.advance(this.r.forceLookingAt(/^[ \t]*/));
const contentsBegin = this.r.offset();
const m = this.r.advance(this.r.forceLookingAt(/(.*?)[ \t]*(?:\||$)/m));
const contentsEnd = contentsBegin + m[1].length;
const end = contentsBegin + m[0].length;
return u('table-cell', this.addPosition({ contentsBegin, contentsEnd }, start, end), []);
}
parseList(structure, affiliated) {
const start = this.r.offset();
const contentsBegin = this.r.offset();
const item = structure.find((x) => x.begin === contentsBegin);
if (!item) {
throw new Error(`parseList: cannot find item. contentsBegin: ${contentsBegin}, structure: ${JSON.stringify(structure, null, 2)}`);
}
const indent = item.indent;
const listType = item.tag
? 'descriptive'
: '-+*'.includes(item.bullet[0])
? 'unordered'
: 'ordered';
let pos = item.end;
while (true) {
const next = structure.find((x) => x.begin === pos && x.indent === indent);
if (!next)
break;
pos = next.end;
}
const contentsEnd = pos;
this.r.resetOffset(contentsEnd);
const end = this.r.offset();
return u('plain-list', this.addPosition({
affiliated,
indent,
listType,
contentsBegin,
contentsEnd,
// Exposing structure here is temporary as it gets removed in parseElements(). It is only exposed so
// that parseElements() can pick it up and use it for parsing list items.
structure,
}, start, end), []);
}
parseListItem(structure) {
const start = this.r.offset();
const m = this.r.advance(this.r.forceMatch(this.re.fullListItemRe()));
const bullet = m.groups.bullet;
const counter = m.groups.counter ?? null;
const checkbox = m.groups.checkbox === '[ ]'
? 'off'
: m.groups.checkbox?.toLowerCase() === '[x]'
? 'on'
: m.groups.checkbox === '[-]'
? 'trans'
: null;
const item = structure.find((x) => x.begin === start);
const contentsBegin = this.r.offset();
const contentsEnd = item.end;
this.r.resetOffset(contentsEnd);
const end = this.r.offset();
return u('list-item', this.addPosition({
indent: item.indent,
bullet,
counter,
checkbox,
contentsBegin,
contentsEnd,
structure,
}, start, end), item.tag ? [item.tag] : []);
}
parseListStructure() {
const items = [];
const struct = [];
while (true) {
if (this.r.eof() || this.r.match(this.re.listEndRe())?.index === 0) {
break;
}
const m = this.r.match(this.re.listItemRe());
if (m) {
const indent = (m.groups.indent1?.length || 0) + (m.groups.indent2?.length || 0);
// end previous siblings
while (items.length && items[items.length - 1].indent >= indent) {
const item = items.pop();
item.end = this.r.offset();
struct.push(item);
}
const fullM = this.r.forceMatch(this.re.fullListItemRe());
const { bullet, counter, checkbox } = fullM.groups;
if (indent === 0 && bullet.startsWith('*')) {
// *-bullets cannot start at 0 indent—this is headline, not a bullet
break;
}
// js doesn't have a way to get start offset of a selected
// group, so we add lengths of all groups before it.
let tag = null;
if (fullM.groups.tag !== undefined) {
const tagStartOffset = this.r.offset() +
(fullM.groups.indent?.length ?? 0) +
(fullM.groups.bullet?.length ?? 0) +
(fullM.groups.counter_group?.length ?? 0) +
(fullM.groups.checkbox_group?.length ?? 0);
const tagStopOffset = tagStartOffset + fullM.groups.tag.length;
this.r.narrow(tagStartOffset, tagStopOffset);
tag = u('list-item-tag', {}, this.parseObjects(restrictionFor('list-item')));
this.r.widen();
}
const item = {
begin: this.r.offset(),
indent,
bullet,
counter: counter ?? null,
checkbox: checkbox ?? null,
tag,
// will be overwritten later
end: this.r.offset(),
};
items.push(item);
this.r.advance(this.r.line());
}
else if (this.r.match(/^[ \t]*\n/)) {
// skip blank lines
this.r.advance(this.r.line());
}
else {
// At some text line. Check if it ends any previous item.
const indent = this.r.forceLookingAt(/^[ \t]*/)[0].length;
while (items.length && items[items.length - 1].indent >= indent) {
const item = items.pop();