aubade
Version:
filesystem-based content processor
212 lines (211 loc) • 6.81 kB
JavaScript
import * as block from './registry/block.js';
import * as inline from './registry/inline.js';
import * as modifier from './registry/modifier.js';
import * as parent from './registry/parent.js';
const dispatch = new Map([
['<', [parent.html]],
['`', [block.code]],
['#', [parent.heading]],
['>', [parent.quote]],
['-', [block.linebreak, block.list]],
['*', [block.linebreak, block.list]],
['_', [block.linebreak]],
['\\', [parent.paragraph]],
]);
const tidbits = [
inline.escape,
inline.comment,
inline.code,
inline.autolink,
inline.image,
inline.link,
modifier.strong,
modifier.emphasis,
modifier.strike,
inline.text,
];
const is = {
'left-flanking'(before, after) {
return (!is.whitespace(after) &&
(!is.punctuation(after) || is.whitespace(before) || is.punctuation(before)));
},
'right-flanking'(before, after) {
return (!is.whitespace(before) &&
(!is.punctuation(before) || is.whitespace(after) || is.punctuation(after)));
},
alphanumeric(char) {
return /\p{L}|\p{N}|_/u.test(char);
},
punctuation(char) {
return /\p{P}|\p{S}/u.test(char);
},
whitespace(char) {
return /\p{Zs}/u.test(char) || /\s/.test(char);
},
};
function contextualize(source, stack) {
let pointer = 0;
const cursor = {
get index() {
return pointer;
},
set index(value) {
pointer = value;
},
consume(delimiter, update) {
let i = pointer;
let last = -1;
while (i < source.length) {
if (i + delimiter.length > source.length)
break;
const text = delimiter.length === 1 ? source[i] : source.slice(i, i + delimiter.length);
if (text === delimiter && update(i))
last = i;
i++;
}
if (last === -1)
return '';
const result = source.slice(pointer, last);
pointer = last;
return result;
},
eat(text) {
if (text.length === 1)
return source[pointer] === text && !!++pointer;
if (text !== source.slice(pointer, pointer + text.length))
return false;
pointer += text.length;
return true;
},
read(length) {
if (length === 1)
return source[pointer++];
const text = source.slice(pointer, pointer + length);
pointer += text.length;
return text;
},
locate(pattern) {
const start = pointer;
const match = pattern.exec(source.slice(pointer));
if (match) {
pointer = start + match.index;
return source.slice(start, pointer);
}
return '';
},
peek(pattern) {
if (typeof pattern === 'string') {
if (pattern.length === 1)
return source[pointer] === pattern ? pattern : '';
return source.slice(pointer, pointer + pattern.length) === pattern ? pattern : '';
}
const match = pattern.exec(source.slice(pointer));
return match ? source.slice(pointer, pointer + match.index) : '';
},
see(n) {
if (n === 0)
return source[pointer];
const index = pointer + n;
// treat out-of-bounds as whitespace
if (n < 0 && index < 0)
return ' ';
if (index >= source.length)
return ' ';
return source[index];
},
trim() {
while (pointer < source.length && /\s/.test(source[pointer])) {
pointer++;
}
},
};
return {
cursor,
is,
stack: {
peek() {
return stack[stack.length - 1];
},
push(token) {
stack.push(token);
return token;
},
pop() {
return stack.pop();
},
find(type, predicate = () => true) {
return stack.find((token) => token.type === type && predicate(token));
},
remove(token) {
const index = stack.indexOf(token);
if (index === -1)
return undefined;
return stack.splice(index, 1)[0];
},
},
compose,
annotate,
};
}
/** create the root document from the source */
export function compose(source) {
const root = { type: ':document', children: [] };
const input = source.trim();
const tree = root.children;
const stack = [];
let index = 0;
while (index < input.length) {
const context = contextualize(input.slice(index), stack);
if (context.cursor.eat('\n')) {
let current = stack[stack.length - 1];
while (current?.type === 'parent:paragraph' || current?.type === 'parent:quote') {
current = stack.pop();
}
}
const start = input[index + context.cursor.index];
const rules = [...(dispatch.get(start) || []), parent.paragraph];
const token = match({ ...context, rules });
if (token && token !== tree[tree.length - 1])
tree.push(token);
index += context.cursor.index;
}
for (const parent of tree) {
if (!parent.type.startsWith('parent:') ||
!('children' in parent) ||
!('text' in parent) ||
!parent.text)
continue;
index = stack.length = 0;
parent.children = annotate(parent.text);
// @ts-expect-error - why does it need to be optional?
delete parent.text; // cleanup text after inline parsing
}
return root;
}
/** construct inline tokens from the source */
export function annotate(source) {
const tree = [];
const stack = [];
let index = 0;
while (index < source.length) {
if (tree[tree.length - 1] !== stack[stack.length - 1])
stack.pop();
const context = contextualize(source, stack);
context.cursor.index = index;
const token = match({ ...context, rules: tidbits });
if (token && token !== tree[tree.length - 1])
tree.push(token);
index = context.cursor.index;
}
return tree;
}
function match({ rules, ...context }) {
const start = context.cursor.index;
for (const rule of rules) {
const token = rule(context);
if (token)
return token;
context.cursor.index = start;
}
return null;
}