prosemirror-model
Version:
ProseMirror's document model
414 lines (371 loc) • 14.1 kB
text/typescript
import {Fragment} from "./fragment"
import {NodeType} from "./schema"
type MatchEdge = {type: NodeType, next: ContentMatch}
/// Instances of this class represent a match state of a node type's
/// [content expression](#model.NodeSpec.content), and can be used to
/// find out whether further content matches here, and whether a given
/// position is a valid end of the node.
export class ContentMatch {
/// @internal
readonly next: MatchEdge[] = []
/// @internal
readonly wrapCache: (NodeType | readonly NodeType[] | null)[] = []
/// @internal
constructor(
/// True when this match state represents a valid end of the node.
readonly validEnd: boolean
) {}
/// @internal
static parse(string: string, nodeTypes: {readonly [name: string]: NodeType}): ContentMatch {
let stream = new TokenStream(string, nodeTypes)
if (stream.next == null) return ContentMatch.empty
let expr = parseExpr(stream)
if (stream.next) stream.err("Unexpected trailing text")
let match = dfa(nfa(expr))
checkForDeadEnds(match, stream)
return match
}
/// Match a node type, returning a match after that node if
/// successful.
matchType(type: NodeType): ContentMatch | null {
for (let i = 0; i < this.next.length; i++)
if (this.next[i].type == type) return this.next[i].next
return null
}
/// Try to match a fragment. Returns the resulting match when
/// successful.
matchFragment(frag: Fragment, start = 0, end = frag.childCount): ContentMatch | null {
let cur: ContentMatch | null = this
for (let i = start; cur && i < end; i++)
cur = cur.matchType(frag.child(i).type)
return cur
}
/// @internal
get inlineContent() {
return this.next.length != 0 && this.next[0].type.isInline
}
/// Get the first matching node type at this match position that can
/// be generated.
get defaultType(): NodeType | null {
for (let i = 0; i < this.next.length; i++) {
let {type} = this.next[i]
if (!(type.isText || type.hasRequiredAttrs())) return type
}
return null
}
/// @internal
compatible(other: ContentMatch) {
for (let i = 0; i < this.next.length; i++)
for (let j = 0; j < other.next.length; j++)
if (this.next[i].type == other.next[j].type) return true
return false
}
/// Try to match the given fragment, and if that fails, see if it can
/// be made to match by inserting nodes in front of it. When
/// successful, return a fragment of inserted nodes (which may be
/// empty if nothing had to be inserted). When `toEnd` is true, only
/// return a fragment if the resulting match goes to the end of the
/// content expression.
fillBefore(after: Fragment, toEnd = false, startIndex = 0): Fragment | null {
let seen: ContentMatch[] = [this]
function search(match: ContentMatch, types: readonly NodeType[]): Fragment | null {
let finished = match.matchFragment(after, startIndex)
if (finished && (!toEnd || finished.validEnd))
return Fragment.from(types.map(tp => tp.createAndFill()!))
for (let i = 0; i < match.next.length; i++) {
let {type, next} = match.next[i]
if (!(type.isText || type.hasRequiredAttrs()) && seen.indexOf(next) == -1) {
seen.push(next)
let found = search(next, types.concat(type))
if (found) return found
}
}
return null
}
return search(this, [])
}
/// Find a set of wrapping node types that would allow a node of the
/// given type to appear at this position. The result may be empty
/// (when it fits directly) and will be null when no such wrapping
/// exists.
findWrapping(target: NodeType): readonly NodeType[] | null {
for (let i = 0; i < this.wrapCache.length; i += 2)
if (this.wrapCache[i] == target) return this.wrapCache[i + 1] as (readonly NodeType[] | null)
let computed = this.computeWrapping(target)
this.wrapCache.push(target, computed)
return computed
}
/// @internal
computeWrapping(target: NodeType): readonly NodeType[] | null {
type Active = {match: ContentMatch, type: NodeType | null, via: Active | null}
let seen = Object.create(null), active: Active[] = [{match: this, type: null, via: null}]
while (active.length) {
let current = active.shift()!, match = current.match
if (match.matchType(target)) {
let result: NodeType[] = []
for (let obj: Active = current; obj.type; obj = obj.via!)
result.push(obj.type)
return result.reverse()
}
for (let i = 0; i < match.next.length; i++) {
let {type, next} = match.next[i]
if (!type.isLeaf && !type.hasRequiredAttrs() && !(type.name in seen) && (!current.type || next.validEnd)) {
active.push({match: type.contentMatch, type, via: current})
seen[type.name] = true
}
}
}
return null
}
/// The number of outgoing edges this node has in the finite
/// automaton that describes the content expression.
get edgeCount() {
return this.next.length
}
/// Get the _n_th outgoing edge from this node in the finite
/// automaton that describes the content expression.
edge(n: number): MatchEdge {
if (n >= this.next.length) throw new RangeError(`There's no ${n}th edge in this content match`)
return this.next[n]
}
/// @internal
toString() {
let seen: ContentMatch[] = []
function scan(m: ContentMatch) {
seen.push(m)
for (let i = 0; i < m.next.length; i++)
if (seen.indexOf(m.next[i].next) == -1) scan(m.next[i].next)
}
scan(this)
return seen.map((m, i) => {
let out = i + (m.validEnd ? "*" : " ") + " "
for (let i = 0; i < m.next.length; i++)
out += (i ? ", " : "") + m.next[i].type.name + "->" + seen.indexOf(m.next[i].next)
return out
}).join("\n")
}
/// @internal
static empty = new ContentMatch(true)
}
class TokenStream {
inline: boolean | null = null
pos = 0
tokens: string[]
constructor(
readonly string: string,
readonly nodeTypes: {readonly [name: string]: NodeType}
) {
this.tokens = string.split(/\s*(?=\b|\W|$)/)
if (this.tokens[this.tokens.length - 1] == "") this.tokens.pop()
if (this.tokens[0] == "") this.tokens.shift()
}
get next() { return this.tokens[this.pos] }
eat(tok: string) { return this.next == tok && (this.pos++ || true) }
err(str: string): never { throw new SyntaxError(str + " (in content expression '" + this.string + "')") }
}
type Expr =
{type: "choice", exprs: Expr[]} |
{type: "seq", exprs: Expr[]} |
{type: "plus", expr: Expr} |
{type: "star", expr: Expr} |
{type: "opt", expr: Expr} |
{type: "range", min: number, max: number, expr: Expr} |
{type: "name", value: NodeType}
function parseExpr(stream: TokenStream): Expr {
let exprs: Expr[] = []
do { exprs.push(parseExprSeq(stream)) }
while (stream.eat("|"))
return exprs.length == 1 ? exprs[0] : {type: "choice", exprs}
}
function parseExprSeq(stream: TokenStream): Expr {
let exprs: Expr[] = []
do { exprs.push(parseExprSubscript(stream)) }
while (stream.next && stream.next != ")" && stream.next != "|")
return exprs.length == 1 ? exprs[0] : {type: "seq", exprs}
}
function parseExprSubscript(stream: TokenStream): Expr {
let expr = parseExprAtom(stream)
for (;;) {
if (stream.eat("+"))
expr = {type: "plus", expr}
else if (stream.eat("*"))
expr = {type: "star", expr}
else if (stream.eat("?"))
expr = {type: "opt", expr}
else if (stream.eat("{"))
expr = parseExprRange(stream, expr)
else break
}
return expr
}
function parseNum(stream: TokenStream) {
if (/\D/.test(stream.next)) stream.err("Expected number, got '" + stream.next + "'")
let result = Number(stream.next)
stream.pos++
return result
}
function parseExprRange(stream: TokenStream, expr: Expr): Expr {
let min = parseNum(stream), max = min
if (stream.eat(",")) {
if (stream.next != "}") max = parseNum(stream)
else max = -1
}
if (!stream.eat("}")) stream.err("Unclosed braced range")
return {type: "range", min, max, expr}
}
function resolveName(stream: TokenStream, name: string): readonly NodeType[] {
let types = stream.nodeTypes, type = types[name]
if (type) return [type]
let result: NodeType[] = []
for (let typeName in types) {
let type = types[typeName]
if (type.isInGroup(name)) result.push(type)
}
if (result.length == 0) stream.err("No node type or group '" + name + "' found")
return result
}
function parseExprAtom(stream: TokenStream): Expr {
if (stream.eat("(")) {
let expr = parseExpr(stream)
if (!stream.eat(")")) stream.err("Missing closing paren")
return expr
} else if (!/\W/.test(stream.next)) {
let exprs = resolveName(stream, stream.next).map(type => {
if (stream.inline == null) stream.inline = type.isInline
else if (stream.inline != type.isInline) stream.err("Mixing inline and block content")
return {type: "name", value: type} as Expr
})
stream.pos++
return exprs.length == 1 ? exprs[0] : {type: "choice", exprs}
} else {
stream.err("Unexpected token '" + stream.next + "'")
}
}
// The code below helps compile a regular-expression-like language
// into a deterministic finite automaton. For a good introduction to
// these concepts, see https://swtch.com/~rsc/regexp/regexp1.html
type Edge = {term: NodeType | undefined, to: number | undefined}
// Construct an NFA from an expression as returned by the parser. The
// NFA is represented as an array of states, which are themselves
// arrays of edges, which are `{term, to}` objects. The first state is
// the entry state and the last node is the success state.
//
// Note that unlike typical NFAs, the edge ordering in this one is
// significant, in that it is used to contruct filler content when
// necessary.
function nfa(expr: Expr): Edge[][] {
let nfa: Edge[][] = [[]]
connect(compile(expr, 0), node())
return nfa
function node() { return nfa.push([]) - 1 }
function edge(from: number, to?: number, term?: NodeType) {
let edge = {term, to}
nfa[from].push(edge)
return edge
}
function connect(edges: Edge[], to: number) {
edges.forEach(edge => edge.to = to)
}
function compile(expr: Expr, from: number): Edge[] {
if (expr.type == "choice") {
return expr.exprs.reduce((out, expr) => out.concat(compile(expr, from)), [] as Edge[])
} else if (expr.type == "seq") {
for (let i = 0;; i++) {
let next = compile(expr.exprs[i], from)
if (i == expr.exprs.length - 1) return next
connect(next, from = node())
}
} else if (expr.type == "star") {
let loop = node()
edge(from, loop)
connect(compile(expr.expr, loop), loop)
return [edge(loop)]
} else if (expr.type == "plus") {
let loop = node()
connect(compile(expr.expr, from), loop)
connect(compile(expr.expr, loop), loop)
return [edge(loop)]
} else if (expr.type == "opt") {
return [edge(from)].concat(compile(expr.expr, from))
} else if (expr.type == "range") {
let cur = from
for (let i = 0; i < expr.min; i++) {
let next = node()
connect(compile(expr.expr, cur), next)
cur = next
}
if (expr.max == -1) {
connect(compile(expr.expr, cur), cur)
} else {
for (let i = expr.min; i < expr.max; i++) {
let next = node()
edge(cur, next)
connect(compile(expr.expr, cur), next)
cur = next
}
}
return [edge(cur)]
} else if (expr.type == "name") {
return [edge(from, undefined, expr.value)]
} else {
throw new Error("Unknown expr type")
}
}
}
function cmp(a: number, b: number) { return b - a }
// Get the set of nodes reachable by null edges from `node`. Omit
// nodes with only a single null-out-edge, since they may lead to
// needless duplicated nodes.
function nullFrom(nfa: Edge[][], node: number): readonly number[] {
let result: number[] = []
scan(node)
return result.sort(cmp)
function scan(node: number): void {
let edges = nfa[node]
if (edges.length == 1 && !edges[0].term) return scan(edges[0].to!)
result.push(node)
for (let i = 0; i < edges.length; i++) {
let {term, to} = edges[i]
if (!term && result.indexOf(to!) == -1) scan(to!)
}
}
}
// Compiles an NFA as produced by `nfa` into a DFA, modeled as a set
// of state objects (`ContentMatch` instances) with transitions
// between them.
function dfa(nfa: Edge[][]): ContentMatch {
let labeled = Object.create(null)
return explore(nullFrom(nfa, 0))
function explore(states: readonly number[]) {
let out: [NodeType, number[]][] = []
states.forEach(node => {
nfa[node].forEach(({term, to}) => {
if (!term) return
let set: number[] | undefined
for (let i = 0; i < out.length; i++) if (out[i][0] == term) set = out[i][1]
nullFrom(nfa, to!).forEach(node => {
if (!set) out.push([term, set = []])
if (set.indexOf(node) == -1) set.push(node)
})
})
})
let state = labeled[states.join(",")] = new ContentMatch(states.indexOf(nfa.length - 1) > -1)
for (let i = 0; i < out.length; i++) {
let states = out[i][1].sort(cmp)
state.next.push({type: out[i][0], next: labeled[states.join(",")] || explore(states)})
}
return state
}
}
function checkForDeadEnds(match: ContentMatch, stream: TokenStream) {
for (let i = 0, work = [match]; i < work.length; i++) {
let state = work[i], dead = !state.validEnd, nodes: string[] = []
for (let j = 0; j < state.next.length; j++) {
let {type, next} = state.next[j]
nodes.push(type.name)
if (dead && !(type.isText || type.hasRequiredAttrs())) dead = false
if (work.indexOf(next) == -1) work.push(next)
}
if (dead) stream.err("Only non-generatable nodes (" + nodes.join(", ") + ") in a required position (see https://prosemirror.net/docs/guide/#generatable)")
}
}