UNPKG

aws-ddk-core

Version:

The AWS DataOps Development Kit is an open source development framework for customers that build data workflows and modern data architecture on AWS.

959 lines (955 loc) 34.7 kB
'use strict'; var cst = require('./cst.js'); var lexer = require('./lexer.js'); function includesToken(list, type) { for (let i = 0; i < list.length; ++i) if (list[i].type === type) return true; return false; } function findNonEmptyIndex(list) { for (let i = 0; i < list.length; ++i) { switch (list[i].type) { case 'space': case 'comment': case 'newline': break; default: return i; } } return -1; } function isFlowToken(token) { switch (token?.type) { case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': case 'flow-collection': return true; default: return false; } } function getPrevProps(parent) { switch (parent.type) { case 'document': return parent.start; case 'block-map': { const it = parent.items[parent.items.length - 1]; return it.sep ?? it.start; } case 'block-seq': return parent.items[parent.items.length - 1].start; /* istanbul ignore next should not happen */ default: return []; } } /** Note: May modify input array */ function getFirstKeyStartProps(prev) { if (prev.length === 0) return []; let i = prev.length; loop: while (--i >= 0) { switch (prev[i].type) { case 'doc-start': case 'explicit-key-ind': case 'map-value-ind': case 'seq-item-ind': case 'newline': break loop; } } while (prev[++i]?.type === 'space') { /* loop */ } return prev.splice(i, prev.length); } function fixFlowSeqItems(fc) { if (fc.start.type === 'flow-seq-start') { for (const it of fc.items) { if (it.sep && !it.value && !includesToken(it.start, 'explicit-key-ind') && !includesToken(it.sep, 'map-value-ind')) { if (it.key) it.value = it.key; delete it.key; if (isFlowToken(it.value)) { if (it.value.end) Array.prototype.push.apply(it.value.end, it.sep); else it.value.end = it.sep; } else Array.prototype.push.apply(it.start, it.sep); delete it.sep; } } } } /** * A YAML concrete syntax tree (CST) parser * * ```ts * const src: string = ... * for (const token of new Parser().parse(src)) { * // token: Token * } * ``` * * To use the parser with a user-provided lexer: * * ```ts * function* parse(source: string, lexer: Lexer) { * const parser = new Parser() * for (const lexeme of lexer.lex(source)) * yield* parser.next(lexeme) * yield* parser.end() * } * * const src: string = ... * const lexer = new Lexer() * for (const token of parse(src, lexer)) { * // token: Token * } * ``` */ class Parser { /** * @param onNewLine - If defined, called separately with the start position of * each new line (in `parse()`, including the start of input). */ constructor(onNewLine) { /** If true, space and sequence indicators count as indentation */ this.atNewLine = true; /** If true, next token is a scalar value */ this.atScalar = false; /** Current indentation level */ this.indent = 0; /** Current offset since the start of parsing */ this.offset = 0; /** On the same line with a block map key */ this.onKeyLine = false; /** Top indicates the node that's currently being built */ this.stack = []; /** The source of the current token, set in parse() */ this.source = ''; /** The type of the current token, set in parse() */ this.type = ''; // Must be defined after `next()` this.lexer = new lexer.Lexer(); this.onNewLine = onNewLine; } /** * Parse `source` as a YAML stream. * If `incomplete`, a part of the last line may be left as a buffer for the next call. * * Errors are not thrown, but yielded as `{ type: 'error', message }` tokens. * * @returns A generator of tokens representing each directive, document, and other structure. */ *parse(source, incomplete = false) { if (this.onNewLine && this.offset === 0) this.onNewLine(0); for (const lexeme of this.lexer.lex(source, incomplete)) yield* this.next(lexeme); if (!incomplete) yield* this.end(); } /** * Advance the parser by the `source` of one lexical token. */ *next(source) { this.source = source; if (process.env.LOG_TOKENS) console.log('|', cst.prettyToken(source)); if (this.atScalar) { this.atScalar = false; yield* this.step(); this.offset += source.length; return; } const type = cst.tokenType(source); if (!type) { const message = `Not a YAML token: ${source}`; yield* this.pop({ type: 'error', offset: this.offset, message, source }); this.offset += source.length; } else if (type === 'scalar') { this.atNewLine = false; this.atScalar = true; this.type = 'scalar'; } else { this.type = type; yield* this.step(); switch (type) { case 'newline': this.atNewLine = true; this.indent = 0; if (this.onNewLine) this.onNewLine(this.offset + source.length); break; case 'space': if (this.atNewLine && source[0] === ' ') this.indent += source.length; break; case 'explicit-key-ind': case 'map-value-ind': case 'seq-item-ind': if (this.atNewLine) this.indent += source.length; break; case 'doc-mode': case 'flow-error-end': return; default: this.atNewLine = false; } this.offset += source.length; } } /** Call at end of input to push out any remaining constructions */ *end() { while (this.stack.length > 0) yield* this.pop(); } get sourceToken() { const st = { type: this.type, offset: this.offset, indent: this.indent, source: this.source }; return st; } *step() { const top = this.peek(1); if (this.type === 'doc-end' && (!top || top.type !== 'doc-end')) { while (this.stack.length > 0) yield* this.pop(); this.stack.push({ type: 'doc-end', offset: this.offset, source: this.source }); return; } if (!top) return yield* this.stream(); switch (top.type) { case 'document': return yield* this.document(top); case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': return yield* this.scalar(top); case 'block-scalar': return yield* this.blockScalar(top); case 'block-map': return yield* this.blockMap(top); case 'block-seq': return yield* this.blockSequence(top); case 'flow-collection': return yield* this.flowCollection(top); case 'doc-end': return yield* this.documentEnd(top); } /* istanbul ignore next should not happen */ yield* this.pop(); } peek(n) { return this.stack[this.stack.length - n]; } *pop(error) { const token = error ?? this.stack.pop(); /* istanbul ignore if should not happen */ if (!token) { const message = 'Tried to pop an empty stack'; yield { type: 'error', offset: this.offset, source: '', message }; } else if (this.stack.length === 0) { yield token; } else { const top = this.peek(1); if (token.type === 'block-scalar') { // Block scalars use their parent rather than header indent token.indent = 'indent' in top ? top.indent : 0; } else if (token.type === 'flow-collection' && top.type === 'document') { // Ignore all indent for top-level flow collections token.indent = 0; } if (token.type === 'flow-collection') fixFlowSeqItems(token); switch (top.type) { case 'document': top.value = token; break; case 'block-scalar': top.props.push(token); // error break; case 'block-map': { const it = top.items[top.items.length - 1]; if (it.value) { top.items.push({ start: [], key: token, sep: [] }); this.onKeyLine = true; return; } else if (it.sep) { it.value = token; } else { Object.assign(it, { key: token, sep: [] }); this.onKeyLine = !it.explicitKey; return; } break; } case 'block-seq': { const it = top.items[top.items.length - 1]; if (it.value) top.items.push({ start: [], value: token }); else it.value = token; break; } case 'flow-collection': { const it = top.items[top.items.length - 1]; if (!it || it.value) top.items.push({ start: [], key: token, sep: [] }); else if (it.sep) it.value = token; else Object.assign(it, { key: token, sep: [] }); return; } /* istanbul ignore next should not happen */ default: yield* this.pop(); yield* this.pop(token); } if ((top.type === 'document' || top.type === 'block-map' || top.type === 'block-seq') && (token.type === 'block-map' || token.type === 'block-seq')) { const last = token.items[token.items.length - 1]; if (last && !last.sep && !last.value && last.start.length > 0 && findNonEmptyIndex(last.start) === -1 && (token.indent === 0 || last.start.every(st => st.type !== 'comment' || st.indent < token.indent))) { if (top.type === 'document') top.end = last.start; else top.items.push({ start: last.start }); token.items.splice(-1, 1); } } } } *stream() { switch (this.type) { case 'directive-line': yield { type: 'directive', offset: this.offset, source: this.source }; return; case 'byte-order-mark': case 'space': case 'comment': case 'newline': yield this.sourceToken; return; case 'doc-mode': case 'doc-start': { const doc = { type: 'document', offset: this.offset, start: [] }; if (this.type === 'doc-start') doc.start.push(this.sourceToken); this.stack.push(doc); return; } } yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML stream`, source: this.source }; } *document(doc) { if (doc.value) return yield* this.lineEnd(doc); switch (this.type) { case 'doc-start': { if (findNonEmptyIndex(doc.start) !== -1) { yield* this.pop(); yield* this.step(); } else doc.start.push(this.sourceToken); return; } case 'anchor': case 'tag': case 'space': case 'comment': case 'newline': doc.start.push(this.sourceToken); return; } const bv = this.startBlockValue(doc); if (bv) this.stack.push(bv); else { yield { type: 'error', offset: this.offset, message: `Unexpected ${this.type} token in YAML document`, source: this.source }; } } *scalar(scalar) { if (this.type === 'map-value-ind') { const prev = getPrevProps(this.peek(2)); const start = getFirstKeyStartProps(prev); let sep; if (scalar.end) { sep = scalar.end; sep.push(this.sourceToken); delete scalar.end; } else sep = [this.sourceToken]; const map = { type: 'block-map', offset: scalar.offset, indent: scalar.indent, items: [{ start, key: scalar, sep }] }; this.onKeyLine = true; this.stack[this.stack.length - 1] = map; } else yield* this.lineEnd(scalar); } *blockScalar(scalar) { switch (this.type) { case 'space': case 'comment': case 'newline': scalar.props.push(this.sourceToken); return; case 'scalar': scalar.source = this.source; // block-scalar source includes trailing newline this.atNewLine = true; this.indent = 0; if (this.onNewLine) { let nl = this.source.indexOf('\n') + 1; while (nl !== 0) { this.onNewLine(this.offset + nl); nl = this.source.indexOf('\n', nl) + 1; } } yield* this.pop(); break; /* istanbul ignore next should not happen */ default: yield* this.pop(); yield* this.step(); } } *blockMap(map) { const it = map.items[map.items.length - 1]; // it.sep is true-ish if pair already has key or : separator switch (this.type) { case 'newline': this.onKeyLine = false; if (it.value) { const end = 'end' in it.value ? it.value.end : undefined; const last = Array.isArray(end) ? end[end.length - 1] : undefined; if (last?.type === 'comment') end?.push(this.sourceToken); else map.items.push({ start: [this.sourceToken] }); } else if (it.sep) { it.sep.push(this.sourceToken); } else { it.start.push(this.sourceToken); } return; case 'space': case 'comment': if (it.value) { map.items.push({ start: [this.sourceToken] }); } else if (it.sep) { it.sep.push(this.sourceToken); } else { if (this.atIndentedComment(it.start, map.indent)) { const prev = map.items[map.items.length - 2]; const end = prev?.value?.end; if (Array.isArray(end)) { Array.prototype.push.apply(end, it.start); end.push(this.sourceToken); map.items.pop(); return; } } it.start.push(this.sourceToken); } return; } if (this.indent >= map.indent) { const atMapIndent = !this.onKeyLine && this.indent === map.indent; const atNextItem = atMapIndent && (it.sep || it.explicitKey) && this.type !== 'seq-item-ind'; // For empty nodes, assign newline-separated not indented empty tokens to following node let start = []; if (atNextItem && it.sep && !it.value) { const nl = []; for (let i = 0; i < it.sep.length; ++i) { const st = it.sep[i]; switch (st.type) { case 'newline': nl.push(i); break; case 'space': break; case 'comment': if (st.indent > map.indent) nl.length = 0; break; default: nl.length = 0; } } if (nl.length >= 2) start = it.sep.splice(nl[1]); } switch (this.type) { case 'anchor': case 'tag': if (atNextItem || it.value) { start.push(this.sourceToken); map.items.push({ start }); this.onKeyLine = true; } else if (it.sep) { it.sep.push(this.sourceToken); } else { it.start.push(this.sourceToken); } return; case 'explicit-key-ind': if (!it.sep && !it.explicitKey) { it.start.push(this.sourceToken); it.explicitKey = true; } else if (atNextItem || it.value) { start.push(this.sourceToken); map.items.push({ start, explicitKey: true }); } else { this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start: [this.sourceToken], explicitKey: true }] }); } this.onKeyLine = true; return; case 'map-value-ind': if (it.explicitKey) { if (!it.sep) { if (includesToken(it.start, 'newline')) { Object.assign(it, { key: null, sep: [this.sourceToken] }); } else { const start = getFirstKeyStartProps(it.start); this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key: null, sep: [this.sourceToken] }] }); } } else if (it.value) { map.items.push({ start: [], key: null, sep: [this.sourceToken] }); } else if (includesToken(it.sep, 'map-value-ind')) { this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key: null, sep: [this.sourceToken] }] }); } else if (isFlowToken(it.key) && !includesToken(it.sep, 'newline')) { const start = getFirstKeyStartProps(it.start); const key = it.key; const sep = it.sep; sep.push(this.sourceToken); // @ts-expect-error type guard is wrong here delete it.key; // @ts-expect-error type guard is wrong here delete it.sep; this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key, sep }] }); } else if (start.length > 0) { // Not actually at next item it.sep = it.sep.concat(start, this.sourceToken); } else { it.sep.push(this.sourceToken); } } else { if (!it.sep) { Object.assign(it, { key: null, sep: [this.sourceToken] }); } else if (it.value || atNextItem) { map.items.push({ start, key: null, sep: [this.sourceToken] }); } else if (includesToken(it.sep, 'map-value-ind')) { this.stack.push({ type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start: [], key: null, sep: [this.sourceToken] }] }); } else { it.sep.push(this.sourceToken); } } this.onKeyLine = true; return; case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': { const fs = this.flowScalar(this.type); if (atNextItem || it.value) { map.items.push({ start, key: fs, sep: [] }); this.onKeyLine = true; } else if (it.sep) { this.stack.push(fs); } else { Object.assign(it, { key: fs, sep: [] }); this.onKeyLine = true; } return; } default: { const bv = this.startBlockValue(map); if (bv) { if (atMapIndent && bv.type !== 'block-seq') { map.items.push({ start }); } this.stack.push(bv); return; } } } } yield* this.pop(); yield* this.step(); } *blockSequence(seq) { const it = seq.items[seq.items.length - 1]; switch (this.type) { case 'newline': if (it.value) { const end = 'end' in it.value ? it.value.end : undefined; const last = Array.isArray(end) ? end[end.length - 1] : undefined; if (last?.type === 'comment') end?.push(this.sourceToken); else seq.items.push({ start: [this.sourceToken] }); } else it.start.push(this.sourceToken); return; case 'space': case 'comment': if (it.value) seq.items.push({ start: [this.sourceToken] }); else { if (this.atIndentedComment(it.start, seq.indent)) { const prev = seq.items[seq.items.length - 2]; const end = prev?.value?.end; if (Array.isArray(end)) { Array.prototype.push.apply(end, it.start); end.push(this.sourceToken); seq.items.pop(); return; } } it.start.push(this.sourceToken); } return; case 'anchor': case 'tag': if (it.value || this.indent <= seq.indent) break; it.start.push(this.sourceToken); return; case 'seq-item-ind': if (this.indent !== seq.indent) break; if (it.value || includesToken(it.start, 'seq-item-ind')) seq.items.push({ start: [this.sourceToken] }); else it.start.push(this.sourceToken); return; } if (this.indent > seq.indent) { const bv = this.startBlockValue(seq); if (bv) { this.stack.push(bv); return; } } yield* this.pop(); yield* this.step(); } *flowCollection(fc) { const it = fc.items[fc.items.length - 1]; if (this.type === 'flow-error-end') { let top; do { yield* this.pop(); top = this.peek(1); } while (top && top.type === 'flow-collection'); } else if (fc.end.length === 0) { switch (this.type) { case 'comma': case 'explicit-key-ind': if (!it || it.sep) fc.items.push({ start: [this.sourceToken] }); else it.start.push(this.sourceToken); return; case 'map-value-ind': if (!it || it.value) fc.items.push({ start: [], key: null, sep: [this.sourceToken] }); else if (it.sep) it.sep.push(this.sourceToken); else Object.assign(it, { key: null, sep: [this.sourceToken] }); return; case 'space': case 'comment': case 'newline': case 'anchor': case 'tag': if (!it || it.value) fc.items.push({ start: [this.sourceToken] }); else if (it.sep) it.sep.push(this.sourceToken); else it.start.push(this.sourceToken); return; case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': { const fs = this.flowScalar(this.type); if (!it || it.value) fc.items.push({ start: [], key: fs, sep: [] }); else if (it.sep) this.stack.push(fs); else Object.assign(it, { key: fs, sep: [] }); return; } case 'flow-map-end': case 'flow-seq-end': fc.end.push(this.sourceToken); return; } const bv = this.startBlockValue(fc); /* istanbul ignore else should not happen */ if (bv) this.stack.push(bv); else { yield* this.pop(); yield* this.step(); } } else { const parent = this.peek(2); if (parent.type === 'block-map' && ((this.type === 'map-value-ind' && parent.indent === fc.indent) || (this.type === 'newline' && !parent.items[parent.items.length - 1].sep))) { yield* this.pop(); yield* this.step(); } else if (this.type === 'map-value-ind' && parent.type !== 'flow-collection') { const prev = getPrevProps(parent); const start = getFirstKeyStartProps(prev); fixFlowSeqItems(fc); const sep = fc.end.splice(1, fc.end.length); sep.push(this.sourceToken); const map = { type: 'block-map', offset: fc.offset, indent: fc.indent, items: [{ start, key: fc, sep }] }; this.onKeyLine = true; this.stack[this.stack.length - 1] = map; } else { yield* this.lineEnd(fc); } } } flowScalar(type) { if (this.onNewLine) { let nl = this.source.indexOf('\n') + 1; while (nl !== 0) { this.onNewLine(this.offset + nl); nl = this.source.indexOf('\n', nl) + 1; } } return { type, offset: this.offset, indent: this.indent, source: this.source }; } startBlockValue(parent) { switch (this.type) { case 'alias': case 'scalar': case 'single-quoted-scalar': case 'double-quoted-scalar': return this.flowScalar(this.type); case 'block-scalar-header': return { type: 'block-scalar', offset: this.offset, indent: this.indent, props: [this.sourceToken], source: '' }; case 'flow-map-start': case 'flow-seq-start': return { type: 'flow-collection', offset: this.offset, indent: this.indent, start: this.sourceToken, items: [], end: [] }; case 'seq-item-ind': return { type: 'block-seq', offset: this.offset, indent: this.indent, items: [{ start: [this.sourceToken] }] }; case 'explicit-key-ind': { this.onKeyLine = true; const prev = getPrevProps(parent); const start = getFirstKeyStartProps(prev); start.push(this.sourceToken); return { type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, explicitKey: true }] }; } case 'map-value-ind': { this.onKeyLine = true; const prev = getPrevProps(parent); const start = getFirstKeyStartProps(prev); return { type: 'block-map', offset: this.offset, indent: this.indent, items: [{ start, key: null, sep: [this.sourceToken] }] }; } } return null; } atIndentedComment(start, indent) { if (this.type !== 'comment') return false; if (this.indent <= indent) return false; return start.every(st => st.type === 'newline' || st.type === 'space'); } *documentEnd(docEnd) { if (this.type !== 'doc-mode') { if (docEnd.end) docEnd.end.push(this.sourceToken); else docEnd.end = [this.sourceToken]; if (this.type === 'newline') yield* this.pop(); } } *lineEnd(token) { switch (this.type) { case 'comma': case 'doc-start': case 'doc-end': case 'flow-seq-end': case 'flow-map-end': case 'map-value-ind': yield* this.pop(); yield* this.step(); break; case 'newline': this.onKeyLine = false; // fallthrough case 'space': case 'comment': default: // all other values are errors if (token.end) token.end.push(this.sourceToken); else token.end = [this.sourceToken]; if (this.type === 'newline') yield* this.pop(); } } } exports.Parser = Parser;