UNPKG

rhombic

Version:

SQL parsing, lineage extraction and manipulation

615 lines 24.2 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.QueryStructureVisitor = exports.QueryRelation = exports.TableRelation = exports.Relation = exports.Column = exports.ROOT_QUERY_NAME = void 0; const AbstractParseTreeVisitor_1 = require("antlr4ts/tree/AbstractParseTreeVisitor"); const SqlBaseParser_1 = require("./SqlBaseParser"); const common_1 = __importDefault(require("./common")); const ROOT_QUERY_ID = "result_1"; exports.ROOT_QUERY_NAME = "[final result]"; class Column { constructor(id, label, range, data, isAssumed) { this.id = id; this.label = label; this.range = range; this.data = data; this.isAssumed = isAssumed; this.columnReferences = []; } } exports.Column = Column; /** * Base relation class representing any relation in query: query itself, subquery, source table, CTE */ class Relation { constructor(id, columns, parent, range) { this.id = id; this.columns = columns; this.parent = parent; this.range = range; } findColumn(columnName) { return this.columns.find(c => columnName.quoted ? c.label == columnName.name : c.label.localeCompare(columnName.name, undefined, { sensitivity: "accent" }) == 0); } resolveColumn(columnName) { const col = this.findColumn(columnName); return col !== undefined ? { tableId: this.id, columnId: col.id, isAssumed: false } : undefined; } } exports.Relation = Relation; /** * Relation representing source table. */ class TableRelation extends Relation { constructor(id, tablePrimary, columns, isFetched, parent, range, data) { super(id, columns, parent, range); this.tablePrimary = tablePrimary; this.isFetched = isFetched; this.data = data; } addAssumedColumn(columnName, range) { const column = new Column(`column_${this.columns.length + 1}`, columnName.name, range, undefined, true); this.columns.push(column); return { tableId: this.id, columnId: column.id, isAssumed: true }; } } exports.TableRelation = TableRelation; /** * Relation representing (sub-)query. */ class QueryRelation extends Relation { constructor(id, parent, range) { super(id, [], parent, range); // CTEs from this context this.ctes = new Map(); // relations for this context extracted from FROM this.relations = new Map(); // sequence generator for columns in this context this.columnIdSeq = 0; this.columnReferences = []; } findLocalRelation(tableName) { for (const rel of this.relations) { if (tableName.quoted) { if (rel[0] == tableName.name) return rel[1]; } else { if (tableName.name.localeCompare(rel[0], undefined, { sensitivity: "accent" }) == 0) return rel[1]; } } return undefined; } findRelation(tableName) { var _a, _b; return (_a = this.findLocalRelation(tableName)) !== null && _a !== void 0 ? _a : (_b = this.parent) === null || _b === void 0 ? void 0 : _b.findRelation(tableName); } findCTE(tableName) { var _a; for (const rel of this.ctes) { if (tableName.quoted) { if (rel[0] == tableName.name) return rel[1]; } else { if (tableName.name.localeCompare(rel[0], undefined, { sensitivity: "accent" }) == 0) return rel[1]; } } return (_a = this.parent) === null || _a === void 0 ? void 0 : _a.findCTE(tableName); } getCTENames() { const localCtes = Array.from(this.ctes.keys()); if (this.parent !== undefined) { return localCtes.concat(this.parent.getCTENames()); } else { return localCtes; } } resolveOrAssumeRelationColumn(columnName, range, tableName) { if (tableName !== undefined) { const rel = this.findRelation(tableName); const col = rel === null || rel === void 0 ? void 0 : rel.resolveColumn(columnName); if (col === undefined && rel != undefined && rel instanceof TableRelation) { return rel.addAssumedColumn(columnName, range); } return col; } else { const tables = []; for (const r of this.relations) { const rel = r[1]; const col = rel.resolveColumn(columnName); if (col) { return col; } if (rel instanceof TableRelation) { tables.push(rel); } } const assumed = tables.filter(t => !t.isFetched); if (assumed.length == 1) { return assumed[0].addAssumedColumn(columnName, range); } else if (tables.length == 1) { return tables[0].addAssumedColumn(columnName, range); } return undefined; } } getNextColumnId() { this.columnIdSeq++; return `column_${this.columnIdSeq}`; } } exports.QueryRelation = QueryRelation; /** * Visitor to extract query structure in the sense of relations, columns and used relations/columns. * This extraction builds a tree of contexts corresponding to (sub-)queries, resolves all column * references and for each (sub-)query and source table reports separate relation with #onRelation() * handler. For column/table used when building particular relation it reports the reference with * #onColumnReference() handler. */ class QueryStructureVisitor extends AbstractParseTreeVisitor_1.AbstractParseTreeVisitor { constructor(getTable, options) { super(); this.getTable = getTable; this.options = options; this.relationSeq = 0; this.currentRelation = new QueryRelation(this.getNextRelationId()); } getNextRelationId() { return `result_${this.relationSeq++}`; } rangeFromContext(ctx) { var _a; const stop = (_a = ctx.stop) !== null && _a !== void 0 ? _a : ctx.start; return { startLine: ctx.start.line, endLine: stop.line, startColumn: ctx.start.charPositionInLine, endColumn: stop.charPositionInLine + (stop.stopIndex - stop.startIndex + 1) }; } /** * Extracts table and column names from PrimaryExpressionContext (if possible). */ extractTableAndColumn(ctx) { if (ctx instanceof SqlBaseParser_1.ColumnReferenceContext) { return { column: common_1.default.stripQuote(ctx.identifier()) }; } else if (ctx instanceof SqlBaseParser_1.DereferenceContext) { const primary = ctx.primaryExpression(); if (primary instanceof SqlBaseParser_1.ColumnReferenceContext) { return { table: common_1.default.stripQuote(primary.identifier()), column: common_1.default.stripQuote(ctx.identifier()) }; } } return undefined; } asPrimaryExpression(ctx) { const boolExpr = ctx.booleanExpression(); if (boolExpr instanceof SqlBaseParser_1.PredicatedContext) { const valExpr = boolExpr.valueExpression(); if (valExpr instanceof SqlBaseParser_1.ValueExpressionDefaultContext) { return valExpr.primaryExpression(); } } return undefined; } /** * Derives column name from expression if possible. */ deriveColumnName(ctx) { var _a; const primExpr = this.asPrimaryExpression(ctx); if (primExpr) { return (_a = this.extractTableAndColumn(primExpr)) === null || _a === void 0 ? void 0 : _a.column.name; } return undefined; } /** * Called when relation is ready. * @param _relation * @param _alias * @returns */ onRelation(_relation, _alias) { return; } reportTableReferences() { for (const [alias, relation] of this.currentRelation.relations) { if (relation instanceof TableRelation) { this.onRelation(relation, alias !== relation.id ? alias : undefined); } } } /** * Called when column reference is ready. * @param _tableId * @param _columnId * @returns */ onColumnReference(_tableId, _columnId) { return; } /** * Determines whether expression is a star. */ isStar(ctx) { const boolExpr = ctx.booleanExpression(); if (boolExpr instanceof SqlBaseParser_1.PredicatedContext) { const valExpr = boolExpr.valueExpression(); if (valExpr instanceof SqlBaseParser_1.ValueExpressionDefaultContext) { const primaryExpr = valExpr.primaryExpression(); if (primaryExpr instanceof SqlBaseParser_1.StarContext) { return primaryExpr; } } } return undefined; } processStar(ctx) { const range = this.rangeFromContext(ctx); const qualifiedName = ctx.qualifiedName(); if (qualifiedName !== undefined) { // TODO support multipart table names const lastName = qualifiedName.identifier()[qualifiedName.identifier().length - 1]; const tableName = common_1.default.stripQuote(lastName); const rel = this.currentRelation.findLocalRelation(tableName); if (rel !== undefined) { this.addRelationColumns(rel, range); } } else { for (const r of this.currentRelation.relations) { this.addRelationColumns(r[1], range); } } return this.visitChildren(ctx); } addRelationColumns(rel, range) { rel.columns.forEach(c => { const columnId = this.currentRelation.getNextColumnId(); const col = new Column(columnId, c.label, range); this.currentRelation.columns.push(col); this.currentRelation.currentColumnId = columnId; this.onColumnReference(rel.id, c.id); this.currentRelation.currentColumnId = undefined; }); } processClause(clause, ctx) { this.currentRelation.currentClause = clause; const result = this.visitChildren(ctx); this.currentRelation.currentClause = undefined; return result; } // // Visitor method overrides // visitQuery(ctx) { var _a; this.currentRelation = new QueryRelation(this.getNextRelationId(), this.currentRelation, this.rangeFromContext(ctx)); const result = this.visitChildren(ctx); this.reportTableReferences(); // to be consumed later this.lastRelation = this.currentRelation; if (this.currentRelation.id == ROOT_QUERY_ID) this.onRelation(this.currentRelation, exports.ROOT_QUERY_NAME); this.currentRelation = (_a = this.currentRelation.parent) !== null && _a !== void 0 ? _a : new QueryRelation(this.getNextRelationId()); return result; } /** * Processing set operations. */ visitQueryTermDefault(ctx) { // reinit column seq as we will repeat the same columns in subsequent queries this.currentRelation.columnIdSeq = 0; // reports table references from previous queryTerm (if any) this.reportTableReferences(); // clear relations for each queryTermDefault because it's individual query this.currentRelation.relations = new Map(); return this.visitChildren(ctx); } visitRegularQuerySpecification(ctx) { var _a, _b, _c; // process FROM first to capture all available relations let result = (_b = (_a = ctx.fromClause()) === null || _a === void 0 ? void 0 : _a.accept(this)) !== null && _b !== void 0 ? _b : this.defaultResult(); // then process all remaining clauses (_c = ctx.children) === null || _c === void 0 ? void 0 : _c.forEach(c => { if (!(c instanceof SqlBaseParser_1.FromClauseContext)) { result = this.aggregateResult(result, c.accept(this)); } }); return result; } /** * Process JOIN ... USING (...) columns * @param ctx */ visitJoinCriteriaUsing(ctx) { const columns = ctx .identifierList() .identifierSeq() .errorCapturingIdentifier() .map(eci => common_1.default.stripQuote(eci.identifier())); // columns shall be searched in last from relation and all previous ones const size = this.currentRelation.relations.size; if (size >= 2) { let i = 0; const foundLeftCol = new Set(); this.currentRelation.relations.forEach(r => { if (i == size - 1) { // this is the last (right) relation columns.forEach(c => { const col = r.resolveColumn(c); if (col) this.onColumnReference(col.tableId, col.columnId); }); } else { columns.forEach((c, j) => { if (!foundLeftCol.has(j)) { const col = r.resolveColumn(c); if (col) this.onColumnReference(col.tableId, col.columnId); foundLeftCol.add(j); } }); } i++; }); } return this.visitChildren(ctx); } // processes table/CTE/correlated subquery references visitTableName(ctx) { const multipartTableName = ctx .multipartIdentifier() .errorCapturingIdentifier() .map(v => common_1.default.stripQuote(v.identifier())); const strictId = ctx.tableAlias().strictIdentifier(); const alias = (strictId !== undefined ? common_1.default.stripQuote(strictId) : multipartTableName[multipartTableName.length - 1]).name; if (multipartTableName.length == 1) { const cte = this.currentRelation.findCTE(multipartTableName[0]); if (cte !== undefined) { // found relation as CTE this.currentRelation.relations.set(alias, cte); return this.defaultResult(); } const rel = this.currentRelation.findRelation(multipartTableName[0]); if (rel !== undefined) { // found relation as correlated sq if (multipartTableName[0].name != alias) { this.currentRelation.relations.set(alias, rel); } return this.defaultResult(); } } const tablePrimary = common_1.default.tablePrimaryFromMultipart(multipartTableName.map(v => v.name)); const metadata = this.getTable(tablePrimary); const columns = (metadata === null || metadata === void 0 ? void 0 : metadata.columns.map(c => new Column(c.id, c.id, undefined, c.data))) || []; const relation = new TableRelation(this.getNextRelationId(), tablePrimary, columns, metadata !== undefined, this.currentRelation, this.rangeFromContext(ctx), metadata === null || metadata === void 0 ? void 0 : metadata.table.data); this.currentRelation.relations.set(alias, relation); return this.defaultResult(); } // processes CTE visitNamedQuery(ctx) { var _a; const result = this.visitChildren(ctx); // expecting query relation to be in stack const relation = this.lastRelation; if (relation !== undefined) { const identifier = ctx.errorCapturingIdentifier().identifier(); const alias = common_1.default.stripQuote(identifier).name; const columnAliases = (_a = ctx .identifierList()) === null || _a === void 0 ? void 0 : _a.identifierSeq().errorCapturingIdentifier().map(eci => common_1.default.stripQuote(eci.identifier()).name); if (columnAliases !== undefined) { relation.columns.forEach((c, i) => { var _a; c.label = (_a = columnAliases[i]) !== null && _a !== void 0 ? _a : c.label; }); } // Notify the callback before adding the cte as the cte is not yet available in scope. this.onRelation(relation, alias); this.currentRelation.ctes.set(alias, relation); return result; } else { throw new Error("Expecting CTE query relation to be in stack"); } } // processes subqueries visitAliasedQuery(ctx) { var _a; const result = this.visitChildren(ctx); // expecting query relation to be in stack const relation = this.lastRelation; if (relation !== undefined) { const strictId = ctx.tableAlias().strictIdentifier(); const alias = strictId !== undefined ? common_1.default.stripQuote(strictId).name : relation.id; const columnAliases = (_a = ctx .tableAlias() .identifierList()) === null || _a === void 0 ? void 0 : _a.identifierSeq().errorCapturingIdentifier().map(eci => common_1.default.stripQuote(eci.identifier()).name); if (columnAliases !== undefined) { relation.columns.forEach((c, i) => { var _a; c.label = (_a = columnAliases[i]) !== null && _a !== void 0 ? _a : c.label; }); } this.currentRelation.relations.set(alias, relation); this.onRelation(relation, alias); return result; } else { throw new Error("Expecting subquery relation to be in stack"); } } visitSelectClause(ctx) { return this.processClause("select", ctx); } visitFromClause(ctx) { return this.processClause("from", ctx); } visitWhereClause(ctx) { return this.processClause("where", ctx); } visitGroupByClause(ctx) { return this.processClause("group by", ctx); } visitHavingClause(ctx) { return this.processClause("having", ctx); } visitQueryOrganization(ctx) { return this.processClause("order by", ctx); } /** * Handle subquery in EXISTS. * @param ctx * @returns */ visitExists(ctx) { const result = this.visitChildren(ctx); const rel = this.lastRelation; if (rel !== undefined) { this.onRelation(rel); this.onColumnReference(rel.id); } return result; } /** * Handle subquery in IN predicate. * @param ctx * @returns */ visitPredicate(ctx) { const result = this.visitChildren(ctx); if (ctx.query() !== undefined) { const rel = this.lastRelation; if (rel !== undefined) { this.onRelation(rel); this.onColumnReference(rel.id); } } return result; } visitNamedExpression(ctx) { var _a; if (ctx.errorCapturingIdentifier() === undefined) { const star = this.isStar(ctx.expression()); if (star !== undefined) { return this.processStar(star); } } const columnId = this.currentRelation.getNextColumnId(); let column = this.currentRelation.columns.find(c => c.id == columnId); // column could have been already defined if we have set operation if (column === undefined) { const errCaptId = ctx.errorCapturingIdentifier(); const label = errCaptId !== undefined ? common_1.default.stripQuote(errCaptId.identifier()).name : (_a = this.deriveColumnName(ctx.expression())) !== null && _a !== void 0 ? _a : columnId; const range = this.rangeFromContext(ctx); column = new Column(columnId, label, range); this.currentRelation.columns.push(column); } this.currentRelation.currentColumnId = columnId; this.currentRelation.columnReferences = []; const result = this.visitChildren(ctx); column.columnReferences.push(...this.currentRelation.columnReferences); this.currentRelation.currentColumnId = undefined; return result; } visitSubqueryExpression(ctx) { const result = this.visitChildren(ctx); const rel = this.lastRelation; if (rel !== undefined) { this.onRelation(rel); this.onColumnReference(rel.id); } return result; } visitFunctionCall(ctx) { if (ctx.functionName().text.toLowerCase() == "count" && ctx.expression().length == 1 && this.isStar(ctx.expression()[0])) { for (const r of this.currentRelation.relations) { this.onColumnReference(r[1].id); } } return this.visitChildren(ctx); } visitSortItem(ctx) { var _a; if ((_a = this.options) === null || _a === void 0 ? void 0 : _a.positionalRefsEnabled) { const primExp = this.asPrimaryExpression(ctx.expression()); if (primExp instanceof SqlBaseParser_1.ConstantDefaultContext) { const constant = primExp.constant(); if (constant instanceof SqlBaseParser_1.NumericLiteralContext) { const idx = Number(constant.text) - 1; if (idx in this.currentRelation.columns) { const col = this.currentRelation.columns[idx]; col.columnReferences.forEach(cr => this.onColumnReference(cr.tableId, cr.columnId)); return this.defaultResult(); } } } } return this.visitChildren(ctx); } processColumnReference(ctx) { const tableCol = this.extractTableAndColumn(ctx); if (tableCol !== undefined) { if (tableCol.table === undefined && this.currentRelation.currentClause !== undefined && ["group by", "order by"].includes(this.currentRelation.currentClause)) { // check if it is self column reference const selfCol = this.currentRelation.findColumn(tableCol.column); if (selfCol) { selfCol.columnReferences.forEach(cr => this.onColumnReference(cr.tableId, cr.columnId)); return this.defaultResult(); } } const range = this.rangeFromContext(ctx); const col = this.currentRelation.resolveOrAssumeRelationColumn(tableCol.column, range, tableCol.table); if (col !== undefined) { this.currentRelation.columnReferences.push(col); this.onColumnReference(col.tableId, col.columnId); } return this.defaultResult(); } else { return this.visitChildren(ctx); } } visitColumnReference(ctx) { return this.processColumnReference(ctx); } visitDereference(ctx) { return this.processColumnReference(ctx); } } exports.QueryStructureVisitor = QueryStructureVisitor; //# sourceMappingURL=QueryStructureVisitor.js.map