rhombic
Version:
SQL parsing, lineage extraction and manipulation
615 lines • 24.2 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.QueryStructureVisitor = exports.QueryRelation = exports.TableRelation = exports.Relation = exports.Column = exports.ROOT_QUERY_NAME = void 0;
const AbstractParseTreeVisitor_1 = require("antlr4ts/tree/AbstractParseTreeVisitor");
const SqlBaseParser_1 = require("./SqlBaseParser");
const common_1 = __importDefault(require("./common"));
const ROOT_QUERY_ID = "result_1";
exports.ROOT_QUERY_NAME = "[final result]";
class Column {
constructor(id, label, range, data, isAssumed) {
this.id = id;
this.label = label;
this.range = range;
this.data = data;
this.isAssumed = isAssumed;
this.columnReferences = [];
}
}
exports.Column = Column;
/**
* Base relation class representing any relation in query: query itself, subquery, source table, CTE
*/
class Relation {
constructor(id, columns, parent, range) {
this.id = id;
this.columns = columns;
this.parent = parent;
this.range = range;
}
findColumn(columnName) {
return this.columns.find(c => columnName.quoted
? c.label == columnName.name
: c.label.localeCompare(columnName.name, undefined, {
sensitivity: "accent"
}) == 0);
}
resolveColumn(columnName) {
const col = this.findColumn(columnName);
return col !== undefined
? {
tableId: this.id,
columnId: col.id,
isAssumed: false
}
: undefined;
}
}
exports.Relation = Relation;
/**
* Relation representing source table.
*/
class TableRelation extends Relation {
constructor(id, tablePrimary, columns, isFetched, parent, range, data) {
super(id, columns, parent, range);
this.tablePrimary = tablePrimary;
this.isFetched = isFetched;
this.data = data;
}
addAssumedColumn(columnName, range) {
const column = new Column(`column_${this.columns.length + 1}`, columnName.name, range, undefined, true);
this.columns.push(column);
return { tableId: this.id, columnId: column.id, isAssumed: true };
}
}
exports.TableRelation = TableRelation;
/**
* Relation representing (sub-)query.
*/
class QueryRelation extends Relation {
constructor(id, parent, range) {
super(id, [], parent, range);
// CTEs from this context
this.ctes = new Map();
// relations for this context extracted from FROM
this.relations = new Map();
// sequence generator for columns in this context
this.columnIdSeq = 0;
this.columnReferences = [];
}
findLocalRelation(tableName) {
for (const rel of this.relations) {
if (tableName.quoted) {
if (rel[0] == tableName.name)
return rel[1];
}
else {
if (tableName.name.localeCompare(rel[0], undefined, {
sensitivity: "accent"
}) == 0)
return rel[1];
}
}
return undefined;
}
findRelation(tableName) {
var _a, _b;
return (_a = this.findLocalRelation(tableName)) !== null && _a !== void 0 ? _a : (_b = this.parent) === null || _b === void 0 ? void 0 : _b.findRelation(tableName);
}
findCTE(tableName) {
var _a;
for (const rel of this.ctes) {
if (tableName.quoted) {
if (rel[0] == tableName.name)
return rel[1];
}
else {
if (tableName.name.localeCompare(rel[0], undefined, {
sensitivity: "accent"
}) == 0)
return rel[1];
}
}
return (_a = this.parent) === null || _a === void 0 ? void 0 : _a.findCTE(tableName);
}
getCTENames() {
const localCtes = Array.from(this.ctes.keys());
if (this.parent !== undefined) {
return localCtes.concat(this.parent.getCTENames());
}
else {
return localCtes;
}
}
resolveOrAssumeRelationColumn(columnName, range, tableName) {
if (tableName !== undefined) {
const rel = this.findRelation(tableName);
const col = rel === null || rel === void 0 ? void 0 : rel.resolveColumn(columnName);
if (col === undefined && rel != undefined && rel instanceof TableRelation) {
return rel.addAssumedColumn(columnName, range);
}
return col;
}
else {
const tables = [];
for (const r of this.relations) {
const rel = r[1];
const col = rel.resolveColumn(columnName);
if (col) {
return col;
}
if (rel instanceof TableRelation) {
tables.push(rel);
}
}
const assumed = tables.filter(t => !t.isFetched);
if (assumed.length == 1) {
return assumed[0].addAssumedColumn(columnName, range);
}
else if (tables.length == 1) {
return tables[0].addAssumedColumn(columnName, range);
}
return undefined;
}
}
getNextColumnId() {
this.columnIdSeq++;
return `column_${this.columnIdSeq}`;
}
}
exports.QueryRelation = QueryRelation;
/**
* Visitor to extract query structure in the sense of relations, columns and used relations/columns.
* This extraction builds a tree of contexts corresponding to (sub-)queries, resolves all column
* references and for each (sub-)query and source table reports separate relation with #onRelation()
* handler. For column/table used when building particular relation it reports the reference with
* #onColumnReference() handler.
*/
class QueryStructureVisitor extends AbstractParseTreeVisitor_1.AbstractParseTreeVisitor {
constructor(getTable, options) {
super();
this.getTable = getTable;
this.options = options;
this.relationSeq = 0;
this.currentRelation = new QueryRelation(this.getNextRelationId());
}
getNextRelationId() {
return `result_${this.relationSeq++}`;
}
rangeFromContext(ctx) {
var _a;
const stop = (_a = ctx.stop) !== null && _a !== void 0 ? _a : ctx.start;
return {
startLine: ctx.start.line,
endLine: stop.line,
startColumn: ctx.start.charPositionInLine,
endColumn: stop.charPositionInLine + (stop.stopIndex - stop.startIndex + 1)
};
}
/**
* Extracts table and column names from PrimaryExpressionContext (if possible).
*/
extractTableAndColumn(ctx) {
if (ctx instanceof SqlBaseParser_1.ColumnReferenceContext) {
return { column: common_1.default.stripQuote(ctx.identifier()) };
}
else if (ctx instanceof SqlBaseParser_1.DereferenceContext) {
const primary = ctx.primaryExpression();
if (primary instanceof SqlBaseParser_1.ColumnReferenceContext) {
return {
table: common_1.default.stripQuote(primary.identifier()),
column: common_1.default.stripQuote(ctx.identifier())
};
}
}
return undefined;
}
asPrimaryExpression(ctx) {
const boolExpr = ctx.booleanExpression();
if (boolExpr instanceof SqlBaseParser_1.PredicatedContext) {
const valExpr = boolExpr.valueExpression();
if (valExpr instanceof SqlBaseParser_1.ValueExpressionDefaultContext) {
return valExpr.primaryExpression();
}
}
return undefined;
}
/**
* Derives column name from expression if possible.
*/
deriveColumnName(ctx) {
var _a;
const primExpr = this.asPrimaryExpression(ctx);
if (primExpr) {
return (_a = this.extractTableAndColumn(primExpr)) === null || _a === void 0 ? void 0 : _a.column.name;
}
return undefined;
}
/**
* Called when relation is ready.
* @param _relation
* @param _alias
* @returns
*/
onRelation(_relation, _alias) {
return;
}
reportTableReferences() {
for (const [alias, relation] of this.currentRelation.relations) {
if (relation instanceof TableRelation) {
this.onRelation(relation, alias !== relation.id ? alias : undefined);
}
}
}
/**
* Called when column reference is ready.
* @param _tableId
* @param _columnId
* @returns
*/
onColumnReference(_tableId, _columnId) {
return;
}
/**
* Determines whether expression is a star.
*/
isStar(ctx) {
const boolExpr = ctx.booleanExpression();
if (boolExpr instanceof SqlBaseParser_1.PredicatedContext) {
const valExpr = boolExpr.valueExpression();
if (valExpr instanceof SqlBaseParser_1.ValueExpressionDefaultContext) {
const primaryExpr = valExpr.primaryExpression();
if (primaryExpr instanceof SqlBaseParser_1.StarContext) {
return primaryExpr;
}
}
}
return undefined;
}
processStar(ctx) {
const range = this.rangeFromContext(ctx);
const qualifiedName = ctx.qualifiedName();
if (qualifiedName !== undefined) {
// TODO support multipart table names
const lastName = qualifiedName.identifier()[qualifiedName.identifier().length - 1];
const tableName = common_1.default.stripQuote(lastName);
const rel = this.currentRelation.findLocalRelation(tableName);
if (rel !== undefined) {
this.addRelationColumns(rel, range);
}
}
else {
for (const r of this.currentRelation.relations) {
this.addRelationColumns(r[1], range);
}
}
return this.visitChildren(ctx);
}
addRelationColumns(rel, range) {
rel.columns.forEach(c => {
const columnId = this.currentRelation.getNextColumnId();
const col = new Column(columnId, c.label, range);
this.currentRelation.columns.push(col);
this.currentRelation.currentColumnId = columnId;
this.onColumnReference(rel.id, c.id);
this.currentRelation.currentColumnId = undefined;
});
}
processClause(clause, ctx) {
this.currentRelation.currentClause = clause;
const result = this.visitChildren(ctx);
this.currentRelation.currentClause = undefined;
return result;
}
//
// Visitor method overrides
//
visitQuery(ctx) {
var _a;
this.currentRelation = new QueryRelation(this.getNextRelationId(), this.currentRelation, this.rangeFromContext(ctx));
const result = this.visitChildren(ctx);
this.reportTableReferences();
// to be consumed later
this.lastRelation = this.currentRelation;
if (this.currentRelation.id == ROOT_QUERY_ID)
this.onRelation(this.currentRelation, exports.ROOT_QUERY_NAME);
this.currentRelation = (_a = this.currentRelation.parent) !== null && _a !== void 0 ? _a : new QueryRelation(this.getNextRelationId());
return result;
}
/**
* Processing set operations.
*/
visitQueryTermDefault(ctx) {
// reinit column seq as we will repeat the same columns in subsequent queries
this.currentRelation.columnIdSeq = 0;
// reports table references from previous queryTerm (if any)
this.reportTableReferences();
// clear relations for each queryTermDefault because it's individual query
this.currentRelation.relations = new Map();
return this.visitChildren(ctx);
}
visitRegularQuerySpecification(ctx) {
var _a, _b, _c;
// process FROM first to capture all available relations
let result = (_b = (_a = ctx.fromClause()) === null || _a === void 0 ? void 0 : _a.accept(this)) !== null && _b !== void 0 ? _b : this.defaultResult();
// then process all remaining clauses
(_c = ctx.children) === null || _c === void 0 ? void 0 : _c.forEach(c => {
if (!(c instanceof SqlBaseParser_1.FromClauseContext)) {
result = this.aggregateResult(result, c.accept(this));
}
});
return result;
}
/**
* Process JOIN ... USING (...) columns
* @param ctx
*/
visitJoinCriteriaUsing(ctx) {
const columns = ctx
.identifierList()
.identifierSeq()
.errorCapturingIdentifier()
.map(eci => common_1.default.stripQuote(eci.identifier()));
// columns shall be searched in last from relation and all previous ones
const size = this.currentRelation.relations.size;
if (size >= 2) {
let i = 0;
const foundLeftCol = new Set();
this.currentRelation.relations.forEach(r => {
if (i == size - 1) {
// this is the last (right) relation
columns.forEach(c => {
const col = r.resolveColumn(c);
if (col)
this.onColumnReference(col.tableId, col.columnId);
});
}
else {
columns.forEach((c, j) => {
if (!foundLeftCol.has(j)) {
const col = r.resolveColumn(c);
if (col)
this.onColumnReference(col.tableId, col.columnId);
foundLeftCol.add(j);
}
});
}
i++;
});
}
return this.visitChildren(ctx);
}
// processes table/CTE/correlated subquery references
visitTableName(ctx) {
const multipartTableName = ctx
.multipartIdentifier()
.errorCapturingIdentifier()
.map(v => common_1.default.stripQuote(v.identifier()));
const strictId = ctx.tableAlias().strictIdentifier();
const alias = (strictId !== undefined
? common_1.default.stripQuote(strictId)
: multipartTableName[multipartTableName.length - 1]).name;
if (multipartTableName.length == 1) {
const cte = this.currentRelation.findCTE(multipartTableName[0]);
if (cte !== undefined) {
// found relation as CTE
this.currentRelation.relations.set(alias, cte);
return this.defaultResult();
}
const rel = this.currentRelation.findRelation(multipartTableName[0]);
if (rel !== undefined) {
// found relation as correlated sq
if (multipartTableName[0].name != alias) {
this.currentRelation.relations.set(alias, rel);
}
return this.defaultResult();
}
}
const tablePrimary = common_1.default.tablePrimaryFromMultipart(multipartTableName.map(v => v.name));
const metadata = this.getTable(tablePrimary);
const columns = (metadata === null || metadata === void 0 ? void 0 : metadata.columns.map(c => new Column(c.id, c.id, undefined, c.data))) || [];
const relation = new TableRelation(this.getNextRelationId(), tablePrimary, columns, metadata !== undefined, this.currentRelation, this.rangeFromContext(ctx), metadata === null || metadata === void 0 ? void 0 : metadata.table.data);
this.currentRelation.relations.set(alias, relation);
return this.defaultResult();
}
// processes CTE
visitNamedQuery(ctx) {
var _a;
const result = this.visitChildren(ctx);
// expecting query relation to be in stack
const relation = this.lastRelation;
if (relation !== undefined) {
const identifier = ctx.errorCapturingIdentifier().identifier();
const alias = common_1.default.stripQuote(identifier).name;
const columnAliases = (_a = ctx
.identifierList()) === null || _a === void 0 ? void 0 : _a.identifierSeq().errorCapturingIdentifier().map(eci => common_1.default.stripQuote(eci.identifier()).name);
if (columnAliases !== undefined) {
relation.columns.forEach((c, i) => {
var _a;
c.label = (_a = columnAliases[i]) !== null && _a !== void 0 ? _a : c.label;
});
}
// Notify the callback before adding the cte as the cte is not yet available in scope.
this.onRelation(relation, alias);
this.currentRelation.ctes.set(alias, relation);
return result;
}
else {
throw new Error("Expecting CTE query relation to be in stack");
}
}
// processes subqueries
visitAliasedQuery(ctx) {
var _a;
const result = this.visitChildren(ctx);
// expecting query relation to be in stack
const relation = this.lastRelation;
if (relation !== undefined) {
const strictId = ctx.tableAlias().strictIdentifier();
const alias = strictId !== undefined ? common_1.default.stripQuote(strictId).name : relation.id;
const columnAliases = (_a = ctx
.tableAlias()
.identifierList()) === null || _a === void 0 ? void 0 : _a.identifierSeq().errorCapturingIdentifier().map(eci => common_1.default.stripQuote(eci.identifier()).name);
if (columnAliases !== undefined) {
relation.columns.forEach((c, i) => {
var _a;
c.label = (_a = columnAliases[i]) !== null && _a !== void 0 ? _a : c.label;
});
}
this.currentRelation.relations.set(alias, relation);
this.onRelation(relation, alias);
return result;
}
else {
throw new Error("Expecting subquery relation to be in stack");
}
}
visitSelectClause(ctx) {
return this.processClause("select", ctx);
}
visitFromClause(ctx) {
return this.processClause("from", ctx);
}
visitWhereClause(ctx) {
return this.processClause("where", ctx);
}
visitGroupByClause(ctx) {
return this.processClause("group by", ctx);
}
visitHavingClause(ctx) {
return this.processClause("having", ctx);
}
visitQueryOrganization(ctx) {
return this.processClause("order by", ctx);
}
/**
* Handle subquery in EXISTS.
* @param ctx
* @returns
*/
visitExists(ctx) {
const result = this.visitChildren(ctx);
const rel = this.lastRelation;
if (rel !== undefined) {
this.onRelation(rel);
this.onColumnReference(rel.id);
}
return result;
}
/**
* Handle subquery in IN predicate.
* @param ctx
* @returns
*/
visitPredicate(ctx) {
const result = this.visitChildren(ctx);
if (ctx.query() !== undefined) {
const rel = this.lastRelation;
if (rel !== undefined) {
this.onRelation(rel);
this.onColumnReference(rel.id);
}
}
return result;
}
visitNamedExpression(ctx) {
var _a;
if (ctx.errorCapturingIdentifier() === undefined) {
const star = this.isStar(ctx.expression());
if (star !== undefined) {
return this.processStar(star);
}
}
const columnId = this.currentRelation.getNextColumnId();
let column = this.currentRelation.columns.find(c => c.id == columnId);
// column could have been already defined if we have set operation
if (column === undefined) {
const errCaptId = ctx.errorCapturingIdentifier();
const label = errCaptId !== undefined
? common_1.default.stripQuote(errCaptId.identifier()).name
: (_a = this.deriveColumnName(ctx.expression())) !== null && _a !== void 0 ? _a : columnId;
const range = this.rangeFromContext(ctx);
column = new Column(columnId, label, range);
this.currentRelation.columns.push(column);
}
this.currentRelation.currentColumnId = columnId;
this.currentRelation.columnReferences = [];
const result = this.visitChildren(ctx);
column.columnReferences.push(...this.currentRelation.columnReferences);
this.currentRelation.currentColumnId = undefined;
return result;
}
visitSubqueryExpression(ctx) {
const result = this.visitChildren(ctx);
const rel = this.lastRelation;
if (rel !== undefined) {
this.onRelation(rel);
this.onColumnReference(rel.id);
}
return result;
}
visitFunctionCall(ctx) {
if (ctx.functionName().text.toLowerCase() == "count" &&
ctx.expression().length == 1 &&
this.isStar(ctx.expression()[0])) {
for (const r of this.currentRelation.relations) {
this.onColumnReference(r[1].id);
}
}
return this.visitChildren(ctx);
}
visitSortItem(ctx) {
var _a;
if ((_a = this.options) === null || _a === void 0 ? void 0 : _a.positionalRefsEnabled) {
const primExp = this.asPrimaryExpression(ctx.expression());
if (primExp instanceof SqlBaseParser_1.ConstantDefaultContext) {
const constant = primExp.constant();
if (constant instanceof SqlBaseParser_1.NumericLiteralContext) {
const idx = Number(constant.text) - 1;
if (idx in this.currentRelation.columns) {
const col = this.currentRelation.columns[idx];
col.columnReferences.forEach(cr => this.onColumnReference(cr.tableId, cr.columnId));
return this.defaultResult();
}
}
}
}
return this.visitChildren(ctx);
}
processColumnReference(ctx) {
const tableCol = this.extractTableAndColumn(ctx);
if (tableCol !== undefined) {
if (tableCol.table === undefined &&
this.currentRelation.currentClause !== undefined &&
["group by", "order by"].includes(this.currentRelation.currentClause)) {
// check if it is self column reference
const selfCol = this.currentRelation.findColumn(tableCol.column);
if (selfCol) {
selfCol.columnReferences.forEach(cr => this.onColumnReference(cr.tableId, cr.columnId));
return this.defaultResult();
}
}
const range = this.rangeFromContext(ctx);
const col = this.currentRelation.resolveOrAssumeRelationColumn(tableCol.column, range, tableCol.table);
if (col !== undefined) {
this.currentRelation.columnReferences.push(col);
this.onColumnReference(col.tableId, col.columnId);
}
return this.defaultResult();
}
else {
return this.visitChildren(ctx);
}
}
visitColumnReference(ctx) {
return this.processColumnReference(ctx);
}
visitDereference(ctx) {
return this.processColumnReference(ctx);
}
}
exports.QueryStructureVisitor = QueryStructureVisitor;
//# sourceMappingURL=QueryStructureVisitor.js.map