rhombic
Version:
SQL parsing, lineage extraction and manipulation
121 lines • 5.58 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.SqlLineageParseTree = void 0;
const ExtractTablesVisitor_1 = require("./ExtractTablesVisitor");
const LineageVisitor_1 = require("./LineageVisitor");
/**
* SQL parse tree with available operations.
*/
class SqlLineageParseTree {
/**
* Creates SQL parse tree from antlr StatementContext
* @param tree StatementContext object which is the product of parsing SQL
* @param cursor A representation of the cursor to look for in the query
*/
constructor(tree, cursor) {
this.tree = tree;
this.cursor = cursor;
}
/**
* Extracts and returns all potentially used tables. Note that this method does not perform context
* analysis and thus can return not only external tables used but also references to CTEs or subqueries
* defined inside the query itself. But it is guaranteed that all external (to the query)
* tables will be returned.
* This method commonly used to analyse query and pre-fetch metadata for tables used.
* @returns Tables used in query
*/
getUsedTables() {
const visitor = new ExtractTablesVisitor_1.ExtractTablesVisitor(this.cursor);
return this.tree.accept(visitor);
}
/**
* Extracts column level lineage from SQL parse tree.
* There are 2 principal modes that control lineage representation: "merged leaves" and "tree" (default).
* - In "tree" mode (default) all source tables are displayed with all their columns and mentioned as many
* times as they occur in the query.
* - In "mergedLeaves" mode source tables are mentioned only once even if they are used multiple times in
* the query. Source table columns that are not used in the query omitted from lineage.
* @param getTable Function to get table metadata. It takes table identifier and returns some table data
* plus the list of columns for this table. Columns are expected to be in particular order as defined
* in this table's DDL.
* @param mergedLeaves Selects mode for the lineage generation ("tree" (default) when `false`,
* "mergedLeaves" when `true`).
* @param options Lineage generation options:
* - `positionalRefsEnabled` (`false` by default) options controls whether to interpret numerical references
* inside ORDER BY as references to SELECT list expressions
* @returns Calculated lineage.
*/
getLineage(getTable = () => undefined, mergedLeaves, options) {
const cursor = this.cursor;
const fetchOp = cursor !== undefined ? tp => getTable(cursor.removeFrom(tp)) : getTable;
const visitor = new LineageVisitor_1.LineageVisitor(fetchOp, options);
this.tree.accept(visitor);
const tables = visitor.tables;
const edges = visitor.edges;
const cleanedTables = [];
// do lineage cleanup if mergedLeaves is true
if (mergedLeaves) {
// 1. remove duplicate table references from the list of tables
const deduplicateTable = new Map();
const usedTables = new Map();
tables.forEach(t => {
if (t.tablePrimary === undefined) {
cleanedTables.push(t.table);
return;
}
const key = JSON.stringify(t.tablePrimary);
const entry = usedTables.get(key);
if (entry !== undefined) {
deduplicateTable.set(t.table.id, entry);
}
else {
usedTables.set(key, t.table.id);
t.table.label = t.tablePrimary.tableName;
cleanedTables.push(t.table);
}
});
// 2. remove references to duplicate tables from edges and collect used columns of tables
const usedColumns = new Map();
edges.forEach(e => {
const remappedSourceTable = deduplicateTable.get(e.source.tableId);
if (remappedSourceTable !== undefined) {
e.source.tableId = remappedSourceTable;
}
if (e.source.columnId !== undefined) {
const columns = usedColumns.get(e.source.tableId);
if (columns !== undefined) {
columns.push(e.source.columnId);
}
else {
usedColumns.set(e.source.tableId, [e.source.columnId]);
}
}
});
// 3. leave only columns that are used in tables
cleanedTables.forEach(t => {
if (t.data !== undefined) {
const tableColumns = usedColumns.get(t.id);
t.columns = t.columns.filter(c => tableColumns === null || tableColumns === void 0 ? void 0 : tableColumns.includes(c.id));
}
});
}
else {
tables.forEach(t => cleanedTables.push(t.table));
}
// used to filter tables that are sources for other tables
const sourceTables = new Set();
edges.forEach(e => {
sourceTables.add(e.source.tableId);
});
cleanedTables.forEach(t => {
if (!sourceTables.has(t.id))
t.isTargetOnly = true;
});
return {
nodes: cleanedTables,
edges
};
}
}
exports.SqlLineageParseTree = SqlLineageParseTree;
//# sourceMappingURL=SqlLineageParseTree.js.map