rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
523 lines • 24.7 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.SchemaCollector = exports.TableSchema = void 0;
const Clause_1 = require("../models/Clause");
const SimpleSelectQuery_1 = require("../models/SimpleSelectQuery");
const CTECollector_1 = require("./CTECollector");
const SelectableColumnCollector_1 = require("./SelectableColumnCollector");
const ValueComponent_1 = require("../models/ValueComponent");
const SelectQuery_1 = require("../models/SelectQuery");
class TableSchema {
constructor(name, columns) {
this.name = name;
this.columns = columns;
}
}
exports.TableSchema = TableSchema;
/**
* Collects schema information (table names and resolved columns) from SelectQuery instances.
*
* @example
* ```typescript
* const collector = new SchemaCollector((table) => ['id', 'name']);
* const query = SelectQueryParser.parse('SELECT id, name FROM users');
* const schemas = collector.collect(query);
* ```
* Related tests: packages/core/tests/transformers/SchemaCollector.test.ts
*/
class SchemaCollector {
constructor(tableColumnResolver = null, allowWildcardWithoutResolver = false) {
this.tableColumnResolver = tableColumnResolver;
this.allowWildcardWithoutResolver = allowWildcardWithoutResolver;
this.tableSchemas = [];
this.visitedNodes = new Set();
this.commonTables = [];
this.running = false;
// For analyze method
this.unresolvedColumns = [];
this.analysisError = undefined;
this.isAnalyzeMode = false;
this.handlers = new Map();
// Setup handlers for query components
this.handlers.set(SimpleSelectQuery_1.SimpleSelectQuery.kind, (expr) => this.visitSimpleSelectQuery(expr));
this.handlers.set(SelectQuery_1.BinarySelectQuery.kind, (expr) => this.visitBinarySelectQuery(expr));
}
/**
* Collects schema information (table names and column names) from a SQL query structure.
* This method ensures that the collected schema information is unique and sorted.
* The resulting schemas and columns are sorted alphabetically to ensure deterministic ordering.
*
* @param arg The SQL query structure to analyze.
*/
collect(arg) {
this.visit(arg);
return this.tableSchemas;
}
/**
* Analyzes schema information from a SQL query structure without throwing errors.
* Returns a result object containing successfully resolved schemas, unresolved columns,
* and error information if any issues were encountered.
*
* @param arg The SQL query structure to analyze.
* @returns Analysis result containing schemas, unresolved columns, and success status.
*/
analyze(arg) {
// Set analyze mode flag
this.isAnalyzeMode = true;
try {
this.visit(arg);
// If we got here without errors, it's a success
return {
success: this.unresolvedColumns.length === 0 && !this.analysisError,
schemas: this.tableSchemas,
unresolvedColumns: this.unresolvedColumns,
error: this.analysisError
};
}
finally {
// Reset analyze mode flag
this.isAnalyzeMode = false;
}
}
/**
* Main entry point for the visitor pattern.
* Implements the shallow visit pattern to distinguish between root and recursive visits.
*
* This method ensures that schema information is collected uniquely and sorted.
* The resulting schemas and columns are sorted alphabetically to ensure deterministic ordering.
*
* @param arg The SQL component to visit.
*/
visit(arg) {
// If not a root visit, just visit the node and return
if (this.running) {
this.visitNode(arg);
return;
}
// If this is a root visit, we need to reset the state
this.reset();
this.running = true;
try {
// Ensure the argument is a SelectQuery
if (!(arg instanceof SimpleSelectQuery_1.SimpleSelectQuery || arg instanceof SelectQuery_1.BinarySelectQuery)) {
throw new Error(`Unsupported SQL component type for schema collection. Received: ${arg.constructor.name}. Expected: SimpleSelectQuery or BinarySelectQuery.`);
}
// Collects Common Table Expressions (CTEs) using CTECollector
const cteCollector = new CTECollector_1.CTECollector();
this.commonTables = cteCollector.collect(arg);
this.visitNode(arg);
// Consolidate tableSchemas
this.consolidateTableSchemas();
}
finally {
// Regardless of success or failure, reset the root visit flag
this.running = false;
}
}
/**
* Internal visit method used for all nodes.
* This separates the visit flag management from the actual node visitation logic.
*/
visitNode(arg) {
// Skip if we've already visited this node to prevent infinite recursion
if (this.visitedNodes.has(arg)) {
return;
}
// Mark as visited
this.visitedNodes.add(arg);
const handler = this.handlers.get(arg.getKind());
if (handler) {
handler(arg);
return;
}
// If no handler found, that's ok - we only care about specific components
}
/**
* Resets the state of the collector for a new root visit.
*/
reset() {
this.tableSchemas = [];
this.visitedNodes = new Set();
this.commonTables = [];
this.unresolvedColumns = [];
this.analysisError = undefined;
}
/**
* Consolidates table schemas by merging columns for tables with the same name.
* This ensures that each table name appears only once in the final schema list,
* with all its columns combined while removing duplicates.
*
* Note: The resulting schemas and columns are sorted alphabetically to ensure deterministic ordering.
*/
consolidateTableSchemas() {
const consolidatedSchemas = new Map();
for (const schema of this.tableSchemas) {
if (!consolidatedSchemas.has(schema.name)) {
consolidatedSchemas.set(schema.name, new Set(schema.columns));
}
else {
const existingColumns = consolidatedSchemas.get(schema.name);
schema.columns.forEach(column => existingColumns === null || existingColumns === void 0 ? void 0 : existingColumns.add(column));
}
}
this.tableSchemas = Array.from(consolidatedSchemas.entries())
.sort(([nameA], [nameB]) => nameA.localeCompare(nameB)) // Sort by table name
.map(([name, columns]) => {
return new TableSchema(name, Array.from(columns).sort()); // Sort columns alphabetically
});
}
handleSourceExpression(source, queryColumns, includeUnnamed) {
var _a, _b;
if (source.datasource instanceof Clause_1.TableSource) {
const tableName = source.datasource.getSourceName();
const cte = this.commonTables.filter((table) => table.getSourceAliasName() === tableName);
if (cte.length > 0) {
// Process the CTE query recursively
cte[0].query.accept(this);
// Also collect schema information for the CTE itself
const cteAlias = (_a = source.getAliasName()) !== null && _a !== void 0 ? _a : tableName;
this.processCTETableSchema(cte[0], cteAlias, queryColumns, includeUnnamed);
}
else {
const tableAlias = (_b = source.getAliasName()) !== null && _b !== void 0 ? _b : tableName;
this.processCollectTableSchema(tableName, tableAlias, queryColumns, includeUnnamed);
}
}
else if (source.datasource instanceof Clause_1.SubQuerySource) {
// Process subqueries recursively
this.visitNode(source.datasource.query);
// For subqueries, we don't add schema information directly as they're derived
// The schema will be collected from the inner query
}
else {
// For other source types (FunctionSource, ParenSource), we skip schema collection
// as they don't represent table schemas in the traditional sense
}
}
visitSimpleSelectQuery(query) {
var _a;
if (query.fromClause === null) {
return;
}
// Collect columns used in the query
const columnCollector = new SelectableColumnCollector_1.SelectableColumnCollector(this.tableColumnResolver, true, SelectableColumnCollector_1.DuplicateDetectionMode.FullName);
const columns = columnCollector.collect(query);
let queryColumns;
// Only filter JOIN condition columns when allowWildcardWithoutResolver is true
// This preserves backward compatibility for existing tests
if (this.allowWildcardWithoutResolver) {
// Filter to include only columns that are actually selected, not those used in JOIN conditions
const selectColumns = this.getSelectClauseColumns(query);
queryColumns = columns.filter((column) => column.value instanceof ValueComponent_1.ColumnReference)
.map(column => column.value)
.filter(columnRef => {
// Only include columns that are either:
// 1. Explicitly mentioned in SELECT clause (not wildcards)
// 2. Part of wildcard expansion from SELECT clause (only if we have a resolver)
const tableName = columnRef.getNamespace();
const columnName = columnRef.column.name;
return selectColumns.some(selectCol => {
if (selectCol.value instanceof ValueComponent_1.ColumnReference) {
const selectTableName = selectCol.value.getNamespace();
const selectColumnName = selectCol.value.column.name;
// Exact match for explicit columns
if (selectTableName === tableName && selectColumnName === columnName) {
return true;
}
// Wildcard match (table.* or *) - only include if we have a resolver
if (selectColumnName === "*") {
// If allowWildcardWithoutResolver is true and no resolver, exclude wildcard expansions
if (this.allowWildcardWithoutResolver && this.tableColumnResolver === null) {
return false;
}
// Full wildcard (*) matches all tables
if (selectTableName === "") {
return true;
}
// Table wildcard (table.*) matches specific table
if (selectTableName === tableName) {
return true;
}
}
}
return false;
});
})
.map(columnRef => ({
table: columnRef.getNamespace(),
column: columnRef.column.name
}));
}
else {
// Original behavior: include all columns including JOIN conditions
queryColumns = columns.filter((column) => column.value instanceof ValueComponent_1.ColumnReference)
.map(column => column.value)
.map(columnRef => ({
table: columnRef.getNamespace(),
column: columnRef.column.name
}));
}
// Handle columns without table names in queries with joins
if (query.fromClause.joins !== null && query.fromClause.joins.length > 0) {
const columnsWithoutTable = queryColumns.filter((columnRef) => columnRef.table === "").map((columnRef) => columnRef.column);
if (columnsWithoutTable.length > 0) {
if (this.isAnalyzeMode) {
// In analyze mode, collect unresolved columns
this.unresolvedColumns.push(...columnsWithoutTable);
this.analysisError = `Column reference(s) without table name found in query: ${columnsWithoutTable.join(', ')}`;
}
else {
// In collect mode, throw error as before
throw new Error(`Column reference(s) without table name found in query: ${columnsWithoutTable.join(', ')}`);
}
}
}
// Handle the main FROM clause table
if (query.fromClause.source.datasource instanceof Clause_1.TableSource) {
this.handleSourceExpression(query.fromClause.source, queryColumns, true);
}
else if (query.fromClause.source.datasource instanceof Clause_1.SubQuerySource) {
query.fromClause.source.datasource.query.accept(this);
}
// Handle JOIN clause tables
if ((_a = query.fromClause) === null || _a === void 0 ? void 0 : _a.joins) {
for (const join of query.fromClause.joins) {
if (join.source.datasource instanceof Clause_1.TableSource) {
this.handleSourceExpression(join.source, queryColumns, false);
}
else if (join.source.datasource instanceof Clause_1.SubQuerySource) {
join.source.datasource.query.accept(this);
}
}
}
}
visitBinarySelectQuery(query) {
// Visit the left and right queries
this.visitNode(query.left);
this.visitNode(query.right);
}
/**
* Extract column references from the SELECT clause only
*/
getSelectClauseColumns(query) {
if (!query.selectClause) {
return [];
}
const selectColumns = [];
for (const item of query.selectClause.items) {
if (item.value instanceof ValueComponent_1.ColumnReference) {
const columnName = item.value.column.name;
selectColumns.push({ name: columnName, value: item.value });
}
}
return selectColumns;
}
processCollectTableSchema(tableName, tableAlias, queryColumns, includeUnnamed = false) {
// Check if wildcard is present and handle based on configuration
if (this.tableColumnResolver === null) {
const hasWildcard = queryColumns
.filter((columnRef) => columnRef.table === tableAlias || (includeUnnamed && columnRef.table === ""))
.filter((columnRef) => columnRef.column === "*")
.length > 0;
// Handle error if wildcard is found and allowWildcardWithoutResolver is false (default behavior)
if (hasWildcard && !this.allowWildcardWithoutResolver) {
const errorMessage = tableName
? `Wildcard (*) is used. A TableColumnResolver is required to resolve wildcards. Target table: ${tableName}`
: "Wildcard (*) is used. A TableColumnResolver is required to resolve wildcards.";
if (this.isAnalyzeMode) {
// In analyze mode, record the error but continue processing
this.analysisError = errorMessage;
// Add wildcard columns to unresolved list
const wildcardColumns = queryColumns
.filter((columnRef) => columnRef.table === tableAlias || (includeUnnamed && columnRef.table === ""))
.filter((columnRef) => columnRef.column === "*")
.map((columnRef) => columnRef.table ? `${columnRef.table}.*` : "*");
this.unresolvedColumns.push(...wildcardColumns);
}
else {
// In collect mode, throw error as before
throw new Error(errorMessage);
}
}
}
let tableColumns = queryColumns
.filter((columnRef) => columnRef.column !== "*")
.filter((columnRef) => columnRef.table === tableAlias || (includeUnnamed && columnRef.table === ""))
.map((columnRef) => columnRef.column);
const tableSchema = new TableSchema(tableName, tableColumns);
this.tableSchemas.push(tableSchema);
}
processCTETableSchema(cte, cteAlias, queryColumns, includeUnnamed = false) {
const cteName = cte.getSourceAliasName();
// Get the columns that the CTE exposes by analyzing its SELECT clause
const cteColumns = this.getCTEColumns(cte);
// Filter query columns that reference this CTE
const cteReferencedColumns = queryColumns
.filter((columnRef) => columnRef.table === cteAlias || (includeUnnamed && columnRef.table === ""))
.map((columnRef) => columnRef.column);
// Handle wildcards for CTEs
if (cteReferencedColumns.includes("*")) {
if (this.tableColumnResolver !== null) {
// Try to resolve columns using the resolver first
const resolvedColumns = this.tableColumnResolver(cteName);
if (resolvedColumns.length > 0) {
const tableSchema = new TableSchema(cteName, resolvedColumns);
this.tableSchemas.push(tableSchema);
return;
}
}
// If we can determine CTE columns, use them for wildcard expansion
if (cteColumns.length > 0) {
const tableSchema = new TableSchema(cteName, cteColumns);
this.tableSchemas.push(tableSchema);
return;
}
else if (this.allowWildcardWithoutResolver) {
// Allow wildcards but with empty columns since we can't determine them
const tableSchema = new TableSchema(cteName, []);
this.tableSchemas.push(tableSchema);
return;
}
else {
// Handle wildcard error
const errorMessage = `Wildcard (*) is used. A TableColumnResolver is required to resolve wildcards. Target table: ${cteName}`;
if (this.isAnalyzeMode) {
this.analysisError = errorMessage;
this.unresolvedColumns.push(cteAlias ? `${cteAlias}.*` : "*");
}
else {
throw new Error(errorMessage);
}
return;
}
}
// Process specific column references
let tableColumns = cteReferencedColumns.filter((column) => column !== "*");
// Validate column references against CTE columns in analyze mode
if (this.isAnalyzeMode) {
let availableColumns = cteColumns;
// Try to get columns from resolver first if available
if (this.tableColumnResolver) {
const resolvedColumns = this.tableColumnResolver(cteName);
if (resolvedColumns.length > 0) {
availableColumns = resolvedColumns;
}
}
// Only validate columns if we have available columns to validate against
// If allowWildcardWithoutResolver is true and we have no available columns,
// skip validation as the wildcard expansion couldn't be determined
if (availableColumns.length > 0) {
const invalidColumns = tableColumns.filter((column) => !availableColumns.includes(column));
if (invalidColumns.length > 0) {
this.unresolvedColumns.push(...invalidColumns);
if (!this.analysisError) {
this.analysisError = `Undefined column(s) found in CTE "${cteName}": ${invalidColumns.join(', ')}`;
}
}
}
else if (!this.allowWildcardWithoutResolver) {
// Only report error if wildcards are not allowed without resolver
const invalidColumns = tableColumns;
if (invalidColumns.length > 0) {
this.unresolvedColumns.push(...invalidColumns);
if (!this.analysisError) {
this.analysisError = `Undefined column(s) found in CTE "${cteName}": ${invalidColumns.join(', ')}`;
}
}
}
}
// Add the CTE schema
const tableSchema = new TableSchema(cteName, tableColumns);
this.tableSchemas.push(tableSchema);
}
getCTEColumns(cte) {
try {
if (cte.query instanceof SimpleSelectQuery_1.SimpleSelectQuery && cte.query.selectClause) {
return this.extractColumnsFromSelectItems(cte.query.selectClause.items, cte);
}
return this.extractColumnsUsingCollector(cte.query);
}
catch (error) {
return [];
}
}
extractColumnsFromSelectItems(selectItems, cte) {
var _a;
const columns = [];
for (const item of selectItems) {
if (item.value instanceof ValueComponent_1.ColumnReference) {
const columnName = ((_a = item.identifier) === null || _a === void 0 ? void 0 : _a.name) || item.value.column.name;
if (item.value.column.name === "*") {
const wildcardColumns = this.resolveWildcardInCTE(item.value, cte);
if (wildcardColumns === null) {
return []; // Wildcard couldn't be resolved
}
columns.push(...wildcardColumns);
}
else {
columns.push(columnName);
}
}
else if (item.identifier) {
columns.push(item.identifier.name);
}
}
return this.removeDuplicates(columns);
}
resolveWildcardInCTE(columnRef, cte) {
const tableNamespace = columnRef.getNamespace();
if (tableNamespace) {
return this.resolveQualifiedWildcard(tableNamespace);
}
else {
return this.resolveUnqualifiedWildcard(cte);
}
}
resolveQualifiedWildcard(tableNamespace) {
const referencedCTE = this.commonTables.find(cte => cte.getSourceAliasName() === tableNamespace);
if (referencedCTE) {
const referencedColumns = this.getCTEColumns(referencedCTE);
if (referencedColumns.length > 0) {
return referencedColumns;
}
}
return null;
}
resolveUnqualifiedWildcard(cte) {
if (!(cte.query instanceof SimpleSelectQuery_1.SimpleSelectQuery) || !cte.query.fromClause) {
return null;
}
const fromSource = cte.query.fromClause.source;
if (fromSource.datasource instanceof Clause_1.TableSource) {
return this.resolveTableWildcard(fromSource.datasource.table.name);
}
else if (fromSource.datasource instanceof Clause_1.SubQuerySource) {
return null; // Too complex to resolve
}
return null;
}
resolveTableWildcard(tableName) {
if (this.tableColumnResolver) {
const resolvedColumns = this.tableColumnResolver(tableName);
if (resolvedColumns.length > 0) {
return resolvedColumns;
}
}
// If allowWildcardWithoutResolver is true, return null to indicate unknown columns
return this.allowWildcardWithoutResolver ? null : null;
}
extractColumnsUsingCollector(query) {
const columnCollector = new SelectableColumnCollector_1.SelectableColumnCollector(null, true, SelectableColumnCollector_1.DuplicateDetectionMode.FullName);
const columns = columnCollector.collect(query);
return columns
.filter((column) => column.value instanceof ValueComponent_1.ColumnReference)
.map(column => column.value)
.map(columnRef => columnRef.column.name)
.filter((name, index, array) => array.indexOf(name) === index);
}
removeDuplicates(columns) {
return columns.filter((name, index, array) => array.indexOf(name) === index);
}
}
exports.SchemaCollector = SchemaCollector;
//# sourceMappingURL=SchemaCollector.js.map