rawsql-ts
Version:
[beta]High-performance SQL parser and AST analyzer written in TypeScript. Provides fast parsing and advanced transformation capabilities.
458 lines • 25.6 kB
JavaScript
Object.defineProperty(exports, "__esModule", { value: true });
exports.PostgresArrayEntityCteBuilder = void 0;
const Clause_1 = require("../models/Clause");
const SimpleSelectQuery_1 = require("../models/SimpleSelectQuery");
const ValueComponent_1 = require("../models/ValueComponent");
const SelectValueCollector_1 = require("./SelectValueCollector");
/**
* Builds CTEs for array entities using depth-first processing and row compression.
*
* Core concepts:
* - Column Compression: OBJECT relationships (user_id, user_name → user_json)
* - Row Compression: ARRAY relationships (multiple rows → JSON array via GROUP BY)
* - Depth-First: Process deepest arrays first for dependency ordering
* - GROUP BY Exclusion: Exclude array-internal columns to prevent over-grouping
*/
class PostgresArrayEntityCteBuilder {
/**
* Builds CTEs for all array entities using depth-first processing.
* Collects arrays by depth, processes deepest first, chains CTEs.
*
* @param ctesSoFar Array of CTEs built so far
* @param aliasOfCteToBuildUpon Alias of the CTE to build upon
* @param allEntities Map of all entities in the mapping
* @param mapping The JSON mapping configuration
* @param columnMappings Optional mappings from object entity IDs to generated JSON column names
* @returns Object containing updated CTEs and last CTE alias
*/
buildArrayEntityCtes(ctesSoFar, aliasOfCteToBuildUpon, allEntities, mapping, columnMappings) {
let currentCtes = [...ctesSoFar];
let currentCteAlias = aliasOfCteToBuildUpon;
// Collect and sort array entities by depth
const sortedArrayInfos = this.collectAndSortArrayEntities(mapping, allEntities);
if (sortedArrayInfos.length === 0) {
return { updatedCtes: currentCtes, lastCteAlias: currentCteAlias };
}
// Group array entities by depth level for batch processing
const entitiesByDepth = this.groupEntitiesByDepth(sortedArrayInfos);
// Process from deepest to shallowest (depth-first)
const depths = Array.from(entitiesByDepth.keys()).sort((a, b) => b - a);
for (const depth of depths) {
const infos = entitiesByDepth.get(depth);
// Build CTE for all entities at this depth
const { cte, newCteAlias } = this.buildDepthCte(infos, currentCteAlias, currentCtes, depth, mapping, columnMappings);
currentCtes.push(cte);
currentCteAlias = newCteAlias;
}
return { updatedCtes: currentCtes, lastCteAlias: currentCteAlias };
}
/**
* Collects array entities and calculates depth for dependency ordering.
* Depth = distance from root. Deeper arrays processed first.
*
* @param mapping The JSON mapping configuration
* @param allEntities Map of all entities in the mapping
* @returns Array of array entity information with depths, sorted deepest first
*/
collectAndSortArrayEntities(mapping, allEntities) {
const arrayEntityInfos = [];
// Helper function to calculate depth for an entity
const getDepth = (entityId) => {
const entity = allEntities.get(entityId);
if (!entity || entity.isRoot)
return 0;
if (!entity.parentId)
return 1;
return 1 + getDepth(entity.parentId);
};
// Collect all array-type nested entities
mapping.nestedEntities.forEach(ne => {
if (ne.relationshipType === "array") {
const currentArrayEntity = allEntities.get(ne.id);
const parentEntity = allEntities.get(ne.parentId);
if (!currentArrayEntity || !parentEntity) {
throw new Error(`Configuration error: Array entity '${ne.id}' or its parent '${ne.parentId}' not found.`);
}
// Determine the linking column from parent entity
// This assumes the first column of the parent is a suitable key for linking.
// More robust linking might require explicit configuration in the mapping.
const parentSqlColumns = Object.values(parentEntity.columns);
if (parentSqlColumns.length === 0) {
throw new Error(`Configuration error: Parent entity '${parentEntity.name}' (ID: ${parentEntity.id}) must have at least one column defined to serve as a linking key for child array '${ne.name}'.`);
}
const parentIdColumnSqlName = parentSqlColumns[0];
arrayEntityInfos.push({
entity: currentArrayEntity,
parentEntity: parentEntity,
parentIdColumnSqlName: parentIdColumnSqlName,
depth: getDepth(ne.id)
});
}
});
// Sort by depth, deepest arrays (higher depth number) processed first (bottom-up for arrays)
arrayEntityInfos.sort((a, b) => b.depth - a.depth);
return arrayEntityInfos;
}
/**
* Groups array entities by depth level for batch processing.
*
* @param arrayInfos Array of array entity information with depths
* @returns Map of depth level to entities at that depth
*/
groupEntitiesByDepth(arrayInfos) {
const entitiesByDepth = new Map();
arrayInfos.forEach(info => {
const depth = info.depth;
if (!entitiesByDepth.has(depth)) {
entitiesByDepth.set(depth, []);
}
entitiesByDepth.get(depth).push(info);
});
return entitiesByDepth;
}
/**
* Builds CTE for specific depth level using row compression.
* Uses GROUP BY to aggregate multiple rows into JSON arrays.
* Excludes array-internal columns from GROUP BY to prevent over-grouping.
*
* @param infos Array entities at this depth level
* @param currentCteAlias Alias of the CTE to build upon
* @param currentCtes All CTEs built so far
* @param depth Current depth level being processed
* @param mapping JSON mapping configuration
* @param columnMappings Optional mappings from object entity IDs to generated JSON column names
* @returns The new CTE and its alias
*/
buildDepthCte(infos, currentCteAlias, currentCtes, depth, mapping, columnMappings) {
var _a;
// Collect columns that will be compressed into arrays
// This includes both direct columns and columns from nested entities within the array
const arrayColumns = new Set();
infos.forEach(info => {
// Add direct columns from the array entity
Object.values(info.entity.columns).forEach(col => arrayColumns.add(col));
// Also add columns from all nested entities within this array entity
const collectNestedColumns = (parentEntityId) => {
mapping.nestedEntities
.filter(nestedEntity => nestedEntity.parentId === parentEntityId)
.forEach(nestedEntity => {
Object.values(nestedEntity.columns).forEach(column => {
const columnName = typeof column === 'string' ? column : column.column;
arrayColumns.add(columnName);
});
// Recursively collect from deeper nested entities
collectNestedColumns(nestedEntity.id);
});
};
collectNestedColumns(info.entity.id);
});
// Get columns from previous CTE
const prevCte = (_a = currentCtes.find(c => c.aliasExpression.table.name === currentCteAlias)) === null || _a === void 0 ? void 0 : _a.query;
if (!prevCte) {
throw new Error(`CTE not found: ${currentCteAlias}`);
}
const prevSelects = new SelectValueCollector_1.SelectValueCollector(null, currentCtes).collect(prevCte); // Build SELECT items: columns that are NOT being compressed (for GROUP BY)
const groupByItems = [];
const selectItems = [];
// Get columns from the current level's array entities that will be aggregated
// These should be included in GROUP BY since they're being processed at this level
const currentLevelArrayColumns = new Set();
infos.forEach(info => {
Object.values(info.entity.columns).forEach(col => currentLevelArrayColumns.add(col));
});
// Collect array entity columns organized by depth for GROUP BY exclusion strategy
const arrayEntityColumns = this.collectArrayEntityColumnsByDepth(mapping, depth);
// Identify JSON columns from objects within the arrays being processed at this depth
const arrayInternalObjectColumns = new Set();
if (columnMappings) {
infos.forEach(info => {
// Find all object-type nested entities within this array entity
mapping.nestedEntities
.filter(ne => ne.parentId === info.entity.id && ne.relationshipType === "object")
.forEach(objectEntity => {
// Find the corresponding JSON column mapping for this object entity
const columnMapping = columnMappings.find(cm => cm.entityId === objectEntity.id);
if (columnMapping) {
arrayInternalObjectColumns.add(columnMapping.generatedColumnName);
}
});
});
}
// Process existing SELECT variables to determine which should be included in GROUP BY
this.processSelectVariablesForGroupBy(prevSelects, arrayColumns, arrayEntityColumns, depth, selectItems, groupByItems, arrayInternalObjectColumns);
// Add JSON aggregation columns for each array entity at this depth
for (const info of infos) {
const agg = this.buildAggregationDetailsForArrayEntity(info.entity, mapping.nestedEntities, new Map(), // allEntities - not needed for array aggregation
columnMappings);
selectItems.push(new Clause_1.SelectItem(agg.jsonAgg, info.entity.propertyName));
}
// Create the new CTE
const cteAlias = `${PostgresArrayEntityCteBuilder.CTE_ARRAY_PREFIX}${depth}`;
const cteSelect = new SimpleSelectQuery_1.SimpleSelectQuery({
selectClause: new Clause_1.SelectClause(selectItems),
fromClause: new Clause_1.FromClause(new Clause_1.SourceExpression(new Clause_1.TableSource(null, new ValueComponent_1.IdentifierString(currentCteAlias)), null), null),
groupByClause: groupByItems.length > 0 ? new Clause_1.GroupByClause(groupByItems) : null,
});
const cte = new Clause_1.CommonTable(cteSelect, new Clause_1.SourceAliasExpression(cteAlias, null), null);
return { cte, newCteAlias: cteAlias };
}
/**
* Creates jsonb_agg function for array entity.
* Handles entity columns and nested child relationships.
* Uses originalPropertyName to avoid sequential numbering.
*
* @param entity The array entity being processed
* @param nestedEntities All nested entities from the mapping
* @param allEntities Map of all entities (not used in current implementation)
* @param columnMappings Mappings from object entity IDs to generated JSON column names
* @returns Object containing the JSON aggregation function
*/
buildAggregationDetailsForArrayEntity(entity, nestedEntities, allEntities, columnMappings) {
// Build JSON object for array elements using JSONB functions
const jsonBuildFunction = PostgresArrayEntityCteBuilder.JSON_FUNCTIONS.BUILD_OBJECT;
const args = [];
// Add the entity's own columns
Object.entries(entity.columns).forEach(([jsonKey, sqlColumn]) => {
args.push(new ValueComponent_1.LiteralValue(jsonKey, undefined, true));
args.push(new ValueComponent_1.ColumnReference(null, new ValueComponent_1.IdentifierString(sqlColumn)));
});
// Find and process child entities (both object and array types)
const childEntities = nestedEntities.filter((ne) => ne.parentId === entity.id);
childEntities.forEach((childEntity) => {
// Use originalPropertyName if available to avoid sequential numbering in final JSON
const propertyNameForJson = childEntity.originalPropertyName || childEntity.propertyName;
args.push(new ValueComponent_1.LiteralValue(propertyNameForJson, undefined, true));
if (childEntity.relationshipType === "object") {
// For object relationships, use pre-computed JSON column from column mappings
if (!columnMappings) {
throw new Error(`❌ PostgresArrayEntityCteBuilder Error: Column mappings not provided\n` +
`\n` +
`🔍 Details:\n` +
` - Entity ID: ${childEntity.id}\n` +
` - Entity Name: ${childEntity.name || 'unknown'}\n` +
` - Property Name: ${childEntity.propertyName}\n` +
` - Relationship Type: ${childEntity.relationshipType}\n` +
`\n` +
`💡 Solution:\n` +
` Column mappings are required for hybrid JSON column naming.\n` +
` This error indicates that PostgresObjectEntityCteBuilder did not\n` +
` pass column mappings to PostgresArrayEntityCteBuilder.\n` +
`\n` +
`🔧 Check:\n` +
` 1. Ensure PostgresJsonQueryBuilder.buildJsonWithCteStrategy() passes columnMappings\n` +
` 2. Verify PostgresObjectEntityCteBuilder.buildObjectEntityCtes() returns columnMappings\n` +
` 3. Check that Model-driven mapping conversion generates unique entity IDs`);
}
const mapping = columnMappings.find(m => m.entityId === childEntity.id);
if (!mapping) {
const availableMappings = columnMappings.map(m => `${m.entityId} → ${m.generatedColumnName}`).join(', ');
throw new Error(`❌ PostgresArrayEntityCteBuilder Error: Column mapping not found\n` +
`\n` +
`🔍 Details:\n` +
` - Looking for Entity ID: ${childEntity.id}\n` +
` - Entity Name: ${childEntity.name || 'unknown'}\n` +
` - Property Name: ${childEntity.propertyName}\n` +
` - Relationship Type: ${childEntity.relationshipType}\n` +
`\n` +
`📋 Available Mappings:\n` +
` ${availableMappings || 'None'}\n` +
`\n` +
`💡 Solution:\n` +
` Entity IDs must match between mapping generation and usage.\n` +
` This suggests a mismatch in entity ID generation or processing.\n` +
`\n` +
`🔧 Check:\n` +
` 1. Model-driven mapping conversion generates consistent entity IDs\n` +
` 2. PostgresObjectEntityCteBuilder processes all entities correctly\n` +
` 3. Entity hierarchy and parentId relationships are correct`);
}
args.push(new ValueComponent_1.ColumnReference(null, new ValueComponent_1.IdentifierString(mapping.generatedColumnName)));
}
else if (childEntity.relationshipType === "array") {
// For array relationships, use the column directly
args.push(new ValueComponent_1.ColumnReference(null, new ValueComponent_1.IdentifierString(childEntity.propertyName)));
}
});
// Create JSON object
const jsonObject = new ValueComponent_1.FunctionCall(null, new ValueComponent_1.RawString(jsonBuildFunction), new ValueComponent_1.ValueList(args), null);
// Create JSON aggregation using JSONB with NULL filtering
// Use FILTER clause to exclude rows where primary key is NULL (no actual data)
const jsonAggFunction = PostgresArrayEntityCteBuilder.JSON_FUNCTIONS.AGGREGATE;
// Find the primary column (typically the first column) to use for NULL filtering
const primaryColumn = Object.values(entity.columns)[0];
// For now, create standard jsonb_agg and handle NULL filtering in post-processing
// TODO: Implement proper FILTER clause support in SQL AST
const jsonAgg = new ValueComponent_1.FunctionCall(null, new ValueComponent_1.RawString(jsonAggFunction), new ValueComponent_1.ValueList([jsonObject]), null);
return { jsonAgg };
}
/**
* Collects array entity columns by depth for GROUP BY exclusion strategy.
*
* @param mapping The JSON mapping configuration containing all entities
* @param currentDepth The current aggregation depth being processed
* @returns A map where keys are depth levels and values are sets of column names
*/
collectArrayEntityColumnsByDepth(mapping, currentDepth) {
const arrayEntitiesByDepth = new Map(); // Initialize depth maps for current and deeper levels
// Use a reasonable maximum depth limit to avoid infinite loops
const maxDepth = Math.max(currentDepth + 3, 5);
for (let d = currentDepth; d <= maxDepth; d++) {
arrayEntitiesByDepth.set(d, new Set());
}
// Process all array entities to collect their columns by depth
mapping.nestedEntities
.filter(entity => entity.relationshipType === 'array')
.forEach(entity => {
// Calculate entity depth in the hierarchy
const entityDepth = this.calculateEntityDepth(entity, mapping);
if (!arrayEntitiesByDepth.has(entityDepth)) {
arrayEntitiesByDepth.set(entityDepth, new Set());
}
// Add direct columns from the array entity
this.addEntityColumnsToDepthSet(entity, entityDepth, arrayEntitiesByDepth);
// Collect columns from all descendant entities recursively
this.collectDescendantColumns(entity.id, entityDepth, mapping, arrayEntitiesByDepth);
});
return arrayEntitiesByDepth;
}
/**
* Calculates entity depth by traversing up to root.
*
* @param entity The entity to calculate depth for
* @param mapping The JSON mapping containing all entities
* @returns The depth level (0 for root level, 1 for first level, etc.)
*/
calculateEntityDepth(entity, mapping) {
let entityDepth = 0;
let currentEntity = entity;
while (currentEntity.parentId && currentEntity.parentId !== mapping.rootEntity.id) {
entityDepth++;
currentEntity = mapping.nestedEntities.find(e => e.id === currentEntity.parentId) || currentEntity;
}
return entityDepth;
}
/**
* Adds entity columns to depth set.
*
* @param entity The entity whose columns should be added
* @param depth The depth level to add columns to
* @param arrayEntitiesByDepth The map to update
*/
addEntityColumnsToDepthSet(entity, depth, arrayEntitiesByDepth) {
Object.values(entity.columns).forEach(column => {
const columnName = typeof column === 'string' ? column : column.column;
arrayEntitiesByDepth.get(depth).add(columnName);
});
}
/**
* Recursively collects columns from descendant entities.
*
* @param parentEntityId The ID of the parent entity
* @param targetDepth The depth level to assign collected columns to
* @param mapping The JSON mapping containing all entities
* @param arrayEntitiesByDepth The map to update with collected columns
*/
collectDescendantColumns(parentEntityId, targetDepth, mapping, arrayEntitiesByDepth) {
mapping.nestedEntities
.filter(nestedEntity => nestedEntity.parentId === parentEntityId)
.forEach(nestedEntity => {
// Add all columns from this descendant to the target depth
this.addEntityColumnsToDepthSet(nestedEntity, targetDepth, arrayEntitiesByDepth);
// Recursively collect from deeper nested entities
this.collectDescendantColumns(nestedEntity.id, targetDepth, mapping, arrayEntitiesByDepth);
});
}
/**
* Implements GROUP BY exclusion strategy for array aggregation.
* Excludes current array columns and array-internal object JSON columns.
*
* @param prevSelects SELECT variables from the previous CTE
* @param arrayColumns Columns that are being aggregated (should be excluded from GROUP BY)
* @param arrayEntitiesByDepth Map of depth levels to their column sets
* @param currentDepth The current aggregation depth being processed
* @param selectItems Output array for SELECT items
* @param groupByItems Output array for GROUP BY items
* @param arrayInternalObjectColumns JSON columns from objects within arrays being processed
*/
processSelectVariablesForGroupBy(prevSelects, arrayColumns, arrayEntitiesByDepth, currentDepth, selectItems, groupByItems, arrayInternalObjectColumns) {
prevSelects.forEach(sv => {
if (!arrayColumns.has(sv.name)) {
// Exclude JSON columns from objects within arrays being processed
if (arrayInternalObjectColumns && arrayInternalObjectColumns.has(sv.name)) {
// Skip this column - it's an object within the array being aggregated
return;
}
const shouldInclude = this.shouldIncludeColumnInGroupBy(sv.name, arrayEntitiesByDepth, currentDepth);
if (shouldInclude) {
selectItems.push(new Clause_1.SelectItem(new ValueComponent_1.ColumnReference(null, new ValueComponent_1.IdentifierString(sv.name)), sv.name));
// Exclude JSON columns from GROUP BY as PostgreSQL doesn't support equality operators for JSON type
if (!sv.name.endsWith('_json')) {
groupByItems.push(new ValueComponent_1.ColumnReference(null, new ValueComponent_1.IdentifierString(sv.name)));
}
}
}
});
}
/**
* Determines if column should be included in GROUP BY clause.
* Applies depth-based filtering and special handling for JSON columns.
*
* @param columnName The name of the column to evaluate
* @param arrayEntitiesByDepth Map of depth levels to their column sets
* @param currentDepth The current aggregation depth
* @returns True if the column should be included in GROUP BY, false otherwise
*/
shouldIncludeColumnInGroupBy(columnName, arrayEntitiesByDepth, currentDepth) {
const isJsonColumn = columnName.endsWith('_json');
let shouldInclude = true;
// Check if this column belongs to array entities at current depth or deeper
// These columns are being aggregated and should not be in GROUP BY
for (const [entityDepth, columns] of arrayEntitiesByDepth.entries()) {
if (entityDepth >= currentDepth && columns.has(columnName)) {
shouldInclude = false;
break;
}
}
// Critical: JSON columns from objects within arrays being processed
// must be excluded from GROUP BY as they are aggregated within the array
if (isJsonColumn) {
// Legacy handling for entity_ prefixed JSON columns
if (columnName.startsWith('entity_')) {
shouldInclude = this.shouldIncludeJsonColumn(columnName, currentDepth);
}
}
return shouldInclude;
}
/**
* Applies heuristics for entity JSON column inclusion in GROUP BY.
* Uses entity numbering patterns to identify deeply nested entities.
*
* @param columnName The JSON column name (expected format: entity_N_json)
* @param currentDepth The current aggregation depth
* @returns True if the JSON column should be included, false otherwise
*/
shouldIncludeJsonColumn(columnName, currentDepth) {
const entityMatch = columnName.match(/entity_(\d+)_json/);
if (!entityMatch) {
return true;
}
// For depth > 0, exclude JSON columns from highly nested entities
// This heuristic assumes entities with higher numbers are more deeply nested
if (currentDepth > 0) {
const entityNumber = parseInt(entityMatch[1]);
// Entities with numbers > 2 are typically nested within arrays and should be excluded
return entityNumber <= 2;
}
return true;
}
}
exports.PostgresArrayEntityCteBuilder = PostgresArrayEntityCteBuilder;
// Constants for consistent naming conventions
PostgresArrayEntityCteBuilder.CTE_ARRAY_PREFIX = 'cte_array_depth_';
// JSON function names for PostgreSQL aggregation
PostgresArrayEntityCteBuilder.JSON_FUNCTIONS = {
BUILD_OBJECT: 'jsonb_build_object',
AGGREGATE: 'jsonb_agg'
};
//# sourceMappingURL=PostgresArrayEntityCteBuilder.js.map
;