@tanstack/db
Version:
A reactive client store for building super fast apps on sync
1 lines • 94.9 kB
Source Map (JSON)
{"version":3,"file":"index.cjs","sources":["../../../../src/query/compiler/index.ts"],"sourcesContent":["import {\n concat as concatOperator,\n distinct,\n filter,\n join as joinOperator,\n map,\n reduce,\n tap,\n} from '@tanstack/db-ivm'\nimport { optimizeQuery } from '../optimizer.js'\nimport {\n CollectionInputNotFoundError,\n DistinctRequiresSelectError,\n DuplicateAliasInSubqueryError,\n FnSelectWithGroupByError,\n HavingRequiresGroupByError,\n LimitOffsetRequireOrderByError,\n UnsupportedFromTypeError,\n} from '../../errors.js'\nimport { VIRTUAL_PROP_NAMES } from '../../virtual-props.js'\nimport {\n ConditionalSelect,\n IncludesSubquery,\n PropRef,\n Value as ValClass,\n getWhereExpression,\n isExpressionLike,\n} from '../ir.js'\nimport { ensureIndexForField } from '../../indexes/auto-index.js'\nimport { inArray } from '../builder/functions.js'\nimport {\n compileExpression,\n isCaseWhenConditionTrue,\n toBooleanPredicate,\n} from './evaluators.js'\nimport { processJoins } from './joins.js'\nimport { containsAggregate, processGroupBy } from './group-by.js'\nimport { getLazyLoadTargets } from './lazy-targets.js'\nimport { processOrderBy } from './order-by.js'\nimport { processSelect } from './select.js'\nimport type { CollectionSubscription } from '../../collection/subscription.js'\nimport type { OrderByOptimizationInfo } from './order-by.js'\nimport type {\n BasicExpression,\n CollectionRef,\n IncludesMaterialization,\n QueryIR,\n QueryRef,\n UnionAll,\n UnionFrom,\n} from '../ir.js'\nimport type { LazyCollectionCallbacks } from './joins.js'\nimport type { Collection } from '../../collection/index.js'\nimport type {\n KeyedStream,\n NamespacedAndKeyedStream,\n ResultStream,\n} from '../../types.js'\nimport type { QueryCache, QueryMapping, WindowOptions } from './types.js'\n\nexport type { WindowOptions } from './types.js'\n\n/** Symbol used to tag parent $selected with routing metadata for includes */\nexport const INCLUDES_ROUTING = Symbol(`includesRouting`)\nexport const FN_SELECT_STATE = Symbol(`fnSelectState`)\nconst SKIP_INCLUDE = Symbol(`skipInclude`)\n\ntype ConditionalSelectGuard = {\n condition: BasicExpression\n expected: boolean\n}\n\ntype SourceInclude = {\n sourceAlias: string\n include: IncludesCompilationResult\n}\n\ntype ProjectedSourceIncludePath = {\n path: Array<string>\n guards: Array<ConditionalSelectGuard>\n}\n\n/**\n * Result of compiling an includes subquery, including the child pipeline\n * and metadata needed to route child results to parent-scoped Collections.\n */\nexport interface IncludesCompilationResult {\n /** Filtered child pipeline (post inner-join with parent keys) */\n pipeline: ResultStream\n /** Result field name on parent (e.g., \"issues\") */\n fieldName: string\n /** Path where the included value is written in the parent result */\n resultPath: Array<string>\n /** Parent-side correlation ref (e.g., project.id) */\n correlationField: PropRef\n /** Child-side correlation ref (e.g., issue.projectId) */\n childCorrelationField: PropRef\n /** Whether the child query has an ORDER BY clause */\n hasOrderBy: boolean\n /** Full compilation result for the child query (for nested includes + alias tracking) */\n childCompilationResult: CompilationResult\n /** Parent-side projection refs for parent-referencing filters */\n parentProjection?: Array<PropRef>\n /** How the output layer materializes the child result on the parent row */\n materialization: IncludesMaterialization\n /** Internal field used to unwrap scalar child selects */\n scalarField?: string\n}\n\n/**\n * Result of query compilation including both the pipeline and source-specific WHERE clauses\n */\nexport interface CompilationResult {\n /** The ID of the main collection */\n collectionId: string\n\n /** The compiled query pipeline (D2 stream) */\n pipeline: ResultStream\n\n /** Map of source aliases to their WHERE clauses for index optimization */\n sourceWhereClauses: Map<string, BasicExpression<boolean>>\n\n /**\n * Maps each source alias to its collection ID. Enables per-alias subscriptions for self-joins.\n * Example: `{ employee: 'employees-col-id', manager: 'employees-col-id' }`\n */\n aliasToCollectionId: Record<string, string>\n\n /**\n * Flattened mapping from outer alias to innermost alias for subqueries.\n * Always provides one-hop lookups, never recursive chains.\n *\n * Example: `{ activeUser: 'user' }` when `.from({ activeUser: subquery })`\n * where the subquery uses `.from({ user: collection })`.\n *\n * For deeply nested subqueries, the mapping goes directly to the innermost alias:\n * `{ author: 'user' }` (not `{ author: 'activeUser' }`), so `aliasRemapping[alias]`\n * always resolves in a single lookup.\n *\n * Used to resolve subscriptions during lazy loading when join aliases differ from\n * the inner aliases where collection subscriptions were created.\n */\n aliasRemapping: Record<string, string>\n\n /** Child pipelines for includes subqueries */\n includes?: Array<IncludesCompilationResult>\n}\n\n/**\n * Compiles a query IR into a D2 pipeline\n * @param rawQuery The query IR to compile\n * @param inputs Mapping of source aliases to input streams (e.g., `{ employee: input1, manager: input2 }`)\n * @param collections Mapping of collection IDs to Collection instances\n * @param subscriptions Mapping of source aliases to CollectionSubscription instances\n * @param callbacks Mapping of source aliases to lazy loading callbacks\n * @param lazySources Set of source aliases that should load data lazily\n * @param optimizableOrderByCollections Map of collection IDs to order-by optimization info\n * @param cache Optional cache for compiled subqueries (used internally for recursion)\n * @param queryMapping Optional mapping from optimized queries to original queries\n * @returns A CompilationResult with the pipeline, source WHERE clauses, and alias metadata\n */\nexport function compileQuery(\n rawQuery: QueryIR,\n inputs: Record<string, KeyedStream>,\n collections: Record<string, Collection<any, any, any, any, any>>,\n subscriptions: Record<string, CollectionSubscription>,\n callbacks: Record<string, LazyCollectionCallbacks>,\n lazySources: Set<string>,\n optimizableOrderByCollections: Record<string, OrderByOptimizationInfo>,\n setWindowFn: (windowFn: (options: WindowOptions) => void) => void,\n cache: QueryCache = new WeakMap(),\n queryMapping: QueryMapping = new WeakMap(),\n // For includes: parent key stream to inner-join with this query's FROM\n parentKeyStream?: KeyedStream,\n childCorrelationField?: PropRef,\n): CompilationResult {\n // Check if the original raw query has already been compiled\n const cachedResult = cache.get(rawQuery)\n if (cachedResult) {\n return cachedResult\n }\n\n // Validate the raw query BEFORE optimization to check user's original structure.\n // This must happen before optimization because the optimizer may create internal\n // subqueries (e.g., for predicate pushdown) that reuse aliases, which is fine.\n validateQueryStructure(rawQuery)\n\n // Optimize the query before compilation\n const { optimizedQuery, sourceWhereClauses } = optimizeQuery(rawQuery)\n // Use a mutable binding so we can shallow-clone select before includes mutation\n let query = optimizedQuery\n\n // Create mapping from optimized query to original for caching\n queryMapping.set(query, rawQuery)\n mapNestedQueries(query, rawQuery, queryMapping)\n\n // Create a copy of the inputs map to avoid modifying the original\n const allInputs = { ...inputs }\n\n // Track alias to collection id relationships discovered during compilation.\n // This includes all user-declared aliases plus inner aliases from subqueries.\n const aliasToCollectionId: Record<string, string> = {}\n\n // Track alias remapping for subqueries (outer alias → inner alias)\n // e.g., when .join({ activeUser: subquery }) where subquery uses .from({ user: collection })\n // we store: aliasRemapping['activeUser'] = 'user'\n const aliasRemapping: Record<string, string> = {}\n\n // Create a map of source aliases to input streams.\n // Inputs MUST be keyed by alias (e.g., `{ employee: input1, manager: input2 }`),\n // not by collection ID. This enables per-alias subscriptions where different aliases\n // of the same collection (e.g., self-joins) maintain independent filtered streams.\n const sources: Record<string, KeyedStream> = {}\n\n // Process the FROM clause to get the source stream.\n const {\n alias: mainSource,\n collectionId: mainCollectionId,\n pipeline: initialPipeline,\n sources: fromSources,\n sourceIncludes,\n directIncludes,\n isUnionFrom,\n } = processFromClause(\n query.from,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n aliasToCollectionId,\n aliasRemapping,\n sourceWhereClauses,\n )\n Object.assign(sources, fromSources)\n\n // If this is an includes child query, inner-join the raw input with parent keys.\n // This filters the child collection to only rows matching parents in the result set.\n // The inner join happens BEFORE namespace wrapping / WHERE / SELECT / ORDER BY,\n // so the child pipeline only processes rows that match parents.\n let pipeline: NamespacedAndKeyedStream = initialPipeline\n if (!isUnionFrom && parentKeyStream && childCorrelationField) {\n const mainInput = sources[mainSource]!\n let filteredMainInput = mainInput\n // Re-key child input by correlation field: [correlationValue, [childKey, childRow]]\n const childFieldPath = childCorrelationField.path.slice(1) // remove alias prefix\n const childRekeyed = mainInput.pipe(\n map(([key, row]: [unknown, any]) => {\n const correlationValue = getNestedValue(row, childFieldPath)\n return [correlationValue, [key, row]] as [unknown, [unknown, any]]\n }),\n )\n\n // Inner join: only children whose correlation key exists in parent keys pass through\n const joined = childRekeyed.pipe(joinOperator(parentKeyStream, `inner`))\n\n // Extract: [correlationValue, [[childKey, childRow], parentContext]] → [childKey, childRow]\n // Tag the row with __correlationKey for output routing\n // If parentSide is non-null (parent context projected), attach as __parentContext\n filteredMainInput = joined.pipe(\n filter(([_correlationValue, [childSide]]: any) => {\n return childSide != null\n }),\n map(([correlationValue, [childSide, parentSide]]: any) => {\n const [childKey, childRow] = childSide\n const tagged: any = { ...childRow, __correlationKey: correlationValue }\n if (parentSide != null) {\n tagged.__parentContext = parentSide\n }\n const effectiveKey =\n parentSide != null\n ? `${String(childKey)}::${JSON.stringify(parentSide)}`\n : childKey\n return [effectiveKey, tagged]\n }),\n )\n\n // Update sources so the rest of the pipeline uses the filtered input\n sources[mainSource] = filteredMainInput\n\n pipeline = wrapInputWithAlias(filteredMainInput, mainSource)\n }\n\n // Process JOIN clauses if they exist\n if (query.join && query.join.length > 0) {\n pipeline = processJoins(\n pipeline,\n query.join,\n sources,\n mainCollectionId,\n mainSource,\n allInputs,\n cache,\n queryMapping,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n rawQuery,\n compileQuery,\n aliasToCollectionId,\n aliasRemapping,\n sourceWhereClauses,\n )\n }\n\n // Process the WHERE clause if it exists\n if (query.where && query.where.length > 0) {\n // Apply each WHERE condition as a filter (they are ANDed together)\n for (const where of query.where) {\n const whereExpression = getWhereExpression(where)\n const compiledWhere = compileExpression(whereExpression)\n pipeline = pipeline.pipe(\n filter(([_key, namespacedRow]) => {\n return toBooleanPredicate(compiledWhere(namespacedRow))\n }),\n )\n }\n }\n\n // Process functional WHERE clauses if they exist\n if (query.fnWhere && query.fnWhere.length > 0) {\n for (const fnWhere of query.fnWhere) {\n pipeline = pipeline.pipe(\n filter(([_key, namespacedRow]) => {\n return toBooleanPredicate(fnWhere(namespacedRow))\n }),\n )\n }\n }\n\n // Extract includes from SELECT, compile child pipelines, and replace with placeholders.\n // This must happen AFTER WHERE (so parent pipeline is filtered) but BEFORE processSelect\n // (so IncludesSubquery nodes are stripped before select compilation).\n const includesResults: Array<IncludesCompilationResult> = !query.select\n ? [...directIncludes]\n : []\n const includesRoutingFns: Array<{\n fieldName: string\n getRouting: (nsRow: any) => {\n correlationKey: unknown\n parentContext: Record<string, any> | null\n }\n }> = []\n for (const { sourceAlias, include } of sourceIncludes) {\n const projectedPaths =\n query.select != null\n ? findProjectedSourceIncludePaths(\n query.select,\n sourceAlias,\n include.resultPath,\n )\n : query.fnSelect\n ? []\n : [\n {\n path: [sourceAlias, ...include.resultPath],\n guards: [],\n },\n ]\n\n if (projectedPaths.length === 0) {\n continue\n }\n\n for (const { path: resultPath, guards } of projectedPaths) {\n const fieldName = getUniqueIncludesRoutingKey(\n `${sourceAlias}.${resultPath.join(`.`)}`,\n includesRoutingFns,\n )\n const compiledGuards = guards.map((guard) => ({\n condition: compileExpression(guard.condition),\n expected: guard.expected,\n }))\n includesResults.push({\n ...include,\n fieldName,\n resultPath,\n })\n\n includesRoutingFns.push({\n fieldName,\n getRouting: (nsRow: any) => {\n if (!matchesConditionalSelectGuards(compiledGuards, nsRow)) {\n return { correlationKey: null, parentContext: null }\n }\n return (\n nsRow[sourceAlias]?.[INCLUDES_ROUTING]?.[include.fieldName] ?? {\n correlationKey: null,\n parentContext: null,\n }\n )\n },\n })\n }\n }\n if (query.select && directIncludes.length > 0) {\n for (const include of directIncludes) {\n const projectedPaths = findProjectedResultIncludePaths(\n query.select,\n include.resultPath,\n )\n\n for (const { path: resultPath, guards } of projectedPaths) {\n const fieldName = getUniqueIncludesRoutingKey(\n resultPath.join(`.`),\n includesRoutingFns,\n )\n const compiledGuards = guards.map((guard) => ({\n condition: compileExpression(guard.condition),\n expected: guard.expected,\n }))\n\n includesResults.push({\n ...include,\n fieldName,\n resultPath,\n })\n\n includesRoutingFns.push({\n fieldName,\n getRouting: (nsRow: any) => {\n if (!matchesConditionalSelectGuards(compiledGuards, nsRow)) {\n return { correlationKey: null, parentContext: null }\n }\n return (\n nsRow[INCLUDES_ROUTING]?.[include.fieldName] ?? {\n correlationKey: null,\n parentContext: null,\n }\n )\n },\n })\n }\n }\n }\n if (query.select) {\n const includesEntries = extractIncludesFromSelect(query.select)\n if (includesEntries.length > 0) {\n query = { ...query, select: { ...query.select } }\n }\n for (const { key, path, subquery, guards } of includesEntries) {\n const fieldName = getUniqueIncludesRoutingKey(key, includesRoutingFns)\n // Branch parent pipeline: map to [correlationValue, parentContext]\n // When parentProjection exists, project referenced parent fields; otherwise null (zero overhead)\n const compiledCorrelation = compileExpression(subquery.correlationField)\n const compiledGuards = guards.map((guard) => ({\n condition: compileExpression(guard.condition),\n expected: guard.expected,\n }))\n let parentKeys: any\n if (subquery.parentProjection && subquery.parentProjection.length > 0) {\n const compiledProjections = subquery.parentProjection.map((ref) => ({\n alias: ref.path[0]!,\n field: ref.path.slice(1),\n compiled: compileExpression(ref),\n }))\n parentKeys = pipeline.pipe(\n map(([_key, nsRow]: any) => {\n if (!matchesConditionalSelectGuards(compiledGuards, nsRow)) {\n return [SKIP_INCLUDE, null] as any\n }\n const parentContext: Record<string, Record<string, any>> = {}\n for (const proj of compiledProjections) {\n if (!parentContext[proj.alias]) {\n parentContext[proj.alias] = {}\n }\n const value = proj.compiled(nsRow)\n // Set nested field in the alias namespace\n let target = parentContext[proj.alias]!\n for (let i = 0; i < proj.field.length - 1; i++) {\n if (!target[proj.field[i]!]) {\n target[proj.field[i]!] = {}\n }\n target = target[proj.field[i]!]\n }\n target[proj.field[proj.field.length - 1]!] = value\n }\n return [compiledCorrelation(nsRow), parentContext] as any\n }),\n )\n } else {\n parentKeys = pipeline.pipe(\n map(([_key, nsRow]: any) => {\n if (!matchesConditionalSelectGuards(compiledGuards, nsRow)) {\n return [SKIP_INCLUDE, null] as any\n }\n return [compiledCorrelation(nsRow), null] as any\n }),\n )\n }\n parentKeys = parentKeys.pipe(\n filter(([correlationValue]: any) => correlationValue !== SKIP_INCLUDE),\n )\n\n // Deduplicate: when multiple parents share the same correlation key (and\n // parentContext), clamp multiplicity to 1 so the inner join doesn't\n // produce duplicate child entries that cause incorrect deletions.\n parentKeys = parentKeys.pipe(\n reduce((values: Array<[any, number]>) =>\n values.map(([v, mult]) => [v, mult > 0 ? 1 : 0] as [any, number]),\n ),\n )\n\n // --- Includes lazy loading (mirrors join lazy loading in joins.ts) ---\n // Resolve the child correlation field to concrete collection targets so\n // subquery and union child sources can load by branch when it is safe.\n const childCorrelationAlias = subquery.childCorrelationField.path[0]!\n const directChildCollection =\n subquery.query.from.type === `collectionRef`\n ? subquery.query.from.collection\n : undefined\n const lazyTargets = getLazyLoadTargets(\n subquery.query,\n subquery.query.from,\n childCorrelationAlias,\n subquery.childCorrelationField,\n directChildCollection,\n aliasRemapping,\n )\n\n if (lazyTargets.length > 0) {\n // 1. Mark child source as lazy so CollectionSubscriber skips initial full load\n for (const target of lazyTargets) {\n lazySources.add(target.alias)\n }\n\n // 2. Ensure an index on the correlation field for efficient lookups\n for (const target of lazyTargets) {\n const targetFieldName = target.path[0]\n if (targetFieldName) {\n ensureIndexForField(targetFieldName, target.path, target.collection)\n }\n }\n\n // 3. Tap parent keys to intercept correlation values and request\n // matching child rows on-demand via the child's subscription\n parentKeys = parentKeys.pipe(\n tap((data: any) => {\n const joinKeys = [\n ...new Set(\n data\n .getInner()\n .map(\n ([[correlationValue]]: any) => correlationValue as unknown,\n )\n .filter((joinKey: unknown) => joinKey != null),\n ),\n ]\n\n if (joinKeys.length === 0) {\n return\n }\n\n for (const target of lazyTargets) {\n const lazySourceSubscription = subscriptions[target.alias]\n\n if (!lazySourceSubscription) {\n continue\n }\n\n if (lazySourceSubscription.hasLoadedInitialState()) {\n continue\n }\n\n const lazyJoinRef = new PropRef(target.path)\n lazySourceSubscription.requestSnapshot({\n where: inArray(lazyJoinRef, joinKeys),\n })\n }\n }),\n )\n }\n\n // If parent filters exist, append them to the child query's WHERE\n const childQuery =\n subquery.parentFilters && subquery.parentFilters.length > 0\n ? {\n ...subquery.query,\n where: [\n ...(subquery.query.where || []),\n ...subquery.parentFilters,\n ],\n }\n : subquery.query\n\n // Recursively compile child query WITH the parent key stream\n const childResult = compileQuery(\n childQuery,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n parentKeys,\n subquery.childCorrelationField,\n )\n\n // Merge child's alias metadata into parent's\n Object.assign(aliasToCollectionId, childResult.aliasToCollectionId)\n Object.assign(aliasRemapping, childResult.aliasRemapping)\n for (const [alias, whereClause] of childResult.sourceWhereClauses) {\n sourceWhereClauses.set(alias, whereClause)\n }\n\n includesResults.push({\n pipeline: childResult.pipeline,\n fieldName,\n resultPath: path,\n correlationField: subquery.correlationField,\n childCorrelationField: subquery.childCorrelationField,\n hasOrderBy: !!(\n subquery.query.orderBy && subquery.query.orderBy.length > 0\n ),\n childCompilationResult: childResult,\n parentProjection: subquery.parentProjection,\n materialization: subquery.materialization,\n scalarField: subquery.scalarField,\n })\n\n // Capture routing function for INCLUDES_ROUTING tagging\n if (subquery.parentProjection && subquery.parentProjection.length > 0) {\n const compiledProjs = subquery.parentProjection.map((ref) => ({\n alias: ref.path[0]!,\n field: ref.path.slice(1),\n compiled: compileExpression(ref),\n }))\n const compiledCorr = compiledCorrelation\n const compiledRoutingGuards = compiledGuards\n includesRoutingFns.push({\n fieldName,\n getRouting: (nsRow: any) => {\n if (!matchesConditionalSelectGuards(compiledRoutingGuards, nsRow)) {\n return { correlationKey: null, parentContext: null }\n }\n const parentContext: Record<string, Record<string, any>> = {}\n for (const proj of compiledProjs) {\n if (!parentContext[proj.alias]) {\n parentContext[proj.alias] = {}\n }\n const value = proj.compiled(nsRow)\n let target = parentContext[proj.alias]!\n for (let i = 0; i < proj.field.length - 1; i++) {\n if (!target[proj.field[i]!]) {\n target[proj.field[i]!] = {}\n }\n target = target[proj.field[i]!]\n }\n target[proj.field[proj.field.length - 1]!] = value\n }\n return { correlationKey: compiledCorr(nsRow), parentContext }\n },\n })\n } else {\n const compiledRoutingGuards = compiledGuards\n includesRoutingFns.push({\n fieldName,\n getRouting: (nsRow: any) => {\n if (!matchesConditionalSelectGuards(compiledRoutingGuards, nsRow)) {\n return { correlationKey: null, parentContext: null }\n }\n return {\n correlationKey: compiledCorrelation(nsRow),\n parentContext: null,\n }\n },\n })\n }\n\n // Replace includes entry in select with a null placeholder\n query = {\n ...query,\n select: replaceIncludesInSelect(query.select!, path),\n }\n }\n }\n\n if (\n query.distinct &&\n !query.fnSelect &&\n !query.select &&\n query.from.type !== `unionAll`\n ) {\n throw new DistinctRequiresSelectError()\n }\n\n if (query.fnSelect && query.groupBy && query.groupBy.length > 0) {\n throw new FnSelectWithGroupByError()\n }\n\n // Process the SELECT clause early - always create $selected\n // This eliminates duplication and allows for DISTINCT implementation\n if (query.fnSelect) {\n // Handle functional select - apply the function to transform the row\n pipeline = pipeline.pipe(\n map(([key, namespacedRow]) => {\n const selectResults = query.fnSelect!(namespacedRow)\n if (selectResults && typeof selectResults === `object`) {\n const routing = (namespacedRow as any)[INCLUDES_ROUTING]\n if (routing) {\n selectResults[INCLUDES_ROUTING] = routing\n }\n if (directIncludes.length > 0) {\n Object.defineProperty(selectResults, FN_SELECT_STATE, {\n value: {\n sourceRow: namespacedRow,\n fnSelect: query.fnSelect!,\n },\n enumerable: true,\n configurable: true,\n })\n }\n }\n return [\n key,\n {\n ...namespacedRow,\n $selected: selectResults,\n },\n ] as [string, typeof namespacedRow & { $selected: any }]\n }),\n )\n } else if (query.select) {\n pipeline = processSelect(pipeline, query.select, allInputs)\n } else {\n // If no SELECT clause, create $selected with the main table data\n pipeline = pipeline.pipe(\n map(([key, namespacedRow]) => {\n const selectResults =\n !isUnionFrom && !query.join && !query.groupBy\n ? namespacedRow[mainSource]\n : namespacedRow\n\n return [\n key,\n {\n ...namespacedRow,\n $selected: selectResults,\n },\n ] as [string, typeof namespacedRow & { $selected: any }]\n }),\n )\n }\n\n // Tag $selected with routing metadata for includes.\n // This lets collection-config-builder extract routing info (correlationKey + parentContext)\n // from parent results without depending on the user's select.\n if (includesRoutingFns.length > 0) {\n pipeline = pipeline.pipe(\n map(([key, namespacedRow]: any) => {\n const routing: Record<\n string,\n { correlationKey: unknown; parentContext: Record<string, any> | null }\n > = {}\n for (const { fieldName, getRouting } of includesRoutingFns) {\n routing[fieldName] = getRouting(namespacedRow)\n }\n namespacedRow.$selected[INCLUDES_ROUTING] = routing\n return [key, namespacedRow]\n }),\n )\n }\n\n // Process the GROUP BY clause if it exists.\n // When in includes mode (parentKeyStream), pass mainSource so that groupBy\n // preserves __correlationKey for per-parent aggregation.\n const groupByMainSource = parentKeyStream ? mainSource : undefined\n if (query.groupBy && query.groupBy.length > 0) {\n pipeline = processGroupBy(\n pipeline,\n query.groupBy,\n query.having,\n query.select,\n query.fnHaving,\n mainCollectionId,\n groupByMainSource,\n )\n } else if (query.select) {\n // Check if SELECT contains aggregates but no GROUP BY (implicit single-group aggregation)\n const hasAggregates = Object.values(query.select).some(\n (expr) => expr.type === `agg` || containsAggregate(expr),\n )\n if (hasAggregates) {\n // Handle implicit single-group aggregation\n pipeline = processGroupBy(\n pipeline,\n [], // Empty group by means single group\n query.having,\n query.select,\n query.fnHaving,\n mainCollectionId,\n groupByMainSource,\n )\n }\n }\n\n // Process the HAVING clause if it exists (only applies after GROUP BY)\n if (query.having && (!query.groupBy || query.groupBy.length === 0)) {\n // Check if we have aggregates in SELECT that would trigger implicit grouping\n const hasAggregates = query.select\n ? Object.values(query.select).some((expr) => expr.type === `agg`)\n : false\n\n if (!hasAggregates) {\n throw new HavingRequiresGroupByError()\n }\n }\n\n // Process functional HAVING clauses outside of GROUP BY (treat as additional WHERE filters)\n if (\n query.fnHaving &&\n query.fnHaving.length > 0 &&\n (!query.groupBy || query.groupBy.length === 0)\n ) {\n // If there's no GROUP BY but there are fnHaving clauses, apply them as filters\n for (const fnHaving of query.fnHaving) {\n pipeline = pipeline.pipe(\n filter(([_key, namespacedRow]) => {\n return fnHaving(namespacedRow)\n }),\n )\n }\n }\n\n // Process the DISTINCT clause if it exists\n if (query.distinct) {\n pipeline = pipeline.pipe(distinct(([_key, row]) => row.$selected))\n }\n\n // Process orderBy parameter if it exists\n if (query.orderBy && query.orderBy.length > 0) {\n // When in includes mode with limit/offset, use grouped ordering so that\n // the limit is applied per parent (per correlation key), not globally.\n const includesGroupKeyFn =\n parentKeyStream &&\n (query.limit !== undefined || query.offset !== undefined)\n ? (_key: unknown, row: unknown) => {\n const correlationKey = (row as any)?.[mainSource]?.__correlationKey\n const parentContext = (row as any)?.__parentContext\n if (parentContext != null) {\n return JSON.stringify([correlationKey, parentContext])\n }\n return correlationKey\n }\n : undefined\n\n const orderedPipeline = processOrderBy(\n rawQuery,\n pipeline,\n query.orderBy,\n query.select || {},\n collections[mainCollectionId]!,\n optimizableOrderByCollections,\n setWindowFn,\n query.limit,\n query.offset,\n includesGroupKeyFn,\n )\n\n // Final step: extract the $selected and include orderBy index\n const resultPipeline: ResultStream = orderedPipeline.pipe(\n map(([key, [row, orderByIndex]]) => {\n // Extract the final results from $selected and include orderBy index\n const raw = (row as any).$selected\n const finalResults = attachVirtualPropsToSelected(\n unwrapValue(raw),\n row as Record<string, any>,\n )\n // When in includes mode, embed the correlation key and parentContext\n if (parentKeyStream) {\n const correlationKey = (row as any)[mainSource]?.__correlationKey\n const parentContext = (row as any).__parentContext ?? null\n // Strip internal routing properties that may leak via spread selects\n delete finalResults.__correlationKey\n delete finalResults.__parentContext\n return [\n key,\n [finalResults, orderByIndex, correlationKey, parentContext],\n ] as any\n }\n return [key, [finalResults, orderByIndex]] as [unknown, [any, string]]\n }),\n ) as ResultStream\n\n const result = resultPipeline\n // Cache the result before returning (use original query as key)\n const compilationResult: CompilationResult = {\n collectionId: mainCollectionId,\n pipeline: result,\n sourceWhereClauses,\n aliasToCollectionId,\n aliasRemapping,\n includes: includesResults.length > 0 ? includesResults : undefined,\n }\n cache.set(rawQuery, compilationResult)\n\n return compilationResult\n } else if (query.limit !== undefined || query.offset !== undefined) {\n // If there's a limit or offset without orderBy, throw an error\n throw new LimitOffsetRequireOrderByError()\n }\n\n // Final step: extract the $selected and return tuple format (no orderBy)\n const resultPipeline: ResultStream = pipeline.pipe(\n map(([key, row]) => {\n // Extract the final results from $selected and return [key, [results, undefined]]\n const raw = (row as any).$selected\n const finalResults = attachVirtualPropsToSelected(\n unwrapValue(raw),\n row as Record<string, any>,\n )\n // When in includes mode, embed the correlation key and parentContext\n if (parentKeyStream) {\n const correlationKey = (row as any)[mainSource]?.__correlationKey\n const parentContext = (row as any).__parentContext ?? null\n // Strip internal routing properties that may leak via spread selects\n delete finalResults.__correlationKey\n delete finalResults.__parentContext\n return [\n key,\n [finalResults, undefined, correlationKey, parentContext],\n ] as any\n }\n return [key, [finalResults, undefined]] as [\n unknown,\n [any, string | undefined],\n ]\n }),\n )\n\n const result = resultPipeline\n // Cache the result before returning (use original query as key)\n const compilationResult: CompilationResult = {\n collectionId: mainCollectionId,\n pipeline: result,\n sourceWhereClauses,\n aliasToCollectionId,\n aliasRemapping,\n includes: includesResults.length > 0 ? includesResults : undefined,\n }\n cache.set(rawQuery, compilationResult)\n\n return compilationResult\n}\n\n/**\n * Collects aliases used for DIRECT collection references (not subqueries).\n * Used to validate that subqueries don't reuse parent query collection aliases.\n * Only direct CollectionRef aliases matter - QueryRef aliases don't cause conflicts.\n */\nfunction collectDirectCollectionAliases(query: QueryIR): Set<string> {\n const aliases = new Set<string>()\n\n // Collect FROM alias only if it's a direct collection reference\n for (const source of getFromSources(query.from)) {\n if (source.type === `collectionRef`) {\n aliases.add(source.alias)\n }\n }\n\n // Collect JOIN aliases only for direct collection references\n if (query.join) {\n for (const joinClause of query.join) {\n if (joinClause.from.type === `collectionRef`) {\n aliases.add(joinClause.from.alias)\n }\n }\n }\n\n return aliases\n}\n\n/**\n * Validates the structure of a query and its subqueries.\n * Checks that subqueries don't reuse collection aliases from parent queries.\n * This must be called on the RAW query before optimization.\n */\nfunction validateQueryStructure(\n query: QueryIR,\n parentCollectionAliases: Set<string> = new Set(),\n): void {\n // Collect direct collection aliases from this query level\n const currentLevelAliases = collectDirectCollectionAliases(query)\n\n // Check if any current alias conflicts with parent aliases\n for (const alias of currentLevelAliases) {\n if (parentCollectionAliases.has(alias)) {\n throw new DuplicateAliasInSubqueryError(\n alias,\n Array.from(parentCollectionAliases),\n )\n }\n }\n\n // Combine parent and current aliases for checking nested subqueries\n const combinedAliases = new Set([\n ...parentCollectionAliases,\n ...currentLevelAliases,\n ])\n\n // Recursively validate FROM subqueries\n if (query.from.type === `unionAll`) {\n for (const branch of query.from.queries) {\n validateQueryStructure(branch, combinedAliases)\n }\n } else {\n for (const source of getFromSources(query.from)) {\n if (source.type === `queryRef`) {\n validateQueryStructure(source.query, combinedAliases)\n }\n }\n }\n\n // Recursively validate JOIN subqueries\n if (query.join) {\n for (const joinClause of query.join) {\n if (joinClause.from.type === `queryRef`) {\n validateQueryStructure(joinClause.from.query, combinedAliases)\n }\n }\n }\n}\n\n/**\n * Processes the FROM clause, handling direct collection references and subqueries.\n * Populates `aliasToCollectionId` and `aliasRemapping` for per-alias subscription tracking.\n */\nfunction processFromClause(\n from: CollectionRef | QueryRef | UnionFrom | UnionAll,\n allInputs: Record<string, KeyedStream>,\n collections: Record<string, Collection>,\n subscriptions: Record<string, CollectionSubscription>,\n callbacks: Record<string, LazyCollectionCallbacks>,\n lazySources: Set<string>,\n optimizableOrderByCollections: Record<string, OrderByOptimizationInfo>,\n setWindowFn: (windowFn: (options: WindowOptions) => void) => void,\n cache: QueryCache,\n queryMapping: QueryMapping,\n aliasToCollectionId: Record<string, string>,\n aliasRemapping: Record<string, string>,\n sourceWhereClauses: Map<string, BasicExpression<boolean>>,\n): {\n alias: string\n pipeline: NamespacedAndKeyedStream\n collectionId: string\n sources: Record<string, KeyedStream>\n sourceIncludes: Array<SourceInclude>\n directIncludes: Array<IncludesCompilationResult>\n isUnionFrom: boolean\n} {\n if (from.type === `unionAll`) {\n return processUnionAll(\n from,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n aliasToCollectionId,\n aliasRemapping,\n sourceWhereClauses,\n )\n }\n\n if (from.type !== `unionFrom`) {\n const { alias, input, collectionId, sourceIncludes } = processFrom(\n from,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n aliasToCollectionId,\n aliasRemapping,\n sourceWhereClauses,\n )\n\n return {\n alias,\n pipeline: wrapInputWithAlias(input, alias),\n collectionId,\n sources: { [alias]: input },\n sourceIncludes,\n directIncludes: [],\n isUnionFrom: false,\n }\n }\n\n if (from.sources.length === 0) {\n throw new UnsupportedFromTypeError(`empty unionFrom`)\n }\n\n const sources: Record<string, KeyedStream> = {}\n const sourceIncludes: Array<SourceInclude> = []\n let pipeline: NamespacedAndKeyedStream | undefined\n let mainAlias = ``\n let mainCollectionId = ``\n\n for (const source of from.sources) {\n const {\n alias,\n input,\n collectionId,\n sourceIncludes: childSourceIncludes,\n } = processFrom(\n source,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n aliasToCollectionId,\n aliasRemapping,\n sourceWhereClauses,\n )\n\n if (!mainAlias) {\n mainAlias = alias\n mainCollectionId = collectionId\n }\n sources[alias] = input\n sourceIncludes.push(...childSourceIncludes)\n\n const branch = wrapInputWithAlias(input, alias).pipe(\n map(([key, row]) => {\n return [`${alias}:${encodeKeyForUnionBranch(key)}`, row] as [\n string,\n typeof row,\n ]\n }),\n )\n\n pipeline = pipeline ? pipeline.pipe(concatOperator(branch)) : branch\n }\n\n return {\n alias: mainAlias,\n pipeline: pipeline!,\n collectionId: mainCollectionId,\n sources,\n sourceIncludes,\n directIncludes: [],\n isUnionFrom: true,\n }\n}\n\nfunction processUnionAll(\n from: UnionAll,\n allInputs: Record<string, KeyedStream>,\n collections: Record<string, Collection>,\n subscriptions: Record<string, CollectionSubscription>,\n callbacks: Record<string, LazyCollectionCallbacks>,\n lazySources: Set<string>,\n optimizableOrderByCollections: Record<string, OrderByOptimizationInfo>,\n setWindowFn: (windowFn: (options: WindowOptions) => void) => void,\n cache: QueryCache,\n queryMapping: QueryMapping,\n aliasToCollectionId: Record<string, string>,\n aliasRemapping: Record<string, string>,\n sourceWhereClauses: Map<string, BasicExpression<boolean>>,\n): {\n alias: string\n pipeline: NamespacedAndKeyedStream\n collectionId: string\n sources: Record<string, KeyedStream>\n sourceIncludes: Array<SourceInclude>\n directIncludes: Array<IncludesCompilationResult>\n isUnionFrom: boolean\n} {\n if (from.queries.length === 0) {\n throw new UnsupportedFromTypeError(`empty unionAll`)\n }\n\n const sources: Record<string, KeyedStream> = {}\n const sourceIncludes: Array<SourceInclude> = []\n const directIncludes: Array<IncludesCompilationResult> = []\n let pipeline: NamespacedAndKeyedStream | undefined\n let mainCollectionId = ``\n const branchAliases = new Set<string>()\n\n for (let index = 0; index < from.queries.length; index++) {\n const branch = from.queries[index]!\n for (const source of getAllSources(branch)) {\n if (branchAliases.has(source.alias)) {\n throw new Error(\n `Duplicate source alias \"${source.alias}\" in unionAll query branches. ` +\n `Use distinct aliases in each branch before passing them to unionAll().`,\n )\n }\n branchAliases.add(source.alias)\n }\n const branchResult = compileQuery(\n branch,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n )\n\n if (!mainCollectionId) {\n mainCollectionId = branchResult.collectionId\n }\n Object.assign(aliasToCollectionId, branchResult.aliasToCollectionId)\n Object.assign(aliasRemapping, branchResult.aliasRemapping)\n directIncludes.push(...(branchResult.includes ?? []))\n Object.assign(sources, allInputs)\n for (const [alias, where] of branchResult.sourceWhereClauses) {\n sourceWhereClauses.set(alias, where)\n }\n\n const branchPipeline = branchResult.pipeline.pipe(\n map(([key, [row]]) => {\n return [`${index}:${encodeKeyForUnionBranch(key)}`, row] as [\n string,\n Record<string, any>,\n ]\n }),\n )\n\n pipeline = pipeline\n ? pipeline.pipe(concatOperator(branchPipeline))\n : branchPipeline\n }\n\n return {\n alias: ``,\n pipeline: pipeline!,\n collectionId: mainCollectionId,\n sources,\n sourceIncludes,\n directIncludes,\n isUnionFrom: true,\n }\n}\n\nfunction wrapInputWithAlias(\n input: KeyedStream,\n alias: string,\n): NamespacedAndKeyedStream {\n return input.pipe(\n map(([key, row]) => {\n // Initialize the record with a nested structure.\n // If __parentContext exists (from parent-referencing includes), merge parent\n // aliases into the namespaced row so WHERE can resolve parent refs.\n const { __parentContext, ...cleanRow } = row as any\n const nsRow: Record<string, any> = { [alias]: cleanRow }\n if (__parentContext) {\n Object.assign(nsRow, __parentContext)\n ;(nsRow as any).__parentContext = __parentContext\n }\n return [key, nsRow] as [unknown, Record<string, typeof row>]\n }),\n )\n}\n\nfunction encodeKeyForUnionBranch(key: unknown): string {\n if (typeof key === `string`) {\n return `string:${key}`\n }\n if (typeof key === `number`) {\n return `number:${String(key)}`\n }\n if (typeof key === `bigint`) {\n return `bigint:${String(key)}`\n }\n return `${typeof key}:${JSON.stringify(key)}`\n}\n\nfunction processFrom(\n from: CollectionRef | QueryRef,\n allInputs: Record<string, KeyedStream>,\n collections: Record<string, Collection>,\n subscriptions: Record<string, CollectionSubscription>,\n callbacks: Record<string, LazyCollectionCallbacks>,\n lazySources: Set<string>,\n optimizableOrderByCollections: Record<string, OrderByOptimizationInfo>,\n setWindowFn: (windowFn: (options: WindowOptions) => void) => void,\n cache: QueryCache,\n queryMapping: QueryMapping,\n aliasToCollectionId: Record<string, string>,\n aliasRemapping: Record<string, string>,\n sourceWhereClauses: Map<string, BasicExpression<boolean>>,\n): {\n alias: string\n input: KeyedStream\n collectionId: string\n sourceIncludes: Array<SourceInclude>\n} {\n switch (from.type) {\n case `collectionRef`: {\n const input = allInputs[from.alias]\n if (!input) {\n throw new CollectionInputNotFoundError(\n from.alias,\n from.collection.id,\n Object.keys(allInputs),\n )\n }\n aliasToCollectionId[from.alias] = from.collection.id\n return {\n alias: from.alias,\n input,\n collectionId: from.collection.id,\n sourceIncludes: [],\n }\n }\n case `queryRef`: {\n // Find the original query for caching purposes\n const originalQuery = queryMapping.get(from.query) || from.query\n\n // Recursively compile the sub-query with cache\n const subQueryResult = compileQuery(\n originalQuery,\n allInputs,\n collections,\n subscriptions,\n callbacks,\n lazySources,\n optimizableOrderByCollections,\n setWindowFn,\n cache,\n queryMapping,\n )\n\n // Pull up alias mappings from subquery to parent scope.\n // This includes both the innermost alias-to-collection mappings AND\n // any existing remappings from nested subquery levels.\n Object.assign(aliasToCollectionId, subQueryResult.aliasToCollectionId)\n Object.assign(aliasRemapping, subQueryResult.aliasRemapping)\n\n // Pull up source WHERE clauses from subquery to parent scope.\n // This enables loadSubset to receive the correct where clauses for subquery collections.\n //\n // IMPORTANT: Skip pull-up for optimizer-created subqueries. These are detected when:\n // 1. The outer alias (from.alias) matches the inner alias (from.query.from.alias)\n // 2. The subquery was found in queryMapping (it's a user-defined subquery, not optimizer-created)\n //\n // For optimizer-created subqueries, the parent already has the sourceWhereClauses\n // extracted from the original raw query, so pulling up would be redundant.\n // More importantly, pulling up for optimizer-created subqueries can cause issues\n // when the optimizer has restructured the query.\n const isUserDefinedSubquery = queryMapping.has(from.query)\n const subqueryFromAlias = getFirstFromAlias(from.query.from)\n const isOptimizerCreated =\n !isUserDefinedSubquery && from.alias === subqueryFromAlias\n\n if (!isOptimizerCreated) {\n for (const [alias, whereClause] of subQueryResult.sourceWhereClauses) {\n sourceWhereClauses.set(alias, whereClause)\n }\n }\n\n // Create a FLATTENED remapping from outer alias to innermost alias.\n // For nested subqueries, this ensures one-hop lookups (not recursive chains).\n //\n // Example with 3-level nesting:\n // Inner: .from({ user: usersCollection })\n // Middle: .from({ activeUser: innerSubquery }) → creates: activeUser → user\n // Outer: .from({ author: middleSubquery }) → creates: author → user (not author → activeUser)\n //\n // The key insight: We search through the PULLED-UP aliasToCollectionId (which contains\n // the innermost 'user' alias), so we always map directly to the deepest level.\n // This means aliasRemapping[alias] is always a single lookup, never recursive.\n // Needed for subscription resolution during lazy loading.\n const innerAlias = Object.keys(subQueryResult.aliasToCollectionId).find(\n (alias) =>\n subQueryResult.aliasToCollectionId[alias] ===\n subQueryResult.collectionId,\n )\n if (innerAlias && innerAlias !== from.alias) {\n aliasRemapping[from.alias] = innerAlias\n }\n\n // Extract the pipeline from the compilation result\n const subQueryInput = subQueryResult.pipeline\n\n // Subqueries may return [key, [value, orderByIndex]] (with ORDER BY) or [key, value] (without ORDER BY)\n // We need to extract just the value for use in parent queries\n const extractedInput = subQueryInput.pipe(\n map((data: any) => {\n const [key, [value, _orderByIndex]] = data\n // Unwrap Value expressions that might have leaked through as the entire row\n const unwrapped = unwrapValue(value)\n return [key, unwrapped] as [unknown, any]\n }),\n )\n\n return {\n alias: from.alias,\n input: extractedInput,\n collectionId: subQueryResult.collectionId,\n sourceIncludes:\n subQueryResult.includes?.map((include) => ({\n sourceAlias: from.alias,\n