@apollo/query-planner

import { assert, arrayEquals, baseType, CompositeType, Field, FieldSelection, FragmentElement, isAbstractType, isCompositeType, isListType, isObjectType, isNamedType, ListType, NonNullType, ObjectType, Operation, OperationPath, sameOperationPaths, Schema, SchemaRootKind, Selection, SelectionSet, selectionSetOf, Variable, VariableDefinition, VariableDefinitions, newDebugLogger, selectionOfElement, selectionSetOfElement, NamedFragments, operationToDocument, MapWithCachedArrays, FederationMetadata, federationMetadata, entitiesFieldName, concatOperationPaths, Directive, directiveApplicationsSubstraction, conditionalDirectivesInOperationPath, SetMultiMap, OperationElement, Concrete, DeferDirectiveArgs, setValues, MultiMap, typenameFieldName, mapKeys, operationPathToStringPath, mapValues, isInterfaceObjectType, isInterfaceType, Type, MutableSelectionSet, SelectionSetUpdates, AbstractType, isDefined, InterfaceType, FragmentSelection, typesCanBeMerged, Supergraph, sameType, isInputType, possibleRuntimeTypes, NamedType, VariableCollector, DEFAULT_MIN_USAGES_TO_OPTIMIZE, } from "@apollo/federation-internals"; import { advanceSimultaneousPathsWithOperation, Edge, emptyContext, ExcludedDestinations, QueryGraph, GraphPath, isPathContext, isRootPathTree, OpGraphPath, OpPathTree, OpRootPathTree, PathContext, PathTree, RootVertex, Vertex, isRootVertex, ExcludedConditions, advanceOptionsToString, ConditionResolution, unsatisfiedConditionsResolution, cachingConditionResolver, ConditionResolver, addConditionExclusion, SimultaneousPathsWithLazyIndirectPaths, simultaneousPathsToString, SimultaneousPaths, terminateWithNonRequestedTypenameField, getLocallySatisfiableKey, createInitialOptions, buildFederatedQueryGraph, FEDERATED_GRAPH_ROOT_SOURCE, NonLocalSelectionsState, NonLocalSelectionsMetadata, } from "@apollo/query-graphs"; import { stripIgnoredCharacters, print, OperationTypeNode, SelectionSetNode, Kind } from "graphql"; import { DeferredNode, FetchDataKeyRenamer, FetchDataRewrite } from "."; import { Conditions, conditionsOfSelectionSet, isConstantCondition, mergeConditions, removeConditionsFromSelectionSet, updatedConditions } from "./conditions"; import { enforceQueryPlannerConfigDefaults, QueryPlannerConfig, validateQueryPlannerConfig } from "./config"; import { generateAllPlansAndFindBest } from "./generateAllPlans"; import { QueryPlan, ResponsePath, SequenceNode, PlanNode, ParallelNode, FetchNode, SubscriptionNode, trimSelectionNodes } from "./QueryPlan"; import { validateRecursiveSelections } from './recursiveSelectionsLimit'; const debug = newDebugLogger('plan'); // Somewhat random string used to optimise handling __typename in some cases. See usage for details. The concrete value // has no particular significance. const SIBLING_TYPENAME_KEY = 'sibling_typename'; type CostFunction = FetchGroupProcessor<number, number>; /** * Constant used during query plan cost computation to account for the base cost of doing a fetch, that is the * fact any fetch imply some networking cost, request serialization/deserialization, validation, ... * * The number is a little bit arbitrary, but insofar as we roughly assign a cost of 1 to a single field queried * (see `selectionCost` method), this can be though of as saying that resolving a single field is in general * a tiny fraction of the actual cost of doing a subgraph fetch. */ const fetchCost = 1000; /** * Constant used during query plan cost computation as a multiplier to the cost of fetches made in sequences. * * This means that if 3 fetches are done in sequence, the cost of 1nd one is multiplied by this number, the * 2nd by twice this number, and the 3rd one by thrice this number. The goal is to heavily favor query plans * with the least amount of sequences, since this affect overall latency directly. The exact number is a tad * arbitrary however. */ const pipeliningCost = 100; /** * Computes the cost of a Plan. * * A plan is essentially some mix of sequences and parallels of fetches. And the plan cost * is about minimizing both: * 1. The expected total latency of executing the plan. Typically, doing 2 fetches in * parallel will most likely have much better latency then executing those exact same * fetches in sequence, and so the cost of the latter must be greater than that of * the former. * 2. The underlying use of resources. For instance, if we query 2 fields and we have * the choice between getting those 2 fields from a single subgraph in 1 fetch, or * get each from a different subgraph with 2 fetches in parallel, then we want to * favor the former as just doing a fetch in and of itself has a cost in terms of * resources consumed. * * Do note that at the moment, this cost is solely based on the "shape" of the plan and has * to make some conservative assumption regarding concrete runtime behaviour. In particular, * it assumes that: * - all fields have the same cost (all resolvers take the same time). * - that field cost is relative small compare to actually doing a subgraph fetch. That is, * it assumes that the networking and other query processing costs are much higher than * the cost of resolving a single field. Or to put it more concretely, it assumes that * a fetch of 5 fields is probably not too different from than of 2 fields. */ const defaultCostFunction: CostFunction = { /** * The cost of a fetch roughly proportional to how many fields it fetches (but see `selectionCost` for more details) * plus some constant "premium" to account for the fact than doing each fetch is costly (and that fetch cost often * dwarfted the actual cost of fields resolution). */ onFetchGroup: (group: FetchGroup) => (fetchCost + group.cost()), /** * We don't take conditions into account in costing for now as they don't really know anything on the condition * and this shouldn't really play a role in picking a plan over another. */ onConditions: (_: Conditions, value: number) => value, /** * We sum the cost of fetch groups in parallel. Note that if we were only concerned about expected latency, * we could instead take the `max` of the values, but as we also try to minimize general resource usage, we * want 2 parallel fetches with cost 1000 to be more costly than one with cost 1000 and one with cost 10, * so suming is a simple option. */ reduceParallel: (values: number[]) => parallelCost(values), /** * For sequences, we want to heavily favor "shorter" pipelines of fetches as this directly impact the * expected latency of the overall plan. * * To do so, each "stage" of a sequence/pipeline gets an additional multiplier on the intrinsic cost * of that stage. */ reduceSequence: (values: number[]) => sequenceCost(values), /** * This method exists so we can inject the necessary information for deferred block when * genuinely creating plan nodes. It's irrelevant to cost computation however and we just * return the cost of the block unchanged. */ reduceDeferred(_: DeferredInfo, value: number): number { return value; }, /** * It is unfortunately a bit difficult to properly compute costs for defers because in theory * some of the deferred blocks (the costs in `deferredValues`) can be started _before_ the full * `nonDeferred` part finishes (more precisely, the "structure" of query plans express the fact * that there is a non-deferred part and other deferred parts, but the complete dependency of * when a deferred part can be start is expressed through the `FetchNode.id` field, and as * this cost function is currently mainly based on the "structure" of query plans, we don't * have easy access to this info). * * Anyway, the approximation we make here is that all the deferred starts strictly after the * non-deferred one, and that all the deferred parts can be done in parallel. */ reduceDefer(nonDeferred: number, _: SelectionSet, deferredValues: number[]): number { return sequenceCost([nonDeferred, parallelCost(deferredValues)]); }, }; function parallelCost(values: number[]): number { return sum(values); } function sequenceCost(stages: number[]): number { return stages.reduceRight((acc, stage, idx) => (acc + (Math.max(1, idx * pipeliningCost) * stage)), 0); } type ClosedPath<RV extends Vertex> = { paths: SimultaneousPaths<RV>, selection?: SelectionSet, } function closedPathToString(p: ClosedPath<any>): string { const pathStr = simultaneousPathsToString(p.paths); return p.selection ? `${pathStr} -> ${p.selection}` : pathStr; } function flattenClosedPath<RV extends Vertex>( p: ClosedPath<RV> ): { path: OpGraphPath<RV>, selection?: SelectionSet }[] { return p.paths.map((path) => ({ path, selection: p.selection})); } type ClosedBranch<RV extends Vertex> = ClosedPath<RV>[]; function allTailVertices(options: SimultaneousPathsWithLazyIndirectPaths<any>[]): Set<Vertex> { const vertices = new Set<Vertex>(); for (const option of options) { for (const path of option.paths) { vertices.add(path.tail); } } return vertices; } function selectionIsFullyLocalFromAllVertices( selection: SelectionSet, vertices: Set<Vertex>, inconsistentAbstractTypesRuntimes: Set<string>, ): boolean { let _useInconsistentAbstractTypes: boolean | undefined = undefined; const useInconsistentAbstractTypes = (): boolean => { if (_useInconsistentAbstractTypes === undefined) { _useInconsistentAbstractTypes = selection.some((elt) => elt.kind === 'FragmentElement' && !!elt.typeCondition && inconsistentAbstractTypesRuntimes.has(elt.typeCondition.name) ); } return _useInconsistentAbstractTypes; } for (const vertex of vertices) { // To guarantee that the selection is fully local from the provided vertex/type, we must have: // - no edge crossing subgraphs from that vertex. // - the type must be compositeType (mostly just ensuring the selection make sense). // - everything in the selection must be avaiable in the type (which `rebaseOn` essentially validates). // - the selection must not "type-cast" into any abstract type that has inconsistent runtimes acrosse subgraphs. The reason for the // later condition is that `selection` is originally a supergraph selection, but that we're looking to apply "as-is" to a subgraph. // But suppose it has a `... on I` where `I` is an interface. Then it's possible that `I` includes "more" types in the supergraph // than in the subgraph, and so we might have to type-explode it. If so, we cannot use the selection "as-is". if (vertex.hasReachableCrossSubgraphEdges || !isCompositeType(vertex.type) || !selection.canRebaseOn(vertex.type) || useInconsistentAbstractTypes() ) { return false; } } return true; } /** * Given 2 paths that are 2 different options to reach the same query leaf field, checks if one can be shown * to be always "better" (more efficient/optimal) than the other one, and this regardless of any surrounding context (that * is regardless of what the rest of the query plan would be for any other query leaf field. * * Note that this method is used on final options of a given "query path", so all the heuristics done within `GraphPath` * to avoid unecessary option have already been applied (say, avoiding to consider a path that do 2 successives key jumps * when there is a 1 jump equivalent, ...), so this focus on what can be done based on the fact that the path considered * are "finished". * * @return -1 if `opt1` is known to be strictly better than `opt2`, 1 if it is `opt2` that is strictly better, and 0 if we * cannot really guarantee anything (at least "out of context"). */ export function compareOptionsComplexityOutOfContext<RV extends Vertex>(opt1: SimultaneousPaths<RV>, opt2: SimultaneousPaths<RV>): number { // Currently, we only every compare single-path options. We may find smart things to do for multi-path options later, // but unsure what currently. if (opt1.length === 1) { if (opt2.length === 1) { return compareSinglePathOptionsComplexityOutOfContext(opt1[0], opt2[0]); } else { return compareSingleVsMultiPathOptionsComplexityOutOfContext(opt1[0], opt2); } } else if (opt2.length === 1) { return -compareSingleVsMultiPathOptionsComplexityOutOfContext(opt2[0], opt1); } return 0; } function compareSinglePathOptionsComplexityOutOfContext<RV extends Vertex>(p1: OpGraphPath<RV>, p2: OpGraphPath<RV>): number { // Currently, this method only handle the case where we have something like: // - `p1`: <some prefix> -[t]-> T(A) -[u]-> U(A) -[x] -> Int(A) // - `p2`: <some prefix> -[t]-> T(A) -[key]-> T(B) -[u]-> U(B) -[x] -> Int(B) // That is, we have 2 choices that are identical up to the "end", when one stays in the subgraph (p1, which stays in A) // while the other use a key to another subgraph (p2, going to B). // // In such a case, whatever else the a query might be doing, it can never be "worst" // to use `p1` than to use `p2` because both will force the same "fetch group" up to the // end, but `p2` may force one more fetch that `p` does not. // Do note that we say "may" above, because the rest of the plan may well have a forced // choice like: // - `other`: <some prefix> -[t]-> T(A) -[key]-> T(B) -[u]-> U(B) -[y] -> Int(B) // in which case the plan will have the jump from A to B after `t` whether we use `p1` or // `p2`, but while in that particular case `p1` and `p2` are about comparable in term // of performance, `p1` is still not worst than `p2` (and in other situtation, `p1` may // genuinely be better). // // Note that this is in many ways just a generalization of a heuristic we use earlier for leaf field. That is, // we will never get as input to this method something like: // - `p1`: <some prefix> -[t]-> T(A) -[x] -> Int(A) // - `p2`: <some prefix> -[t]-> T(A) -[key]-> T(B) -[x] -> Int(B) // because when the code is asked for option for `x` after `<some prefix> -[t]-> T(A)`, it notices that `x` // is a leaf and is in `A`, so it doesn't ever look for alternative path. But this only work for direct // leaf of an entity. In the example at the beginning, field `u` makes this not working, because when // we compute choices for `u`, we don't yet know what comes after that, and so we have to take the option // of going to subgraph `B` into account (it may very be that whatever comes after `u` is not in `A` for // instance). if (p1.tail.source !== p2.tail.source) { const { thisJumps: p1Jumps, thatJumps: p2Jumps } = p1.countSubgraphJumpsAfterLastCommonVertex(p2); // As described above, we want to known if one of the path has no jumps at all (after the common prefix) while // the other do have some. if (p1Jumps === 0 && p2Jumps > 0) { return -1; } else if (p1Jumps > 0 && p2Jumps === 0) { return 1; } else { return 0; } } return 0; } function compareSingleVsMultiPathOptionsComplexityOutOfContext<RV extends Vertex>(p1: OpGraphPath<RV>, p2s: SimultaneousPaths<RV>): number { // This handle the same case than for the single-path only case, but compares the single path against // each of the option of the multi-path, and only "ignore" the multi-path if all its paths can be ignored. // Note that this happens less often than the single-path only case, but with @provides on an interface, you can // have case where one the one side you can get something entirely on the current graph, but the type-exploded case // has still be generated due to the leaf field not being the one just after "provided" interface. for (const p2 of p2s) { // Note: not sure if it is possible for a branch of the multi-path option to subsume the single-path one in practice, but // if it does, we ignore it because it's not obvious that this is enough to get rid of `p1` (maybe `p1` is provably a bit // costlier than one of the path of `p2s`, but `p2s` may have many paths and could still be collectively worst than `p1`). if (compareSinglePathOptionsComplexityOutOfContext(p1, p2) >= 0) { return 0; } } return -1; } class QueryPlanningTraversal<RV extends Vertex> { // The stack contains all states that aren't terminal. private bestPlan: [FetchDependencyGraph, OpPathTree<RV>, number] | undefined; private readonly isTopLevel: boolean; private conditionResolver: ConditionResolver; private stack: [Selection, SimultaneousPathsWithLazyIndirectPaths<RV>[]][]; private readonly closedBranches: ClosedBranch<RV>[] = []; private readonly optionsLimit: number | null; private readonly typeConditionedFetching: boolean; constructor( readonly parameters: PlanningParameters<RV>, selectionSet: SelectionSet, readonly startFetchIdGen: number, readonly hasDefers: boolean, private readonly rootKind: SchemaRootKind, readonly costFunction: CostFunction, initialContext: PathContext, typeConditionedFetching: boolean, nonLocalSelectionsState: NonLocalSelectionsState | null, excludedDestinations: ExcludedDestinations = [], excludedConditions: ExcludedConditions = [], ) { const { root, federatedQueryGraph } = parameters; this.typeConditionedFetching = typeConditionedFetching || false; this.isTopLevel = isRootVertex(root); this.optionsLimit = parameters.config.debug?.pathsLimit; this.conditionResolver = cachingConditionResolver( (edge, context, excludedEdges, excludedConditions, extras) => this.resolveConditionPlan(edge, context, excludedEdges, excludedConditions, extras), ); const initialPath: OpGraphPath<RV> = GraphPath.create(federatedQueryGraph, root); const initialOptions = createInitialOptions( initialPath, initialContext, this.conditionResolver, excludedDestinations, excludedConditions, parameters.overrideConditions, ); this.stack = mapOptionsToSelections(selectionSet, initialOptions); if ( this.parameters.federatedQueryGraph.nonLocalSelectionsMetadata && nonLocalSelectionsState ) { if (this.parameters.federatedQueryGraph.nonLocalSelectionsMetadata .checkNonLocalSelectionsLimitExceededAtRoot( this.stack, nonLocalSelectionsState, this.parameters.supergraphSchema, this.parameters.inconsistentAbstractTypesRuntimes, this.parameters.overrideConditions, ) ) { throw Error(`Number of non-local selections exceeds limit of ${ NonLocalSelectionsMetadata.MAX_NON_LOCAL_SELECTIONS }`); } } } private debugStack() { if (this.isTopLevel && debug.enabled) { debug.group('Query planning open branches:'); for (const [selection, options] of this.stack) { debug.groupedValues(options, opt => `${simultaneousPathsToString(opt)}`, `${selection}:`); } debug.groupEnd(); } } findBestPlan(): [FetchDependencyGraph, OpPathTree<RV>, number] | undefined { while (this.stack.length > 0) { this.debugStack(); const [selection, options] = this.stack.pop()!; this.handleOpenBranch(selection, options); } this.computeBestPlanFromClosedBranches(); return this.bestPlan; } private recordClosedBranch(closed: ClosedBranch<RV>) { const maybeTrimmed = this.maybeEliminateStrictlyMoreCostlyPaths(closed); debug.log(() => `Closed branch has ${maybeTrimmed.length} options (eliminated ${closed.length - maybeTrimmed.length} that could be proved as unecessary)`); this.closedBranches.push(maybeTrimmed); } private handleOpenBranch(selection: Selection, options: SimultaneousPathsWithLazyIndirectPaths<RV>[]) { const operation = selection.element; debug.group(() => `Handling open branch: ${operation}`); let newOptions: SimultaneousPathsWithLazyIndirectPaths<RV>[] = []; for (const option of options) { const followupForOption = advanceSimultaneousPathsWithOperation( this.parameters.supergraphSchema, option, operation, this.parameters.overrideConditions, ); if (!followupForOption) { // There is no valid way to advance the current `operation` from this option, so this option is a dead branch // that cannot produce a valid query plan. So we simply ignore it and rely on other options. continue; } if (followupForOption.length === 0) { // This `operation` is valid from that option but is guarantee to yield no result (it's a type condition that, along // with prior condition, has no intersection). Given that (assuming the user do properly resolve all versions of a // given field the same way from all subgraphs) all options should return the same results, we know that operation // should return no result from all options (even if we can't provide it technically). // More concretely, this usually means the current operation is a type condition that has no intersection with the possible // current runtime types at this point, and this means whatever fields the type condition sub-selection selects, they // will never be part of the results. That said, we cannot completely ignore the type-condition/fragment or we'd end // up with the wrong results. Consider the example a sub-part of the query is : // { // foo { // ... on Bar { // field // } // } // } // and suppose that `... on Bar` can never match a concrete runtime type at this point. Because that's the only sub-selection // of `foo`, if we completely ignore it, we'll end up not querying this at all. Which means that, during execution, // we'd either return (for that sub-part of the query) `{ foo: null }` if `foo` happens to be nullable, or just `null` for // the whole sub-part otherwise. But what we *should* return (assuming foo doesn't actually return `null`) is `{ foo: {} }`. // Meaning, we have queried `foo` and it returned something, but it's simply not a `Bar` and so nothing was included. // Long story short, to avoid that situation, we replace the whole `... on Bar` section that can never match the runtime // type by simply getting the `__typename` of `foo`. This ensure we do query `foo` but don't end up including condiditions // that may not even make sense to the subgraph we're querying. // Do note that we'll only need that `__typename` if there is no other selections inside `foo`, and so we might include // it unecessarally in practice: it's a very minor inefficiency though. if (operation.kind === 'FragmentElement') { this.recordClosedBranch(options.map((o) => ({ paths: o.paths.map(p => terminateWithNonRequestedTypenameField(p, this.parameters.overrideConditions)) }))); } debug.groupEnd(() => `Terminating branch with no possible results`); return; } newOptions = newOptions.concat(followupForOption); if (this.optionsLimit && newOptions.length > this.optionsLimit) { throw new Error(`Too many options generated for ${selection}, reached the limit of ${this.optionsLimit}`); } } if (newOptions.length === 0) { // If we have no options, it means there is no way to build a plan for that branch, and // that means the whole query planning has no plan. // This should never happen for a top-level query planning (unless the supergraph has *not* been // validated), but can happen when computing sub-plans for a key condition. if (this.isTopLevel) { debug.groupEnd(() => `No valid options to advance ${selection} from ${advanceOptionsToString(options)}`); throw new Error(`Was not able to find any options for ${selection}: This shouldn't have happened.`); } else { // We clear both open branches and closed ones as a mean to terminate the plan computation with // no plan this.stack.splice(0, this.stack.length); this.closedBranches.splice(0, this.closedBranches.length); debug.groupEnd(() => `No possible plan for ${selection} from ${advanceOptionsToString(options)}; terminating condition`); return; } } if (selection.selectionSet) { const allTails = allTailVertices(newOptions); if (selectionIsFullyLocalFromAllVertices(selection.selectionSet, allTails, this.parameters.inconsistentAbstractTypesRuntimes) && !selection.hasDefer() ) { // We known the rest of the selection is local to whichever subgraph the current options are in, and so we're // going to keep that selection around and add it "as-is" to the `FetchNode` when needed, saving a bunch of // work (created `GraphPath`, merging `PathTree`, ...). However, as we're skipping the "normal path" for that // sub-selection, there is a few things that are handled in said "normal path" that we need to still handle. // More precisely: // - we have this "attachment" trick that removes requested `__typename` temporarily, so we should add it back. // - we still need to add the selection of `__typename` for abstract types. It is not really necessary for the // execution per-se, but if we don't do it, then we will not be able to reuse named fragments as often // as we should (we add `__typename` for abstract types on the "normal path" and so we add them too to // named fragments; as such, we need them here too). const selectionSet = addTypenameFieldForAbstractTypes(addBackTypenameInAttachments(selection.selectionSet)); this.recordClosedBranch(newOptions.map((opt) => ({ paths: opt.paths, selection: selectionSet }))); } else { for (const branch of mapOptionsToSelections(selection.selectionSet, newOptions)) { this.stack.push(branch); } } debug.groupEnd(); } else { this.recordClosedBranch(newOptions.map((opt) => ({ paths: opt.paths }))); debug.groupEnd(() => `Branch finished`); } } /** * This method is called on a closed branch, that is on all the possible options found * to get a particular leaf of the query being planned. And when there is more than one * option, it tries a last effort at checking an option can be shown to be less efficient * than another one _whatever the rest of the query plan is_ (that is, whatever the options * for any other leaf of the query are). * * In practice, this compare all pair of options and call the heuristics * of `compareOptionsComplexityOutOfContext` on them to see if one strictly * subsume the other (and if that's the case, the subsumed one is ignored). */ private maybeEliminateStrictlyMoreCostlyPaths(branch: ClosedBranch<RV>): ClosedBranch<RV> { if (branch.length <= 1) { return branch; } // Copying the branch because we're going to modify in place. const toHandle = branch.concat(); const keptOptions: ClosedPath<RV>[] = []; while (toHandle.length >= 2) { const first = toHandle[0]; let shouldKeepFirst = true; // We compare `first` to every other remaining. But we iterate from end to beginning // because we may remove in place some of those we iterate on and that makes it safe. for (let i = toHandle.length - 1 ; i >= 1; i--) { const other = toHandle[i]; const cmp = compareOptionsComplexityOutOfContext(first.paths, other.paths); if (cmp < 0) { // This means that `first` is always better than `other`. So we eliminate `other`. toHandle.splice(i, 1); } else if (cmp > 0) { // This means that `first` is always worst than `other`. So we eliminate `first` ( // and we're done with this inner loop). toHandle.splice(0, 1); shouldKeepFirst = false; break; } } if (shouldKeepFirst) { // Means that we found no other option that make first unecessary. We mark first as kept, // and remove it from our working set (which we know it hasn't yet). keptOptions.push(first); toHandle.splice(0, 1); } } // We know toHandle has at most 1 element, but it may have one and we should keep it. if (toHandle.length > 0) { keptOptions.push(toHandle[0]); } return keptOptions; } private newDependencyGraph(): FetchDependencyGraph { const { supergraphSchema, federatedQueryGraph } = this.parameters; const rootType = this.isTopLevel && this.hasDefers ? supergraphSchema.schemaDefinition.rootType(this.rootKind) : undefined; return FetchDependencyGraph.create(supergraphSchema, federatedQueryGraph, this.startFetchIdGen, rootType, this.parameters.config.generateQueryFragments); } // Moves the first closed branch to after any branch having more options. // This method assumes that closed branches are sorted by decreasing number of options _except_ for the first element // which may be out of order, and this method restore that order. private reorderFirstBranch() { const firstBranch = this.closedBranches[0]; let i = 1; while (i < this.closedBranches.length && this.closedBranches[i].length > firstBranch.length) { i++; } // `i` is the smallest index of an element having the same number or less options than the first one, // so we switch that first branch with the element "before" `i` (which has more elements). this.closedBranches[0] = this.closedBranches[i - 1]; this.closedBranches[i - 1] = firstBranch; } private sortOptionsInClosedBranches() { this.closedBranches.forEach((branch) => branch.sort((p1, p2) => { const p1Jumps = Math.max(...p1.paths.map((p) => p.subgraphJumps())); const p2Jumps = Math.max(...p2.paths.map((p) => p.subgraphJumps())); return p1Jumps - p2Jumps; })); } private computeBestPlanFromClosedBranches() { if (this.closedBranches.length === 0) { return; } // We now sort the options within each branch, putting those with the least amount of subgraph jumps first. // The idea is that for each branch taken individually, the option with the least jumps is going to be // the most efficient, and while it is not always the case that the best plan is built for those // individual bests, they are still statistically more likely to be part of the best plan. So putting // them first has 2 benefits for the rest of this method: // 1. if we end up cutting some options of a branch below (due to having too many possible plans), // we'll cut the last option first (we `pop()`), so better cut what it the least likely to be good. // 2. when we finally generate the plan, we use the cost of previously computed plans to cut computation // early when possible (see `generateAllPlansAndFindBest`), so there is a premium in generating good // plans early (it cuts more computation), and putting those more-likely-to-be-good options first helps // this. this.sortOptionsInClosedBranches(); // We're out of smart ideas for now, so we look at how many plans we'd have to generate, and if it's // "too much", we reduce it to something manageable by arbitrarilly throwing out options. This effectively // means that when a query has too many options, we give up on always finding the "best" // query plan in favor of an "ok" query plan. // TODO: currently, when we need to reduce options, we do so somewhat arbitrarilly. More // precisely, we reduce the branches with the most options first and then drop the last // option of the branch, repeating until we have a reasonable number of plans to consider. // The sorting we do about help making this slightly more likely to be a good choice, but // there is likely more "smarts" we could add to this. // We sort branches by those that have the most options first. this.closedBranches.sort((b1, b2) => b1.length > b2.length ? -1 : (b1.length < b2.length ? 1 : 0)); let planCount = possiblePlans(this.closedBranches); debug.log(() => `Query has ${planCount} possible plans`); let firstBranch = this.closedBranches[0]; const maxPlansToCompute = this.parameters.config.debug.maxEvaluatedPlans; while (planCount > maxPlansToCompute && firstBranch.length > 1) { // we remove the right-most option of the first branch, and them move that branch to it's new place. const prevSize = BigInt(firstBranch.length); firstBranch.pop(); planCount -= planCount / prevSize; this.reorderFirstBranch(); // Note that if firstBranch is our only branch, it's fine, we'll continue to remove options from // it (but that is beyond unlikely). firstBranch = this.closedBranches[0]; debug.log(() => `Reduced plans to consider to ${planCount} plans`); } // Note that if `!this.isTopLevel`, then this means we're resolving a sub-plan for an edge condition, and we // don't want to count those as "evaluated plans". if (this.parameters.statistics && this.isTopLevel) { this.parameters.statistics.evaluatedPlanCount += Number(planCount); } debug.log(() => `All branches:${this.closedBranches.map((opts, i) => `\n${i}:${opts.map((opt => `\n - ${closedPathToString(opt)}`))}`)}`); // Note that usually, we'll have a majority of branches with just one option. We can group them in // a PathTree first with no fuss. When then need to do a cartesian product between this created // tree an other branches however to build the possible plans and chose. let idxFirstOfLengthOne = 0; while (idxFirstOfLengthOne < this.closedBranches.length && this.closedBranches[idxFirstOfLengthOne].length > 1) { idxFirstOfLengthOne++; } let initialTree: OpPathTree<RV>; let initialDependencyGraph: FetchDependencyGraph; const { federatedQueryGraph, root } = this.parameters; if (idxFirstOfLengthOne === this.closedBranches.length) { initialTree = PathTree.createOp(federatedQueryGraph, root); initialDependencyGraph = this.newDependencyGraph(); } else { const singleChoiceBranches = this .closedBranches .slice(idxFirstOfLengthOne) .flat() .map((cp) => flattenClosedPath(cp)) .flat(); initialTree = PathTree.createFromOpPaths(federatedQueryGraph, root, singleChoiceBranches); initialDependencyGraph = this.updatedDependencyGraph(this.newDependencyGraph(), initialTree); if (idxFirstOfLengthOne === 0) { // Well, we have the only possible plan; it's also the best. this.bestPlan = [initialDependencyGraph, initialTree, this.cost(initialDependencyGraph)]; return; } } const otherTrees = this .closedBranches .slice(0, idxFirstOfLengthOne) .map(b => b.map(opt => PathTree.createFromOpPaths(federatedQueryGraph, root, flattenClosedPath(opt)))); const { best, cost} = generateAllPlansAndFindBest({ initial: { graph: initialDependencyGraph, tree: initialTree }, toAdd: otherTrees, addFct: (p, t) => { const updatedDependencyGraph = p.graph.clone(); this.updatedDependencyGraph(updatedDependencyGraph, t); const updatedTree = p.tree.merge(t); return { graph: updatedDependencyGraph, tree: updatedTree }; }, costFct: (p) => this.cost(p.graph), onPlan: (p, cost, prevCost) => { debug.log(() => { if (!prevCost) { return `Computed plan with cost ${cost}: ${p.tree}`; } else if (cost > prevCost) { return `Ignoring plan with cost ${cost} (a better plan with cost ${prevCost} exists): ${p.tree}` } else { return `Found better with cost ${cost} (previous had cost ${prevCost}: ${p.tree}`; } }); }, }); this.bestPlan = [best.graph, best.tree, cost]; } private cost(dependencyGraph: FetchDependencyGraph): number { const { main, deferred } = dependencyGraph.process(this.costFunction, this.rootKind); return deferred.length === 0 ? main : this.costFunction.reduceDefer(main, dependencyGraph.deferTracking.primarySelection!.get(), deferred); } private updatedDependencyGraph(dependencyGraph: FetchDependencyGraph, tree: OpPathTree<RV>): FetchDependencyGraph { return isRootPathTree(tree) ? computeRootFetchGroups(dependencyGraph, tree, this.rootKind, this.typeConditionedFetching) : computeNonRootFetchGroups(dependencyGraph, tree, this.rootKind, this.typeConditionedFetching); } private resolveConditionPlan(edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions, extraConditions?: SelectionSet): ConditionResolution { const bestPlan = new QueryPlanningTraversal( { ...this.parameters, root: edge.head, }, extraConditions ?? edge.conditions!, 0, false, 'query', this.costFunction, context, this.typeConditionedFetching, null, excludedDestinations, addConditionExclusion(excludedConditions, edge.conditions), ).findBestPlan(); // Note that we want to return 'null', not 'undefined', because it's the latter that means "I cannot resolve that // condition" within `advanceSimultaneousPathsWithOperation`. return bestPlan ? { satisfied: true, cost: bestPlan[2], pathTree: bestPlan[1] } : unsatisfiedConditionsResolution; } } /** * Used in `FetchDependencyGraph` to store, for a given group, information about one of its parent. * Namely, this structure stores: * 1. the actual parent group, and * 2. the path of the group for which this is a "parent relation" into said parent (`group`). This information * is maintained for the case where we want/need to merge groups into each other. One can roughly think of * this as similar to a `mergeAt`, but that is relative to the start of `group`. It can be `undefined`, which * either mean we don't know that path or that this simply doesn't make sense (there is case where a child `mergeAt` can * be shorter than its parent's, in which case the `path`, which is essentially `child-mergeAt - parent-mergeAt`, does * not make sense (or rather, it's negative, which we cannot represent)). Tl;dr, `undefined` for the `path` means that * should make no assumption and bail on any merging that uses said path. */ type ParentRelation = { group: FetchGroup, path?: OperationPath, } const conditionsMemoizer = (selectionSet: SelectionSet) => ({ conditions: conditionsOfSelectionSet(selectionSet) }); class GroupInputs { readonly usedContexts = new Map<string, Type>; private readonly perType = new Map<string, MutableSelectionSet>(); onUpdateCallback?: () => void | undefined = undefined; constructor( readonly supergraphSchema: Schema, ) { } add(selection: Selection | SelectionSet) { assert(selection.parentType.schema() === this.supergraphSchema, 'Inputs selections must be based on the supergraph schema'); const typeName = selection.parentType.name; let typeSelection = this.perType.get(typeName); if (!typeSelection) { typeSelection = MutableSelectionSet.empty(selection.parentType); this.perType.set(typeName, typeSelection); } typeSelection.updates().add(selection); this.onUpdateCallback?.(); } addContext(context: string, type: Type) { this.usedContexts.set(context, type); } addAll(other: GroupInputs) { for (const otherSelection of other.perType.values()) { this.add(otherSelection.get()); } for (const [context, type] of other.usedContexts) { this.addContext(context, type); } } selectionSets(): SelectionSet[] { return mapValues(this.perType).map((s) => s.get()); } toSelectionSetNode(variablesDefinitions: VariableDefinitions, handledConditions: Conditions): SelectionSetNode { const selectionSets = mapValues(this.perType).map((s) => removeConditionsFromSelectionSet(s.get(), handledConditions)); // Making sure we're not generating something invalid. selectionSets.forEach((s) => s.validate(variablesDefinitions)); const selections = selectionSets.flatMap((sSet) => sSet.selections().map((s) => s.toSelectionNode())); return { kind: Kind.SELECTION_SET, selections, } } contains(other: GroupInputs): boolean { for (const [type, otherSelection] of other.perType) { const thisSelection = this.perType.get(type); if (!thisSelection || !thisSelection.get().contains(otherSelection.get())) { return false; } } if (this.usedContexts.size < other.usedContexts.size) { return false; } for (const [c,_] of other.usedContexts) { if (!this.usedContexts.has(c)) { return false; } } return true; } equals(other: GroupInputs): boolean { if (this.perType.size !== other.perType.size) { return false; } for (const [type, thisSelection] of this.perType) { const otherSelection = other.perType.get(type); if (!otherSelection || !thisSelection.get().equals(otherSelection.get())) { return false; } } if (this.usedContexts.size !== other.usedContexts.size) { return false; } for (const [c,_] of other.usedContexts) { if (!this.usedContexts.has(c)) { return false; } } return true; } clone(): GroupInputs { const cloned = new GroupInputs(this.supergraphSchema); for (const [type, selection] of this.perType.entries()) { cloned.perType.set(type, selection.clone()); } for (const [c,v] of this.usedContexts) { cloned.usedContexts.set(c,v); } return cloned; } toString(): string { const inputs = mapValues(this.perType); if (inputs.length === 0) { return '{}'; } if (inputs.length === 1) { return inputs[0].toString(); } return '[' + inputs.join(',') + ']'; } } /** * Represents a subgraph fetch of a query plan, and is a vertex of a `FetchDependencyGraph` (and as such provides links to * its parent and children in that dependency graph). */ class FetchGroup { private readonly _parents: ParentRelation[] = []; private readonly _children: FetchGroup[] = []; private _id: string | undefined; // Set in some code-path to indicate that the selection of the group not be optimized away even if it "looks" useless. mustPreserveSelection: boolean = false; private constructor( readonly dependencyGraph: FetchDependencyGraph, public index: number, readonly subgraphName: string, readonly rootKind: SchemaRootKind, readonly parentType: CompositeType, readonly isEntityFetch: boolean, private _selection: MutableSelectionSet<{ conditions: Conditions}>, private _inputs?: GroupInputs, private _contextInputs?: FetchDataKeyRenamer[], readonly mergeAt?: ResponsePath, readonly deferRef?: string, // Some of the processing on the dependency graph checks for groups to the same subgraph and same mergeAt, and we use this // key for that. Having it here saves us from re-computing it more than once. readonly subgraphAndMergeAtKey?: string, private cachedCost?: number, private generateQueryFragments: boolean = false, // Cache used to save unecessary recomputation of the `isUseless` method. private isKnownUseful: boolean = false, private readonly inputRewrites: FetchDataRewrite[] = [], ) { if (this._inputs) { this._inputs.onUpdateCallback = () => { // We're trying to avoid the full recomputation of `isUseless` when we're already // shown that the group is known useful (if it is shown useless, the group is removed, // so we're not caching that result but it's ok). And `isUseless` basically checks if // `inputs.contains(selection)`, so if a group is shown useful, it means that there // is some selections not in the inputs, but as long as we add to selections (and we // never remove from selections; `MutableSelectionSet` don't have removing methods), // then this won't change. Only changing inputs may require some recomputation. this.isKnownUseful = false; } } } static create({ dependencyGraph, index, subgraphName, rootKind, parentType, hasInputs, mergeAt, deferRef, generateQueryFragments, }: { dependencyGraph: FetchDependencyGraph, index: number, subgraphName: string, rootKind: SchemaRootKind, parentType: CompositeType, hasInputs: boolean, mergeAt?: ResponsePath, deferRef?: string, generateQueryFragments: boolean, }): FetchGroup { // Sanity checks that the selection parent type belongs to the schema of the subgraph we're querying. assert(parentType.schema() === dependencyGraph.subgraphSchemas.get(subgraphName), `Expected parent type ${parentType} to belong to ${subgraphName}`); return new FetchGroup( dependencyGraph, index, subgraphName, rootKind, parentType, hasInputs, MutableSelectionSet.emptyWithMemoized(parentType, conditionsMemoizer), hasInputs ? new GroupInputs(dependencyGraph.supergraphSchema) : undefined, undefined, mergeAt, deferRef, hasInputs ? `${toValidGraphQLName(subgraphName)}-${mergeAt?.join('::') ?? ''}` : undefined, undefined, generateQueryFragments, ); } // Clones everything on the group itself, but not it's `_parents` or `_children` links. cloneShallow(newDependencyGraph: FetchDependencyGraph): FetchGroup { return new FetchGroup( newDependencyGraph, this.index, this.subgraphName, this.rootKind, this.parentType, this.isEntityFetch, this._selection.clone(), this._inputs?.clone(), this._contextInputs ? this._contextInputs.map((c) => ({ ...c})) : undefined, this.mergeAt, this.deferRef, this.subgraphAndMergeAtKey, this.cachedCost, this.generateQueryFragments, this.isKnownUseful, [...this.inputRewrites], ); } cost(): number { if (!this.cachedCost) { this.cachedCost = selectionCost(this.selection); } return this.cachedCost; } set id(id: string | undefined) { assert(!this._id, () => `The id for fetch group ${this} is already set`); this._id = id; } get id(): string | undefined { return this._id; } get isTopLevel(): boolean { return !this.mergeAt; } get selection(): SelectionSet { return this._selection.get(); } private selectionUpdates(): SelectionSetUpdates { this.cachedCost = undefined; return this._selection.updates(); } get inputs(): GroupInputs | undefined { return this._inputs; } addParents(parents: readonly ParentRelation[]) { for (const parent of parents) { this.addParent(parent); } } /** * Adds another group as a parent of this one (meaning that this fetch should happen after the provided one). */ addParent(parent: ParentRelation) { if (this.isChildOf(parent.group)) { // Due to how we handle the building of multiple query plans when there is choices, it's possible that we re-traverse // key edges we've already traversed before, and that can means hitting this condition. While we could try to filter // "already-children" before calling this method, it's easier to just make this a no-op. return; } assert(!parent.group.isParentOf(this), () => `Group ${parent.group} is a parent of ${this}, but the child relationship is broken`); assert(!parent.group.isChildOf(this), () => `Group ${parent.group} is a child of ${this}: adding it as parent would create a cycle`); this.dependencyGraph.onModification(); this._parents.push(parent); parent.group._children.push(this); } removeChild(child: FetchGroup) { if (!this.isParentOf(child)) { return; } this.dependencyGraph.onModification(); findAndRemoveInPlace((g) => g === child, this._children); findAndRemoveInPlace((p) => p.group === this, child._parents); } isParentOf(maybeChild: FetchGroup): boolean { return this._children.includes(maybeChild); } isChildOf(maybeParent: FetchGroup): boolean { return !!this.parentRelation(maybeParent); } isDescendantOf(maybeAncestor: FetchGroup): boolean { const children = Array.from(maybeAncestor.children()); while (children.length > 0) { const child = children.pop()!; if (child === this) { return true; } child.children().forEach((c) => children.push(c)); } return false; } /** * Returns whether this group is both a child of `maybeParent` but also if we can show that the * dependency between the group is "artificial" in the sense that this group inputs do not truly * depend on anything `maybeParent` fetches. */ isChildOfWithArtificialDependency(maybeParent: FetchGroup): boolean { const relation = this.parentRelation(maybeParent); // To be a child with an artificial dependency, it needs to be a child first, and the "path in parent" should be know. if (!relation || !relation.path) { return false; } // Then, if we have no inputs, we know we don't depend on anything from the parent no matter what. if (!this.inputs) { return true; } // If we do have inputs, t