@apollo/query-planner
Version:
Apollo Query Planner
1,156 lines (1,060 loc) • 228 kB
text/typescript
import {
assert,
arrayEquals,
baseType,
CompositeType,
Field,
FieldSelection,
FragmentElement,
isAbstractType,
isCompositeType,
isListType,
isObjectType,
isNamedType,
ListType,
NonNullType,
ObjectType,
Operation,
OperationPath,
sameOperationPaths,
Schema,
SchemaRootKind,
Selection,
SelectionSet,
selectionSetOf,
Variable,
VariableDefinition,
VariableDefinitions,
newDebugLogger,
selectionOfElement,
selectionSetOfElement,
NamedFragments,
operationToDocument,
MapWithCachedArrays,
FederationMetadata,
federationMetadata,
entitiesFieldName,
concatOperationPaths,
Directive,
directiveApplicationsSubstraction,
conditionalDirectivesInOperationPath,
SetMultiMap,
OperationElement,
Concrete,
DeferDirectiveArgs,
setValues,
MultiMap,
typenameFieldName,
mapKeys,
operationPathToStringPath,
mapValues,
isInterfaceObjectType,
isInterfaceType,
Type,
MutableSelectionSet,
SelectionSetUpdates,
AbstractType,
isDefined,
InterfaceType,
FragmentSelection,
typesCanBeMerged,
Supergraph,
sameType,
isInputType,
possibleRuntimeTypes,
NamedType,
VariableCollector,
DEFAULT_MIN_USAGES_TO_OPTIMIZE,
} from "@apollo/federation-internals";
import {
advanceSimultaneousPathsWithOperation,
Edge,
emptyContext,
ExcludedDestinations,
QueryGraph,
GraphPath,
isPathContext,
isRootPathTree,
OpGraphPath,
OpPathTree,
OpRootPathTree,
PathContext,
PathTree,
RootVertex,
Vertex,
isRootVertex,
ExcludedConditions,
advanceOptionsToString,
ConditionResolution,
unsatisfiedConditionsResolution,
cachingConditionResolver,
ConditionResolver,
addConditionExclusion,
SimultaneousPathsWithLazyIndirectPaths,
simultaneousPathsToString,
SimultaneousPaths,
terminateWithNonRequestedTypenameField,
getLocallySatisfiableKey,
createInitialOptions,
buildFederatedQueryGraph,
FEDERATED_GRAPH_ROOT_SOURCE,
NonLocalSelectionsState,
NonLocalSelectionsMetadata,
} from "@apollo/query-graphs";
import { stripIgnoredCharacters, print, OperationTypeNode, SelectionSetNode, Kind } from "graphql";
import { DeferredNode, FetchDataKeyRenamer, FetchDataRewrite } from ".";
import { Conditions, conditionsOfSelectionSet, isConstantCondition, mergeConditions, removeConditionsFromSelectionSet, updatedConditions } from "./conditions";
import { enforceQueryPlannerConfigDefaults, QueryPlannerConfig, validateQueryPlannerConfig } from "./config";
import { generateAllPlansAndFindBest } from "./generateAllPlans";
import { QueryPlan, ResponsePath, SequenceNode, PlanNode, ParallelNode, FetchNode, SubscriptionNode, trimSelectionNodes } from "./QueryPlan";
import { validateRecursiveSelections } from './recursiveSelectionsLimit';
const debug = newDebugLogger('plan');
// Somewhat random string used to optimise handling __typename in some cases. See usage for details. The concrete value
// has no particular significance.
const SIBLING_TYPENAME_KEY = 'sibling_typename';
type CostFunction = FetchGroupProcessor<number, number>;
/**
* Constant used during query plan cost computation to account for the base cost of doing a fetch, that is the
* fact any fetch imply some networking cost, request serialization/deserialization, validation, ...
*
* The number is a little bit arbitrary, but insofar as we roughly assign a cost of 1 to a single field queried
* (see `selectionCost` method), this can be though of as saying that resolving a single field is in general
* a tiny fraction of the actual cost of doing a subgraph fetch.
*/
const fetchCost = 1000;
/**
* Constant used during query plan cost computation as a multiplier to the cost of fetches made in sequences.
*
* This means that if 3 fetches are done in sequence, the cost of 1nd one is multiplied by this number, the
* 2nd by twice this number, and the 3rd one by thrice this number. The goal is to heavily favor query plans
* with the least amount of sequences, since this affect overall latency directly. The exact number is a tad
* arbitrary however.
*/
const pipeliningCost = 100;
/**
* Computes the cost of a Plan.
*
* A plan is essentially some mix of sequences and parallels of fetches. And the plan cost
* is about minimizing both:
* 1. The expected total latency of executing the plan. Typically, doing 2 fetches in
* parallel will most likely have much better latency then executing those exact same
* fetches in sequence, and so the cost of the latter must be greater than that of
* the former.
* 2. The underlying use of resources. For instance, if we query 2 fields and we have
* the choice between getting those 2 fields from a single subgraph in 1 fetch, or
* get each from a different subgraph with 2 fetches in parallel, then we want to
* favor the former as just doing a fetch in and of itself has a cost in terms of
* resources consumed.
*
* Do note that at the moment, this cost is solely based on the "shape" of the plan and has
* to make some conservative assumption regarding concrete runtime behaviour. In particular,
* it assumes that:
* - all fields have the same cost (all resolvers take the same time).
* - that field cost is relative small compare to actually doing a subgraph fetch. That is,
* it assumes that the networking and other query processing costs are much higher than
* the cost of resolving a single field. Or to put it more concretely, it assumes that
* a fetch of 5 fields is probably not too different from than of 2 fields.
*/
const defaultCostFunction: CostFunction = {
/**
* The cost of a fetch roughly proportional to how many fields it fetches (but see `selectionCost` for more details)
* plus some constant "premium" to account for the fact than doing each fetch is costly (and that fetch cost often
* dwarfted the actual cost of fields resolution).
*/
onFetchGroup: (group: FetchGroup) => (fetchCost + group.cost()),
/**
* We don't take conditions into account in costing for now as they don't really know anything on the condition
* and this shouldn't really play a role in picking a plan over another.
*/
onConditions: (_: Conditions, value: number) => value,
/**
* We sum the cost of fetch groups in parallel. Note that if we were only concerned about expected latency,
* we could instead take the `max` of the values, but as we also try to minimize general resource usage, we
* want 2 parallel fetches with cost 1000 to be more costly than one with cost 1000 and one with cost 10,
* so suming is a simple option.
*/
reduceParallel: (values: number[]) => parallelCost(values),
/**
* For sequences, we want to heavily favor "shorter" pipelines of fetches as this directly impact the
* expected latency of the overall plan.
*
* To do so, each "stage" of a sequence/pipeline gets an additional multiplier on the intrinsic cost
* of that stage.
*/
reduceSequence: (values: number[]) => sequenceCost(values),
/**
* This method exists so we can inject the necessary information for deferred block when
* genuinely creating plan nodes. It's irrelevant to cost computation however and we just
* return the cost of the block unchanged.
*/
reduceDeferred(_: DeferredInfo, value: number): number {
return value;
},
/**
* It is unfortunately a bit difficult to properly compute costs for defers because in theory
* some of the deferred blocks (the costs in `deferredValues`) can be started _before_ the full
* `nonDeferred` part finishes (more precisely, the "structure" of query plans express the fact
* that there is a non-deferred part and other deferred parts, but the complete dependency of
* when a deferred part can be start is expressed through the `FetchNode.id` field, and as
* this cost function is currently mainly based on the "structure" of query plans, we don't
* have easy access to this info).
*
* Anyway, the approximation we make here is that all the deferred starts strictly after the
* non-deferred one, and that all the deferred parts can be done in parallel.
*/
reduceDefer(nonDeferred: number, _: SelectionSet, deferredValues: number[]): number {
return sequenceCost([nonDeferred, parallelCost(deferredValues)]);
},
};
function parallelCost(values: number[]): number {
return sum(values);
}
function sequenceCost(stages: number[]): number {
return stages.reduceRight((acc, stage, idx) => (acc + (Math.max(1, idx * pipeliningCost) * stage)), 0);
}
type ClosedPath<RV extends Vertex> = {
paths: SimultaneousPaths<RV>,
selection?: SelectionSet,
}
function closedPathToString(p: ClosedPath<any>): string {
const pathStr = simultaneousPathsToString(p.paths);
return p.selection ? `${pathStr} -> ${p.selection}` : pathStr;
}
function flattenClosedPath<RV extends Vertex>(
p: ClosedPath<RV>
): { path: OpGraphPath<RV>, selection?: SelectionSet }[] {
return p.paths.map((path) => ({ path, selection: p.selection}));
}
type ClosedBranch<RV extends Vertex> = ClosedPath<RV>[];
function allTailVertices(options: SimultaneousPathsWithLazyIndirectPaths<any>[]): Set<Vertex> {
const vertices = new Set<Vertex>();
for (const option of options) {
for (const path of option.paths) {
vertices.add(path.tail);
}
}
return vertices;
}
function selectionIsFullyLocalFromAllVertices(
selection: SelectionSet,
vertices: Set<Vertex>,
inconsistentAbstractTypesRuntimes: Set<string>,
): boolean {
let _useInconsistentAbstractTypes: boolean | undefined = undefined;
const useInconsistentAbstractTypes = (): boolean => {
if (_useInconsistentAbstractTypes === undefined) {
_useInconsistentAbstractTypes = selection.some((elt) =>
elt.kind === 'FragmentElement' && !!elt.typeCondition && inconsistentAbstractTypesRuntimes.has(elt.typeCondition.name)
);
}
return _useInconsistentAbstractTypes;
}
for (const vertex of vertices) {
// To guarantee that the selection is fully local from the provided vertex/type, we must have:
// - no edge crossing subgraphs from that vertex.
// - the type must be compositeType (mostly just ensuring the selection make sense).
// - everything in the selection must be avaiable in the type (which `rebaseOn` essentially validates).
// - the selection must not "type-cast" into any abstract type that has inconsistent runtimes acrosse subgraphs. The reason for the
// later condition is that `selection` is originally a supergraph selection, but that we're looking to apply "as-is" to a subgraph.
// But suppose it has a `... on I` where `I` is an interface. Then it's possible that `I` includes "more" types in the supergraph
// than in the subgraph, and so we might have to type-explode it. If so, we cannot use the selection "as-is".
if (vertex.hasReachableCrossSubgraphEdges
|| !isCompositeType(vertex.type)
|| !selection.canRebaseOn(vertex.type)
|| useInconsistentAbstractTypes()
) {
return false;
}
}
return true;
}
/**
* Given 2 paths that are 2 different options to reach the same query leaf field, checks if one can be shown
* to be always "better" (more efficient/optimal) than the other one, and this regardless of any surrounding context (that
* is regardless of what the rest of the query plan would be for any other query leaf field.
*
* Note that this method is used on final options of a given "query path", so all the heuristics done within `GraphPath`
* to avoid unecessary option have already been applied (say, avoiding to consider a path that do 2 successives key jumps
* when there is a 1 jump equivalent, ...), so this focus on what can be done based on the fact that the path considered
* are "finished".
*
* @return -1 if `opt1` is known to be strictly better than `opt2`, 1 if it is `opt2` that is strictly better, and 0 if we
* cannot really guarantee anything (at least "out of context").
*/
export function compareOptionsComplexityOutOfContext<RV extends Vertex>(opt1: SimultaneousPaths<RV>, opt2: SimultaneousPaths<RV>): number {
// Currently, we only every compare single-path options. We may find smart things to do for multi-path options later,
// but unsure what currently.
if (opt1.length === 1) {
if (opt2.length === 1) {
return compareSinglePathOptionsComplexityOutOfContext(opt1[0], opt2[0]);
} else {
return compareSingleVsMultiPathOptionsComplexityOutOfContext(opt1[0], opt2);
}
} else if (opt2.length === 1) {
return -compareSingleVsMultiPathOptionsComplexityOutOfContext(opt2[0], opt1);
}
return 0;
}
function compareSinglePathOptionsComplexityOutOfContext<RV extends Vertex>(p1: OpGraphPath<RV>, p2: OpGraphPath<RV>): number {
// Currently, this method only handle the case where we have something like:
// - `p1`: <some prefix> -[t]-> T(A) -[u]-> U(A) -[x] -> Int(A)
// - `p2`: <some prefix> -[t]-> T(A) -[key]-> T(B) -[u]-> U(B) -[x] -> Int(B)
// That is, we have 2 choices that are identical up to the "end", when one stays in the subgraph (p1, which stays in A)
// while the other use a key to another subgraph (p2, going to B).
//
// In such a case, whatever else the a query might be doing, it can never be "worst"
// to use `p1` than to use `p2` because both will force the same "fetch group" up to the
// end, but `p2` may force one more fetch that `p` does not.
// Do note that we say "may" above, because the rest of the plan may well have a forced
// choice like:
// - `other`: <some prefix> -[t]-> T(A) -[key]-> T(B) -[u]-> U(B) -[y] -> Int(B)
// in which case the plan will have the jump from A to B after `t` whether we use `p1` or
// `p2`, but while in that particular case `p1` and `p2` are about comparable in term
// of performance, `p1` is still not worst than `p2` (and in other situtation, `p1` may
// genuinely be better).
//
// Note that this is in many ways just a generalization of a heuristic we use earlier for leaf field. That is,
// we will never get as input to this method something like:
// - `p1`: <some prefix> -[t]-> T(A) -[x] -> Int(A)
// - `p2`: <some prefix> -[t]-> T(A) -[key]-> T(B) -[x] -> Int(B)
// because when the code is asked for option for `x` after `<some prefix> -[t]-> T(A)`, it notices that `x`
// is a leaf and is in `A`, so it doesn't ever look for alternative path. But this only work for direct
// leaf of an entity. In the example at the beginning, field `u` makes this not working, because when
// we compute choices for `u`, we don't yet know what comes after that, and so we have to take the option
// of going to subgraph `B` into account (it may very be that whatever comes after `u` is not in `A` for
// instance).
if (p1.tail.source !== p2.tail.source) {
const { thisJumps: p1Jumps, thatJumps: p2Jumps } = p1.countSubgraphJumpsAfterLastCommonVertex(p2);
// As described above, we want to known if one of the path has no jumps at all (after the common prefix) while
// the other do have some.
if (p1Jumps === 0 && p2Jumps > 0) {
return -1;
} else if (p1Jumps > 0 && p2Jumps === 0) {
return 1;
} else {
return 0;
}
}
return 0;
}
function compareSingleVsMultiPathOptionsComplexityOutOfContext<RV extends Vertex>(p1: OpGraphPath<RV>, p2s: SimultaneousPaths<RV>): number {
// This handle the same case than for the single-path only case, but compares the single path against
// each of the option of the multi-path, and only "ignore" the multi-path if all its paths can be ignored.
// Note that this happens less often than the single-path only case, but with @provides on an interface, you can
// have case where one the one side you can get something entirely on the current graph, but the type-exploded case
// has still be generated due to the leaf field not being the one just after "provided" interface.
for (const p2 of p2s) {
// Note: not sure if it is possible for a branch of the multi-path option to subsume the single-path one in practice, but
// if it does, we ignore it because it's not obvious that this is enough to get rid of `p1` (maybe `p1` is provably a bit
// costlier than one of the path of `p2s`, but `p2s` may have many paths and could still be collectively worst than `p1`).
if (compareSinglePathOptionsComplexityOutOfContext(p1, p2) >= 0) {
return 0;
}
}
return -1;
}
class QueryPlanningTraversal<RV extends Vertex> {
// The stack contains all states that aren't terminal.
private bestPlan: [FetchDependencyGraph, OpPathTree<RV>, number] | undefined;
private readonly isTopLevel: boolean;
private conditionResolver: ConditionResolver;
private stack: [Selection, SimultaneousPathsWithLazyIndirectPaths<RV>[]][];
private readonly closedBranches: ClosedBranch<RV>[] = [];
private readonly optionsLimit: number | null;
private readonly typeConditionedFetching: boolean;
constructor(
readonly parameters: PlanningParameters<RV>,
selectionSet: SelectionSet,
readonly startFetchIdGen: number,
readonly hasDefers: boolean,
private readonly rootKind: SchemaRootKind,
readonly costFunction: CostFunction,
initialContext: PathContext,
typeConditionedFetching: boolean,
nonLocalSelectionsState: NonLocalSelectionsState | null,
excludedDestinations: ExcludedDestinations = [],
excludedConditions: ExcludedConditions = [],
) {
const { root, federatedQueryGraph } = parameters;
this.typeConditionedFetching = typeConditionedFetching || false;
this.isTopLevel = isRootVertex(root);
this.optionsLimit = parameters.config.debug?.pathsLimit;
this.conditionResolver = cachingConditionResolver(
(edge, context, excludedEdges, excludedConditions, extras) => this.resolveConditionPlan(edge, context, excludedEdges, excludedConditions, extras),
);
const initialPath: OpGraphPath<RV> = GraphPath.create(federatedQueryGraph, root);
const initialOptions = createInitialOptions(
initialPath,
initialContext,
this.conditionResolver,
excludedDestinations,
excludedConditions,
parameters.overrideConditions,
);
this.stack = mapOptionsToSelections(selectionSet, initialOptions);
if (
this.parameters.federatedQueryGraph.nonLocalSelectionsMetadata
&& nonLocalSelectionsState
) {
if (this.parameters.federatedQueryGraph.nonLocalSelectionsMetadata
.checkNonLocalSelectionsLimitExceededAtRoot(
this.stack,
nonLocalSelectionsState,
this.parameters.supergraphSchema,
this.parameters.inconsistentAbstractTypesRuntimes,
this.parameters.overrideConditions,
)
) {
throw Error(`Number of non-local selections exceeds limit of ${
NonLocalSelectionsMetadata.MAX_NON_LOCAL_SELECTIONS
}`);
}
}
}
private debugStack() {
if (this.isTopLevel && debug.enabled) {
debug.group('Query planning open branches:');
for (const [selection, options] of this.stack) {
debug.groupedValues(options, opt => `${simultaneousPathsToString(opt)}`, `${selection}:`);
}
debug.groupEnd();
}
}
findBestPlan(): [FetchDependencyGraph, OpPathTree<RV>, number] | undefined {
while (this.stack.length > 0) {
this.debugStack();
const [selection, options] = this.stack.pop()!;
this.handleOpenBranch(selection, options);
}
this.computeBestPlanFromClosedBranches();
return this.bestPlan;
}
private recordClosedBranch(closed: ClosedBranch<RV>) {
const maybeTrimmed = this.maybeEliminateStrictlyMoreCostlyPaths(closed);
debug.log(() => `Closed branch has ${maybeTrimmed.length} options (eliminated ${closed.length - maybeTrimmed.length} that could be proved as unecessary)`);
this.closedBranches.push(maybeTrimmed);
}
private handleOpenBranch(selection: Selection, options: SimultaneousPathsWithLazyIndirectPaths<RV>[]) {
const operation = selection.element;
debug.group(() => `Handling open branch: ${operation}`);
let newOptions: SimultaneousPathsWithLazyIndirectPaths<RV>[] = [];
for (const option of options) {
const followupForOption = advanceSimultaneousPathsWithOperation(
this.parameters.supergraphSchema,
option,
operation,
this.parameters.overrideConditions,
);
if (!followupForOption) {
// There is no valid way to advance the current `operation` from this option, so this option is a dead branch
// that cannot produce a valid query plan. So we simply ignore it and rely on other options.
continue;
}
if (followupForOption.length === 0) {
// This `operation` is valid from that option but is guarantee to yield no result (it's a type condition that, along
// with prior condition, has no intersection). Given that (assuming the user do properly resolve all versions of a
// given field the same way from all subgraphs) all options should return the same results, we know that operation
// should return no result from all options (even if we can't provide it technically).
// More concretely, this usually means the current operation is a type condition that has no intersection with the possible
// current runtime types at this point, and this means whatever fields the type condition sub-selection selects, they
// will never be part of the results. That said, we cannot completely ignore the type-condition/fragment or we'd end
// up with the wrong results. Consider the example a sub-part of the query is :
// {
// foo {
// ... on Bar {
// field
// }
// }
// }
// and suppose that `... on Bar` can never match a concrete runtime type at this point. Because that's the only sub-selection
// of `foo`, if we completely ignore it, we'll end up not querying this at all. Which means that, during execution,
// we'd either return (for that sub-part of the query) `{ foo: null }` if `foo` happens to be nullable, or just `null` for
// the whole sub-part otherwise. But what we *should* return (assuming foo doesn't actually return `null`) is `{ foo: {} }`.
// Meaning, we have queried `foo` and it returned something, but it's simply not a `Bar` and so nothing was included.
// Long story short, to avoid that situation, we replace the whole `... on Bar` section that can never match the runtime
// type by simply getting the `__typename` of `foo`. This ensure we do query `foo` but don't end up including condiditions
// that may not even make sense to the subgraph we're querying.
// Do note that we'll only need that `__typename` if there is no other selections inside `foo`, and so we might include
// it unecessarally in practice: it's a very minor inefficiency though.
if (operation.kind === 'FragmentElement') {
this.recordClosedBranch(options.map((o) => ({
paths: o.paths.map(p => terminateWithNonRequestedTypenameField(p, this.parameters.overrideConditions))
})));
}
debug.groupEnd(() => `Terminating branch with no possible results`);
return;
}
newOptions = newOptions.concat(followupForOption);
if (this.optionsLimit && newOptions.length > this.optionsLimit) {
throw new Error(`Too many options generated for ${selection}, reached the limit of ${this.optionsLimit}`);
}
}
if (newOptions.length === 0) {
// If we have no options, it means there is no way to build a plan for that branch, and
// that means the whole query planning has no plan.
// This should never happen for a top-level query planning (unless the supergraph has *not* been
// validated), but can happen when computing sub-plans for a key condition.
if (this.isTopLevel) {
debug.groupEnd(() => `No valid options to advance ${selection} from ${advanceOptionsToString(options)}`);
throw new Error(`Was not able to find any options for ${selection}: This shouldn't have happened.`);
} else {
// We clear both open branches and closed ones as a mean to terminate the plan computation with
// no plan
this.stack.splice(0, this.stack.length);
this.closedBranches.splice(0, this.closedBranches.length);
debug.groupEnd(() => `No possible plan for ${selection} from ${advanceOptionsToString(options)}; terminating condition`);
return;
}
}
if (selection.selectionSet) {
const allTails = allTailVertices(newOptions);
if (selectionIsFullyLocalFromAllVertices(selection.selectionSet, allTails, this.parameters.inconsistentAbstractTypesRuntimes)
&& !selection.hasDefer()
) {
// We known the rest of the selection is local to whichever subgraph the current options are in, and so we're
// going to keep that selection around and add it "as-is" to the `FetchNode` when needed, saving a bunch of
// work (created `GraphPath`, merging `PathTree`, ...). However, as we're skipping the "normal path" for that
// sub-selection, there is a few things that are handled in said "normal path" that we need to still handle.
// More precisely:
// - we have this "attachment" trick that removes requested `__typename` temporarily, so we should add it back.
// - we still need to add the selection of `__typename` for abstract types. It is not really necessary for the
// execution per-se, but if we don't do it, then we will not be able to reuse named fragments as often
// as we should (we add `__typename` for abstract types on the "normal path" and so we add them too to
// named fragments; as such, we need them here too).
const selectionSet = addTypenameFieldForAbstractTypes(addBackTypenameInAttachments(selection.selectionSet));
this.recordClosedBranch(newOptions.map((opt) => ({ paths: opt.paths, selection: selectionSet })));
} else {
for (const branch of mapOptionsToSelections(selection.selectionSet, newOptions)) {
this.stack.push(branch);
}
}
debug.groupEnd();
} else {
this.recordClosedBranch(newOptions.map((opt) => ({ paths: opt.paths })));
debug.groupEnd(() => `Branch finished`);
}
}
/**
* This method is called on a closed branch, that is on all the possible options found
* to get a particular leaf of the query being planned. And when there is more than one
* option, it tries a last effort at checking an option can be shown to be less efficient
* than another one _whatever the rest of the query plan is_ (that is, whatever the options
* for any other leaf of the query are).
*
* In practice, this compare all pair of options and call the heuristics
* of `compareOptionsComplexityOutOfContext` on them to see if one strictly
* subsume the other (and if that's the case, the subsumed one is ignored).
*/
private maybeEliminateStrictlyMoreCostlyPaths(branch: ClosedBranch<RV>): ClosedBranch<RV> {
if (branch.length <= 1) {
return branch;
}
// Copying the branch because we're going to modify in place.
const toHandle = branch.concat();
const keptOptions: ClosedPath<RV>[] = [];
while (toHandle.length >= 2) {
const first = toHandle[0];
let shouldKeepFirst = true;
// We compare `first` to every other remaining. But we iterate from end to beginning
// because we may remove in place some of those we iterate on and that makes it safe.
for (let i = toHandle.length - 1 ; i >= 1; i--) {
const other = toHandle[i];
const cmp = compareOptionsComplexityOutOfContext(first.paths, other.paths);
if (cmp < 0) {
// This means that `first` is always better than `other`. So we eliminate `other`.
toHandle.splice(i, 1);
} else if (cmp > 0) {
// This means that `first` is always worst than `other`. So we eliminate `first` (
// and we're done with this inner loop).
toHandle.splice(0, 1);
shouldKeepFirst = false;
break;
}
}
if (shouldKeepFirst) {
// Means that we found no other option that make first unecessary. We mark first as kept,
// and remove it from our working set (which we know it hasn't yet).
keptOptions.push(first);
toHandle.splice(0, 1);
}
}
// We know toHandle has at most 1 element, but it may have one and we should keep it.
if (toHandle.length > 0) {
keptOptions.push(toHandle[0]);
}
return keptOptions;
}
private newDependencyGraph(): FetchDependencyGraph {
const { supergraphSchema, federatedQueryGraph } = this.parameters;
const rootType = this.isTopLevel && this.hasDefers ? supergraphSchema.schemaDefinition.rootType(this.rootKind) : undefined;
return FetchDependencyGraph.create(supergraphSchema, federatedQueryGraph, this.startFetchIdGen, rootType, this.parameters.config.generateQueryFragments);
}
// Moves the first closed branch to after any branch having more options.
// This method assumes that closed branches are sorted by decreasing number of options _except_ for the first element
// which may be out of order, and this method restore that order.
private reorderFirstBranch() {
const firstBranch = this.closedBranches[0];
let i = 1;
while (i < this.closedBranches.length && this.closedBranches[i].length > firstBranch.length) {
i++;
}
// `i` is the smallest index of an element having the same number or less options than the first one,
// so we switch that first branch with the element "before" `i` (which has more elements).
this.closedBranches[0] = this.closedBranches[i - 1];
this.closedBranches[i - 1] = firstBranch;
}
private sortOptionsInClosedBranches() {
this.closedBranches.forEach((branch) => branch.sort((p1, p2) => {
const p1Jumps = Math.max(...p1.paths.map((p) => p.subgraphJumps()));
const p2Jumps = Math.max(...p2.paths.map((p) => p.subgraphJumps()));
return p1Jumps - p2Jumps;
}));
}
private computeBestPlanFromClosedBranches() {
if (this.closedBranches.length === 0) {
return;
}
// We now sort the options within each branch, putting those with the least amount of subgraph jumps first.
// The idea is that for each branch taken individually, the option with the least jumps is going to be
// the most efficient, and while it is not always the case that the best plan is built for those
// individual bests, they are still statistically more likely to be part of the best plan. So putting
// them first has 2 benefits for the rest of this method:
// 1. if we end up cutting some options of a branch below (due to having too many possible plans),
// we'll cut the last option first (we `pop()`), so better cut what it the least likely to be good.
// 2. when we finally generate the plan, we use the cost of previously computed plans to cut computation
// early when possible (see `generateAllPlansAndFindBest`), so there is a premium in generating good
// plans early (it cuts more computation), and putting those more-likely-to-be-good options first helps
// this.
this.sortOptionsInClosedBranches();
// We're out of smart ideas for now, so we look at how many plans we'd have to generate, and if it's
// "too much", we reduce it to something manageable by arbitrarilly throwing out options. This effectively
// means that when a query has too many options, we give up on always finding the "best"
// query plan in favor of an "ok" query plan.
// TODO: currently, when we need to reduce options, we do so somewhat arbitrarilly. More
// precisely, we reduce the branches with the most options first and then drop the last
// option of the branch, repeating until we have a reasonable number of plans to consider.
// The sorting we do about help making this slightly more likely to be a good choice, but
// there is likely more "smarts" we could add to this.
// We sort branches by those that have the most options first.
this.closedBranches.sort((b1, b2) => b1.length > b2.length ? -1 : (b1.length < b2.length ? 1 : 0));
let planCount = possiblePlans(this.closedBranches);
debug.log(() => `Query has ${planCount} possible plans`);
let firstBranch = this.closedBranches[0];
const maxPlansToCompute = this.parameters.config.debug.maxEvaluatedPlans;
while (planCount > maxPlansToCompute && firstBranch.length > 1) {
// we remove the right-most option of the first branch, and them move that branch to it's new place.
const prevSize = BigInt(firstBranch.length);
firstBranch.pop();
planCount -= planCount / prevSize;
this.reorderFirstBranch();
// Note that if firstBranch is our only branch, it's fine, we'll continue to remove options from
// it (but that is beyond unlikely).
firstBranch = this.closedBranches[0];
debug.log(() => `Reduced plans to consider to ${planCount} plans`);
}
// Note that if `!this.isTopLevel`, then this means we're resolving a sub-plan for an edge condition, and we
// don't want to count those as "evaluated plans".
if (this.parameters.statistics && this.isTopLevel) {
this.parameters.statistics.evaluatedPlanCount += Number(planCount);
}
debug.log(() => `All branches:${this.closedBranches.map((opts, i) => `\n${i}:${opts.map((opt => `\n - ${closedPathToString(opt)}`))}`)}`);
// Note that usually, we'll have a majority of branches with just one option. We can group them in
// a PathTree first with no fuss. When then need to do a cartesian product between this created
// tree an other branches however to build the possible plans and chose.
let idxFirstOfLengthOne = 0;
while (idxFirstOfLengthOne < this.closedBranches.length && this.closedBranches[idxFirstOfLengthOne].length > 1) {
idxFirstOfLengthOne++;
}
let initialTree: OpPathTree<RV>;
let initialDependencyGraph: FetchDependencyGraph;
const { federatedQueryGraph, root } = this.parameters;
if (idxFirstOfLengthOne === this.closedBranches.length) {
initialTree = PathTree.createOp(federatedQueryGraph, root);
initialDependencyGraph = this.newDependencyGraph();
} else {
const singleChoiceBranches = this
.closedBranches
.slice(idxFirstOfLengthOne)
.flat()
.map((cp) => flattenClosedPath(cp))
.flat();
initialTree = PathTree.createFromOpPaths(federatedQueryGraph, root, singleChoiceBranches);
initialDependencyGraph = this.updatedDependencyGraph(this.newDependencyGraph(), initialTree);
if (idxFirstOfLengthOne === 0) {
// Well, we have the only possible plan; it's also the best.
this.bestPlan = [initialDependencyGraph, initialTree, this.cost(initialDependencyGraph)];
return;
}
}
const otherTrees = this
.closedBranches
.slice(0, idxFirstOfLengthOne)
.map(b => b.map(opt => PathTree.createFromOpPaths(federatedQueryGraph, root, flattenClosedPath(opt))));
const { best, cost} = generateAllPlansAndFindBest({
initial: { graph: initialDependencyGraph, tree: initialTree },
toAdd: otherTrees,
addFct: (p, t) => {
const updatedDependencyGraph = p.graph.clone();
this.updatedDependencyGraph(updatedDependencyGraph, t);
const updatedTree = p.tree.merge(t);
return { graph: updatedDependencyGraph, tree: updatedTree };
},
costFct: (p) => this.cost(p.graph),
onPlan: (p, cost, prevCost) => {
debug.log(() => {
if (!prevCost) {
return `Computed plan with cost ${cost}: ${p.tree}`;
} else if (cost > prevCost) {
return `Ignoring plan with cost ${cost} (a better plan with cost ${prevCost} exists): ${p.tree}`
} else {
return `Found better with cost ${cost} (previous had cost ${prevCost}: ${p.tree}`;
}
});
},
});
this.bestPlan = [best.graph, best.tree, cost];
}
private cost(dependencyGraph: FetchDependencyGraph): number {
const { main, deferred } = dependencyGraph.process(this.costFunction, this.rootKind);
return deferred.length === 0
? main
: this.costFunction.reduceDefer(main, dependencyGraph.deferTracking.primarySelection!.get(), deferred);
}
private updatedDependencyGraph(dependencyGraph: FetchDependencyGraph, tree: OpPathTree<RV>): FetchDependencyGraph {
return isRootPathTree(tree)
? computeRootFetchGroups(dependencyGraph, tree, this.rootKind, this.typeConditionedFetching)
: computeNonRootFetchGroups(dependencyGraph, tree, this.rootKind, this.typeConditionedFetching);
}
private resolveConditionPlan(edge: Edge, context: PathContext, excludedDestinations: ExcludedDestinations, excludedConditions: ExcludedConditions, extraConditions?: SelectionSet): ConditionResolution {
const bestPlan = new QueryPlanningTraversal(
{
...this.parameters,
root: edge.head,
},
extraConditions ?? edge.conditions!,
0,
false,
'query',
this.costFunction,
context,
this.typeConditionedFetching,
null,
excludedDestinations,
addConditionExclusion(excludedConditions, edge.conditions),
).findBestPlan();
// Note that we want to return 'null', not 'undefined', because it's the latter that means "I cannot resolve that
// condition" within `advanceSimultaneousPathsWithOperation`.
return bestPlan ? { satisfied: true, cost: bestPlan[2], pathTree: bestPlan[1] } : unsatisfiedConditionsResolution;
}
}
/**
* Used in `FetchDependencyGraph` to store, for a given group, information about one of its parent.
* Namely, this structure stores:
* 1. the actual parent group, and
* 2. the path of the group for which this is a "parent relation" into said parent (`group`). This information
* is maintained for the case where we want/need to merge groups into each other. One can roughly think of
* this as similar to a `mergeAt`, but that is relative to the start of `group`. It can be `undefined`, which
* either mean we don't know that path or that this simply doesn't make sense (there is case where a child `mergeAt` can
* be shorter than its parent's, in which case the `path`, which is essentially `child-mergeAt - parent-mergeAt`, does
* not make sense (or rather, it's negative, which we cannot represent)). Tl;dr, `undefined` for the `path` means that
* should make no assumption and bail on any merging that uses said path.
*/
type ParentRelation = {
group: FetchGroup,
path?: OperationPath,
}
const conditionsMemoizer = (selectionSet: SelectionSet) => ({ conditions: conditionsOfSelectionSet(selectionSet) });
class GroupInputs {
readonly usedContexts = new Map<string, Type>;
private readonly perType = new Map<string, MutableSelectionSet>();
onUpdateCallback?: () => void | undefined = undefined;
constructor(
readonly supergraphSchema: Schema,
) {
}
add(selection: Selection | SelectionSet) {
assert(selection.parentType.schema() === this.supergraphSchema, 'Inputs selections must be based on the supergraph schema');
const typeName = selection.parentType.name;
let typeSelection = this.perType.get(typeName);
if (!typeSelection) {
typeSelection = MutableSelectionSet.empty(selection.parentType);
this.perType.set(typeName, typeSelection);
}
typeSelection.updates().add(selection);
this.onUpdateCallback?.();
}
addContext(context: string, type: Type) {
this.usedContexts.set(context, type);
}
addAll(other: GroupInputs) {
for (const otherSelection of other.perType.values()) {
this.add(otherSelection.get());
}
for (const [context, type] of other.usedContexts) {
this.addContext(context, type);
}
}
selectionSets(): SelectionSet[] {
return mapValues(this.perType).map((s) => s.get());
}
toSelectionSetNode(variablesDefinitions: VariableDefinitions, handledConditions: Conditions): SelectionSetNode {
const selectionSets = mapValues(this.perType).map((s) => removeConditionsFromSelectionSet(s.get(), handledConditions));
// Making sure we're not generating something invalid.
selectionSets.forEach((s) => s.validate(variablesDefinitions));
const selections = selectionSets.flatMap((sSet) => sSet.selections().map((s) => s.toSelectionNode()));
return {
kind: Kind.SELECTION_SET,
selections,
}
}
contains(other: GroupInputs): boolean {
for (const [type, otherSelection] of other.perType) {
const thisSelection = this.perType.get(type);
if (!thisSelection || !thisSelection.get().contains(otherSelection.get())) {
return false;
}
}
if (this.usedContexts.size < other.usedContexts.size) {
return false;
}
for (const [c,_] of other.usedContexts) {
if (!this.usedContexts.has(c)) {
return false;
}
}
return true;
}
equals(other: GroupInputs): boolean {
if (this.perType.size !== other.perType.size) {
return false;
}
for (const [type, thisSelection] of this.perType) {
const otherSelection = other.perType.get(type);
if (!otherSelection || !thisSelection.get().equals(otherSelection.get())) {
return false;
}
}
if (this.usedContexts.size !== other.usedContexts.size) {
return false;
}
for (const [c,_] of other.usedContexts) {
if (!this.usedContexts.has(c)) {
return false;
}
}
return true;
}
clone(): GroupInputs {
const cloned = new GroupInputs(this.supergraphSchema);
for (const [type, selection] of this.perType.entries()) {
cloned.perType.set(type, selection.clone());
}
for (const [c,v] of this.usedContexts) {
cloned.usedContexts.set(c,v);
}
return cloned;
}
toString(): string {
const inputs = mapValues(this.perType);
if (inputs.length === 0) {
return '{}';
}
if (inputs.length === 1) {
return inputs[0].toString();
}
return '[' + inputs.join(',') + ']';
}
}
/**
* Represents a subgraph fetch of a query plan, and is a vertex of a `FetchDependencyGraph` (and as such provides links to
* its parent and children in that dependency graph).
*/
class FetchGroup {
private readonly _parents: ParentRelation[] = [];
private readonly _children: FetchGroup[] = [];
private _id: string | undefined;
// Set in some code-path to indicate that the selection of the group not be optimized away even if it "looks" useless.
mustPreserveSelection: boolean = false;
private constructor(
readonly dependencyGraph: FetchDependencyGraph,
public index: number,
readonly subgraphName: string,
readonly rootKind: SchemaRootKind,
readonly parentType: CompositeType,
readonly isEntityFetch: boolean,
private _selection: MutableSelectionSet<{ conditions: Conditions}>,
private _inputs?: GroupInputs,
private _contextInputs?: FetchDataKeyRenamer[],
readonly mergeAt?: ResponsePath,
readonly deferRef?: string,
// Some of the processing on the dependency graph checks for groups to the same subgraph and same mergeAt, and we use this
// key for that. Having it here saves us from re-computing it more than once.
readonly subgraphAndMergeAtKey?: string,
private cachedCost?: number,
private generateQueryFragments: boolean = false,
// Cache used to save unecessary recomputation of the `isUseless` method.
private isKnownUseful: boolean = false,
private readonly inputRewrites: FetchDataRewrite[] = [],
) {
if (this._inputs) {
this._inputs.onUpdateCallback = () => {
// We're trying to avoid the full recomputation of `isUseless` when we're already
// shown that the group is known useful (if it is shown useless, the group is removed,
// so we're not caching that result but it's ok). And `isUseless` basically checks if
// `inputs.contains(selection)`, so if a group is shown useful, it means that there
// is some selections not in the inputs, but as long as we add to selections (and we
// never remove from selections; `MutableSelectionSet` don't have removing methods),
// then this won't change. Only changing inputs may require some recomputation.
this.isKnownUseful = false;
}
}
}
static create({
dependencyGraph,
index,
subgraphName,
rootKind,
parentType,
hasInputs,
mergeAt,
deferRef,
generateQueryFragments,
}: {
dependencyGraph: FetchDependencyGraph,
index: number,
subgraphName: string,
rootKind: SchemaRootKind,
parentType: CompositeType,
hasInputs: boolean,
mergeAt?: ResponsePath,
deferRef?: string,
generateQueryFragments: boolean,
}): FetchGroup {
// Sanity checks that the selection parent type belongs to the schema of the subgraph we're querying.
assert(parentType.schema() === dependencyGraph.subgraphSchemas.get(subgraphName), `Expected parent type ${parentType} to belong to ${subgraphName}`);
return new FetchGroup(
dependencyGraph,
index,
subgraphName,
rootKind,
parentType,
hasInputs,
MutableSelectionSet.emptyWithMemoized(parentType, conditionsMemoizer),
hasInputs ? new GroupInputs(dependencyGraph.supergraphSchema) : undefined,
undefined,
mergeAt,
deferRef,
hasInputs ? `${toValidGraphQLName(subgraphName)}-${mergeAt?.join('::') ?? ''}` : undefined,
undefined,
generateQueryFragments,
);
}
// Clones everything on the group itself, but not it's `_parents` or `_children` links.
cloneShallow(newDependencyGraph: FetchDependencyGraph): FetchGroup {
return new FetchGroup(
newDependencyGraph,
this.index,
this.subgraphName,
this.rootKind,
this.parentType,
this.isEntityFetch,
this._selection.clone(),
this._inputs?.clone(),
this._contextInputs ? this._contextInputs.map((c) => ({ ...c})) : undefined,
this.mergeAt,
this.deferRef,
this.subgraphAndMergeAtKey,
this.cachedCost,
this.generateQueryFragments,
this.isKnownUseful,
[...this.inputRewrites],
);
}
cost(): number {
if (!this.cachedCost) {
this.cachedCost = selectionCost(this.selection);
}
return this.cachedCost;
}
set id(id: string | undefined) {
assert(!this._id, () => `The id for fetch group ${this} is already set`);
this._id = id;
}
get id(): string | undefined {
return this._id;
}
get isTopLevel(): boolean {
return !this.mergeAt;
}
get selection(): SelectionSet {
return this._selection.get();
}
private selectionUpdates(): SelectionSetUpdates {
this.cachedCost = undefined;
return this._selection.updates();
}
get inputs(): GroupInputs | undefined {
return this._inputs;
}
addParents(parents: readonly ParentRelation[]) {
for (const parent of parents) {
this.addParent(parent);
}
}
/**
* Adds another group as a parent of this one (meaning that this fetch should happen after the provided one).
*/
addParent(parent: ParentRelation) {
if (this.isChildOf(parent.group)) {
// Due to how we handle the building of multiple query plans when there is choices, it's possible that we re-traverse
// key edges we've already traversed before, and that can means hitting this condition. While we could try to filter
// "already-children" before calling this method, it's easier to just make this a no-op.
return;
}
assert(!parent.group.isParentOf(this), () => `Group ${parent.group} is a parent of ${this}, but the child relationship is broken`);
assert(!parent.group.isChildOf(this), () => `Group ${parent.group} is a child of ${this}: adding it as parent would create a cycle`);
this.dependencyGraph.onModification();
this._parents.push(parent);
parent.group._children.push(this);
}
removeChild(child: FetchGroup) {
if (!this.isParentOf(child)) {
return;
}
this.dependencyGraph.onModification();
findAndRemoveInPlace((g) => g === child, this._children);
findAndRemoveInPlace((p) => p.group === this, child._parents);
}
isParentOf(maybeChild: FetchGroup): boolean {
return this._children.includes(maybeChild);
}
isChildOf(maybeParent: FetchGroup): boolean {
return !!this.parentRelation(maybeParent);
}
isDescendantOf(maybeAncestor: FetchGroup): boolean {
const children = Array.from(maybeAncestor.children());
while (children.length > 0) {
const child = children.pop()!;
if (child === this) {
return true;
}
child.children().forEach((c) => children.push(c));
}
return false;
}
/**
* Returns whether this group is both a child of `maybeParent` but also if we can show that the
* dependency between the group is "artificial" in the sense that this group inputs do not truly
* depend on anything `maybeParent` fetches.
*/
isChildOfWithArtificialDependency(maybeParent: FetchGroup): boolean {
const relation = this.parentRelation(maybeParent);
// To be a child with an artificial dependency, it needs to be a child first, and the "path in parent" should be know.
if (!relation || !relation.path) {
return false;
}
// Then, if we have no inputs, we know we don't depend on anything from the parent no matter what.
if (!this.inputs) {
return true;
}
// If we do have inputs, t