@azure/cosmos
Version:
Microsoft Azure Cosmos DB Service Node.js SDK for NOSQL API
520 lines (519 loc) • 21.7 kB
JavaScript
var __defProp = Object.defineProperty;
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
var __getOwnPropNames = Object.getOwnPropertyNames;
var __hasOwnProp = Object.prototype.hasOwnProperty;
var __export = (target, all) => {
for (var name in all)
__defProp(target, name, { get: all[name], enumerable: true });
};
var __copyProps = (to, from, except, desc) => {
if (from && typeof from === "object" || typeof from === "function") {
for (let key of __getOwnPropNames(from))
if (!__hasOwnProp.call(to, key) && key !== except)
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
}
return to;
};
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
var hybridQueryExecutionContext_exports = {};
__export(hybridQueryExecutionContext_exports, {
HybridQueryExecutionContext: () => HybridQueryExecutionContext,
HybridQueryExecutionContextBaseStates: () => HybridQueryExecutionContextBaseStates
});
module.exports = __toCommonJS(hybridQueryExecutionContext_exports);
var import_logger = require("@azure/logger");
var import_hybridSearchQueryResult = require("../request/hybridSearchQueryResult.js");
var import_GlobalStatisticsAggregator = require("./Aggregators/GlobalStatisticsAggregator.js");
var import_headerUtils = require("./headerUtils.js");
var import_parallelQueryExecutionContext = require("./parallelQueryExecutionContext.js");
var import_pipelinedQueryExecutionContext = require("./pipelinedQueryExecutionContext.js");
var import_QueryValidationHelper = require("./QueryValidationHelper.js");
var HybridQueryExecutionContextBaseStates = /* @__PURE__ */ ((HybridQueryExecutionContextBaseStates2) => {
HybridQueryExecutionContextBaseStates2["uninitialized"] = "uninitialized";
HybridQueryExecutionContextBaseStates2["initialized"] = "initialized";
HybridQueryExecutionContextBaseStates2["draining"] = "draining";
HybridQueryExecutionContextBaseStates2["done"] = "done";
return HybridQueryExecutionContextBaseStates2;
})(HybridQueryExecutionContextBaseStates || {});
class HybridQueryExecutionContext {
constructor(clientContext, collectionLink, query, options, partitionedQueryExecutionInfo, correlatedActivityId, allPartitionsRanges) {
this.clientContext = clientContext;
this.collectionLink = collectionLink;
this.query = query;
this.options = options;
this.partitionedQueryExecutionInfo = partitionedQueryExecutionInfo;
this.correlatedActivityId = correlatedActivityId;
this.allPartitionsRanges = allPartitionsRanges;
(0, import_QueryValidationHelper.rejectContinuationTokenForUnsupportedQueries)(this.options.continuationToken, [
import_QueryValidationHelper.QueryTypes.hybridSearch(true)
]);
this.state = "uninitialized" /* uninitialized */;
this.pageSize = this.options.maxItemCount;
if (this.pageSize === void 0) {
this.pageSize = this.DEFAULT_PAGE_SIZE;
}
if (partitionedQueryExecutionInfo.hybridSearchQueryInfo.requiresGlobalStatistics) {
const globalStaticsQueryOptions = { maxItemCount: this.pageSize };
this.globalStatisticsAggregator = new import_GlobalStatisticsAggregator.GlobalStatisticsAggregator();
const globalStatisticsQuery = typeof this.query === "string" ? this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.globalStatisticsQuery : {
query: this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.globalStatisticsQuery,
parameters: this.query?.parameters ?? []
};
const globalStatisticsQueryExecutionInfo = {
partitionedQueryExecutionInfoVersion: 1,
queryInfo: {
distinctType: "None",
hasSelectValue: false,
groupByAliasToAggregateType: {},
rewrittenQuery: globalStatisticsQuery,
hasNonStreamingOrderBy: false
},
queryRanges: this.allPartitionsRanges
};
this.globalStatisticsExecutionContext = new import_parallelQueryExecutionContext.ParallelQueryExecutionContext(
this.clientContext,
this.collectionLink,
globalStatisticsQuery,
globalStaticsQueryOptions,
globalStatisticsQueryExecutionInfo,
this.correlatedActivityId
);
} else {
this.createComponentExecutionContexts();
this.state = "initialized" /* initialized */;
}
}
globalStatisticsExecutionContext;
componentsExecutionContext = [];
pageSize;
state;
globalStatisticsAggregator;
emitRawOrderByPayload = true;
buffer = [];
DEFAULT_PAGE_SIZE = 10;
TOTAL_WORD_COUNT_PLACEHOLDER = "documentdb-formattablehybridsearchquery-totalwordcount";
HIT_COUNTS_ARRAY_PLACEHOLDER = "documentdb-formattablehybridsearchquery-hitcountsarray";
TOTAL_DOCUMENT_COUNT_PLACEHOLDER = "documentdb-formattablehybridsearchquery-totaldocumentcount";
RRF_CONSTANT = 60;
// Constant for RRF score calculation
logger = (0, import_logger.createClientLogger)("HybridQueryExecutionContext");
hybridSearchResult = [];
uniqueItems = /* @__PURE__ */ new Map();
isSingleComponent = false;
async nextItem(diagnosticNode) {
const nextItemRespHeaders = (0, import_headerUtils.getInitialHeader)();
while ((this.state === "uninitialized" /* uninitialized */ || this.state === "initialized" /* initialized */) && this.buffer.length === 0) {
await this.fetchMoreInternal(diagnosticNode, nextItemRespHeaders);
}
if (this.state === "draining" /* draining */ && this.buffer.length > 0) {
return this.drainOne(nextItemRespHeaders);
} else {
return this.done(nextItemRespHeaders);
}
}
hasMoreResults() {
switch (this.state) {
case "uninitialized" /* uninitialized */:
return true;
case "initialized" /* initialized */:
return true;
case "draining" /* draining */:
return this.buffer.length > 0;
case "done" /* done */:
return false;
default:
return false;
}
}
async fetchMore(diagnosticNode) {
const fetchMoreRespHeaders = (0, import_headerUtils.getInitialHeader)();
return this.fetchMoreInternal(diagnosticNode, fetchMoreRespHeaders);
}
async fetchMoreInternal(diagnosticNode, headers) {
switch (this.state) {
case "uninitialized" /* uninitialized */:
await this.initialize(diagnosticNode, headers);
return {
result: [],
headers
};
case "initialized" /* initialized */:
await this.executeComponentQueries(diagnosticNode, headers);
return {
result: [],
headers
};
case "draining" /* draining */:
return this.drain(headers);
case "done" /* done */:
return this.done(headers);
default:
throw new Error(`Invalid state: ${this.state}`);
}
}
async initialize(diagnosticNode, fetchMoreRespHeaders) {
try {
while (this.globalStatisticsExecutionContext.hasMoreResults()) {
const result = await this.globalStatisticsExecutionContext.fetchMore(diagnosticNode);
(0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers);
if (result && result.result) {
const resultData = result.result.buffer;
for (const item of resultData) {
const globalStatistics = item;
if (globalStatistics) {
this.globalStatisticsAggregator.aggregate(globalStatistics);
}
}
}
}
} catch (error) {
this.state = "done" /* done */;
throw error;
}
this.createComponentExecutionContexts();
this.state = "initialized" /* initialized */;
}
async executeComponentQueries(diagnosticNode, fetchMoreRespHeaders) {
if (this.isSingleComponent) {
await this.drainSingleComponent(diagnosticNode, fetchMoreRespHeaders);
return;
}
try {
if (this.options.enableQueryControl) {
if (this.componentsExecutionContext.length > 0) {
const componentExecutionContext = this.componentsExecutionContext.pop();
if (componentExecutionContext.hasMoreResults()) {
const result = await componentExecutionContext.fetchMore(diagnosticNode);
(0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers);
const resultData = result.result;
if (result && resultData) {
resultData.forEach((item) => {
const hybridItem = import_hybridSearchQueryResult.HybridSearchQueryResult.create(item);
if (!this.uniqueItems.has(hybridItem.rid)) {
this.uniqueItems.set(hybridItem.rid, hybridItem);
}
});
}
if (componentExecutionContext.hasMoreResults()) {
this.componentsExecutionContext.push(componentExecutionContext);
}
}
}
if (this.componentsExecutionContext.length === 0) {
this.processUniqueItems();
}
} else {
for (const componentExecutionContext of this.componentsExecutionContext) {
while (componentExecutionContext.hasMoreResults()) {
const result = await componentExecutionContext.fetchMore(diagnosticNode);
(0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers);
const resultData = result.result;
if (result && resultData) {
resultData.forEach((item) => {
const hybridItem = import_hybridSearchQueryResult.HybridSearchQueryResult.create(item);
if (!this.uniqueItems.has(hybridItem.rid)) {
this.uniqueItems.set(hybridItem.rid, hybridItem);
}
});
}
}
}
this.processUniqueItems();
}
} catch (error) {
this.state = "done" /* done */;
throw error;
}
}
processUniqueItems() {
this.uniqueItems.forEach((item) => this.hybridSearchResult.push(item));
if (this.hybridSearchResult.length === 0 || this.hybridSearchResult.length === 1) {
this.hybridSearchResult.forEach((item) => this.buffer.push(item.data));
this.state = "draining" /* draining */;
return;
}
const componentWeights = this.extractComponentWeights();
const sortedHybridSearchResult = this.sortHybridSearchResultByRRFScore(
this.hybridSearchResult,
componentWeights
);
sortedHybridSearchResult.forEach((item) => this.buffer.push(item.data));
this.applySkipAndTakeToBuffer();
this.state = "draining" /* draining */;
}
applySkipAndTakeToBuffer() {
const { skip, take } = this.partitionedQueryExecutionInfo.hybridSearchQueryInfo;
if (skip) {
this.buffer = skip >= this.buffer.length ? [] : this.buffer.slice(skip);
}
if (take) {
this.buffer = take <= 0 ? [] : this.buffer.slice(0, take);
}
}
async drain(fetchMoreRespHeaders) {
try {
if (this.buffer.length === 0) {
this.state = "done" /* done */;
return this.done(fetchMoreRespHeaders);
}
const result = this.buffer.slice(0, this.pageSize);
this.buffer = this.buffer.slice(this.pageSize);
if (this.buffer.length === 0) {
this.state = "done" /* done */;
}
return {
result,
headers: fetchMoreRespHeaders
};
} catch (error) {
this.state = "done" /* done */;
throw error;
}
}
async drainOne(nextItemRespHeaders) {
try {
if (this.buffer.length === 0) {
this.state = "done" /* done */;
return this.done(nextItemRespHeaders);
}
const result = this.buffer.shift();
if (this.buffer.length === 0) {
this.state = "done" /* done */;
}
return {
result,
headers: nextItemRespHeaders
};
} catch (error) {
this.state = "done" /* done */;
throw error;
}
}
done(fetchMoreRespHeaders) {
return {
result: void 0,
headers: fetchMoreRespHeaders
};
}
sortHybridSearchResultByRRFScore(hybridSearchResult, componentWeights) {
if (hybridSearchResult.length === 0) {
return [];
}
const ranksArray = hybridSearchResult.map((item) => ({
rid: item.rid,
ranks: new Array(item.componentScores.length).fill(0)
}));
for (let i = 0; i < hybridSearchResult[0].componentScores.length; i++) {
hybridSearchResult.sort(
(a, b) => componentWeights[i].comparator(a.componentScores[i], b.componentScores[i])
);
let rank = 1;
for (let j = 0; j < hybridSearchResult.length; j++) {
if (j > 0 && hybridSearchResult[j].componentScores[i] !== hybridSearchResult[j - 1].componentScores[i]) {
++rank;
}
const rankIndex = ranksArray.findIndex(
(rankItem) => rankItem.rid === hybridSearchResult[j].rid
);
ranksArray[rankIndex].ranks[i] = rank;
}
}
const rrfScores = ranksArray.map((item) => ({
rid: item.rid,
rrfScore: this.computeRRFScore(item.ranks, this.RRF_CONSTANT, componentWeights)
}));
rrfScores.sort((a, b) => b.rrfScore - a.rrfScore);
const sortedHybridSearchResult = rrfScores.map(
(scoreItem) => hybridSearchResult.find((item) => item.rid === scoreItem.rid)
);
return sortedHybridSearchResult;
}
async drainSingleComponent(diagNode, fetchMoreRespHeaders) {
if (this.componentsExecutionContext && this.componentsExecutionContext.length !== 1) {
this.logger.error("drainSingleComponent called on multiple components");
return;
}
try {
if (this.options.enableQueryControl) {
const componentExecutionContext = this.componentsExecutionContext[0];
if (componentExecutionContext.hasMoreResults()) {
const result = await componentExecutionContext.fetchMore(diagNode);
(0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers);
const resultData = result.result;
if (result && resultData) {
resultData.forEach((item) => {
this.hybridSearchResult.push(import_hybridSearchQueryResult.HybridSearchQueryResult.create(item));
});
}
}
if (!componentExecutionContext.hasMoreResults()) {
this.state = "draining" /* draining */;
this.hybridSearchResult.forEach((item) => this.buffer.push(item.data));
this.applySkipAndTakeToBuffer();
this.state = "draining" /* draining */;
}
return;
} else {
const componentExecutionContext = this.componentsExecutionContext[0];
const hybridSearchResult = [];
while (componentExecutionContext.hasMoreResults()) {
const result = await componentExecutionContext.fetchMore(diagNode);
(0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers);
const resultData = result.result;
if (result && resultData) {
resultData.forEach((item) => {
hybridSearchResult.push(import_hybridSearchQueryResult.HybridSearchQueryResult.create(item));
});
}
}
hybridSearchResult.forEach((item) => this.buffer.push(item.data));
this.applySkipAndTakeToBuffer();
this.state = "draining" /* draining */;
}
} catch (error) {
this.state = "done" /* done */;
throw error;
}
}
createComponentExecutionContexts() {
let queryInfos = this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.componentQueryInfos;
if (this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.requiresGlobalStatistics) {
queryInfos = this.processComponentQueries(
this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.componentQueryInfos,
this.globalStatisticsAggregator.getResult()
);
}
for (const componentQueryInfo of queryInfos) {
const componentPartitionExecutionInfo = {
partitionedQueryExecutionInfoVersion: 1,
queryInfo: componentQueryInfo,
queryRanges: this.partitionedQueryExecutionInfo.queryRanges
};
const rewrittenSqlQuerySpec = typeof this.query === "string" ? componentQueryInfo.rewrittenQuery : {
query: componentQueryInfo.rewrittenQuery,
parameters: this.query?.parameters ?? []
};
const executionContext = new import_pipelinedQueryExecutionContext.PipelinedQueryExecutionContext(
this.clientContext,
this.collectionLink,
rewrittenSqlQuerySpec,
this.options,
componentPartitionExecutionInfo,
this.correlatedActivityId,
this.emitRawOrderByPayload,
/* supportsContinuationTokens */
false
);
this.componentsExecutionContext.push(executionContext);
}
this.isSingleComponent = this.componentsExecutionContext.length === 1;
}
processComponentQueries(componentQueryInfos, globalStats) {
return componentQueryInfos.map((queryInfo) => {
let rewrittenOrderByExpressions = queryInfo.orderByExpressions;
if (queryInfo.orderBy && queryInfo.orderBy.length > 0) {
if (!queryInfo.hasNonStreamingOrderBy) {
throw new Error("The component query must have a non-streaming order by clause.");
}
rewrittenOrderByExpressions = queryInfo.orderByExpressions.map(
(expr) => this.replacePlaceholdersWorkaroud(expr, globalStats, componentQueryInfos.length)
);
}
return {
...queryInfo,
rewrittenQuery: this.replacePlaceholdersWorkaroud(
queryInfo.rewrittenQuery,
globalStats,
componentQueryInfos.length
),
orderByExpressions: rewrittenOrderByExpressions
};
});
}
// This method is commented currently, but we will switch back to using this
// once the gateway has been redeployed with the fix for placeholder indexes
// private replacePlaceholders(query: string, globalStats: GlobalStatistics): string {
// // Replace total document count
// query = query.replace(
// new RegExp(`{${this.TOTAL_DOCUMENT_COUNT_PLACEHOLDER}}`, "g"),
// globalStats.documentCount.toString(),
// );
// // Replace total word counts and hit counts from fullTextStatistics
// globalStats.fullTextStatistics.forEach((stats, index) => {
// // Replace total word counts
// query = query.replace(
// new RegExp(`{${this.TOTAL_WORD_COUNT_PLACEHOLDER}-${index}}`, "g"),
// stats.totalWordCount.toString(),
// );
// // Replace hit counts
// query = query.replace(
// new RegExp(`{${this.HIT_COUNTS_ARRAY_PLACEHOLDER}-${index}}`, "g"),
// `[${stats.hitCounts.join(",")}]`,
// );
// });
// return query;
// }
replacePlaceholdersWorkaroud(query, globalStats, componentCount) {
if (!globalStats || !globalStats.documentCount || !Array.isArray(globalStats.fullTextStatistics)) {
throw new Error("GlobalStats validation failed");
}
query = query.replace(
new RegExp(`{${this.TOTAL_DOCUMENT_COUNT_PLACEHOLDER}}`, "g"),
globalStats.documentCount.toString()
);
let statisticsIndex = 0;
for (let i = 0; i < componentCount; i++) {
const wordCountPlaceholder = `{${this.TOTAL_WORD_COUNT_PLACEHOLDER}-${i}}`;
const hitCountPlaceholder = `{${this.HIT_COUNTS_ARRAY_PLACEHOLDER}-${i}}`;
if (!query.includes(wordCountPlaceholder)) {
continue;
}
const stats = globalStats.fullTextStatistics[statisticsIndex];
query = query.replace(new RegExp(wordCountPlaceholder, "g"), stats.totalWordCount.toString());
query = query.replace(new RegExp(hitCountPlaceholder, "g"), `[${stats.hitCounts.join(",")}]`);
statisticsIndex++;
}
return query;
}
computeRRFScore = (ranks, k, componentWeights) => {
if (ranks.length !== componentWeights.length) {
throw new Error("Ranks and component weights length mismatch");
}
let rrfScore = 0;
for (let i = 0; i < ranks.length; i++) {
const rank = ranks[i];
const weight = componentWeights[i].weight;
rrfScore += weight * (1 / (k + rank));
}
return rrfScore;
};
extractComponentWeights() {
const hybridSearchQueryInfo = this.partitionedQueryExecutionInfo.hybridSearchQueryInfo;
const useDefaultComponentWeight = !hybridSearchQueryInfo.componentWeights || hybridSearchQueryInfo.componentWeights.length === 0;
const result = [];
for (let index = 0; index < hybridSearchQueryInfo.componentQueryInfos.length; ++index) {
const queryInfo = hybridSearchQueryInfo.componentQueryInfos[index];
if (queryInfo.orderBy && queryInfo.orderBy.length > 0) {
if (!queryInfo.hasNonStreamingOrderBy) {
throw new Error("The component query should have a non streaming order by");
}
if (!queryInfo.orderByExpressions || queryInfo.orderByExpressions.length !== 1) {
throw new Error("The component query should have exactly one order by expression");
}
}
const componentWeight = useDefaultComponentWeight ? 1 : hybridSearchQueryInfo.componentWeights[index];
const hasOrderBy = queryInfo.orderBy && queryInfo.orderBy.length > 0;
const sortOrder = hasOrderBy && queryInfo.orderBy[0].includes("Ascending") ? 1 : -1;
result.push({
weight: componentWeight,
comparator: (x, y) => sortOrder * (x - y)
});
}
return result;
}
}
// Annotate the CommonJS export names for ESM import in node:
0 && (module.exports = {
HybridQueryExecutionContext,
HybridQueryExecutionContextBaseStates
});