UNPKG

@azure/cosmos

Version:
520 lines (519 loc) • 21.7 kB
var __defProp = Object.defineProperty; var __getOwnPropDesc = Object.getOwnPropertyDescriptor; var __getOwnPropNames = Object.getOwnPropertyNames; var __hasOwnProp = Object.prototype.hasOwnProperty; var __export = (target, all) => { for (var name in all) __defProp(target, name, { get: all[name], enumerable: true }); }; var __copyProps = (to, from, except, desc) => { if (from && typeof from === "object" || typeof from === "function") { for (let key of __getOwnPropNames(from)) if (!__hasOwnProp.call(to, key) && key !== except) __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable }); } return to; }; var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod); var hybridQueryExecutionContext_exports = {}; __export(hybridQueryExecutionContext_exports, { HybridQueryExecutionContext: () => HybridQueryExecutionContext, HybridQueryExecutionContextBaseStates: () => HybridQueryExecutionContextBaseStates }); module.exports = __toCommonJS(hybridQueryExecutionContext_exports); var import_logger = require("@azure/logger"); var import_hybridSearchQueryResult = require("../request/hybridSearchQueryResult.js"); var import_GlobalStatisticsAggregator = require("./Aggregators/GlobalStatisticsAggregator.js"); var import_headerUtils = require("./headerUtils.js"); var import_parallelQueryExecutionContext = require("./parallelQueryExecutionContext.js"); var import_pipelinedQueryExecutionContext = require("./pipelinedQueryExecutionContext.js"); var import_QueryValidationHelper = require("./QueryValidationHelper.js"); var HybridQueryExecutionContextBaseStates = /* @__PURE__ */ ((HybridQueryExecutionContextBaseStates2) => { HybridQueryExecutionContextBaseStates2["uninitialized"] = "uninitialized"; HybridQueryExecutionContextBaseStates2["initialized"] = "initialized"; HybridQueryExecutionContextBaseStates2["draining"] = "draining"; HybridQueryExecutionContextBaseStates2["done"] = "done"; return HybridQueryExecutionContextBaseStates2; })(HybridQueryExecutionContextBaseStates || {}); class HybridQueryExecutionContext { constructor(clientContext, collectionLink, query, options, partitionedQueryExecutionInfo, correlatedActivityId, allPartitionsRanges) { this.clientContext = clientContext; this.collectionLink = collectionLink; this.query = query; this.options = options; this.partitionedQueryExecutionInfo = partitionedQueryExecutionInfo; this.correlatedActivityId = correlatedActivityId; this.allPartitionsRanges = allPartitionsRanges; (0, import_QueryValidationHelper.rejectContinuationTokenForUnsupportedQueries)(this.options.continuationToken, [ import_QueryValidationHelper.QueryTypes.hybridSearch(true) ]); this.state = "uninitialized" /* uninitialized */; this.pageSize = this.options.maxItemCount; if (this.pageSize === void 0) { this.pageSize = this.DEFAULT_PAGE_SIZE; } if (partitionedQueryExecutionInfo.hybridSearchQueryInfo.requiresGlobalStatistics) { const globalStaticsQueryOptions = { maxItemCount: this.pageSize }; this.globalStatisticsAggregator = new import_GlobalStatisticsAggregator.GlobalStatisticsAggregator(); const globalStatisticsQuery = typeof this.query === "string" ? this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.globalStatisticsQuery : { query: this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.globalStatisticsQuery, parameters: this.query?.parameters ?? [] }; const globalStatisticsQueryExecutionInfo = { partitionedQueryExecutionInfoVersion: 1, queryInfo: { distinctType: "None", hasSelectValue: false, groupByAliasToAggregateType: {}, rewrittenQuery: globalStatisticsQuery, hasNonStreamingOrderBy: false }, queryRanges: this.allPartitionsRanges }; this.globalStatisticsExecutionContext = new import_parallelQueryExecutionContext.ParallelQueryExecutionContext( this.clientContext, this.collectionLink, globalStatisticsQuery, globalStaticsQueryOptions, globalStatisticsQueryExecutionInfo, this.correlatedActivityId ); } else { this.createComponentExecutionContexts(); this.state = "initialized" /* initialized */; } } globalStatisticsExecutionContext; componentsExecutionContext = []; pageSize; state; globalStatisticsAggregator; emitRawOrderByPayload = true; buffer = []; DEFAULT_PAGE_SIZE = 10; TOTAL_WORD_COUNT_PLACEHOLDER = "documentdb-formattablehybridsearchquery-totalwordcount"; HIT_COUNTS_ARRAY_PLACEHOLDER = "documentdb-formattablehybridsearchquery-hitcountsarray"; TOTAL_DOCUMENT_COUNT_PLACEHOLDER = "documentdb-formattablehybridsearchquery-totaldocumentcount"; RRF_CONSTANT = 60; // Constant for RRF score calculation logger = (0, import_logger.createClientLogger)("HybridQueryExecutionContext"); hybridSearchResult = []; uniqueItems = /* @__PURE__ */ new Map(); isSingleComponent = false; async nextItem(diagnosticNode) { const nextItemRespHeaders = (0, import_headerUtils.getInitialHeader)(); while ((this.state === "uninitialized" /* uninitialized */ || this.state === "initialized" /* initialized */) && this.buffer.length === 0) { await this.fetchMoreInternal(diagnosticNode, nextItemRespHeaders); } if (this.state === "draining" /* draining */ && this.buffer.length > 0) { return this.drainOne(nextItemRespHeaders); } else { return this.done(nextItemRespHeaders); } } hasMoreResults() { switch (this.state) { case "uninitialized" /* uninitialized */: return true; case "initialized" /* initialized */: return true; case "draining" /* draining */: return this.buffer.length > 0; case "done" /* done */: return false; default: return false; } } async fetchMore(diagnosticNode) { const fetchMoreRespHeaders = (0, import_headerUtils.getInitialHeader)(); return this.fetchMoreInternal(diagnosticNode, fetchMoreRespHeaders); } async fetchMoreInternal(diagnosticNode, headers) { switch (this.state) { case "uninitialized" /* uninitialized */: await this.initialize(diagnosticNode, headers); return { result: [], headers }; case "initialized" /* initialized */: await this.executeComponentQueries(diagnosticNode, headers); return { result: [], headers }; case "draining" /* draining */: return this.drain(headers); case "done" /* done */: return this.done(headers); default: throw new Error(`Invalid state: ${this.state}`); } } async initialize(diagnosticNode, fetchMoreRespHeaders) { try { while (this.globalStatisticsExecutionContext.hasMoreResults()) { const result = await this.globalStatisticsExecutionContext.fetchMore(diagnosticNode); (0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers); if (result && result.result) { const resultData = result.result.buffer; for (const item of resultData) { const globalStatistics = item; if (globalStatistics) { this.globalStatisticsAggregator.aggregate(globalStatistics); } } } } } catch (error) { this.state = "done" /* done */; throw error; } this.createComponentExecutionContexts(); this.state = "initialized" /* initialized */; } async executeComponentQueries(diagnosticNode, fetchMoreRespHeaders) { if (this.isSingleComponent) { await this.drainSingleComponent(diagnosticNode, fetchMoreRespHeaders); return; } try { if (this.options.enableQueryControl) { if (this.componentsExecutionContext.length > 0) { const componentExecutionContext = this.componentsExecutionContext.pop(); if (componentExecutionContext.hasMoreResults()) { const result = await componentExecutionContext.fetchMore(diagnosticNode); (0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers); const resultData = result.result; if (result && resultData) { resultData.forEach((item) => { const hybridItem = import_hybridSearchQueryResult.HybridSearchQueryResult.create(item); if (!this.uniqueItems.has(hybridItem.rid)) { this.uniqueItems.set(hybridItem.rid, hybridItem); } }); } if (componentExecutionContext.hasMoreResults()) { this.componentsExecutionContext.push(componentExecutionContext); } } } if (this.componentsExecutionContext.length === 0) { this.processUniqueItems(); } } else { for (const componentExecutionContext of this.componentsExecutionContext) { while (componentExecutionContext.hasMoreResults()) { const result = await componentExecutionContext.fetchMore(diagnosticNode); (0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers); const resultData = result.result; if (result && resultData) { resultData.forEach((item) => { const hybridItem = import_hybridSearchQueryResult.HybridSearchQueryResult.create(item); if (!this.uniqueItems.has(hybridItem.rid)) { this.uniqueItems.set(hybridItem.rid, hybridItem); } }); } } } this.processUniqueItems(); } } catch (error) { this.state = "done" /* done */; throw error; } } processUniqueItems() { this.uniqueItems.forEach((item) => this.hybridSearchResult.push(item)); if (this.hybridSearchResult.length === 0 || this.hybridSearchResult.length === 1) { this.hybridSearchResult.forEach((item) => this.buffer.push(item.data)); this.state = "draining" /* draining */; return; } const componentWeights = this.extractComponentWeights(); const sortedHybridSearchResult = this.sortHybridSearchResultByRRFScore( this.hybridSearchResult, componentWeights ); sortedHybridSearchResult.forEach((item) => this.buffer.push(item.data)); this.applySkipAndTakeToBuffer(); this.state = "draining" /* draining */; } applySkipAndTakeToBuffer() { const { skip, take } = this.partitionedQueryExecutionInfo.hybridSearchQueryInfo; if (skip) { this.buffer = skip >= this.buffer.length ? [] : this.buffer.slice(skip); } if (take) { this.buffer = take <= 0 ? [] : this.buffer.slice(0, take); } } async drain(fetchMoreRespHeaders) { try { if (this.buffer.length === 0) { this.state = "done" /* done */; return this.done(fetchMoreRespHeaders); } const result = this.buffer.slice(0, this.pageSize); this.buffer = this.buffer.slice(this.pageSize); if (this.buffer.length === 0) { this.state = "done" /* done */; } return { result, headers: fetchMoreRespHeaders }; } catch (error) { this.state = "done" /* done */; throw error; } } async drainOne(nextItemRespHeaders) { try { if (this.buffer.length === 0) { this.state = "done" /* done */; return this.done(nextItemRespHeaders); } const result = this.buffer.shift(); if (this.buffer.length === 0) { this.state = "done" /* done */; } return { result, headers: nextItemRespHeaders }; } catch (error) { this.state = "done" /* done */; throw error; } } done(fetchMoreRespHeaders) { return { result: void 0, headers: fetchMoreRespHeaders }; } sortHybridSearchResultByRRFScore(hybridSearchResult, componentWeights) { if (hybridSearchResult.length === 0) { return []; } const ranksArray = hybridSearchResult.map((item) => ({ rid: item.rid, ranks: new Array(item.componentScores.length).fill(0) })); for (let i = 0; i < hybridSearchResult[0].componentScores.length; i++) { hybridSearchResult.sort( (a, b) => componentWeights[i].comparator(a.componentScores[i], b.componentScores[i]) ); let rank = 1; for (let j = 0; j < hybridSearchResult.length; j++) { if (j > 0 && hybridSearchResult[j].componentScores[i] !== hybridSearchResult[j - 1].componentScores[i]) { ++rank; } const rankIndex = ranksArray.findIndex( (rankItem) => rankItem.rid === hybridSearchResult[j].rid ); ranksArray[rankIndex].ranks[i] = rank; } } const rrfScores = ranksArray.map((item) => ({ rid: item.rid, rrfScore: this.computeRRFScore(item.ranks, this.RRF_CONSTANT, componentWeights) })); rrfScores.sort((a, b) => b.rrfScore - a.rrfScore); const sortedHybridSearchResult = rrfScores.map( (scoreItem) => hybridSearchResult.find((item) => item.rid === scoreItem.rid) ); return sortedHybridSearchResult; } async drainSingleComponent(diagNode, fetchMoreRespHeaders) { if (this.componentsExecutionContext && this.componentsExecutionContext.length !== 1) { this.logger.error("drainSingleComponent called on multiple components"); return; } try { if (this.options.enableQueryControl) { const componentExecutionContext = this.componentsExecutionContext[0]; if (componentExecutionContext.hasMoreResults()) { const result = await componentExecutionContext.fetchMore(diagNode); (0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers); const resultData = result.result; if (result && resultData) { resultData.forEach((item) => { this.hybridSearchResult.push(import_hybridSearchQueryResult.HybridSearchQueryResult.create(item)); }); } } if (!componentExecutionContext.hasMoreResults()) { this.state = "draining" /* draining */; this.hybridSearchResult.forEach((item) => this.buffer.push(item.data)); this.applySkipAndTakeToBuffer(); this.state = "draining" /* draining */; } return; } else { const componentExecutionContext = this.componentsExecutionContext[0]; const hybridSearchResult = []; while (componentExecutionContext.hasMoreResults()) { const result = await componentExecutionContext.fetchMore(diagNode); (0, import_headerUtils.mergeHeaders)(fetchMoreRespHeaders, result.headers); const resultData = result.result; if (result && resultData) { resultData.forEach((item) => { hybridSearchResult.push(import_hybridSearchQueryResult.HybridSearchQueryResult.create(item)); }); } } hybridSearchResult.forEach((item) => this.buffer.push(item.data)); this.applySkipAndTakeToBuffer(); this.state = "draining" /* draining */; } } catch (error) { this.state = "done" /* done */; throw error; } } createComponentExecutionContexts() { let queryInfos = this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.componentQueryInfos; if (this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.requiresGlobalStatistics) { queryInfos = this.processComponentQueries( this.partitionedQueryExecutionInfo.hybridSearchQueryInfo.componentQueryInfos, this.globalStatisticsAggregator.getResult() ); } for (const componentQueryInfo of queryInfos) { const componentPartitionExecutionInfo = { partitionedQueryExecutionInfoVersion: 1, queryInfo: componentQueryInfo, queryRanges: this.partitionedQueryExecutionInfo.queryRanges }; const rewrittenSqlQuerySpec = typeof this.query === "string" ? componentQueryInfo.rewrittenQuery : { query: componentQueryInfo.rewrittenQuery, parameters: this.query?.parameters ?? [] }; const executionContext = new import_pipelinedQueryExecutionContext.PipelinedQueryExecutionContext( this.clientContext, this.collectionLink, rewrittenSqlQuerySpec, this.options, componentPartitionExecutionInfo, this.correlatedActivityId, this.emitRawOrderByPayload, /* supportsContinuationTokens */ false ); this.componentsExecutionContext.push(executionContext); } this.isSingleComponent = this.componentsExecutionContext.length === 1; } processComponentQueries(componentQueryInfos, globalStats) { return componentQueryInfos.map((queryInfo) => { let rewrittenOrderByExpressions = queryInfo.orderByExpressions; if (queryInfo.orderBy && queryInfo.orderBy.length > 0) { if (!queryInfo.hasNonStreamingOrderBy) { throw new Error("The component query must have a non-streaming order by clause."); } rewrittenOrderByExpressions = queryInfo.orderByExpressions.map( (expr) => this.replacePlaceholdersWorkaroud(expr, globalStats, componentQueryInfos.length) ); } return { ...queryInfo, rewrittenQuery: this.replacePlaceholdersWorkaroud( queryInfo.rewrittenQuery, globalStats, componentQueryInfos.length ), orderByExpressions: rewrittenOrderByExpressions }; }); } // This method is commented currently, but we will switch back to using this // once the gateway has been redeployed with the fix for placeholder indexes // private replacePlaceholders(query: string, globalStats: GlobalStatistics): string { // // Replace total document count // query = query.replace( // new RegExp(`{${this.TOTAL_DOCUMENT_COUNT_PLACEHOLDER}}`, "g"), // globalStats.documentCount.toString(), // ); // // Replace total word counts and hit counts from fullTextStatistics // globalStats.fullTextStatistics.forEach((stats, index) => { // // Replace total word counts // query = query.replace( // new RegExp(`{${this.TOTAL_WORD_COUNT_PLACEHOLDER}-${index}}`, "g"), // stats.totalWordCount.toString(), // ); // // Replace hit counts // query = query.replace( // new RegExp(`{${this.HIT_COUNTS_ARRAY_PLACEHOLDER}-${index}}`, "g"), // `[${stats.hitCounts.join(",")}]`, // ); // }); // return query; // } replacePlaceholdersWorkaroud(query, globalStats, componentCount) { if (!globalStats || !globalStats.documentCount || !Array.isArray(globalStats.fullTextStatistics)) { throw new Error("GlobalStats validation failed"); } query = query.replace( new RegExp(`{${this.TOTAL_DOCUMENT_COUNT_PLACEHOLDER}}`, "g"), globalStats.documentCount.toString() ); let statisticsIndex = 0; for (let i = 0; i < componentCount; i++) { const wordCountPlaceholder = `{${this.TOTAL_WORD_COUNT_PLACEHOLDER}-${i}}`; const hitCountPlaceholder = `{${this.HIT_COUNTS_ARRAY_PLACEHOLDER}-${i}}`; if (!query.includes(wordCountPlaceholder)) { continue; } const stats = globalStats.fullTextStatistics[statisticsIndex]; query = query.replace(new RegExp(wordCountPlaceholder, "g"), stats.totalWordCount.toString()); query = query.replace(new RegExp(hitCountPlaceholder, "g"), `[${stats.hitCounts.join(",")}]`); statisticsIndex++; } return query; } computeRRFScore = (ranks, k, componentWeights) => { if (ranks.length !== componentWeights.length) { throw new Error("Ranks and component weights length mismatch"); } let rrfScore = 0; for (let i = 0; i < ranks.length; i++) { const rank = ranks[i]; const weight = componentWeights[i].weight; rrfScore += weight * (1 / (k + rank)); } return rrfScore; }; extractComponentWeights() { const hybridSearchQueryInfo = this.partitionedQueryExecutionInfo.hybridSearchQueryInfo; const useDefaultComponentWeight = !hybridSearchQueryInfo.componentWeights || hybridSearchQueryInfo.componentWeights.length === 0; const result = []; for (let index = 0; index < hybridSearchQueryInfo.componentQueryInfos.length; ++index) { const queryInfo = hybridSearchQueryInfo.componentQueryInfos[index]; if (queryInfo.orderBy && queryInfo.orderBy.length > 0) { if (!queryInfo.hasNonStreamingOrderBy) { throw new Error("The component query should have a non streaming order by"); } if (!queryInfo.orderByExpressions || queryInfo.orderByExpressions.length !== 1) { throw new Error("The component query should have exactly one order by expression"); } } const componentWeight = useDefaultComponentWeight ? 1 : hybridSearchQueryInfo.componentWeights[index]; const hasOrderBy = queryInfo.orderBy && queryInfo.orderBy.length > 0; const sortOrder = hasOrderBy && queryInfo.orderBy[0].includes("Ascending") ? 1 : -1; result.push({ weight: componentWeight, comparator: (x, y) => sortOrder * (x - y) }); } return result; } } // Annotate the CommonJS export names for ESM import in node: 0 && (module.exports = { HybridQueryExecutionContext, HybridQueryExecutionContextBaseStates });