@finos/legend-application-marketplace
Version:
Legend Marketplace application core
1,105 lines (1,104 loc) • 52.9 kB
JavaScript
/**
* Copyright (c) 2026-present, Goldman Sachs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { action, computed, flow, makeObservable, observable, runInAction, } from 'mobx';
import { ActionState, assertErrorThrown, guaranteeNonNullable, } from '@finos/legend-shared';
import { LegendAIMessageRole, LegendAIQuestionIntent, LegendAIResolvedEntities, TDSServiceSourceType, classifyQuestionIntentFast, findLegendAIPlugin, processQuestionViaOrchestrator, handleMetadataQuestion, buildMetadataOverview, attachMetadataOverview, generateAndJudgeSql, executeSqlAndReport, analyzeOrchestratorResults, addThinkingStep, completeThinkingSteps, finishWithThinkingError, classifyError, updateLastAssistant, buildConversationHistory, createMessagePair, elapsedSeconds, LEGEND_AI_ORCHESTRATOR_FALLBACK_ACTION_ID, cleanLlmSqlResponse, isValidSqlCorrection, } from '@finos/legend-lego/legend-ai';
import { QueryExplicitExecutionContextInfo, extractElementNameFromPath, } from '@finos/legend-graph';
import { generateGAVCoordinates } from '@finos/legend-storage';
import { V1_deserializeDataSpace, } from '@finos/legend-extension-dsl-data-space/graph';
import { convertAutosuggestResultToSearchResult } from '../../utils/SearchUtils.js';
import { DataProductSearchResult, DataProductSearchResponse, DataProductDetailsType, DataProductSearchResultDetailsType, FieldSearchType, GroupedFieldSearchResponse, LakehouseDataProductSearchResultDetails, LakehouseSDLCDataProductSearchResultOrigin, LegacyDataProductSearchResultDetails, EntitySearchResponse, } from '@finos/legend-server-marketplace';
export var MarketplaceAIChatStage;
(function (MarketplaceAIChatStage) {
MarketplaceAIChatStage["IDLE"] = "idle";
MarketplaceAIChatStage["SEARCHING"] = "searching";
MarketplaceAIChatStage["PRODUCT_SELECTION"] = "product-selection";
MarketplaceAIChatStage["QUERYING"] = "querying";
MarketplaceAIChatStage["RESULTS"] = "results";
})(MarketplaceAIChatStage || (MarketplaceAIChatStage = {}));
const FIELD_COVERAGE_BOOST = 0.6;
const MAX_PRODUCT_SUGGESTIONS = 3;
const MERGED_CANDIDATE_LIMIT = 6;
const PRODUCT_SEARCH_PAGE_SIZE = 6;
const FIELD_SEARCH_PAGE_SIZE = 5;
const MAX_RELEVANT_SERVICES = 5;
const DESCRIPTION_PREVIEW_LENGTH = 200;
const DATASET_SEARCH_PAGE_SIZE = 20;
const DEFAULT_SUGGESTED_QUERIES = [
'What BVAL bond pricing data is available?',
'Show me credit risk data products',
'Find FX rates and currency data',
'What equity analytics data do we have?',
];
export function unwrapProductDetails(product) {
const details = product.dataProductDetails;
if (details instanceof LegacyDataProductSearchResultDetails) {
return {
groupId: details.groupId,
artifactId: details.artifactId,
versionId: details.versionId,
path: details.path,
};
}
if (details instanceof LakehouseDataProductSearchResultDetails) {
const origin = details.origin;
if (origin instanceof LakehouseSDLCDataProductSearchResultOrigin) {
return {
groupId: origin.groupId ?? '',
artifactId: origin.artifactId ?? '',
versionId: origin.versionId ?? '',
path: origin.path ?? '',
};
}
}
return { groupId: '', artifactId: '', versionId: '', path: '' };
}
function toCoordinatesString(coords) {
return generateGAVCoordinates(coords.group_id, coords.artifact_id, coords.version);
}
export class LegendMarketplaceAIChatStore {
baseStore;
stage = MarketplaceAIChatStage.IDLE;
questionText = '';
messages = [];
isSending = false;
suggestedProducts = [];
scoredCandidates = [];
scopeProducts = [];
selectedProduct = undefined;
selectedProductCoordinates = undefined;
selectedProductMetadata = undefined;
pureExecutionContext = undefined;
pendingFallbackQuestion = undefined;
resolvedProductServices = [];
lastResolvedEntities = undefined;
lastEntityCandidates = [];
selectedDataProductId = undefined;
searchState = ActionState.create();
constructor(baseStore) {
makeObservable(this, {
stage: observable,
questionText: observable,
messages: observable,
isSending: observable,
suggestedProducts: observable,
scoredCandidates: observable,
scopeProducts: observable,
selectedProduct: observable,
selectedProductCoordinates: observable,
selectedProductMetadata: observable,
pureExecutionContext: observable,
pendingFallbackQuestion: observable,
resolvedProductServices: observable,
lastResolvedEntities: observable,
lastEntityCandidates: observable,
selectedDataProductId: observable,
setQuestionText: action,
setStage: action,
clearChat: action,
selectDataProduct: action,
selectAutosuggestProduct: action,
deselectProduct: action,
addScopeProduct: action,
removeScopeProduct: action,
submitQuery: flow,
askFollowUp: flow,
runOrchestratorFallback: flow,
config: computed,
plugin: computed,
isEnabled: computed,
lastUserMessageText: computed,
welcomeSuggestedQueries: computed,
});
this.baseStore = baseStore;
}
get config() {
return this.baseStore.applicationStore.config.legendAIConfig;
}
get plugin() {
return findLegendAIPlugin(this.baseStore.pluginManager.getApplicationPlugins());
}
get isEnabled() {
return this.config.enabled && this.plugin !== undefined;
}
get lastUserMessageText() {
return (this.messages.findLast((m) => m.role === LegendAIMessageRole.USER)
?.text ?? '');
}
get welcomeSuggestedQueries() {
return (this.baseStore.applicationStore.config.options
.defaultAISuggestedQueries ?? DEFAULT_SUGGESTED_QUERIES);
}
setQuestionText(text) {
this.questionText = text;
}
setStage(stage) {
this.stage = stage;
}
clearChat() {
this.messages = [];
this.suggestedProducts = [];
this.scoredCandidates = [];
this.selectedProduct = undefined;
const firstScope = this.scopeProducts[0];
this.selectedProductCoordinates = firstScope?.coordinates;
this.selectedProductMetadata = firstScope
? {
name: firstScope.name,
coordinates: toCoordinatesString(firstScope.coordinates),
serviceSummaries: [],
}
: undefined;
this.pureExecutionContext = undefined;
this.pendingFallbackQuestion = undefined;
this.resolvedProductServices = [];
this.lastResolvedEntities = undefined;
this.lastEntityCandidates = [];
this.selectedDataProductId = undefined;
this.stage = MarketplaceAIChatStage.IDLE;
this.questionText = '';
this.isSending = false;
}
createMessageSetter() {
return (updater) => {
runInAction(() => {
if (typeof updater === 'function') {
this.messages = updater(this.messages);
}
else {
this.messages = updater;
}
});
};
}
buildContextPromise(question, metadata, setMessages) {
if (!this.plugin) {
return Promise.resolve();
}
return this.plugin
.buildDataContextSummary(question, metadata, this.config)
.then((contextText) => {
if (contextText) {
updateLastAssistant(setMessages, () => ({
dataContext: contextText,
}));
}
})
.catch(() => {
/* Non-fatal */
});
}
buildConversationHistory() {
return buildConversationHistory(this.messages);
}
extractMetadata(result, coordinates) {
const metadata = {
name: result.dataProductTitle ?? 'Unknown',
coordinates: toCoordinatesString(coordinates),
serviceSummaries: [],
accessPointGroups: [],
};
if (result.dataProductDescription !== null) {
metadata.description = result.dataProductDescription;
}
const tags1 = result.tags1;
const tags2 = result.tags2;
if (tags1.length > 0 || tags2.length > 0) {
metadata.tags = [...tags1, ...tags2].map((t) => ({
profile: 'tag',
value: t,
}));
}
return metadata;
}
buildTitleFromPath(path, artifactId) {
const parts = path.split('::');
const filtered = parts.filter((p) => p.toLowerCase() !== 'dataspace' &&
p.toLowerCase() !== 'model' &&
!p.toLowerCase().endsWith('dataspace'));
if (filtered.length > 0) {
return filtered
.map((p) => p
.replaceAll(/(?<lower>[a-z])(?<upper>[A-Z])/g, '$<lower> $<upper>')
.replace(/^./, (c) => c.toUpperCase()))
.join(' ');
}
return artifactId
.split('-')
.map((w) => w.charAt(0).toUpperCase() + w.slice(1))
.join(' ');
}
async multiSignalSearch(question, setMessages) {
const env = this.baseStore.envState.lakehouseEnvironment;
addThinkingStep(setMessages, 'Searching products and fields in parallel...');
const [productRaw, fieldRaw] = await Promise.all([
this.baseStore.marketplaceServerClient.dataProductSearch(question, env, FieldSearchType.HYBRID, [], PRODUCT_SEARCH_PAGE_SIZE, 1, false),
this.baseStore.marketplaceServerClient
.fieldSearch(env, {
query: question,
searchType: FieldSearchType.HYBRID,
pageSize: FIELD_SEARCH_PAGE_SIZE,
pageNumber: 1,
})
.catch(() => null),
]);
const productResponse = DataProductSearchResponse.serialization.fromJson(productRaw);
const productResults = productResponse.results.filter((r) => r.dataProductDetails instanceof
LakehouseDataProductSearchResultDetails ||
r.dataProductDetails instanceof LegacyDataProductSearchResultDetails);
let fieldResults = [];
if (fieldRaw) {
try {
const fieldResponse = GroupedFieldSearchResponse.serialization.fromJson(fieldRaw);
fieldResults = fieldResponse.results;
}
catch {
/* Non-fatal: field search is best-effort */
}
}
if (fieldResults.length > 0) {
addThinkingStep(setMessages, `Found ${fieldResults.length} matching field${fieldResults.length > 1 ? 's' : ''} across products`);
}
return { productResults, fieldResults };
}
deriveProductsFromFieldResults(fieldResults, existingProducts) {
const existingKeys = new Set(existingProducts.map((p) => {
const { groupId, artifactId } = unwrapProductDetails(p);
return `${groupId}:${artifactId}`;
}));
const productFieldCounts = new Map();
for (const dp of fieldResults.flatMap((entry) => entry.dataProducts)) {
if (!dp.groupId || !dp.artifactId || !dp.versionId) {
continue;
}
const key = `${dp.groupId}:${dp.artifactId}`;
if (existingKeys.has(key)) {
continue;
}
const existing = productFieldCounts.get(key);
if (existing) {
existing.fieldCount += 1;
}
else {
productFieldCounts.set(key, {
path: dp.path,
productType: dp.productType,
groupId: dp.groupId,
artifactId: dp.artifactId,
versionId: dp.versionId,
...(dp.dataProductId === undefined
? {}
: { dataProductId: dp.dataProductId }),
fieldCount: 1,
});
}
}
const sorted = [...productFieldCounts.values()].sort((a, b) => b.fieldCount - a.fieldCount);
return sorted
.slice(0, MAX_PRODUCT_SUGGESTIONS)
.map((entry) => this.buildDerivedProduct(entry));
}
buildDerivedProduct(entry) {
const product = new DataProductSearchResult();
product.dataProductTitle = this.buildTitleFromPath(entry.path, entry.artifactId);
product.dataProductDescription = null;
product.tags1 = [];
product.tags2 = [];
product.tag_score = 0;
product.similarity = 0;
if (entry.productType === DataProductSearchResultDetailsType.LEGACY) {
const details = new LegacyDataProductSearchResultDetails();
details.groupId = entry.groupId;
details.artifactId = entry.artifactId;
details.versionId = entry.versionId;
details.path = entry.path;
product.dataProductDetails = details;
}
else {
const origin = new LakehouseSDLCDataProductSearchResultOrigin();
origin.groupId = entry.groupId;
origin.artifactId = entry.artifactId;
origin.versionId = entry.versionId;
origin.path = entry.path;
const details = new LakehouseDataProductSearchResultDetails();
details.dataProductId = entry.dataProductId ?? '';
details.deploymentId = 0;
details.producerEnvironmentName = '';
details.producerEnvironmentType = undefined;
details.origin = origin;
product.dataProductDetails = details;
}
return product;
}
computeScoredCandidates(productResults, fieldResults) {
const allFieldNames = fieldResults.map((f) => f.fieldName);
const maxSimilarity = productResults.length > 0
? Math.max(...productResults.map((p) => p.similarity))
: 1;
const scoreProduct = (product) => {
const { groupId, artifactId, path: productPath, } = unwrapProductDetails(product);
const matchedFields = [];
const missingFields = [];
for (const fieldEntry of fieldResults) {
const inProduct = fieldEntry.dataProducts.some((dp) => dp.path === productPath ||
(dp.groupId &&
dp.artifactId &&
groupId === dp.groupId &&
artifactId === dp.artifactId) ||
(productPath.length > 0 && dp.path.includes(productPath)) ||
(dp.path.length > 0 && productPath.includes(dp.path)));
if (inProduct) {
matchedFields.push(fieldEntry.fieldName);
}
else {
missingFields.push(fieldEntry.fieldName);
}
}
const productSimilarity = product.similarity;
const normalizedSimilarity = maxSimilarity > 0 ? productSimilarity / maxSimilarity : 0;
const fieldCoverage = allFieldNames.length > 0
? matchedFields.length / allFieldNames.length
: 0;
const fieldIntersection = allFieldNames.length > 0 && missingFields.length === 0 ? 1 : 0;
const compositeScore = allFieldNames.length > 0
? normalizedSimilarity + FIELD_COVERAGE_BOOST * fieldCoverage
: normalizedSimilarity;
return {
product,
productSimilarity,
fieldCoverage,
fieldIntersection,
matchedFields,
missingFields,
compositeScore,
};
};
// Score product search results
const productCandidates = productResults.map(scoreProduct);
productCandidates.sort((a, b) => b.compositeScore - a.compositeScore);
// Score field-derived products (discovered from field search, not in product search)
const fieldDerived = this.deriveProductsFromFieldResults(fieldResults, productResults);
const fieldCandidates = fieldDerived.map(scoreProduct);
fieldCandidates.sort((a, b) => b.fieldCoverage - a.fieldCoverage);
// Merge: interleave top product results with top field-derived results
// so both signals are represented in the final list
return this.mergeInterleaved(productCandidates, fieldCandidates, MERGED_CANDIDATE_LIMIT);
}
mergeInterleaved(productCandidates, fieldCandidates, limit) {
const merged = [];
const seenKeys = new Set();
let pIdx = 0;
let fIdx = 0;
const tryAdd = (candidate) => {
const { groupId, artifactId } = unwrapProductDetails(candidate.product);
const key = `${groupId}:${artifactId}`;
if (!seenKeys.has(key)) {
seenKeys.add(key);
merged.push(candidate);
}
};
while (merged.length < limit &&
(pIdx < productCandidates.length || fIdx < fieldCandidates.length)) {
// Add 2 from product search, then 1 from field-derived, repeat
const fromProduct = merged.length % 3 !== 2;
if (pIdx < productCandidates.length &&
(fromProduct || fIdx >= fieldCandidates.length)) {
tryAdd(guaranteeNonNullable(productCandidates[pIdx]));
pIdx++;
}
else if (fIdx < fieldCandidates.length) {
tryAdd(guaranteeNonNullable(fieldCandidates[fIdx]));
fIdx++;
}
else {
break;
}
}
return merged;
}
async llmRerankProducts(question, candidates, fieldResults, setMessages) {
const plugin = this.plugin;
if (!plugin || candidates.length <= MAX_PRODUCT_SUGGESTIONS) {
return candidates.slice(0, MAX_PRODUCT_SUGGESTIONS);
}
addThinkingStep(setMessages, 'Using AI to rank best matching products...');
const candidateInputs = candidates.map((c) => ({
title: c.product.dataProductTitle ?? 'Unknown',
description: c.product.dataProductDescription
? c.product.dataProductDescription.slice(0, DESCRIPTION_PREVIEW_LENGTH)
: '',
matchedFields: c.matchedFields,
}));
const allFieldNames = fieldResults.map((f) => f.fieldName);
const indices = await plugin.rerankProducts(question, candidateInputs, allFieldNames, MAX_PRODUCT_SUGGESTIONS, this.config);
if (indices && indices.length > 0) {
return this.buildRankedList(indices, candidates, MAX_PRODUCT_SUGGESTIONS);
}
return candidates.slice(0, MAX_PRODUCT_SUGGESTIONS);
}
buildRankedList(indices, candidates, limit) {
const ranked = [];
for (const idx of indices) {
if (ranked.length >= limit) {
break;
}
if (idx >= 0 && idx < candidates.length) {
ranked.push(guaranteeNonNullable(candidates[idx]));
}
}
for (const c of candidates) {
if (ranked.length >= limit) {
break;
}
if (!ranked.includes(c)) {
ranked.push(c);
}
}
return ranked;
}
*submitQuery(text) {
const trimmed = text.trim();
if (!trimmed || this.isSending || !this.plugin) {
return;
}
this.isSending = true;
this.questionText = '';
this.messages = [...this.messages, ...createMessagePair(trimmed)];
const setMessages = this.createMessageSetter();
try {
if (this.selectedProductCoordinates) {
this.stage = MarketplaceAIChatStage.QUERYING;
const relevantDatasets = (yield this.enrichWithEntitySearch(trimmed, setMessages));
yield this.dispatchWithSql2(trimmed, relevantDatasets, setMessages);
this.stage = MarketplaceAIChatStage.RESULTS;
return;
}
this.stage = MarketplaceAIChatStage.SEARCHING;
const { productResults, fieldResults } = (yield this.multiSignalSearch(trimmed, setMessages));
const candidates = this.computeScoredCandidates(productResults, fieldResults);
if (candidates.length === 0) {
completeThinkingSteps(setMessages);
updateLastAssistant(setMessages, () => ({
textAnswer: 'I could not find any data products matching your query. Please try rephrasing or use more specific terms.',
isProcessing: false,
}));
this.stage = MarketplaceAIChatStage.IDLE;
}
else {
const topCandidates = (yield this.llmRerankProducts(trimmed, candidates, fieldResults, setMessages));
const top = guaranteeNonNullable(topCandidates[0]);
addThinkingStep(setMessages, `Top candidate: ${top.product.dataProductTitle ?? 'Unknown'} (${(top.compositeScore * 100).toFixed(0)}% composite)`);
completeThinkingSteps(setMessages);
this.suggestedProducts = topCandidates.map((c) => c.product);
this.scoredCandidates = topCandidates;
const hasFieldInfo = fieldResults.length > 0 &&
topCandidates.some((c) => c.matchedFields.length > 0);
let message = `I found ${candidates.length} data product${candidates.length > 1 ? 's' : ''} that may contain the data you need.`;
if (hasFieldInfo) {
message += ' Field availability is shown for each product.';
}
message += ' Please select one to continue:';
updateLastAssistant(setMessages, () => ({
textAnswer: message,
isProcessing: false,
}));
this.stage = MarketplaceAIChatStage.PRODUCT_SELECTION;
}
}
catch (error) {
assertErrorThrown(error);
finishWithThinkingError(setMessages, error.message, Date.now(), classifyError(error));
this.stage = MarketplaceAIChatStage.IDLE;
}
finally {
this.isSending = false;
}
}
selectDataProduct(result) {
const { groupId, artifactId, versionId, path } = unwrapProductDetails(result);
if (!groupId || !artifactId || !versionId || !path) {
return;
}
const coordinates = {
data_product: path,
group_id: groupId,
artifact_id: artifactId,
version: versionId,
};
this.selectedProduct = result;
this.selectedProductCoordinates = coordinates;
this.selectedProductMetadata = this.extractMetadata(result, coordinates);
this.suggestedProducts = [];
const details = result.dataProductDetails;
if (details instanceof LakehouseDataProductSearchResultDetails) {
this.selectedDataProductId = details.dataProductId;
}
else {
this.selectedDataProductId = undefined;
}
}
selectAutosuggestProduct(result) {
const searchResult = convertAutosuggestResultToSearchResult(result);
this.selectDataProduct(searchResult);
}
deselectProduct() {
this.selectedProduct = undefined;
this.selectedProductCoordinates = undefined;
this.selectedProductMetadata = undefined;
this.pureExecutionContext = undefined;
this.resolvedProductServices = [];
this.lastResolvedEntities = undefined;
this.lastEntityCandidates = [];
this.selectedDataProductId = undefined;
this.stage = MarketplaceAIChatStage.PRODUCT_SELECTION;
}
addScopeProduct(result) {
const details = result.dataProductDetails;
let groupId;
let artifactId;
let versionId;
let path;
if (details._type === DataProductDetailsType.LAKEHOUSE &&
details.origin !== undefined) {
groupId = details.origin.groupId;
artifactId = details.origin.artifactId;
versionId = details.origin.versionId;
path = details.origin.path;
}
else {
groupId = details.groupId;
artifactId = details.artifactId;
versionId = details.versionId;
path = details.path;
}
if (!groupId || !artifactId || !versionId || !path) {
return;
}
const key = generateGAVCoordinates(groupId, artifactId, versionId);
if (this.scopeProducts.some((p) => toCoordinatesString(p.coordinates) === key)) {
return;
}
if (this.scopeProducts.length >= 3) {
return;
}
const coords = {
data_product: path,
group_id: groupId,
artifact_id: artifactId,
version: versionId,
};
this.scopeProducts = [
...this.scopeProducts,
{ name: result.dataProductName, coordinates: coords },
];
if (this.scopeProducts.length === 1) {
this.selectedProductCoordinates = coords;
this.selectedProductMetadata = {
name: result.dataProductName,
description: result.dataProductDescription,
coordinates: key,
serviceSummaries: [],
};
this.selectedDataProductId = details.dataProductId;
}
}
removeScopeProduct(index) {
this.scopeProducts = this.scopeProducts.filter((_, i) => i !== index);
if (this.selectedProduct === undefined) {
const firstScope = this.scopeProducts[0];
this.selectedProductCoordinates = firstScope?.coordinates;
this.selectedProductMetadata = firstScope
? {
name: firstScope.name,
coordinates: toCoordinatesString(firstScope.coordinates),
serviceSummaries: [],
}
: undefined;
this.pureExecutionContext = undefined;
this.resolvedProductServices = [];
this.lastResolvedEntities = undefined;
this.lastEntityCandidates = [];
this.selectedDataProductId = undefined;
}
}
async resolveExecutionContext(setMessages) {
const product = this.selectedProduct;
const coordinates = this.selectedProductCoordinates;
if (!coordinates) {
return;
}
addThinkingStep(setMessages, 'Resolving execution context...');
try {
let dataSpace;
if (product) {
const details = product.dataProductDetails;
if (details instanceof LegacyDataProductSearchResultDetails) {
const entity = await this.baseStore.depotServerClient.getVersionEntity(details.groupId, details.artifactId, details.versionId, details.path);
dataSpace = V1_deserializeDataSpace(entity.content);
}
else if (details instanceof LakehouseDataProductSearchResultDetails &&
details.origin instanceof
LakehouseSDLCDataProductSearchResultOrigin &&
details.origin.groupId &&
details.origin.artifactId &&
details.origin.versionId &&
details.origin.path) {
const entity = await this.baseStore.depotServerClient.getVersionEntity(details.origin.groupId, details.origin.artifactId, details.origin.versionId, details.origin.path);
dataSpace = V1_deserializeDataSpace(entity.content);
}
}
else {
const entity = await this.baseStore.depotServerClient.getVersionEntity(coordinates.group_id, coordinates.artifact_id, coordinates.version, coordinates.data_product);
dataSpace = V1_deserializeDataSpace(entity.content);
}
if (dataSpace && dataSpace.executionContexts.length > 0) {
const defaultCtxName = dataSpace.defaultExecutionContext;
const execCtx = dataSpace.executionContexts.find((c) => c.name === defaultCtxName) ??
guaranteeNonNullable(dataSpace.executionContexts[0]);
const ctx = new QueryExplicitExecutionContextInfo();
ctx.mapping = execCtx.mapping.path;
ctx.runtime = execCtx.defaultRuntime.path;
runInAction(() => {
this.pureExecutionContext = ctx;
});
}
}
catch (error) {
assertErrorThrown(error);
addThinkingStep(setMessages, `Warning: Could not resolve execution context — ${error.message}`);
}
}
*askFollowUp(text) {
const trimmed = text.trim();
if (!trimmed ||
this.isSending ||
!this.plugin ||
!this.selectedProductCoordinates) {
return;
}
this.isSending = true;
this.questionText = '';
this.messages = [...this.messages, ...createMessagePair(trimmed)];
const setMessages = this.createMessageSetter();
try {
this.stage = MarketplaceAIChatStage.QUERYING;
const relevantDatasets = (yield this.enrichWithEntitySearch(trimmed, setMessages));
yield this.dispatchWithSql2(trimmed, relevantDatasets, setMessages);
this.stage = MarketplaceAIChatStage.RESULTS;
}
catch (error) {
assertErrorThrown(error);
finishWithThinkingError(setMessages, error.message, Date.now(), classifyError(error));
}
finally {
this.isSending = false;
}
}
async enrichWithEntitySearch(question, setMessages) {
const coordinates = this.selectedProductCoordinates;
if (!coordinates) {
return [];
}
addThinkingStep(setMessages, 'Searching for relevant datasets and fields...');
try {
const env = this.baseStore.envState.lakehouseEnvironment;
const entitySearchOptions = {
groupId: coordinates.group_id,
artifactId: coordinates.artifact_id,
versionId: coordinates.version,
path: coordinates.data_product,
...(this.selectedDataProductId === undefined
? {}
: { dataProductId: this.selectedDataProductId }),
searchType: FieldSearchType.HYBRID,
pageSize: DATASET_SEARCH_PAGE_SIZE,
};
const [primaryRaw, diversityRaw] = await Promise.all([
this.baseStore.marketplaceServerClient.entitySearch(env, question, entitySearchOptions),
this.baseStore.marketplaceServerClient
.entitySearch(env, extractElementNameFromPath(coordinates.data_product), entitySearchOptions)
.catch(() => undefined),
]);
const primaryResponse = EntitySearchResponse.serialization.fromJson(primaryRaw);
const results = primaryResponse.results;
this.mergeDiversityResults(results, diversityRaw);
if (results.length > 0) {
const topDataset = guaranteeNonNullable(results[0]);
addThinkingStep(setMessages, `Found ${results.length} relevant dataset${results.length > 1 ? 's' : ''} — top: ${topDataset.datasetName}`);
if (this.selectedProductMetadata) {
const datasetSummaries = results
.slice(0, MAX_RELEVANT_SERVICES)
.map((r) => ({
title: r.datasetName,
...(r.datasetDescription === undefined
? {}
: { description: r.datasetDescription }),
}));
const existingTitles = new Set(this.selectedProductMetadata.serviceSummaries.map((s) => s.title));
const newSummaries = datasetSummaries.filter((s) => !existingTitles.has(s.title));
const currentMetadata = this.selectedProductMetadata;
runInAction(() => {
this.selectedProductMetadata = {
...currentMetadata,
serviceSummaries: [
...currentMetadata.serviceSummaries,
...newSummaries,
],
};
});
}
this.buildServicesFromEntitySearch(results, setMessages);
await this.resolveEntityCandidates(question, results, coordinates);
}
return results.map((r) => r.datasetName);
}
catch (error) {
assertErrorThrown(error);
addThinkingStep(setMessages, `Warning: Dataset search unavailable — ${error.message}`);
return [];
}
}
async resolveEntityCandidates(question, results, coordinates) {
const entitiesWithPaths = results.filter((r) => r.datasetDetails?.modelPath);
if (entitiesWithPaths.length === 0 || !this.plugin) {
return;
}
const candidates = entitiesWithPaths.map((r) => ({
datasetName: r.datasetName,
...(r.datasetDescription === undefined
? {}
: { description: r.datasetDescription }),
modelPath: guaranteeNonNullable(r.datasetDetails).modelPath,
similarityScore: r.similarityScore,
}));
runInAction(() => {
this.lastEntityCandidates = candidates
.slice(0, MAX_PRODUCT_SUGGESTIONS)
.map((c) => ({
datasetName: c.datasetName,
modelPath: c.modelPath,
...(c.description === undefined
? {}
: { description: c.description }),
}));
});
try {
const resolved = await this.plugin.disambiguateEntity(question, candidates, this.config, this.pureExecutionContext, coordinates);
runInAction(() => {
this.lastResolvedEntities = resolved;
});
}
catch {
const topEntity = entitiesWithPaths[0];
if (topEntity) {
const resolved = new LegendAIResolvedEntities();
resolved.rootEntity =
topEntity.datasetDetails?.modelPath ?? topEntity.datasetName;
resolved.relatedEntities = entitiesWithPaths
.slice(1, MAX_RELEVANT_SERVICES + 1)
.map((r) => r.datasetDetails?.modelPath)
.filter((p) => p !== undefined);
runInAction(() => {
this.lastResolvedEntities = resolved;
});
}
}
}
mergeDiversityResults(results, diversityRaw) {
if (!diversityRaw) {
return;
}
const diversityResponse = EntitySearchResponse.serialization.fromJson(diversityRaw);
const existingPaths = new Set(results
.filter((r) => r.datasetDetails?.modelPath)
.map((r) => guaranteeNonNullable(r.datasetDetails).modelPath));
for (const r of diversityResponse.results) {
if (r.datasetDetails?.modelPath &&
!existingPaths.has(r.datasetDetails.modelPath)) {
results.push(r);
existingPaths.add(r.datasetDetails.modelPath);
}
}
}
buildServicesFromEntitySearch(results, setMessages) {
const coordinates = this.selectedProductCoordinates;
if (!coordinates) {
return;
}
// Skip service building for legacy dataspaces so the flow routes to the orchestrator instead.
const firstResult = results[0];
if (firstResult?.dataProductDetails?._type === DataProductDetailsType.LEGACY) {
return;
}
const fallbackPath = coordinates.data_product;
const services = [];
let totalColumns = 0;
for (const result of results) {
const fields = result.relatedFields ?? [];
const columns = fields.map((f) => ({
name: f.fieldName,
type: f.fieldType ?? 'String',
...(f.fieldDescription === undefined
? {}
: { documentation: f.fieldDescription }),
}));
totalColumns += columns.length;
services.push({
title: result.datasetName,
pattern: `/${result.datasetName}`,
columns,
parameters: [],
sourceType: TDSServiceSourceType.ACCESS_POINT,
dataProductPath: result.datasetDetails?.modelPath ?? fallbackPath,
});
}
if (services.length > 0) {
runInAction(() => {
this.resolvedProductServices = services;
});
addThinkingStep(setMessages, `Loaded ${services.length} relevant dataset${services.length > 1 ? 's' : ''} with ${totalColumns} fields`);
}
}
getServicesForQuery(relevantDatasetNames) {
if (this.resolvedProductServices.length === 0) {
return [];
}
if (relevantDatasetNames.length === 0) {
return this.resolvedProductServices.slice(0, MAX_RELEVANT_SERVICES);
}
const relevantSet = new Set(relevantDatasetNames.map((n) => n.toLowerCase()));
const relevant = [];
for (const service of this.resolvedProductServices) {
if (relevantSet.has(service.title.toLowerCase())) {
relevant.push(service);
}
}
relevant.sort((a, b) => {
const aIdx = relevantDatasetNames.findIndex((n) => n.toLowerCase() === a.title.toLowerCase());
const bIdx = relevantDatasetNames.findIndex((n) => n.toLowerCase() === b.title.toLowerCase());
return aIdx - bIdx;
});
return relevant;
}
async handleNoServices(question, setMessages, startTime, contextPromise) {
addThinkingStep(setMessages, 'No dataset schemas available — entity search did not return results for this data product.');
completeThinkingSteps(setMessages);
updateLastAssistant(setMessages, () => ({
textAnswer: 'Could not resolve dataset schemas for this data product. You can try the Legend AI Orchestrator to generate a Pure query instead.',
isProcessing: false,
}));
this.offerOrchestratorFallback(question, setMessages, startTime);
await contextPromise;
}
async handleZeroRows(judgedSql, question, services, coordinates, metadata, context, timing) {
const { startTime, contextPromise } = timing;
const { setMessages } = context;
const coordinatesStr = toCoordinatesString(coordinates);
const corrected = await this.attemptZeroRowCorrection(judgedSql, question, services, coordinatesStr, setMessages, coordinates);
if (corrected) {
await contextPromise;
await this.safeAnalyzeResults(question, corrected.sql, corrected.result, metadata, context, startTime);
return;
}
const datasetList = services
.slice(0, MAX_RELEVANT_SERVICES)
.map((s) => s.title)
.join(', ');
const datasetSuffix = services.length > MAX_RELEVANT_SERVICES
? ` and ${services.length - MAX_RELEVANT_SERVICES} more`
: '';
updateLastAssistant(setMessages, () => ({
textAnswer: `The SQL 2.0 query executed successfully but returned **0 rows**. The applied filters may not match any records in the available datasets, or the specific values may not exist.\n\n**Queried datasets:** ${datasetList}${datasetSuffix}`,
}));
this.offerOrchestratorFallback(question, setMessages, startTime);
await contextPromise;
}
async dispatchWithSql2(question, relevantDatasetNames, setMessages) {
const plugin = this.plugin;
const coordinates = this.selectedProductCoordinates;
const metadata = this.selectedProductMetadata;
if (!plugin || !coordinates || !metadata) {
return;
}
const config = this.config;
const history = this.buildConversationHistory();
const context = {
config,
plugin,
history,
setMessages,
};
const services = this.getServicesForQuery(relevantDatasetNames);
const contextPromise = services.length > 0
? this.buildContextPromise(question, metadata, setMessages)
: Promise.resolve();
const fastIntent = classifyQuestionIntentFast(question, true);
// ── Pure METADATA: fast classifier is confident, no data signals ──
if (fastIntent.intent === LegendAIQuestionIntent.METADATA &&
!fastIntent.ambiguous) {
await handleMetadataQuestion(question, metadata, context, Date.now(), services.length > 0);
return;
}
// ── Ambiguous: show both metadata overview + SQL results ──
if (fastIntent.ambiguous && services.length > 0) {
await this.handleAmbiguousIntent(question, services, coordinates, metadata, context, contextPromise, setMessages);
return;
}
await this.handleLlmJudgeFallback({ question, ...fastIntent }, services, coordinates, metadata, context, contextPromise, setMessages);
}
async handleLlmJudgeFallback(fastIntent, services, coordinates, metadata, context, contextPromise, setMessages) {
if (fastIntent.intent === LegendAIQuestionIntent.METADATA ||
fastIntent.ambiguous) {
addThinkingStep(setMessages, services.length > 0
? 'Checking product capabilities first and trying a data query if the datasets support it...'
: 'Checking product capabilities first...');
}
const intent = await context.plugin.classifyQuestionIntent(fastIntent.question, services.length > 0, context.config);
if (intent === LegendAIQuestionIntent.METADATA) {
await handleMetadataQuestion(fastIntent.question, metadata, context, Date.now(), services.length > 0);
return;
}
const startTime = Date.now();
if (services.length === 0) {
await this.handleNoServices(fastIntent.question, setMessages, startTime, contextPromise);
return;
}
await this.runSqlPath(fastIntent.question, services, coordinates, metadata, context, contextPromise, setMessages);
}
async handleAmbiguousIntent(question, services, coordinates, metadata, context, contextPromise, setMessages) {
addThinkingStep(setMessages, 'Intent is ambiguous, providing metadata context and querying data...');
let metadataOverview;
try {
addThinkingStep(setMessages, 'Building metadata context...');
metadataOverview = await buildMetadataOverview(question, metadata, context);
}
catch {
addThinkingStep(setMessages, 'Could not build metadata context — continuing with data query...');
}
try {
await this.runSqlPath(question, services, coordinates, metadata, context, contextPromise, setMessages);
if (metadataOverview) {
attachMetadataOverview(setMessages, metadataOverview);
}
}
catch (queryError) {
assertErrorThrown(queryError);
addThinkingStep(setMessages, 'Query failed, answering from product metadata...');
await handleMetadataQuestion(question, metadata, context, Date.now(), true);
}
}
/**
* Core SQL generation → execution → analysis pipeline.
* Extracted so both the direct DATA_QUERY path and the ambiguous-intent
* path can reuse it.
*/
async runSqlPath(question, services, coordinates, metadata, context, contextPromise, setMessages) {
const { config, plugin } = context;
const coordinatesStr = toCoordinatesString(coordinates);
const startTime = Date.now();
const totalColumns = services.reduce((sum, s) => sum + s.columns.length, 0);
addThinkingStep(setMessages, `Generating Alloy SQL 2.0 query with ${services.length} relevant dataset${services.length > 1 ? 's' : ''} (${totalColumns} columns)...`);
try {
const judgedSql = await generateAndJudgeSql(question, services, coordinatesStr, context, startTime);
if (!judgedSql) {
this.offerOrchestratorFallback(question, setMessages, startTime);
await contextPromise;
return;
}
const sqlGenTime = elapsedSeconds(startTime, 2);
completeThinkingSteps(setMessages);
updateLastAssistant(setMessages, () => ({
sql: judgedSql,
sqlGenTime,
isExecuting: true,
}));
const sqlResult = await executeSqlAndReport(judgedSql, services, config, plugin, setMessages, startTime, coordinates);
if (!sqlResult) {
this.offerOrchestratorFallback(question, setMessages, startTime);
await contextPromise;
return;
}
if (sqlResult.rows.length === 0) {
await this.handleZeroRows(judgedSql, question, services, coordinates, metadata, context, { startTime, contextPromise });
return;
}
await contextPromise;
await this.safeAnalyzeResults(question, judgedSql, sqlResult, metadata, context, startTime);
}
catch (error) {
assertErrorThrown(error);
addThinkingStep(setMessages, `SQL 2.0 failed: ${error.message}`);
const datasetContext = services.length > 0
? `\n\nAvailable datasets: ${services.map((s) => s.title).join(', ')}`
: '';
finishWithThinkingError(setMessages, `Alloy SQL 2.0 encountered an error: ${error.message}${datasetContext}`, startTime, classifyError(error));
this.offerOrchestratorFallback(question, setMessages, startTime);
await contextPromise;
}
}
async safeAnalyzeResults(question, sql, result, metadata, context, startTime) {
try {
await analyzeOrchestratorResults(question, sql, result, metadata, context, startTime);
}
catch {
completeThinkingSteps(context.setMessages);
updateLastAssistant(context.setMessages, () => ({
isProcessing: false,
thinkingDuration: elapsedSeconds(startTime),
}));
}
}
async attemptZeroRowCorrection(currentSql, question, services, coordinatesStr, setMessages, dataProductCoordinates) {
const config = this.config;
const plugin = this.plugin;
if (!plugin) {
return undefined;
}
addThinkingStep(setMessages, 'Query returned 0 rows, attempting filter correction...');
const prompt = plugin.buildZeroRowCorrectionPrompt(currentSql, question, services, coordinatesStr);
if (!prompt) {
return undefined;
}
try {
const raw = await plugin.callLLM(prompt, config);
const trimmed = cleanLlmSqlResponse(raw);
if (!isValidSqlCorrection(trimmed, currentSql)) {
return undefined;
}
addThinkingStep(setMessages, 'Retrying with corrected filters...');
updateLastAssistant(setMessages, () => ({ sql: trimmed }));
const retryResult = await plugin.executeLakehouseSql(trimmed, dataProductCoordinates, config);
if (retryResult.rows.length > 0) {
const sqlGenTime = elapsedSeconds(Date.now(), 2);
completeThinkingSteps(setMessages);
updateLastAssistant(setMessages, () => ({
sql: trimmed,
sqlGenTime,
isExecuting: false,
}));
return { sql: trimmed, result: retryResult };
}
}
catch {
/* empty */
}
return undefined;
}
offerOrchestratorFallback(question, setMessages, startTime) {
this.pendingFallbackQuestion = question;
completeThinkingSteps(setMessages);
updateLastAssistant(setMessages, () => ({
fallbackAction: {