vespa-ts
Version:
A reusable TypeScript package for interacting with Vespa search engine with dependency injection support
1,419 lines (1,415 loc) • 81.9 kB
JavaScript
import { z } from 'zod';
const fileSchema = "file"; // Replace with your actual schema name
const userSchema = "user";
// calendar
const eventSchema = "event";
// mail
const mailAttachmentSchema = "mail_attachment";
const mailSchema = "mail";
// chat
const chatContainerSchema = "chat_container";
// this is not meant to be searched but we will
// store the data in vespa and fetch it as needed
const chatTeamSchema = "chat_team";
const chatMessageSchema = "chat_message";
const chatUserSchema = "chat_user";
// previous queries
const userQuerySchema = "user_query";
const datasourceSchema = "datasource";
const dataSourceFileSchema = "datasource_file";
// not using @ because of vite of frontend
var Apps;
(function (Apps) {
// includes everything google
Apps["GoogleWorkspace"] = "google-workspace";
// more granular
Apps["GoogleDrive"] = "google-drive";
Apps["Gmail"] = "gmail";
// Notion = "notion", // Notion is not yet supported
Apps["GoogleCalendar"] = "google-calendar";
Apps["Slack"] = "slack";
Apps["MCP"] = "mcp";
Apps["Github"] = "github";
Apps["Xyne"] = "xyne";
Apps["DataSource"] = "data-source";
})(Apps || (Apps = {}));
var GooglePeopleEntity;
(function (GooglePeopleEntity) {
GooglePeopleEntity["Contacts"] = "Contacts";
GooglePeopleEntity["OtherContacts"] = "OtherContacts";
GooglePeopleEntity["AdminDirectory"] = "AdminDirectory";
})(GooglePeopleEntity || (GooglePeopleEntity = {}));
// the vespa schemas
const Schemas = z.union([
z.literal(fileSchema),
z.literal(userSchema),
z.literal(mailSchema),
z.literal(eventSchema),
z.literal(userQuerySchema),
z.literal(mailAttachmentSchema),
z.literal(chatContainerSchema),
z.literal(chatTeamSchema),
z.literal(chatUserSchema),
z.literal(chatMessageSchema),
z.literal(datasourceSchema),
z.literal(dataSourceFileSchema),
]);
var MailEntity;
(function (MailEntity) {
MailEntity["Email"] = "mail";
})(MailEntity || (MailEntity = {}));
var CalendarEntity;
(function (CalendarEntity) {
CalendarEntity["Event"] = "event";
})(CalendarEntity || (CalendarEntity = {}));
var SlackEntity;
(function (SlackEntity) {
SlackEntity["Team"] = "team";
SlackEntity["User"] = "user";
SlackEntity["Message"] = "message";
SlackEntity["Channel"] = "channel";
SlackEntity["File"] = "file";
})(SlackEntity || (SlackEntity = {}));
var DriveEntity;
(function (DriveEntity) {
DriveEntity["Docs"] = "docs";
DriveEntity["Sheets"] = "sheets";
DriveEntity["Slides"] = "slides";
DriveEntity["Presentation"] = "presentation";
DriveEntity["PDF"] = "pdf";
DriveEntity["Folder"] = "folder";
DriveEntity["Misc"] = "driveFile";
DriveEntity["Drawing"] = "drawing";
DriveEntity["Form"] = "form";
DriveEntity["Script"] = "script";
DriveEntity["Site"] = "site";
DriveEntity["Map"] = "map";
DriveEntity["Audio"] = "audio";
DriveEntity["Video"] = "video";
DriveEntity["Photo"] = "photo";
DriveEntity["ThirdPartyApp"] = "third_party_app";
DriveEntity["Image"] = "image";
DriveEntity["Zip"] = "zip";
DriveEntity["WordDocument"] = "word_document";
DriveEntity["ExcelSpreadsheet"] = "excel_spreadsheet";
DriveEntity["PowerPointPresentation"] = "powerpoint_presentation";
DriveEntity["Text"] = "text";
DriveEntity["CSV"] = "csv";
})(DriveEntity || (DriveEntity = {}));
var MailAttachmentEntity;
(function (MailAttachmentEntity) {
MailAttachmentEntity["PDF"] = "pdf";
MailAttachmentEntity["Sheets"] = "sheets";
MailAttachmentEntity["CSV"] = "csv";
MailAttachmentEntity["WordDocument"] = "worddocument";
MailAttachmentEntity["PowerPointPresentation"] = "powerpointpresentation";
MailAttachmentEntity["Text"] = "text";
MailAttachmentEntity["NotValid"] = "notvalid";
})(MailAttachmentEntity || (MailAttachmentEntity = {}));
const PeopleEntitySchema = z.nativeEnum(GooglePeopleEntity);
const ChatEntitySchema = z.nativeEnum(SlackEntity);
var NotionEntity;
(function (NotionEntity) {
NotionEntity["Page"] = "page";
NotionEntity["Database"] = "database";
})(NotionEntity || (NotionEntity = {}));
const FileEntitySchema = z.nativeEnum(DriveEntity);
const MailEntitySchema = z.nativeEnum(MailEntity);
const MailAttachmentEntitySchema = z.nativeEnum(MailAttachmentEntity);
const EventEntitySchema = z.nativeEnum(CalendarEntity);
const NotionEntitySchema = z.nativeEnum(NotionEntity);
var SystemEntity;
(function (SystemEntity) {
SystemEntity["SystemInfo"] = "system_info";
SystemEntity["UserProfile"] = "user_profile";
})(SystemEntity || (SystemEntity = {}));
var DataSourceEntity;
(function (DataSourceEntity) {
DataSourceEntity["DataSourceFile"] = "data_source_file";
})(DataSourceEntity || (DataSourceEntity = {}));
const SystemEntitySchema = z.nativeEnum(SystemEntity);
const DataSourceEntitySchema = z.nativeEnum(DataSourceEntity);
const entitySchema = z.union([
SystemEntitySchema,
PeopleEntitySchema,
FileEntitySchema,
NotionEntitySchema,
MailEntitySchema,
EventEntitySchema,
MailAttachmentEntitySchema,
ChatEntitySchema,
DataSourceEntitySchema,
]);
const scoredChunk = z.object({
chunk: z.string(),
score: z.number(),
index: z.number(),
});
const defaultVespaFieldsSchema = z.object({
relevance: z.number(),
source: z.string(),
// sddocname: Schemas,
documentid: z.string(),
});
const SpreadsheetMetadata = z.object({
spreadsheetId: z.string(),
totalSheets: z.number(),
});
const Metadata = z.union([z.object({}), SpreadsheetMetadata]);
const VespaFileSchema = z.object({
docId: z.string(),
app: z.nativeEnum(Apps),
entity: FileEntitySchema,
title: z.string(),
url: z.string().nullable(),
parentId: z.string().nullable(),
chunks: z.array(z.string()),
owner: z.string().nullable(),
ownerEmail: z.string().nullable(),
photoLink: z.string().nullable(),
permissions: z.array(z.string()),
mimeType: z.string().nullable(),
metadata: Metadata,
createdAt: z.number(),
updatedAt: z.number(),
});
const chunkScoresSchema = z.object({
cells: z.record(z.string(), z.number()),
});
// Match features for file schema
const FileMatchFeaturesSchema = z.object({
"bm25(title)": z.number().optional(),
"bm25(chunks)": z.number().optional(),
"closeness(field, chunk_embeddings)": z.number().optional(),
chunk_scores: chunkScoresSchema,
});
// Match features for user schema
const UserMatchFeaturesSchema = z.object({
"bm25(name)": z.number().optional(),
"bm25(email)": z.number().optional(),
});
// Match features for mail schema
const MailMatchFeaturesSchema = z.object({
"bm25(subject)": z.number().optional(),
"bm25(chunks)": z.number().optional(),
"bm25(attachmentFilenames)": z.number().optional(),
chunk_scores: chunkScoresSchema,
});
const EventMatchFeaturesSchema = z.object({
"bm25(name)": z.number().optional(),
"bm25(description)": z.number().optional(),
"bm25(attachmentFilenames)": z.number().optional(),
"bm25(attendeesNames)": z.number().optional(),
});
const MailAttachmentMatchFeaturesSchema = z.object({
chunk_vector_score: z.number().optional(),
scaled_bm25_chunks: z.number().optional(),
scaled_bm25_filename: z.number().optional(),
chunk_scores: chunkScoresSchema,
});
const ChatMessageMatchFeaturesSchema = z.object({
vector_score: z.number().optional(),
combined_nativeRank: z.number().optional(),
"nativeRank(text)": z.number().optional(),
"nativeRank(username)": z.number().optional(),
"nativeRank(name)": z.number().optional(),
});
const DataSourceFileMatchFeaturesSchema = z.object({
"bm25(fileName)": z.number().optional(),
"bm25(chunks)": z.number().optional(),
"closeness(field, chunk_embeddings)": z.number().optional(),
chunk_scores: chunkScoresSchema.optional(),
});
z.union([
FileMatchFeaturesSchema,
MailMatchFeaturesSchema,
MailAttachmentMatchFeaturesSchema,
DataSourceFileMatchFeaturesSchema,
]);
// Base schema for DataSource (for insertion)
const VespaDataSourceSchemaBase = z.object({
docId: z.string(),
name: z.string(),
createdBy: z.string(),
createdAt: z.number(), // long
updatedAt: z.number(), // long
});
// Search schema for DataSource
const VespaDataSourceSearchSchema = VespaDataSourceSchemaBase.extend({
sddocname: z.literal(datasourceSchema),
matchfeatures: z.any().optional(),
rankfeatures: z.any().optional(),
}).merge(defaultVespaFieldsSchema);
// Base schema for DataSourceFile (for insertion)
const VespaDataSourceFileSchemaBase = z.object({
docId: z.string(),
description: z.string().optional(),
app: z.literal(Apps.DataSource),
fileName: z.string().optional(),
fileSize: z.number().optional(), // long
chunks: z.array(z.string()),
image_chunks: z.array(z.string()).optional(), // Added for image descriptions
chunks_pos: z.array(z.number()).optional(), // Added for text chunk positions
image_chunks_pos: z.array(z.number()).optional(), // Added for image chunk positions
uploadedBy: z.string(),
duration: z.number().optional(), // long
mimeType: z.string().optional(),
createdAt: z.number(), // long
updatedAt: z.number(), // long
dataSourceRef: z.string(), // reference to datasource docId
metadata: z.string().optional(), // JSON string
});
// Search schema for DataSourceFile
const VespaDataSourceFileSearchSchema = VespaDataSourceFileSchemaBase.extend({
sddocname: z.literal(dataSourceFileSchema),
matchfeatures: DataSourceFileMatchFeaturesSchema,
rankfeatures: z.any().optional(),
dataSourceName: z.string().optional(),
})
.merge(defaultVespaFieldsSchema)
.extend({
chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(),
image_chunks_summary: z
.array(z.union([z.string(), scoredChunk]))
.optional(),
chunks_pos_summary: z.array(z.number()).optional(),
image_chunks_pos_summary: z.array(z.number()).optional(),
});
const VespaFileSearchSchema = VespaFileSchema.extend({
sddocname: z.literal(fileSchema),
matchfeatures: FileMatchFeaturesSchema,
rankfeatures: z.any().optional(),
})
.merge(defaultVespaFieldsSchema)
.extend({
chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(),
});
// basically GetDocument doesn't return sddocname
// in search it's always present
const VespaFileGetSchema = VespaFileSchema.merge(defaultVespaFieldsSchema);
const VespaUserSchema = z
.object({
docId: z.string().min(1),
name: z.string().optional(), //.min(1),
email: z.string().min(1).email(),
app: z.nativeEnum(Apps),
entity: z.nativeEnum(GooglePeopleEntity),
gender: z.string().optional(),
photoLink: z.string().optional(),
aliases: z.array(z.string()).optional(),
language: z.string().optional(),
includeInGlobalAddressList: z.boolean().optional(),
isAdmin: z.boolean().optional(),
isDelegatedAdmin: z.boolean().optional(),
suspended: z.boolean().optional(),
archived: z.boolean().optional(),
urls: z.array(z.string()).optional(),
rankfeatures: z.any().optional(),
orgName: z.string().optional(),
orgJobTitle: z.string().optional(),
orgDepartment: z.string().optional(),
orgLocation: z.string().optional(),
orgDescription: z.string().optional(),
creationTime: z.number(),
lastLoggedIn: z.number().optional(),
birthday: z.number().optional(),
occupations: z.array(z.string()).optional(),
userDefined: z.array(z.string()).optional(),
customerId: z.string().optional(),
clientData: z.array(z.string()).optional(),
// this only exists for contacts
owner: z.string().optional(),
sddocname: z.literal(userSchema),
})
.merge(defaultVespaFieldsSchema);
// Mail Types
const AttachmentSchema = z.object({
fileType: z.string(),
fileSize: z.number(),
});
const MailSchema = z.object({
docId: z.string(),
threadId: z.string(),
mailId: z.string().optional(), // Optional for threads
subject: z.string().default(""), // Default to empty string to avoid zod errors when subject is missing
chunks: z.array(z.string()),
timestamp: z.number(),
app: z.nativeEnum(Apps),
userMap: z.optional(z.record(z.string(), z.string())),
entity: z.nativeEnum(MailEntity),
permissions: z.array(z.string()),
from: z.string(),
to: z.array(z.string()),
cc: z.array(z.string()),
bcc: z.array(z.string()),
mimeType: z.string(),
attachmentFilenames: z.array(z.string()),
attachments: z.array(AttachmentSchema),
labels: z.array(z.string()),
});
const VespaMailSchema = MailSchema.extend({
docId: z.string().min(1),
});
const MailAttachmentSchema = z.object({
docId: z.string(),
mailId: z.string(),
threadId: z.string(),
partId: z.number().nullable().optional(),
app: z.nativeEnum(Apps),
entity: z.nativeEnum(MailAttachmentEntity),
chunks: z.array(z.string()),
timestamp: z.number(),
permissions: z.array(z.string()),
filename: z.string(),
fileType: z.string().nullable().optional(),
fileSize: z.number().nullable().optional(),
});
const VespaMailAttachmentSchema = MailAttachmentSchema.extend({});
const EventUser = z.object({
email: z.string(),
displayName: z.string(),
});
const EventAtatchment = z.object({
fileId: z.string(),
title: z.string(),
fileUrl: z.string(),
mimeType: z.string(),
});
const VespaEventSchema = z.object({
docId: z.string(),
name: z.string(),
description: z.string(),
url: z.string(),
status: z.string(),
location: z.string(),
createdAt: z.number(),
updatedAt: z.number(),
app: z.nativeEnum(Apps),
entity: z.nativeEnum(CalendarEntity),
creator: EventUser,
organizer: EventUser,
attendees: z.array(EventUser),
attendeesNames: z.array(z.string()),
startTime: z.number(),
endTime: z.number(),
attachmentFilenames: z.array(z.string()),
attachments: z.array(EventAtatchment),
recurrence: z.array(z.string()),
baseUrl: z.string(),
joiningLink: z.string(),
permissions: z.array(z.string()),
cancelledInstances: z.array(z.string()),
defaultStartTime: z.boolean(),
});
const VespaMailSearchSchema = VespaMailSchema.extend({
sddocname: z.literal("mail"),
matchfeatures: MailMatchFeaturesSchema,
rankfeatures: z.any().optional(),
})
.merge(defaultVespaFieldsSchema)
.extend({
// attachment won't have this
chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(),
});
const VespaMailAttachmentSearchSchema = VespaMailAttachmentSchema.extend({
sddocname: z.literal("mail_attachment"),
matchfeatures: MailAttachmentMatchFeaturesSchema,
rankfeatures: z.any().optional(),
})
.merge(defaultVespaFieldsSchema)
.extend({
chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(),
});
const VespaEventSearchSchema = VespaEventSchema.extend({
sddocname: z.literal("event"),
// Assuming events can have rankfeatures
rankfeatures: z.any().optional(),
}).merge(defaultVespaFieldsSchema);
const VespaUserQueryHistorySchema = z.object({
docId: z.string(),
query_text: z.string(),
timestamp: z.number(),
count: z.number(),
});
const VespaUserQueryHGetSchema = VespaUserQueryHistorySchema.extend({
sddocname: z.literal("user_query"),
}).merge(defaultVespaFieldsSchema);
const VespaMailGetSchema = VespaMailSchema.merge(defaultVespaFieldsSchema);
const VespaMailAttachmentGetSchema = VespaMailAttachmentSchema.merge(defaultVespaFieldsSchema);
const VespaChatMessageSchema = z.object({
docId: z.string(), // client_msg_id from Slack
teamId: z.string(), // Slack team ID (e.g., "T05N1EJSE0K")
channelId: z.string(), // Slack channel ID (e.g., "C123ABC456")
text: z.string(),
userId: z.string(), // Slack user ID (e.g., "U032QT45V53")
app: z.nativeEnum(Apps), // App (e.g., "slack")
entity: z.nativeEnum(SlackEntity), // Entity (e.g., "message")
name: z.string(),
username: z.string(),
image: z.string(),
channelName: z.string().optional(), // derived
isIm: z.boolean().optional(), // derived
isMpim: z.boolean().optional(), // derived
isPrivate: z.boolean().optional(), // derived
permissions: z.array(z.string()).optional(), // derived,
teamName: z.string().optional(), // derived
domain: z.string().optional(), // derived
createdAt: z.number(), // Slack ts (e.g., 1734442791.514519)
teamRef: z.string(), // vespa id for team
threadId: z.string().default(""), // Slack thread_ts, null if not in thread
attachmentIds: z.array(z.string()).default([]), // Slack file IDs (e.g., ["F0857N0FF4N"])
// reactions: z.array(z.string()), // Commented out in Vespa schema, so excluded
mentions: z.array(z.string()), // Extracted from text (e.g., ["U032QT45V53"])
updatedAt: z.number(), // Slack edited.ts (e.g., 1734442538.0), null if not edited
deletedAt: z.number(),
metadata: z.string(), // JSON string for subtype, etc. (e.g., "{\"subtype\": null}")
});
const VespaChatMessageSearchSchema = VespaChatMessageSchema.extend({
sddocname: z.literal(chatMessageSchema),
matchfeatures: ChatMessageMatchFeaturesSchema,
rankfeatures: z.any().optional(),
})
.merge(defaultVespaFieldsSchema)
.extend({
chunks_summary: z.array(z.string()).optional(),
});
const VespaChatMessageGetSchema = VespaChatMessageSchema.merge(defaultVespaFieldsSchema);
const VespaChatUserSchema = z.object({
docId: z.string(),
name: z.string(),
title: z.string(),
app: z.nativeEnum(Apps),
entity: z.nativeEnum(SlackEntity),
image: z.string(),
email: z.string(),
statusText: z.string(),
tz: z.string(),
teamId: z.string(),
deleted: z.boolean(),
isAdmin: z.boolean(),
updatedAt: z.number(),
});
z.object({
id: z.string(),
pathId: z.string(),
fields: VespaChatUserSchema,
});
const VespaChatUserSearchSchema = VespaChatUserSchema.extend({
sddocname: z.literal(chatUserSchema),
}).merge(defaultVespaFieldsSchema);
const VespaChatContainerSchema = z.object({
docId: z.string(),
name: z.string(),
channelName: z.string(),
creator: z.string(),
app: z.nativeEnum(Apps),
entity: z.nativeEnum(SlackEntity),
isPrivate: z.boolean(),
isArchived: z.boolean(),
isGeneral: z.boolean(),
isIm: z.boolean(),
isMpim: z.boolean(),
domain: z.string().optional(), // derived
permissions: z.array(z.string()),
createdAt: z.number(),
updatedAt: z.number(),
lastSyncedAt: z.number(),
topic: z.string(),
description: z.string(),
count: z.number().int(),
});
// Schema for search results that includes Vespa fields
const VespaChatContainerSearchSchema = VespaChatContainerSchema.extend({
sddocname: z.literal(chatContainerSchema),
}).merge(defaultVespaFieldsSchema);
const ChatContainerMatchFeaturesSchema = z.object({
"bm25(name)": z.number().optional(),
"bm25(topic)": z.number().optional(),
"bm25(description)": z.number().optional(),
"closeness(field, chunk_embeddings)": z.number().optional(),
});
const VespaChatTeamSchema = z.object({
docId: z.string(),
name: z.string(),
app: z.nativeEnum(Apps),
icon: z.string(),
url: z.string(),
domain: z.string(),
email_domain: z.string(),
own: z.boolean(),
createdAt: z.number(),
updatedAt: z.number(),
count: z.number().int(),
});
VespaChatTeamSchema.extend({
sddocname: z.literal(chatTeamSchema),
}).merge(defaultVespaFieldsSchema);
const VespaSearchFieldsUnionSchema = z.discriminatedUnion("sddocname", [
VespaUserSchema,
VespaFileSearchSchema,
VespaMailSearchSchema,
VespaEventSearchSchema,
VespaUserQueryHGetSchema,
VespaMailAttachmentSearchSchema,
VespaChatContainerSearchSchema,
VespaChatUserSearchSchema,
VespaChatMessageSearchSchema,
VespaDataSourceSearchSchema,
VespaDataSourceFileSearchSchema,
]);
// Get schema for DataSourceFile
const VespaDataSourceFileGetSchema = VespaDataSourceFileSchemaBase.merge(defaultVespaFieldsSchema);
const SearchMatchFeaturesSchema = z.union([
FileMatchFeaturesSchema,
UserMatchFeaturesSchema,
MailMatchFeaturesSchema,
EventMatchFeaturesSchema,
MailAttachmentMatchFeaturesSchema,
ChatMessageMatchFeaturesSchema,
DataSourceFileMatchFeaturesSchema,
ChatContainerMatchFeaturesSchema,
]);
const VespaSearchFieldsSchema = z
.object({
matchfeatures: SearchMatchFeaturesSchema,
sddocname: Schemas,
})
.and(VespaSearchFieldsUnionSchema);
z.union([
VespaUserSchema,
VespaFileGetSchema,
VespaMailGetSchema,
VespaDataSourceFileGetSchema,
]);
const VespaSearchResultsSchema = z.object({
id: z.string(),
relevance: z.number(),
fields: VespaSearchFieldsSchema,
pathId: z.string().optional(),
});
z.object({
id: z.string(),
relevance: z.number(),
fields: VespaSearchFieldsSchema,
pathId: z.string().optional(),
});
const VespaGroupSchema = z.object({
id: z.string(),
relevance: z.number(),
label: z.string(),
value: z.string().optional(),
fields: z
.object({
"count()": z.number(),
})
.optional(),
children: z.array(z.lazy(() => VespaGroupSchema)).optional(),
});
const VespaErrorSchema = z.object({
code: z.number(),
summary: z.string(),
source: z.string(),
message: z.string(),
});
const VespaRootBaseSchema = z.object({
root: z.object({
id: z.string(),
relevance: z.number(),
fields: z
.object({
totalCount: z.number(),
})
.optional(),
coverage: z.object({
coverage: z.number(),
documents: z.number(),
full: z.boolean(),
nodes: z.number(),
results: z.number(),
resultsFull: z.number(),
}),
errors: z.array(VespaErrorSchema).optional(),
}),
trace: z.any().optional(), // Add optional trace field to the root
});
const VespaSearchResultSchema = z.union([
VespaSearchResultsSchema,
VespaGroupSchema,
]);
VespaRootBaseSchema.extend({
root: VespaRootBaseSchema.shape.root.extend({
children: z.array(VespaSearchResultSchema),
}),
});
const AutocompleteMatchFeaturesSchema = z.union([
z.object({
"bm25(title_fuzzy)": z.number(),
}),
z.object({
"bm25(email_fuzzy)": z.number(),
"bm25(name_fuzzy)": z.number(),
}),
z.object({
"bm25(subject_fuzzy)": z.number(),
}),
]);
const VespaAutocompleteFileSchema = z
.object({
docId: z.string(),
title: z.string(),
app: z.nativeEnum(Apps),
entity: entitySchema,
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteUserSchema = z
.object({
docId: z.string(),
// optional due to contacts
name: z.string().optional(),
email: z.string(),
app: z.nativeEnum(Apps),
entity: entitySchema,
photoLink: z.string(),
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteMailSchema = z
.object({
docId: z.string(),
threadId: z.string(),
subject: z.string().optional(),
app: z.nativeEnum(Apps),
entity: entitySchema,
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteChatUserSchema = z
.object({
docId: z.string(),
// optional due to contacts
name: z.string().optional(),
email: z.string(),
app: z.nativeEnum(Apps),
entity: entitySchema,
image: z.string(),
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteMailAttachmentSchema = z
.object({
docId: z.string(),
filename: z.string(),
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
z
.object({
docId: z.string(),
name: z.string().optional(),
app: z.nativeEnum(Apps),
entity: entitySchema,
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteUserQueryHSchema = z
.object({
docId: z.string(),
query_text: z.string(),
timestamp: z.number().optional(),
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteChatContainerSchema = z
.object({
docId: z.string(),
name: z.string(),
app: z.nativeEnum(Apps),
sddocname: Schemas,
})
.merge(defaultVespaFieldsSchema);
const VespaAutocompleteSummarySchema = z.union([
VespaAutocompleteFileSchema,
VespaAutocompleteUserSchema,
VespaAutocompleteMailSchema,
VespaAutocompleteUserQueryHSchema,
VespaAutocompleteMailAttachmentSchema,
VespaAutocompleteChatContainerSchema,
VespaAutocompleteChatUserSchema,
]);
const VespaAutocompleteFieldsSchema = z
.object({
matchfeatures: AutocompleteMatchFeaturesSchema,
sddocname: Schemas,
})
.and(VespaAutocompleteSummarySchema);
const VespaAutocompleteSchema = z.object({
id: z.string(),
relevance: z.number(),
source: z.string(),
fields: VespaAutocompleteFieldsSchema,
});
VespaRootBaseSchema.extend({
root: VespaRootBaseSchema.shape.root.extend({
children: z.array(VespaAutocompleteSchema),
}),
});
const MailResponseSchema = VespaMailGetSchema.pick({
docId: true,
threadId: true,
app: true,
entity: true,
subject: true,
from: true,
relevance: true,
timestamp: true,
userMap: true,
mailId: true,
})
.strip()
.extend({
type: z.literal("mail"),
mimeType: z.string(),
chunks_summary: z.array(scoredChunk).optional(),
matchfeatures: z.any().optional(),
rankfeatures: z.any().optional(),
});
const MailAttachmentResponseSchema = VespaMailAttachmentGetSchema.pick({
docId: true,
app: true,
entity: true,
relevance: true,
timestamp: true,
filename: true,
mailId: true,
partId: true,
fileType: true,
})
.strip()
.extend({
type: z.literal("mail_attachment"),
chunks_summary: z.array(scoredChunk).optional(),
matchfeatures: z.any().optional(),
rankfeatures: z.any().optional(),
});
const ChatMessageResponseSchema = VespaChatMessageGetSchema.pick({
docId: true,
teamId: true,
channelId: true,
text: true,
userId: true,
app: true,
entity: true,
createdAt: true,
threadId: true,
image: true,
name: true,
domain: true,
username: true,
attachmentIds: true,
mentions: true,
relevance: true,
updatedAt: true,
})
.strip()
.extend({
type: z.literal("chat_message"),
chunks_summary: z.array(z.string()).optional(),
matchfeatures: z.any().optional(),
rankfeatures: z.any().optional(),
});
const DataSourceFileResponseSchema = VespaDataSourceFileGetSchema.pick({
docId: true,
description: true,
app: true,
fileName: true,
fileSize: true,
uploadedBy: true,
duration: true,
mimeType: true,
createdAt: true,
updatedAt: true,
dataSourceRef: true,
metadata: true,
relevance: true,
})
.strip()
.extend({
type: z.literal(dataSourceFileSchema), // Using the schema const for the literal
chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(),
matchfeatures: DataSourceFileMatchFeaturesSchema.optional(), // or z.any().optional() if specific match features aren't always needed here
rankfeatures: z.any().optional(),
});
({
gmail: Apps.Gmail,
drive: Apps.GoogleDrive,
googledrive: Apps.GoogleDrive,
googlecalendar: Apps.GoogleCalendar,
slack: Apps.Slack,
datasource: Apps.DataSource,
"google-workspace": Apps.GoogleWorkspace,
googledocs: Apps.GoogleDrive,
googlesheets: Apps.GoogleDrive,
pdf: Apps.GoogleDrive,
});
const AutocompleteFileSchema = z
.object({
type: z.literal(fileSchema),
relevance: z.number(),
title: z.string(),
app: z.nativeEnum(Apps),
entity: entitySchema,
})
.strip();
const AutocompleteUserSchema = z
.object({
type: z.literal(userSchema),
relevance: z.number(),
// optional due to contacts
name: z.string().optional(),
email: z.string(),
app: z.nativeEnum(Apps),
entity: entitySchema,
photoLink: z.string().optional(),
})
.strip();
const AutocompleteUserQueryHSchema = z
.object({
type: z.literal(userQuerySchema),
docId: z.string(),
query_text: z.string(),
timestamp: z.number().optional(),
})
.strip();
const AutocompleteMailSchema = z
.object({
type: z.literal(mailSchema),
relevance: z.number(),
// optional due to contacts
subject: z.string().optional(),
app: z.nativeEnum(Apps),
entity: entitySchema,
threadId: z.string().optional(),
docId: z.string(),
})
.strip();
const AutocompleteMailAttachmentSchema = z
.object({
type: z.literal(mailAttachmentSchema),
relevance: z.number(),
app: z.nativeEnum(Apps),
entity: entitySchema,
filename: z.string(),
docId: z.string(),
})
.strip();
const AutocompleteEventSchema = z
.object({
type: z.literal(eventSchema),
relevance: z.number(),
name: z.string().optional(),
app: z.nativeEnum(Apps),
entity: entitySchema,
docId: z.string(),
})
.strip();
const AutocompleteChatUserSchema = z
.object({
type: z.literal(chatUserSchema),
relevance: z.number(),
// optional due to contacts
name: z.string().optional(),
email: z.string().optional(),
app: z.nativeEnum(Apps),
entity: entitySchema,
image: z.string(),
})
.strip();
const AutocompleteSchema = z.discriminatedUnion("type", [
AutocompleteFileSchema,
AutocompleteUserSchema,
AutocompleteMailSchema,
AutocompleteEventSchema,
AutocompleteUserQueryHSchema,
AutocompleteMailAttachmentSchema,
AutocompleteChatUserSchema,
]);
z.object({
results: z.array(AutocompleteSchema),
});
// search result
const FileResponseSchema = VespaFileSchema.pick({
docId: true,
title: true,
url: true,
app: true,
entity: true,
owner: true,
ownerEmail: true,
photoLink: true,
updatedAt: true,
})
.extend({
type: z.literal(fileSchema),
chunk: z.string().optional(),
chunkIndex: z.number().optional(),
mimeType: z.string(),
chunks_summary: z.array(scoredChunk).optional(),
relevance: z.number(),
matchfeatures: z.any().optional(), // Add matchfeatures
rankfeatures: z.any().optional(),
})
.strip();
const EventResponseSchema = VespaEventSchema.pick({
docId: true,
name: true,
url: true,
app: true,
entity: true,
updatedAt: true,
})
.extend({
type: z.literal(eventSchema),
relevance: z.number(),
description: z.string().optional(),
chunks_summary: z.array(z.string()).optional(),
attendeesNames: z.array(z.string()).optional(),
matchfeatures: z.any().optional(), // Add matchfeatures
rankfeatures: z.any().optional(),
})
.strip();
const UserResponseSchema = VespaUserSchema.pick({
name: true,
email: true,
app: true,
entity: true,
photoLink: true,
docId: true,
})
.strip()
.extend({
type: z.literal(userSchema),
relevance: z.number(),
matchfeatures: z.any().optional(), // Add matchfeatures
rankfeatures: z.any().optional(),
});
// Search Response Schema
const SearchResultsSchema = z.discriminatedUnion("type", [
UserResponseSchema,
FileResponseSchema,
DataSourceFileResponseSchema,
MailResponseSchema,
EventResponseSchema,
MailAttachmentResponseSchema,
ChatMessageResponseSchema,
]);
z.object({
count: z.number(),
results: z.array(SearchResultsSchema),
groupCount: z.any(),
trace: z.any().optional(),
});
z.object({
from: z.array(z.string()).optional(),
to: z.array(z.string()).optional(),
cc: z.array(z.string()).optional(),
bcc: z.array(z.string()).optional(),
subject: z.array(z.string()).optional(),
});
var SearchModes;
(function (SearchModes) {
SearchModes["NativeRank"] = "default_native";
SearchModes["BM25"] = "default_bm25";
SearchModes["AI"] = "default_ai";
SearchModes["Random"] = "default_random";
SearchModes["GlobalSorted"] = "global_sorted";
})(SearchModes || (SearchModes = {}));
const getErrorMessage = (error) => {
if (error instanceof Error)
return error.message;
return String(error);
};
// module contains all the transformations
// from vespa to the user accepted types
function handleVespaGroupResponse(response) {
const appEntityCounts = {};
// Navigate to the first level of groups
const groupRoot = response.root.children?.[0]; // Assuming this is the group:root level
if (!groupRoot || !("children" in groupRoot))
return appEntityCounts; // Safeguard for empty responses
// Navigate to the app grouping (e.g., grouplist:app)
const appGroup = groupRoot.children?.[0];
if (!appGroup || !("children" in appGroup))
return appEntityCounts; // Safeguard for missing app group
// Iterate through the apps
// @ts-ignore
for (const app of appGroup.children) {
const appName = app.value; // Get the app name
appEntityCounts[appName] = {}; // Initialize the app entry
// Navigate to the entity grouping (e.g., grouplist:entity)
const entityGroup = app.children?.[0];
if (!entityGroup || !("children" in entityGroup))
continue; // Skip if no entities
// Iterate through the entities
// @ts-ignore
for (const entity of entityGroup.children) {
const entityName = entity.value; // Get the entity name
const count = entity.fields?.["count()"] || 0; // Get the count or default to 0
appEntityCounts[appName][entityName] = count; // Assign the count to the app-entity pair
}
}
return appEntityCounts; // Return the final map
}
// Console fallback logger
const consoleLogger = {
info: (message, ...args) => console.info(`[INFO] ${message}`, ...args),
error: (message, ...args) => {
const msg = message instanceof Error ? message.message : message;
console.error(`[ERROR] ${msg}`, ...args);
},
warn: (message, ...args) => console.warn(`[WARN] ${message}`, ...args),
debug: (message, ...args) => console.debug(`[DEBUG] ${message}`, ...args),
child: (metadata) => consoleLogger,
};
class VespaClient {
constructor(endpoint, logger, config) {
this.logger = logger || consoleLogger;
this.maxRetries = config?.vespaMaxRetryAttempts || 3;
this.retryDelay = config?.vespaRetryDelay || 1000; // milliseconds
this.vespaEndpoint = endpoint || `http://${config?.vespaBaseHost || "localhost"}:8080`;
}
async delay(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
async fetchWithRetry(url, options, retryCount = 0) {
const nonRetryableStatusCodes = [404];
try {
const response = await fetch(url, options);
if (!response.ok) {
// Don't need to retry for non-retryable status codes
if (nonRetryableStatusCodes.includes(response.status)) {
throw new Error(`Non-retryable error: ${response.status} ${response.statusText}`);
}
// Retry for 429 (Too Many Requests) or 5xx errors
if ((response.status === 429 || response.status >= 500) &&
retryCount < this.maxRetries) {
this.logger.info("retrying due to status: ", response.status);
await this.delay(this.retryDelay * Math.pow(2, retryCount));
return this.fetchWithRetry(url, options, retryCount + 1);
}
}
return response;
}
catch (error) {
const errorMessage = getErrorMessage(error);
if (retryCount < this.maxRetries &&
!errorMessage.includes("Non-retryable error")) {
await this.delay(this.retryDelay * Math.pow(2, retryCount)); // Exponential backoff
return this.fetchWithRetry(url, options, retryCount + 1);
}
throw error;
}
}
async search(payload) {
const url = `${this.vespaEndpoint}/search/`;
try {
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(payload),
});
if (!response.ok) {
const errorText = response.statusText;
const errorBody = await response.text();
this.logger.error(`Vespa search failed - Status: ${response.status}, StatusText: ${errorText}`);
this.logger.error(`Vespa error body: ${errorBody}`);
throw new Error(`Failed to fetch documents in searchVespa: ${response.status} ${response.statusText} - ${errorText}`);
}
const result = await response.json();
return result;
}
catch (error) {
this.logger.error(`VespaClient.search error:`, error);
throw new Error(`Vespa search error: ${error.message}`);
}
}
async fetchDocumentBatch(schema, options, limit, offset, email) {
const yqlQuery = `select * from sources ${schema} where true`;
const searchPayload = {
yql: yqlQuery,
hits: limit,
offset,
timeout: "10s",
};
const response = await this.search(searchPayload);
return (response.root?.children || []).map((doc) => {
// Use optional chaining and nullish coalescing to safely extract fields
const { matchfeatures, ...fieldsWithoutMatch } = doc.fields;
return fieldsWithoutMatch;
});
}
async getAllDocumentsParallel(schema, options, concurrency = 3, email) {
// First get document count
const countResponse = await this.getDocumentCount(schema, options, email);
const totalCount = countResponse?.root?.fields?.totalCount || 0;
if (totalCount === 0)
return [];
// Calculate optimal batch size and create batch tasks
const batchSize = 350;
const tasks = [];
for (let offset = 0; offset < totalCount; offset += batchSize) {
tasks.push(() => this.fetchDocumentBatch(schema, options, batchSize, offset, email));
}
// Run tasks with concurrency limit
const pLimit = (await import('p-limit')).default;
const limit = pLimit(concurrency);
const results = await Promise.all(tasks.map((task) => limit(task)));
// Flatten results
return results.flat();
}
async deleteAllDocuments(options) {
const { cluster, namespace, schema } = options;
// Construct the DELETE URL
const url = `${this.vespaEndpoint}/document/v1/${namespace}/${schema}/docid?selection=true&cluster=${cluster}`;
try {
const response = await this.fetchWithRetry(url, {
method: "DELETE",
});
if (response.ok) {
this.logger.info("All documents deleted successfully.");
}
else {
const errorText = response.statusText;
throw new Error(`Failed to delete documents: ${response.status} ${response.statusText} - ${errorText}`);
}
}
catch (error) {
this.logger.error(`Error deleting documents:, ${error} ${error.stack}`, error);
throw new Error(`Vespa delete error: ${error}`);
}
}
async insertDocument(document, options) {
try {
const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${document.docId}`;
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ fields: document }),
});
if (!response.ok) {
// Using status text since response.text() return Body Already used Error
const errorText = response.statusText;
const errorBody = await response.text();
this.logger.error(`Vespa error: ${errorBody}`);
throw new Error(`Failed to insert document: ${response.status} ${response.statusText} - ${errorText}`);
}
const data = await response.json();
if (response.ok) {
// this.logger.info(`Document ${document.docId} inserted successfully`)
}
else {
this.logger.error(`Error inserting document ${document.docId}`);
}
}
catch (error) {
const errMessage = getErrorMessage(error);
this.logger.error(`Error inserting document ${document.docId}: ${errMessage}`, error);
throw new Error(`Error inserting document ${document.docId}: ${errMessage}`);
}
}
async insert(document, options) {
try {
const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${document.docId}`;
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ fields: document }),
});
if (!response.ok) {
// Using status text since response.text() return Body Already used Error
const errorText = response.statusText;
const errorBody = await response.text();
this.logger.error(`Vespa error: ${errorBody}`);
throw new Error(`Failed to insert document: ${response.status} ${response.statusText} - ${errorText}`);
}
const data = await response.json();
if (response.ok) {
this.logger.info(`Document ${document.docId} inserted successfully`);
}
else {
}
}
catch (error) {
const errMessage = getErrorMessage(error);
this.logger.error(`Error inserting document ${document.docId}: ${errMessage} ${error.stack}`, error);
throw new Error(`Error inserting document ${document.docId}: ${errMessage} ${error.stack}`);
}
}
async insertUser(user, options) {
try {
const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${user.docId}`;
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ fields: user }),
});
const data = await response.json();
if (response.ok) {
// this.logger.info(`Document ${user.docId} inserted successfully:`, data)
}
else {
this.logger.error(`Error inserting user ${user.docId}: ${data}`, data);
}
}
catch (error) {
const errorMessage = getErrorMessage(error);
this.logger.error(`Error inserting user ${user.docId}:`, errorMessage, error);
throw new Error(`Error inserting user ${user.docId}: ${errorMessage}`);
}
}
async autoComplete(searchPayload) {
try {
const url = `${this.vespaEndpoint}/search/`;
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(searchPayload),
});
if (!response.ok) {
const errorText = response.statusText;
const errorBody = await response.text();
this.logger.error(`AutoComplete failed - Status: ${response.status}, StatusText: ${errorText}`);
this.logger.error(`AutoComplete error body: ${errorBody}`);
throw new Error(`Failed to perform autocomplete search: ${response.status} ${response.statusText} - ${errorText}`);
}
const data = await response.json();
return data;
}
catch (error) {
this.logger.error(`VespaClient.autoComplete error:`, error);
throw new Error(`Error performing autocomplete search:, ${error} ${error.stack} `);
}
}
async groupSearch(payload) {
try {
const url = `${this.vespaEndpoint}/search/`;
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(payload),
});
if (!response.ok) {
const errorText = response.statusText;
throw new Error(`Failed to fetch documents in groupVespaSearch: ${response.status} ${response.statusText} - ${errorText}`);
}
const data = await response.json();
return handleVespaGroupResponse(data);
}
catch (error) {
this.logger.error(`Error performing search groupVespaSearch:, ${error} - ${error.stack}`, error);
throw new Error(`Error performing search groupVespaSearch:, ${error} - ${error.stack}`);
}
}
async getDocumentCount(schema, options, email) {
try {
// Encode the YQL query to ensure it's URL-safe
const yql = encodeURIComponent(`select * from sources ${schema} where uploadedBy contains '${email}'`);
// Construct the search URL with necessary query parameters
const url = `${this.vespaEndpoint}/search/?yql=${yql}&hits=0&cluster=${options.cluster}`;
const response = await this.fetchWithRetry(url, {
method: "GET",
headers: {
Accept: "application/json",
},
});
if (!response.ok) {
const errorText = response.statusText;
throw new Error(`Failed to fetch document count: ${response.status} ${response.statusText} - ${errorText}`);
}
const data = await response.json();
// Extract the total number of hits from the response
const totalCount = data?.root?.fields?.totalCount;
if (typeof totalCount === "number") {
this.logger.info(`Total documents in schema '${schema}' within namespace '${options.namespace}' and cluster '${options.cluster}': ${totalCount}`);
return data;
}
else {
this.logger.error(`Unexpected response structure:', ${data}`);
}
}
catch (error) {
const errMessage = getErrorMessage(error);
this.logger.error(`Error retrieving document count: ${errMessage}`);
throw new Error(`Error retrieving document count: ${errMessage}`);
}
}
async getDocument(options) {
const { docId, namespace, schema } = options;
const url = `${this.vespaEndpoint}/document/v1/${namespace}/${schema}/docid/${docId}`;
try {
const response = await this.fetchWithRetry(url, {
method: "GET",
headers: {
Accept: "application/json",
},
});
if (!response.ok) {
const errorText = response.statusText;
const errorBody = await response.text();
throw new Error(`Failed to fetch document: ${response.status} ${response.statusText} - ${errorBody}`);
}
const document = await response.json();
return document;
}
catch (error) {
const errMessage = getErrorMessage(error);
throw new Error(`Error fetching document docId: ${docId} - ${errMessage}`);
}
}
async getDocumentsByOnlyDocIds(options) {
const { docIds, generateAnswerSpan } = options;
const yqlIds = docIds.map((id) => `docId contains '${id}'`).join(" or ");
const yqlMailIds = docIds
.map((id) => `mailId contains '${id}'`)
.join(" or ");
const yqlQuery = `select * from sources * where (${yqlIds}) or (${yqlMailIds})`;
const url = `${this.vespaEndpoint}/search/`;
try {
const payload = {
yql: yqlQuery,
hits: docIds?.length,
maxHits: docIds?.length,
};
generateAnswerSpan.setAttribute("vespaPayload", JSON.stringify(payload));
const response = await this.fetchWithRetry(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify(payload),
});
if (!response.ok) {
const errorText = response.statusText;
throw new Error(`Search query failed: ${response.status} ${response.statusText} - ${errorText}`);
}
const result = await response.json();
return result;
}
catch (error) {
const errMessage = getErrorMessage(error);
throw new Error(`Error fetching documents: ${errMessage}`);
}
}
async up