UNPKG

vespa-ts

Version:

A reusable TypeScript package for interacting with Vespa search engine with dependency injection support

1,419 lines (1,415 loc) 81.9 kB
import { z } from 'zod'; const fileSchema = "file"; // Replace with your actual schema name const userSchema = "user"; // calendar const eventSchema = "event"; // mail const mailAttachmentSchema = "mail_attachment"; const mailSchema = "mail"; // chat const chatContainerSchema = "chat_container"; // this is not meant to be searched but we will // store the data in vespa and fetch it as needed const chatTeamSchema = "chat_team"; const chatMessageSchema = "chat_message"; const chatUserSchema = "chat_user"; // previous queries const userQuerySchema = "user_query"; const datasourceSchema = "datasource"; const dataSourceFileSchema = "datasource_file"; // not using @ because of vite of frontend var Apps; (function (Apps) { // includes everything google Apps["GoogleWorkspace"] = "google-workspace"; // more granular Apps["GoogleDrive"] = "google-drive"; Apps["Gmail"] = "gmail"; // Notion = "notion", // Notion is not yet supported Apps["GoogleCalendar"] = "google-calendar"; Apps["Slack"] = "slack"; Apps["MCP"] = "mcp"; Apps["Github"] = "github"; Apps["Xyne"] = "xyne"; Apps["DataSource"] = "data-source"; })(Apps || (Apps = {})); var GooglePeopleEntity; (function (GooglePeopleEntity) { GooglePeopleEntity["Contacts"] = "Contacts"; GooglePeopleEntity["OtherContacts"] = "OtherContacts"; GooglePeopleEntity["AdminDirectory"] = "AdminDirectory"; })(GooglePeopleEntity || (GooglePeopleEntity = {})); // the vespa schemas const Schemas = z.union([ z.literal(fileSchema), z.literal(userSchema), z.literal(mailSchema), z.literal(eventSchema), z.literal(userQuerySchema), z.literal(mailAttachmentSchema), z.literal(chatContainerSchema), z.literal(chatTeamSchema), z.literal(chatUserSchema), z.literal(chatMessageSchema), z.literal(datasourceSchema), z.literal(dataSourceFileSchema), ]); var MailEntity; (function (MailEntity) { MailEntity["Email"] = "mail"; })(MailEntity || (MailEntity = {})); var CalendarEntity; (function (CalendarEntity) { CalendarEntity["Event"] = "event"; })(CalendarEntity || (CalendarEntity = {})); var SlackEntity; (function (SlackEntity) { SlackEntity["Team"] = "team"; SlackEntity["User"] = "user"; SlackEntity["Message"] = "message"; SlackEntity["Channel"] = "channel"; SlackEntity["File"] = "file"; })(SlackEntity || (SlackEntity = {})); var DriveEntity; (function (DriveEntity) { DriveEntity["Docs"] = "docs"; DriveEntity["Sheets"] = "sheets"; DriveEntity["Slides"] = "slides"; DriveEntity["Presentation"] = "presentation"; DriveEntity["PDF"] = "pdf"; DriveEntity["Folder"] = "folder"; DriveEntity["Misc"] = "driveFile"; DriveEntity["Drawing"] = "drawing"; DriveEntity["Form"] = "form"; DriveEntity["Script"] = "script"; DriveEntity["Site"] = "site"; DriveEntity["Map"] = "map"; DriveEntity["Audio"] = "audio"; DriveEntity["Video"] = "video"; DriveEntity["Photo"] = "photo"; DriveEntity["ThirdPartyApp"] = "third_party_app"; DriveEntity["Image"] = "image"; DriveEntity["Zip"] = "zip"; DriveEntity["WordDocument"] = "word_document"; DriveEntity["ExcelSpreadsheet"] = "excel_spreadsheet"; DriveEntity["PowerPointPresentation"] = "powerpoint_presentation"; DriveEntity["Text"] = "text"; DriveEntity["CSV"] = "csv"; })(DriveEntity || (DriveEntity = {})); var MailAttachmentEntity; (function (MailAttachmentEntity) { MailAttachmentEntity["PDF"] = "pdf"; MailAttachmentEntity["Sheets"] = "sheets"; MailAttachmentEntity["CSV"] = "csv"; MailAttachmentEntity["WordDocument"] = "worddocument"; MailAttachmentEntity["PowerPointPresentation"] = "powerpointpresentation"; MailAttachmentEntity["Text"] = "text"; MailAttachmentEntity["NotValid"] = "notvalid"; })(MailAttachmentEntity || (MailAttachmentEntity = {})); const PeopleEntitySchema = z.nativeEnum(GooglePeopleEntity); const ChatEntitySchema = z.nativeEnum(SlackEntity); var NotionEntity; (function (NotionEntity) { NotionEntity["Page"] = "page"; NotionEntity["Database"] = "database"; })(NotionEntity || (NotionEntity = {})); const FileEntitySchema = z.nativeEnum(DriveEntity); const MailEntitySchema = z.nativeEnum(MailEntity); const MailAttachmentEntitySchema = z.nativeEnum(MailAttachmentEntity); const EventEntitySchema = z.nativeEnum(CalendarEntity); const NotionEntitySchema = z.nativeEnum(NotionEntity); var SystemEntity; (function (SystemEntity) { SystemEntity["SystemInfo"] = "system_info"; SystemEntity["UserProfile"] = "user_profile"; })(SystemEntity || (SystemEntity = {})); var DataSourceEntity; (function (DataSourceEntity) { DataSourceEntity["DataSourceFile"] = "data_source_file"; })(DataSourceEntity || (DataSourceEntity = {})); const SystemEntitySchema = z.nativeEnum(SystemEntity); const DataSourceEntitySchema = z.nativeEnum(DataSourceEntity); const entitySchema = z.union([ SystemEntitySchema, PeopleEntitySchema, FileEntitySchema, NotionEntitySchema, MailEntitySchema, EventEntitySchema, MailAttachmentEntitySchema, ChatEntitySchema, DataSourceEntitySchema, ]); const scoredChunk = z.object({ chunk: z.string(), score: z.number(), index: z.number(), }); const defaultVespaFieldsSchema = z.object({ relevance: z.number(), source: z.string(), // sddocname: Schemas, documentid: z.string(), }); const SpreadsheetMetadata = z.object({ spreadsheetId: z.string(), totalSheets: z.number(), }); const Metadata = z.union([z.object({}), SpreadsheetMetadata]); const VespaFileSchema = z.object({ docId: z.string(), app: z.nativeEnum(Apps), entity: FileEntitySchema, title: z.string(), url: z.string().nullable(), parentId: z.string().nullable(), chunks: z.array(z.string()), owner: z.string().nullable(), ownerEmail: z.string().nullable(), photoLink: z.string().nullable(), permissions: z.array(z.string()), mimeType: z.string().nullable(), metadata: Metadata, createdAt: z.number(), updatedAt: z.number(), }); const chunkScoresSchema = z.object({ cells: z.record(z.string(), z.number()), }); // Match features for file schema const FileMatchFeaturesSchema = z.object({ "bm25(title)": z.number().optional(), "bm25(chunks)": z.number().optional(), "closeness(field, chunk_embeddings)": z.number().optional(), chunk_scores: chunkScoresSchema, }); // Match features for user schema const UserMatchFeaturesSchema = z.object({ "bm25(name)": z.number().optional(), "bm25(email)": z.number().optional(), }); // Match features for mail schema const MailMatchFeaturesSchema = z.object({ "bm25(subject)": z.number().optional(), "bm25(chunks)": z.number().optional(), "bm25(attachmentFilenames)": z.number().optional(), chunk_scores: chunkScoresSchema, }); const EventMatchFeaturesSchema = z.object({ "bm25(name)": z.number().optional(), "bm25(description)": z.number().optional(), "bm25(attachmentFilenames)": z.number().optional(), "bm25(attendeesNames)": z.number().optional(), }); const MailAttachmentMatchFeaturesSchema = z.object({ chunk_vector_score: z.number().optional(), scaled_bm25_chunks: z.number().optional(), scaled_bm25_filename: z.number().optional(), chunk_scores: chunkScoresSchema, }); const ChatMessageMatchFeaturesSchema = z.object({ vector_score: z.number().optional(), combined_nativeRank: z.number().optional(), "nativeRank(text)": z.number().optional(), "nativeRank(username)": z.number().optional(), "nativeRank(name)": z.number().optional(), }); const DataSourceFileMatchFeaturesSchema = z.object({ "bm25(fileName)": z.number().optional(), "bm25(chunks)": z.number().optional(), "closeness(field, chunk_embeddings)": z.number().optional(), chunk_scores: chunkScoresSchema.optional(), }); z.union([ FileMatchFeaturesSchema, MailMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ]); // Base schema for DataSource (for insertion) const VespaDataSourceSchemaBase = z.object({ docId: z.string(), name: z.string(), createdBy: z.string(), createdAt: z.number(), // long updatedAt: z.number(), // long }); // Search schema for DataSource const VespaDataSourceSearchSchema = VespaDataSourceSchemaBase.extend({ sddocname: z.literal(datasourceSchema), matchfeatures: z.any().optional(), rankfeatures: z.any().optional(), }).merge(defaultVespaFieldsSchema); // Base schema for DataSourceFile (for insertion) const VespaDataSourceFileSchemaBase = z.object({ docId: z.string(), description: z.string().optional(), app: z.literal(Apps.DataSource), fileName: z.string().optional(), fileSize: z.number().optional(), // long chunks: z.array(z.string()), image_chunks: z.array(z.string()).optional(), // Added for image descriptions chunks_pos: z.array(z.number()).optional(), // Added for text chunk positions image_chunks_pos: z.array(z.number()).optional(), // Added for image chunk positions uploadedBy: z.string(), duration: z.number().optional(), // long mimeType: z.string().optional(), createdAt: z.number(), // long updatedAt: z.number(), // long dataSourceRef: z.string(), // reference to datasource docId metadata: z.string().optional(), // JSON string }); // Search schema for DataSourceFile const VespaDataSourceFileSearchSchema = VespaDataSourceFileSchemaBase.extend({ sddocname: z.literal(dataSourceFileSchema), matchfeatures: DataSourceFileMatchFeaturesSchema, rankfeatures: z.any().optional(), dataSourceName: z.string().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(), image_chunks_summary: z .array(z.union([z.string(), scoredChunk])) .optional(), chunks_pos_summary: z.array(z.number()).optional(), image_chunks_pos_summary: z.array(z.number()).optional(), }); const VespaFileSearchSchema = VespaFileSchema.extend({ sddocname: z.literal(fileSchema), matchfeatures: FileMatchFeaturesSchema, rankfeatures: z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(), }); // basically GetDocument doesn't return sddocname // in search it's always present const VespaFileGetSchema = VespaFileSchema.merge(defaultVespaFieldsSchema); const VespaUserSchema = z .object({ docId: z.string().min(1), name: z.string().optional(), //.min(1), email: z.string().min(1).email(), app: z.nativeEnum(Apps), entity: z.nativeEnum(GooglePeopleEntity), gender: z.string().optional(), photoLink: z.string().optional(), aliases: z.array(z.string()).optional(), language: z.string().optional(), includeInGlobalAddressList: z.boolean().optional(), isAdmin: z.boolean().optional(), isDelegatedAdmin: z.boolean().optional(), suspended: z.boolean().optional(), archived: z.boolean().optional(), urls: z.array(z.string()).optional(), rankfeatures: z.any().optional(), orgName: z.string().optional(), orgJobTitle: z.string().optional(), orgDepartment: z.string().optional(), orgLocation: z.string().optional(), orgDescription: z.string().optional(), creationTime: z.number(), lastLoggedIn: z.number().optional(), birthday: z.number().optional(), occupations: z.array(z.string()).optional(), userDefined: z.array(z.string()).optional(), customerId: z.string().optional(), clientData: z.array(z.string()).optional(), // this only exists for contacts owner: z.string().optional(), sddocname: z.literal(userSchema), }) .merge(defaultVespaFieldsSchema); // Mail Types const AttachmentSchema = z.object({ fileType: z.string(), fileSize: z.number(), }); const MailSchema = z.object({ docId: z.string(), threadId: z.string(), mailId: z.string().optional(), // Optional for threads subject: z.string().default(""), // Default to empty string to avoid zod errors when subject is missing chunks: z.array(z.string()), timestamp: z.number(), app: z.nativeEnum(Apps), userMap: z.optional(z.record(z.string(), z.string())), entity: z.nativeEnum(MailEntity), permissions: z.array(z.string()), from: z.string(), to: z.array(z.string()), cc: z.array(z.string()), bcc: z.array(z.string()), mimeType: z.string(), attachmentFilenames: z.array(z.string()), attachments: z.array(AttachmentSchema), labels: z.array(z.string()), }); const VespaMailSchema = MailSchema.extend({ docId: z.string().min(1), }); const MailAttachmentSchema = z.object({ docId: z.string(), mailId: z.string(), threadId: z.string(), partId: z.number().nullable().optional(), app: z.nativeEnum(Apps), entity: z.nativeEnum(MailAttachmentEntity), chunks: z.array(z.string()), timestamp: z.number(), permissions: z.array(z.string()), filename: z.string(), fileType: z.string().nullable().optional(), fileSize: z.number().nullable().optional(), }); const VespaMailAttachmentSchema = MailAttachmentSchema.extend({}); const EventUser = z.object({ email: z.string(), displayName: z.string(), }); const EventAtatchment = z.object({ fileId: z.string(), title: z.string(), fileUrl: z.string(), mimeType: z.string(), }); const VespaEventSchema = z.object({ docId: z.string(), name: z.string(), description: z.string(), url: z.string(), status: z.string(), location: z.string(), createdAt: z.number(), updatedAt: z.number(), app: z.nativeEnum(Apps), entity: z.nativeEnum(CalendarEntity), creator: EventUser, organizer: EventUser, attendees: z.array(EventUser), attendeesNames: z.array(z.string()), startTime: z.number(), endTime: z.number(), attachmentFilenames: z.array(z.string()), attachments: z.array(EventAtatchment), recurrence: z.array(z.string()), baseUrl: z.string(), joiningLink: z.string(), permissions: z.array(z.string()), cancelledInstances: z.array(z.string()), defaultStartTime: z.boolean(), }); const VespaMailSearchSchema = VespaMailSchema.extend({ sddocname: z.literal("mail"), matchfeatures: MailMatchFeaturesSchema, rankfeatures: z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ // attachment won't have this chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(), }); const VespaMailAttachmentSearchSchema = VespaMailAttachmentSchema.extend({ sddocname: z.literal("mail_attachment"), matchfeatures: MailAttachmentMatchFeaturesSchema, rankfeatures: z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(), }); const VespaEventSearchSchema = VespaEventSchema.extend({ sddocname: z.literal("event"), // Assuming events can have rankfeatures rankfeatures: z.any().optional(), }).merge(defaultVespaFieldsSchema); const VespaUserQueryHistorySchema = z.object({ docId: z.string(), query_text: z.string(), timestamp: z.number(), count: z.number(), }); const VespaUserQueryHGetSchema = VespaUserQueryHistorySchema.extend({ sddocname: z.literal("user_query"), }).merge(defaultVespaFieldsSchema); const VespaMailGetSchema = VespaMailSchema.merge(defaultVespaFieldsSchema); const VespaMailAttachmentGetSchema = VespaMailAttachmentSchema.merge(defaultVespaFieldsSchema); const VespaChatMessageSchema = z.object({ docId: z.string(), // client_msg_id from Slack teamId: z.string(), // Slack team ID (e.g., "T05N1EJSE0K") channelId: z.string(), // Slack channel ID (e.g., "C123ABC456") text: z.string(), userId: z.string(), // Slack user ID (e.g., "U032QT45V53") app: z.nativeEnum(Apps), // App (e.g., "slack") entity: z.nativeEnum(SlackEntity), // Entity (e.g., "message") name: z.string(), username: z.string(), image: z.string(), channelName: z.string().optional(), // derived isIm: z.boolean().optional(), // derived isMpim: z.boolean().optional(), // derived isPrivate: z.boolean().optional(), // derived permissions: z.array(z.string()).optional(), // derived, teamName: z.string().optional(), // derived domain: z.string().optional(), // derived createdAt: z.number(), // Slack ts (e.g., 1734442791.514519) teamRef: z.string(), // vespa id for team threadId: z.string().default(""), // Slack thread_ts, null if not in thread attachmentIds: z.array(z.string()).default([]), // Slack file IDs (e.g., ["F0857N0FF4N"]) // reactions: z.array(z.string()), // Commented out in Vespa schema, so excluded mentions: z.array(z.string()), // Extracted from text (e.g., ["U032QT45V53"]) updatedAt: z.number(), // Slack edited.ts (e.g., 1734442538.0), null if not edited deletedAt: z.number(), metadata: z.string(), // JSON string for subtype, etc. (e.g., "{\"subtype\": null}") }); const VespaChatMessageSearchSchema = VespaChatMessageSchema.extend({ sddocname: z.literal(chatMessageSchema), matchfeatures: ChatMessageMatchFeaturesSchema, rankfeatures: z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: z.array(z.string()).optional(), }); const VespaChatMessageGetSchema = VespaChatMessageSchema.merge(defaultVespaFieldsSchema); const VespaChatUserSchema = z.object({ docId: z.string(), name: z.string(), title: z.string(), app: z.nativeEnum(Apps), entity: z.nativeEnum(SlackEntity), image: z.string(), email: z.string(), statusText: z.string(), tz: z.string(), teamId: z.string(), deleted: z.boolean(), isAdmin: z.boolean(), updatedAt: z.number(), }); z.object({ id: z.string(), pathId: z.string(), fields: VespaChatUserSchema, }); const VespaChatUserSearchSchema = VespaChatUserSchema.extend({ sddocname: z.literal(chatUserSchema), }).merge(defaultVespaFieldsSchema); const VespaChatContainerSchema = z.object({ docId: z.string(), name: z.string(), channelName: z.string(), creator: z.string(), app: z.nativeEnum(Apps), entity: z.nativeEnum(SlackEntity), isPrivate: z.boolean(), isArchived: z.boolean(), isGeneral: z.boolean(), isIm: z.boolean(), isMpim: z.boolean(), domain: z.string().optional(), // derived permissions: z.array(z.string()), createdAt: z.number(), updatedAt: z.number(), lastSyncedAt: z.number(), topic: z.string(), description: z.string(), count: z.number().int(), }); // Schema for search results that includes Vespa fields const VespaChatContainerSearchSchema = VespaChatContainerSchema.extend({ sddocname: z.literal(chatContainerSchema), }).merge(defaultVespaFieldsSchema); const ChatContainerMatchFeaturesSchema = z.object({ "bm25(name)": z.number().optional(), "bm25(topic)": z.number().optional(), "bm25(description)": z.number().optional(), "closeness(field, chunk_embeddings)": z.number().optional(), }); const VespaChatTeamSchema = z.object({ docId: z.string(), name: z.string(), app: z.nativeEnum(Apps), icon: z.string(), url: z.string(), domain: z.string(), email_domain: z.string(), own: z.boolean(), createdAt: z.number(), updatedAt: z.number(), count: z.number().int(), }); VespaChatTeamSchema.extend({ sddocname: z.literal(chatTeamSchema), }).merge(defaultVespaFieldsSchema); const VespaSearchFieldsUnionSchema = z.discriminatedUnion("sddocname", [ VespaUserSchema, VespaFileSearchSchema, VespaMailSearchSchema, VespaEventSearchSchema, VespaUserQueryHGetSchema, VespaMailAttachmentSearchSchema, VespaChatContainerSearchSchema, VespaChatUserSearchSchema, VespaChatMessageSearchSchema, VespaDataSourceSearchSchema, VespaDataSourceFileSearchSchema, ]); // Get schema for DataSourceFile const VespaDataSourceFileGetSchema = VespaDataSourceFileSchemaBase.merge(defaultVespaFieldsSchema); const SearchMatchFeaturesSchema = z.union([ FileMatchFeaturesSchema, UserMatchFeaturesSchema, MailMatchFeaturesSchema, EventMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, ChatMessageMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ChatContainerMatchFeaturesSchema, ]); const VespaSearchFieldsSchema = z .object({ matchfeatures: SearchMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaSearchFieldsUnionSchema); z.union([ VespaUserSchema, VespaFileGetSchema, VespaMailGetSchema, VespaDataSourceFileGetSchema, ]); const VespaSearchResultsSchema = z.object({ id: z.string(), relevance: z.number(), fields: VespaSearchFieldsSchema, pathId: z.string().optional(), }); z.object({ id: z.string(), relevance: z.number(), fields: VespaSearchFieldsSchema, pathId: z.string().optional(), }); const VespaGroupSchema = z.object({ id: z.string(), relevance: z.number(), label: z.string(), value: z.string().optional(), fields: z .object({ "count()": z.number(), }) .optional(), children: z.array(z.lazy(() => VespaGroupSchema)).optional(), }); const VespaErrorSchema = z.object({ code: z.number(), summary: z.string(), source: z.string(), message: z.string(), }); const VespaRootBaseSchema = z.object({ root: z.object({ id: z.string(), relevance: z.number(), fields: z .object({ totalCount: z.number(), }) .optional(), coverage: z.object({ coverage: z.number(), documents: z.number(), full: z.boolean(), nodes: z.number(), results: z.number(), resultsFull: z.number(), }), errors: z.array(VespaErrorSchema).optional(), }), trace: z.any().optional(), // Add optional trace field to the root }); const VespaSearchResultSchema = z.union([ VespaSearchResultsSchema, VespaGroupSchema, ]); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: z.array(VespaSearchResultSchema), }), }); const AutocompleteMatchFeaturesSchema = z.union([ z.object({ "bm25(title_fuzzy)": z.number(), }), z.object({ "bm25(email_fuzzy)": z.number(), "bm25(name_fuzzy)": z.number(), }), z.object({ "bm25(subject_fuzzy)": z.number(), }), ]); const VespaAutocompleteFileSchema = z .object({ docId: z.string(), title: z.string(), app: z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserSchema = z .object({ docId: z.string(), // optional due to contacts name: z.string().optional(), email: z.string(), app: z.nativeEnum(Apps), entity: entitySchema, photoLink: z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailSchema = z .object({ docId: z.string(), threadId: z.string(), subject: z.string().optional(), app: z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatUserSchema = z .object({ docId: z.string(), // optional due to contacts name: z.string().optional(), email: z.string(), app: z.nativeEnum(Apps), entity: entitySchema, image: z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailAttachmentSchema = z .object({ docId: z.string(), filename: z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); z .object({ docId: z.string(), name: z.string().optional(), app: z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserQueryHSchema = z .object({ docId: z.string(), query_text: z.string(), timestamp: z.number().optional(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatContainerSchema = z .object({ docId: z.string(), name: z.string(), app: z.nativeEnum(Apps), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteSummarySchema = z.union([ VespaAutocompleteFileSchema, VespaAutocompleteUserSchema, VespaAutocompleteMailSchema, VespaAutocompleteUserQueryHSchema, VespaAutocompleteMailAttachmentSchema, VespaAutocompleteChatContainerSchema, VespaAutocompleteChatUserSchema, ]); const VespaAutocompleteFieldsSchema = z .object({ matchfeatures: AutocompleteMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaAutocompleteSummarySchema); const VespaAutocompleteSchema = z.object({ id: z.string(), relevance: z.number(), source: z.string(), fields: VespaAutocompleteFieldsSchema, }); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: z.array(VespaAutocompleteSchema), }), }); const MailResponseSchema = VespaMailGetSchema.pick({ docId: true, threadId: true, app: true, entity: true, subject: true, from: true, relevance: true, timestamp: true, userMap: true, mailId: true, }) .strip() .extend({ type: z.literal("mail"), mimeType: z.string(), chunks_summary: z.array(scoredChunk).optional(), matchfeatures: z.any().optional(), rankfeatures: z.any().optional(), }); const MailAttachmentResponseSchema = VespaMailAttachmentGetSchema.pick({ docId: true, app: true, entity: true, relevance: true, timestamp: true, filename: true, mailId: true, partId: true, fileType: true, }) .strip() .extend({ type: z.literal("mail_attachment"), chunks_summary: z.array(scoredChunk).optional(), matchfeatures: z.any().optional(), rankfeatures: z.any().optional(), }); const ChatMessageResponseSchema = VespaChatMessageGetSchema.pick({ docId: true, teamId: true, channelId: true, text: true, userId: true, app: true, entity: true, createdAt: true, threadId: true, image: true, name: true, domain: true, username: true, attachmentIds: true, mentions: true, relevance: true, updatedAt: true, }) .strip() .extend({ type: z.literal("chat_message"), chunks_summary: z.array(z.string()).optional(), matchfeatures: z.any().optional(), rankfeatures: z.any().optional(), }); const DataSourceFileResponseSchema = VespaDataSourceFileGetSchema.pick({ docId: true, description: true, app: true, fileName: true, fileSize: true, uploadedBy: true, duration: true, mimeType: true, createdAt: true, updatedAt: true, dataSourceRef: true, metadata: true, relevance: true, }) .strip() .extend({ type: z.literal(dataSourceFileSchema), // Using the schema const for the literal chunks_summary: z.array(z.union([z.string(), scoredChunk])).optional(), matchfeatures: DataSourceFileMatchFeaturesSchema.optional(), // or z.any().optional() if specific match features aren't always needed here rankfeatures: z.any().optional(), }); ({ gmail: Apps.Gmail, drive: Apps.GoogleDrive, googledrive: Apps.GoogleDrive, googlecalendar: Apps.GoogleCalendar, slack: Apps.Slack, datasource: Apps.DataSource, "google-workspace": Apps.GoogleWorkspace, googledocs: Apps.GoogleDrive, googlesheets: Apps.GoogleDrive, pdf: Apps.GoogleDrive, }); const AutocompleteFileSchema = z .object({ type: z.literal(fileSchema), relevance: z.number(), title: z.string(), app: z.nativeEnum(Apps), entity: entitySchema, }) .strip(); const AutocompleteUserSchema = z .object({ type: z.literal(userSchema), relevance: z.number(), // optional due to contacts name: z.string().optional(), email: z.string(), app: z.nativeEnum(Apps), entity: entitySchema, photoLink: z.string().optional(), }) .strip(); const AutocompleteUserQueryHSchema = z .object({ type: z.literal(userQuerySchema), docId: z.string(), query_text: z.string(), timestamp: z.number().optional(), }) .strip(); const AutocompleteMailSchema = z .object({ type: z.literal(mailSchema), relevance: z.number(), // optional due to contacts subject: z.string().optional(), app: z.nativeEnum(Apps), entity: entitySchema, threadId: z.string().optional(), docId: z.string(), }) .strip(); const AutocompleteMailAttachmentSchema = z .object({ type: z.literal(mailAttachmentSchema), relevance: z.number(), app: z.nativeEnum(Apps), entity: entitySchema, filename: z.string(), docId: z.string(), }) .strip(); const AutocompleteEventSchema = z .object({ type: z.literal(eventSchema), relevance: z.number(), name: z.string().optional(), app: z.nativeEnum(Apps), entity: entitySchema, docId: z.string(), }) .strip(); const AutocompleteChatUserSchema = z .object({ type: z.literal(chatUserSchema), relevance: z.number(), // optional due to contacts name: z.string().optional(), email: z.string().optional(), app: z.nativeEnum(Apps), entity: entitySchema, image: z.string(), }) .strip(); const AutocompleteSchema = z.discriminatedUnion("type", [ AutocompleteFileSchema, AutocompleteUserSchema, AutocompleteMailSchema, AutocompleteEventSchema, AutocompleteUserQueryHSchema, AutocompleteMailAttachmentSchema, AutocompleteChatUserSchema, ]); z.object({ results: z.array(AutocompleteSchema), }); // search result const FileResponseSchema = VespaFileSchema.pick({ docId: true, title: true, url: true, app: true, entity: true, owner: true, ownerEmail: true, photoLink: true, updatedAt: true, }) .extend({ type: z.literal(fileSchema), chunk: z.string().optional(), chunkIndex: z.number().optional(), mimeType: z.string(), chunks_summary: z.array(scoredChunk).optional(), relevance: z.number(), matchfeatures: z.any().optional(), // Add matchfeatures rankfeatures: z.any().optional(), }) .strip(); const EventResponseSchema = VespaEventSchema.pick({ docId: true, name: true, url: true, app: true, entity: true, updatedAt: true, }) .extend({ type: z.literal(eventSchema), relevance: z.number(), description: z.string().optional(), chunks_summary: z.array(z.string()).optional(), attendeesNames: z.array(z.string()).optional(), matchfeatures: z.any().optional(), // Add matchfeatures rankfeatures: z.any().optional(), }) .strip(); const UserResponseSchema = VespaUserSchema.pick({ name: true, email: true, app: true, entity: true, photoLink: true, docId: true, }) .strip() .extend({ type: z.literal(userSchema), relevance: z.number(), matchfeatures: z.any().optional(), // Add matchfeatures rankfeatures: z.any().optional(), }); // Search Response Schema const SearchResultsSchema = z.discriminatedUnion("type", [ UserResponseSchema, FileResponseSchema, DataSourceFileResponseSchema, MailResponseSchema, EventResponseSchema, MailAttachmentResponseSchema, ChatMessageResponseSchema, ]); z.object({ count: z.number(), results: z.array(SearchResultsSchema), groupCount: z.any(), trace: z.any().optional(), }); z.object({ from: z.array(z.string()).optional(), to: z.array(z.string()).optional(), cc: z.array(z.string()).optional(), bcc: z.array(z.string()).optional(), subject: z.array(z.string()).optional(), }); var SearchModes; (function (SearchModes) { SearchModes["NativeRank"] = "default_native"; SearchModes["BM25"] = "default_bm25"; SearchModes["AI"] = "default_ai"; SearchModes["Random"] = "default_random"; SearchModes["GlobalSorted"] = "global_sorted"; })(SearchModes || (SearchModes = {})); const getErrorMessage = (error) => { if (error instanceof Error) return error.message; return String(error); }; // module contains all the transformations // from vespa to the user accepted types function handleVespaGroupResponse(response) { const appEntityCounts = {}; // Navigate to the first level of groups const groupRoot = response.root.children?.[0]; // Assuming this is the group:root level if (!groupRoot || !("children" in groupRoot)) return appEntityCounts; // Safeguard for empty responses // Navigate to the app grouping (e.g., grouplist:app) const appGroup = groupRoot.children?.[0]; if (!appGroup || !("children" in appGroup)) return appEntityCounts; // Safeguard for missing app group // Iterate through the apps // @ts-ignore for (const app of appGroup.children) { const appName = app.value; // Get the app name appEntityCounts[appName] = {}; // Initialize the app entry // Navigate to the entity grouping (e.g., grouplist:entity) const entityGroup = app.children?.[0]; if (!entityGroup || !("children" in entityGroup)) continue; // Skip if no entities // Iterate through the entities // @ts-ignore for (const entity of entityGroup.children) { const entityName = entity.value; // Get the entity name const count = entity.fields?.["count()"] || 0; // Get the count or default to 0 appEntityCounts[appName][entityName] = count; // Assign the count to the app-entity pair } } return appEntityCounts; // Return the final map } // Console fallback logger const consoleLogger = { info: (message, ...args) => console.info(`[INFO] ${message}`, ...args), error: (message, ...args) => { const msg = message instanceof Error ? message.message : message; console.error(`[ERROR] ${msg}`, ...args); }, warn: (message, ...args) => console.warn(`[WARN] ${message}`, ...args), debug: (message, ...args) => console.debug(`[DEBUG] ${message}`, ...args), child: (metadata) => consoleLogger, }; class VespaClient { constructor(endpoint, logger, config) { this.logger = logger || consoleLogger; this.maxRetries = config?.vespaMaxRetryAttempts || 3; this.retryDelay = config?.vespaRetryDelay || 1000; // milliseconds this.vespaEndpoint = endpoint || `http://${config?.vespaBaseHost || "localhost"}:8080`; } async delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } async fetchWithRetry(url, options, retryCount = 0) { const nonRetryableStatusCodes = [404]; try { const response = await fetch(url, options); if (!response.ok) { // Don't need to retry for non-retryable status codes if (nonRetryableStatusCodes.includes(response.status)) { throw new Error(`Non-retryable error: ${response.status} ${response.statusText}`); } // Retry for 429 (Too Many Requests) or 5xx errors if ((response.status === 429 || response.status >= 500) && retryCount < this.maxRetries) { this.logger.info("retrying due to status: ", response.status); await this.delay(this.retryDelay * Math.pow(2, retryCount)); return this.fetchWithRetry(url, options, retryCount + 1); } } return response; } catch (error) { const errorMessage = getErrorMessage(error); if (retryCount < this.maxRetries && !errorMessage.includes("Non-retryable error")) { await this.delay(this.retryDelay * Math.pow(2, retryCount)); // Exponential backoff return this.fetchWithRetry(url, options, retryCount + 1); } throw error; } } async search(payload) { const url = `${this.vespaEndpoint}/search/`; try { const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(payload), }); if (!response.ok) { const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`Vespa search failed - Status: ${response.status}, StatusText: ${errorText}`); this.logger.error(`Vespa error body: ${errorBody}`); throw new Error(`Failed to fetch documents in searchVespa: ${response.status} ${response.statusText} - ${errorText}`); } const result = await response.json(); return result; } catch (error) { this.logger.error(`VespaClient.search error:`, error); throw new Error(`Vespa search error: ${error.message}`); } } async fetchDocumentBatch(schema, options, limit, offset, email) { const yqlQuery = `select * from sources ${schema} where true`; const searchPayload = { yql: yqlQuery, hits: limit, offset, timeout: "10s", }; const response = await this.search(searchPayload); return (response.root?.children || []).map((doc) => { // Use optional chaining and nullish coalescing to safely extract fields const { matchfeatures, ...fieldsWithoutMatch } = doc.fields; return fieldsWithoutMatch; }); } async getAllDocumentsParallel(schema, options, concurrency = 3, email) { // First get document count const countResponse = await this.getDocumentCount(schema, options, email); const totalCount = countResponse?.root?.fields?.totalCount || 0; if (totalCount === 0) return []; // Calculate optimal batch size and create batch tasks const batchSize = 350; const tasks = []; for (let offset = 0; offset < totalCount; offset += batchSize) { tasks.push(() => this.fetchDocumentBatch(schema, options, batchSize, offset, email)); } // Run tasks with concurrency limit const pLimit = (await import('p-limit')).default; const limit = pLimit(concurrency); const results = await Promise.all(tasks.map((task) => limit(task))); // Flatten results return results.flat(); } async deleteAllDocuments(options) { const { cluster, namespace, schema } = options; // Construct the DELETE URL const url = `${this.vespaEndpoint}/document/v1/${namespace}/${schema}/docid?selection=true&cluster=${cluster}`; try { const response = await this.fetchWithRetry(url, { method: "DELETE", }); if (response.ok) { this.logger.info("All documents deleted successfully."); } else { const errorText = response.statusText; throw new Error(`Failed to delete documents: ${response.status} ${response.statusText} - ${errorText}`); } } catch (error) { this.logger.error(`Error deleting documents:, ${error} ${error.stack}`, error); throw new Error(`Vespa delete error: ${error}`); } } async insertDocument(document, options) { try { const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${document.docId}`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ fields: document }), }); if (!response.ok) { // Using status text since response.text() return Body Already used Error const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`Vespa error: ${errorBody}`); throw new Error(`Failed to insert document: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); if (response.ok) { // this.logger.info(`Document ${document.docId} inserted successfully`) } else { this.logger.error(`Error inserting document ${document.docId}`); } } catch (error) { const errMessage = getErrorMessage(error); this.logger.error(`Error inserting document ${document.docId}: ${errMessage}`, error); throw new Error(`Error inserting document ${document.docId}: ${errMessage}`); } } async insert(document, options) { try { const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${document.docId}`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ fields: document }), }); if (!response.ok) { // Using status text since response.text() return Body Already used Error const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`Vespa error: ${errorBody}`); throw new Error(`Failed to insert document: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); if (response.ok) { this.logger.info(`Document ${document.docId} inserted successfully`); } else { } } catch (error) { const errMessage = getErrorMessage(error); this.logger.error(`Error inserting document ${document.docId}: ${errMessage} ${error.stack}`, error); throw new Error(`Error inserting document ${document.docId}: ${errMessage} ${error.stack}`); } } async insertUser(user, options) { try { const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${user.docId}`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ fields: user }), }); const data = await response.json(); if (response.ok) { // this.logger.info(`Document ${user.docId} inserted successfully:`, data) } else { this.logger.error(`Error inserting user ${user.docId}: ${data}`, data); } } catch (error) { const errorMessage = getErrorMessage(error); this.logger.error(`Error inserting user ${user.docId}:`, errorMessage, error); throw new Error(`Error inserting user ${user.docId}: ${errorMessage}`); } } async autoComplete(searchPayload) { try { const url = `${this.vespaEndpoint}/search/`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(searchPayload), }); if (!response.ok) { const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`AutoComplete failed - Status: ${response.status}, StatusText: ${errorText}`); this.logger.error(`AutoComplete error body: ${errorBody}`); throw new Error(`Failed to perform autocomplete search: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); return data; } catch (error) { this.logger.error(`VespaClient.autoComplete error:`, error); throw new Error(`Error performing autocomplete search:, ${error} ${error.stack} `); } } async groupSearch(payload) { try { const url = `${this.vespaEndpoint}/search/`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(payload), }); if (!response.ok) { const errorText = response.statusText; throw new Error(`Failed to fetch documents in groupVespaSearch: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); return handleVespaGroupResponse(data); } catch (error) { this.logger.error(`Error performing search groupVespaSearch:, ${error} - ${error.stack}`, error); throw new Error(`Error performing search groupVespaSearch:, ${error} - ${error.stack}`); } } async getDocumentCount(schema, options, email) { try { // Encode the YQL query to ensure it's URL-safe const yql = encodeURIComponent(`select * from sources ${schema} where uploadedBy contains '${email}'`); // Construct the search URL with necessary query parameters const url = `${this.vespaEndpoint}/search/?yql=${yql}&hits=0&cluster=${options.cluster}`; const response = await this.fetchWithRetry(url, { method: "GET", headers: { Accept: "application/json", }, }); if (!response.ok) { const errorText = response.statusText; throw new Error(`Failed to fetch document count: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); // Extract the total number of hits from the response const totalCount = data?.root?.fields?.totalCount; if (typeof totalCount === "number") { this.logger.info(`Total documents in schema '${schema}' within namespace '${options.namespace}' and cluster '${options.cluster}': ${totalCount}`); return data; } else { this.logger.error(`Unexpected response structure:', ${data}`); } } catch (error) { const errMessage = getErrorMessage(error); this.logger.error(`Error retrieving document count: ${errMessage}`); throw new Error(`Error retrieving document count: ${errMessage}`); } } async getDocument(options) { const { docId, namespace, schema } = options; const url = `${this.vespaEndpoint}/document/v1/${namespace}/${schema}/docid/${docId}`; try { const response = await this.fetchWithRetry(url, { method: "GET", headers: { Accept: "application/json", }, }); if (!response.ok) { const errorText = response.statusText; const errorBody = await response.text(); throw new Error(`Failed to fetch document: ${response.status} ${response.statusText} - ${errorBody}`); } const document = await response.json(); return document; } catch (error) { const errMessage = getErrorMessage(error); throw new Error(`Error fetching document docId: ${docId} - ${errMessage}`); } } async getDocumentsByOnlyDocIds(options) { const { docIds, generateAnswerSpan } = options; const yqlIds = docIds.map((id) => `docId contains '${id}'`).join(" or "); const yqlMailIds = docIds .map((id) => `mailId contains '${id}'`) .join(" or "); const yqlQuery = `select * from sources * where (${yqlIds}) or (${yqlMailIds})`; const url = `${this.vespaEndpoint}/search/`; try { const payload = { yql: yqlQuery, hits: docIds?.length, maxHits: docIds?.length, }; generateAnswerSpan.setAttribute("vespaPayload", JSON.stringify(payload)); const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(payload), }); if (!response.ok) { const errorText = response.statusText; throw new Error(`Search query failed: ${response.status} ${response.statusText} - ${errorText}`); } const result = await response.json(); return result; } catch (error) { const errMessage = getErrorMessage(error); throw new Error(`Error fetching documents: ${errMessage}`); } } async up