UNPKG

vespa-ts

Version:

A reusable TypeScript package for interacting with Vespa search engine with dependency injection support

1,367 lines (1,361 loc) 84.3 kB
'use strict'; Object.defineProperty(exports, '__esModule', { value: true }); var zod = require('zod'); const fileSchema = "file"; // Replace with your actual schema name const userSchema = "user"; // calendar const eventSchema = "event"; // mail const mailAttachmentSchema = "mail_attachment"; const mailSchema = "mail"; // chat const chatContainerSchema = "chat_container"; // this is not meant to be searched but we will // store the data in vespa and fetch it as needed const chatTeamSchema = "chat_team"; const chatMessageSchema = "chat_message"; const chatUserSchema = "chat_user"; // previous queries const userQuerySchema = "user_query"; const datasourceSchema = "datasource"; const dataSourceFileSchema = "datasource_file"; // not using @ because of vite of frontend var Apps; (function (Apps) { // includes everything google Apps["GoogleWorkspace"] = "google-workspace"; // more granular Apps["GoogleDrive"] = "google-drive"; Apps["Gmail"] = "gmail"; // Notion = "notion", // Notion is not yet supported Apps["GoogleCalendar"] = "google-calendar"; Apps["Slack"] = "slack"; Apps["MCP"] = "mcp"; Apps["Github"] = "github"; Apps["Xyne"] = "xyne"; Apps["DataSource"] = "data-source"; })(Apps || (Apps = {})); var GooglePeopleEntity; (function (GooglePeopleEntity) { GooglePeopleEntity["Contacts"] = "Contacts"; GooglePeopleEntity["OtherContacts"] = "OtherContacts"; GooglePeopleEntity["AdminDirectory"] = "AdminDirectory"; })(GooglePeopleEntity || (GooglePeopleEntity = {})); // the vespa schemas const Schemas = zod.z.union([ zod.z.literal(fileSchema), zod.z.literal(userSchema), zod.z.literal(mailSchema), zod.z.literal(eventSchema), zod.z.literal(userQuerySchema), zod.z.literal(mailAttachmentSchema), zod.z.literal(chatContainerSchema), zod.z.literal(chatTeamSchema), zod.z.literal(chatUserSchema), zod.z.literal(chatMessageSchema), zod.z.literal(datasourceSchema), zod.z.literal(dataSourceFileSchema), ]); var MailEntity; (function (MailEntity) { MailEntity["Email"] = "mail"; })(MailEntity || (MailEntity = {})); var CalendarEntity; (function (CalendarEntity) { CalendarEntity["Event"] = "event"; })(CalendarEntity || (CalendarEntity = {})); var SlackEntity; (function (SlackEntity) { SlackEntity["Team"] = "team"; SlackEntity["User"] = "user"; SlackEntity["Message"] = "message"; SlackEntity["Channel"] = "channel"; SlackEntity["File"] = "file"; })(SlackEntity || (SlackEntity = {})); var DriveEntity; (function (DriveEntity) { DriveEntity["Docs"] = "docs"; DriveEntity["Sheets"] = "sheets"; DriveEntity["Slides"] = "slides"; DriveEntity["Presentation"] = "presentation"; DriveEntity["PDF"] = "pdf"; DriveEntity["Folder"] = "folder"; DriveEntity["Misc"] = "driveFile"; DriveEntity["Drawing"] = "drawing"; DriveEntity["Form"] = "form"; DriveEntity["Script"] = "script"; DriveEntity["Site"] = "site"; DriveEntity["Map"] = "map"; DriveEntity["Audio"] = "audio"; DriveEntity["Video"] = "video"; DriveEntity["Photo"] = "photo"; DriveEntity["ThirdPartyApp"] = "third_party_app"; DriveEntity["Image"] = "image"; DriveEntity["Zip"] = "zip"; DriveEntity["WordDocument"] = "word_document"; DriveEntity["ExcelSpreadsheet"] = "excel_spreadsheet"; DriveEntity["PowerPointPresentation"] = "powerpoint_presentation"; DriveEntity["Text"] = "text"; DriveEntity["CSV"] = "csv"; })(DriveEntity || (DriveEntity = {})); var MailAttachmentEntity; (function (MailAttachmentEntity) { MailAttachmentEntity["PDF"] = "pdf"; MailAttachmentEntity["Sheets"] = "sheets"; MailAttachmentEntity["CSV"] = "csv"; MailAttachmentEntity["WordDocument"] = "worddocument"; MailAttachmentEntity["PowerPointPresentation"] = "powerpointpresentation"; MailAttachmentEntity["Text"] = "text"; MailAttachmentEntity["NotValid"] = "notvalid"; })(MailAttachmentEntity || (MailAttachmentEntity = {})); const PeopleEntitySchema = zod.z.nativeEnum(GooglePeopleEntity); const ChatEntitySchema = zod.z.nativeEnum(SlackEntity); var NotionEntity; (function (NotionEntity) { NotionEntity["Page"] = "page"; NotionEntity["Database"] = "database"; })(NotionEntity || (NotionEntity = {})); const FileEntitySchema = zod.z.nativeEnum(DriveEntity); const MailEntitySchema = zod.z.nativeEnum(MailEntity); const MailAttachmentEntitySchema = zod.z.nativeEnum(MailAttachmentEntity); const EventEntitySchema = zod.z.nativeEnum(CalendarEntity); const NotionEntitySchema = zod.z.nativeEnum(NotionEntity); var SystemEntity; (function (SystemEntity) { SystemEntity["SystemInfo"] = "system_info"; SystemEntity["UserProfile"] = "user_profile"; })(SystemEntity || (SystemEntity = {})); var DataSourceEntity; (function (DataSourceEntity) { DataSourceEntity["DataSourceFile"] = "data_source_file"; })(DataSourceEntity || (DataSourceEntity = {})); const SystemEntitySchema = zod.z.nativeEnum(SystemEntity); const DataSourceEntitySchema = zod.z.nativeEnum(DataSourceEntity); const entitySchema = zod.z.union([ SystemEntitySchema, PeopleEntitySchema, FileEntitySchema, NotionEntitySchema, MailEntitySchema, EventEntitySchema, MailAttachmentEntitySchema, ChatEntitySchema, DataSourceEntitySchema, ]); const scoredChunk = zod.z.object({ chunk: zod.z.string(), score: zod.z.number(), index: zod.z.number(), }); const defaultVespaFieldsSchema = zod.z.object({ relevance: zod.z.number(), source: zod.z.string(), // sddocname: Schemas, documentid: zod.z.string(), }); const SpreadsheetMetadata = zod.z.object({ spreadsheetId: zod.z.string(), totalSheets: zod.z.number(), }); const Metadata = zod.z.union([zod.z.object({}), SpreadsheetMetadata]); const VespaFileSchema = zod.z.object({ docId: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: FileEntitySchema, title: zod.z.string(), url: zod.z.string().nullable(), parentId: zod.z.string().nullable(), chunks: zod.z.array(zod.z.string()), owner: zod.z.string().nullable(), ownerEmail: zod.z.string().nullable(), photoLink: zod.z.string().nullable(), permissions: zod.z.array(zod.z.string()), mimeType: zod.z.string().nullable(), metadata: Metadata, createdAt: zod.z.number(), updatedAt: zod.z.number(), }); const chunkScoresSchema = zod.z.object({ cells: zod.z.record(zod.z.string(), zod.z.number()), }); // Match features for file schema const FileMatchFeaturesSchema = zod.z.object({ "bm25(title)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); // Match features for user schema const UserMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(email)": zod.z.number().optional(), }); // Match features for mail schema const MailMatchFeaturesSchema = zod.z.object({ "bm25(subject)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "bm25(attachmentFilenames)": zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); const EventMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(description)": zod.z.number().optional(), "bm25(attachmentFilenames)": zod.z.number().optional(), "bm25(attendeesNames)": zod.z.number().optional(), }); const MailAttachmentMatchFeaturesSchema = zod.z.object({ chunk_vector_score: zod.z.number().optional(), scaled_bm25_chunks: zod.z.number().optional(), scaled_bm25_filename: zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); const ChatMessageMatchFeaturesSchema = zod.z.object({ vector_score: zod.z.number().optional(), combined_nativeRank: zod.z.number().optional(), "nativeRank(text)": zod.z.number().optional(), "nativeRank(username)": zod.z.number().optional(), "nativeRank(name)": zod.z.number().optional(), }); const DataSourceFileMatchFeaturesSchema = zod.z.object({ "bm25(fileName)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), chunk_scores: chunkScoresSchema.optional(), }); zod.z.union([ FileMatchFeaturesSchema, MailMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ]); // Base schema for DataSource (for insertion) const VespaDataSourceSchemaBase = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), createdBy: zod.z.string(), createdAt: zod.z.number(), // long updatedAt: zod.z.number(), // long }); // Search schema for DataSource const VespaDataSourceSearchSchema = VespaDataSourceSchemaBase.extend({ sddocname: zod.z.literal(datasourceSchema), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }).merge(defaultVespaFieldsSchema); // Base schema for DataSourceFile (for insertion) const VespaDataSourceFileSchemaBase = zod.z.object({ docId: zod.z.string(), description: zod.z.string().optional(), app: zod.z.literal(Apps.DataSource), fileName: zod.z.string().optional(), fileSize: zod.z.number().optional(), // long chunks: zod.z.array(zod.z.string()), image_chunks: zod.z.array(zod.z.string()).optional(), // Added for image descriptions chunks_pos: zod.z.array(zod.z.number()).optional(), // Added for text chunk positions image_chunks_pos: zod.z.array(zod.z.number()).optional(), // Added for image chunk positions uploadedBy: zod.z.string(), duration: zod.z.number().optional(), // long mimeType: zod.z.string().optional(), createdAt: zod.z.number(), // long updatedAt: zod.z.number(), // long dataSourceRef: zod.z.string(), // reference to datasource docId metadata: zod.z.string().optional(), // JSON string }); // Search schema for DataSourceFile const VespaDataSourceFileSearchSchema = VespaDataSourceFileSchemaBase.extend({ sddocname: zod.z.literal(dataSourceFileSchema), matchfeatures: DataSourceFileMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), dataSourceName: zod.z.string().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), image_chunks_summary: zod.z .array(zod.z.union([zod.z.string(), scoredChunk])) .optional(), chunks_pos_summary: zod.z.array(zod.z.number()).optional(), image_chunks_pos_summary: zod.z.array(zod.z.number()).optional(), }); const VespaFileSearchSchema = VespaFileSchema.extend({ sddocname: zod.z.literal(fileSchema), matchfeatures: FileMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); // basically GetDocument doesn't return sddocname // in search it's always present const VespaFileGetSchema = VespaFileSchema.merge(defaultVespaFieldsSchema); const VespaUserSchema = zod.z .object({ docId: zod.z.string().min(1), name: zod.z.string().optional(), //.min(1), email: zod.z.string().min(1).email(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(GooglePeopleEntity), gender: zod.z.string().optional(), photoLink: zod.z.string().optional(), aliases: zod.z.array(zod.z.string()).optional(), language: zod.z.string().optional(), includeInGlobalAddressList: zod.z.boolean().optional(), isAdmin: zod.z.boolean().optional(), isDelegatedAdmin: zod.z.boolean().optional(), suspended: zod.z.boolean().optional(), archived: zod.z.boolean().optional(), urls: zod.z.array(zod.z.string()).optional(), rankfeatures: zod.z.any().optional(), orgName: zod.z.string().optional(), orgJobTitle: zod.z.string().optional(), orgDepartment: zod.z.string().optional(), orgLocation: zod.z.string().optional(), orgDescription: zod.z.string().optional(), creationTime: zod.z.number(), lastLoggedIn: zod.z.number().optional(), birthday: zod.z.number().optional(), occupations: zod.z.array(zod.z.string()).optional(), userDefined: zod.z.array(zod.z.string()).optional(), customerId: zod.z.string().optional(), clientData: zod.z.array(zod.z.string()).optional(), // this only exists for contacts owner: zod.z.string().optional(), sddocname: zod.z.literal(userSchema), }) .merge(defaultVespaFieldsSchema); // Mail Types const AttachmentSchema = zod.z.object({ fileType: zod.z.string(), fileSize: zod.z.number(), }); const MailSchema = zod.z.object({ docId: zod.z.string(), threadId: zod.z.string(), mailId: zod.z.string().optional(), // Optional for threads subject: zod.z.string().default(""), // Default to empty string to avoid zod errors when subject is missing chunks: zod.z.array(zod.z.string()), timestamp: zod.z.number(), app: zod.z.nativeEnum(Apps), userMap: zod.z.optional(zod.z.record(zod.z.string(), zod.z.string())), entity: zod.z.nativeEnum(MailEntity), permissions: zod.z.array(zod.z.string()), from: zod.z.string(), to: zod.z.array(zod.z.string()), cc: zod.z.array(zod.z.string()), bcc: zod.z.array(zod.z.string()), mimeType: zod.z.string(), attachmentFilenames: zod.z.array(zod.z.string()), attachments: zod.z.array(AttachmentSchema), labels: zod.z.array(zod.z.string()), }); const VespaMailSchema = MailSchema.extend({ docId: zod.z.string().min(1), }); const MailAttachmentSchema = zod.z.object({ docId: zod.z.string(), mailId: zod.z.string(), threadId: zod.z.string(), partId: zod.z.number().nullable().optional(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(MailAttachmentEntity), chunks: zod.z.array(zod.z.string()), timestamp: zod.z.number(), permissions: zod.z.array(zod.z.string()), filename: zod.z.string(), fileType: zod.z.string().nullable().optional(), fileSize: zod.z.number().nullable().optional(), }); const VespaMailAttachmentSchema = MailAttachmentSchema.extend({}); const EventUser = zod.z.object({ email: zod.z.string(), displayName: zod.z.string(), }); const EventAtatchment = zod.z.object({ fileId: zod.z.string(), title: zod.z.string(), fileUrl: zod.z.string(), mimeType: zod.z.string(), }); const VespaEventSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), description: zod.z.string(), url: zod.z.string(), status: zod.z.string(), location: zod.z.string(), createdAt: zod.z.number(), updatedAt: zod.z.number(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(CalendarEntity), creator: EventUser, organizer: EventUser, attendees: zod.z.array(EventUser), attendeesNames: zod.z.array(zod.z.string()), startTime: zod.z.number(), endTime: zod.z.number(), attachmentFilenames: zod.z.array(zod.z.string()), attachments: zod.z.array(EventAtatchment), recurrence: zod.z.array(zod.z.string()), baseUrl: zod.z.string(), joiningLink: zod.z.string(), permissions: zod.z.array(zod.z.string()), cancelledInstances: zod.z.array(zod.z.string()), defaultStartTime: zod.z.boolean(), }); const VespaMailSearchSchema = VespaMailSchema.extend({ sddocname: zod.z.literal("mail"), matchfeatures: MailMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ // attachment won't have this chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); const VespaMailAttachmentSearchSchema = VespaMailAttachmentSchema.extend({ sddocname: zod.z.literal("mail_attachment"), matchfeatures: MailAttachmentMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); const VespaEventSearchSchema = VespaEventSchema.extend({ sddocname: zod.z.literal("event"), // Assuming events can have rankfeatures rankfeatures: zod.z.any().optional(), }).merge(defaultVespaFieldsSchema); const VespaUserQueryHistorySchema = zod.z.object({ docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number(), count: zod.z.number(), }); const VespaUserQueryHGetSchema = VespaUserQueryHistorySchema.extend({ sddocname: zod.z.literal("user_query"), }).merge(defaultVespaFieldsSchema); const VespaMailGetSchema = VespaMailSchema.merge(defaultVespaFieldsSchema); const VespaMailAttachmentGetSchema = VespaMailAttachmentSchema.merge(defaultVespaFieldsSchema); const VespaChatMessageSchema = zod.z.object({ docId: zod.z.string(), // client_msg_id from Slack teamId: zod.z.string(), // Slack team ID (e.g., "T05N1EJSE0K") channelId: zod.z.string(), // Slack channel ID (e.g., "C123ABC456") text: zod.z.string(), userId: zod.z.string(), // Slack user ID (e.g., "U032QT45V53") app: zod.z.nativeEnum(Apps), // App (e.g., "slack") entity: zod.z.nativeEnum(SlackEntity), // Entity (e.g., "message") name: zod.z.string(), username: zod.z.string(), image: zod.z.string(), channelName: zod.z.string().optional(), // derived isIm: zod.z.boolean().optional(), // derived isMpim: zod.z.boolean().optional(), // derived isPrivate: zod.z.boolean().optional(), // derived permissions: zod.z.array(zod.z.string()).optional(), // derived, teamName: zod.z.string().optional(), // derived domain: zod.z.string().optional(), // derived createdAt: zod.z.number(), // Slack ts (e.g., 1734442791.514519) teamRef: zod.z.string(), // vespa id for team threadId: zod.z.string().default(""), // Slack thread_ts, null if not in thread attachmentIds: zod.z.array(zod.z.string()).default([]), // Slack file IDs (e.g., ["F0857N0FF4N"]) // reactions: z.array(z.string()), // Commented out in Vespa schema, so excluded mentions: zod.z.array(zod.z.string()), // Extracted from text (e.g., ["U032QT45V53"]) updatedAt: zod.z.number(), // Slack edited.ts (e.g., 1734442538.0), null if not edited deletedAt: zod.z.number(), metadata: zod.z.string(), // JSON string for subtype, etc. (e.g., "{\"subtype\": null}") }); const VespaChatMessageSearchSchema = VespaChatMessageSchema.extend({ sddocname: zod.z.literal(chatMessageSchema), matchfeatures: ChatMessageMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.string()).optional(), }); const VespaChatMessageGetSchema = VespaChatMessageSchema.merge(defaultVespaFieldsSchema); const VespaChatUserSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), title: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(SlackEntity), image: zod.z.string(), email: zod.z.string(), statusText: zod.z.string(), tz: zod.z.string(), teamId: zod.z.string(), deleted: zod.z.boolean(), isAdmin: zod.z.boolean(), updatedAt: zod.z.number(), }); zod.z.object({ id: zod.z.string(), pathId: zod.z.string(), fields: VespaChatUserSchema, }); const VespaChatUserSearchSchema = VespaChatUserSchema.extend({ sddocname: zod.z.literal(chatUserSchema), }).merge(defaultVespaFieldsSchema); const VespaChatContainerSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), channelName: zod.z.string(), creator: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(SlackEntity), isPrivate: zod.z.boolean(), isArchived: zod.z.boolean(), isGeneral: zod.z.boolean(), isIm: zod.z.boolean(), isMpim: zod.z.boolean(), domain: zod.z.string().optional(), // derived permissions: zod.z.array(zod.z.string()), createdAt: zod.z.number(), updatedAt: zod.z.number(), lastSyncedAt: zod.z.number(), topic: zod.z.string(), description: zod.z.string(), count: zod.z.number().int(), }); // Schema for search results that includes Vespa fields const VespaChatContainerSearchSchema = VespaChatContainerSchema.extend({ sddocname: zod.z.literal(chatContainerSchema), }).merge(defaultVespaFieldsSchema); const ChatContainerMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(topic)": zod.z.number().optional(), "bm25(description)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), }); const VespaChatTeamSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), app: zod.z.nativeEnum(Apps), icon: zod.z.string(), url: zod.z.string(), domain: zod.z.string(), email_domain: zod.z.string(), own: zod.z.boolean(), createdAt: zod.z.number(), updatedAt: zod.z.number(), count: zod.z.number().int(), }); VespaChatTeamSchema.extend({ sddocname: zod.z.literal(chatTeamSchema), }).merge(defaultVespaFieldsSchema); const VespaSearchFieldsUnionSchema = zod.z.discriminatedUnion("sddocname", [ VespaUserSchema, VespaFileSearchSchema, VespaMailSearchSchema, VespaEventSearchSchema, VespaUserQueryHGetSchema, VespaMailAttachmentSearchSchema, VespaChatContainerSearchSchema, VespaChatUserSearchSchema, VespaChatMessageSearchSchema, VespaDataSourceSearchSchema, VespaDataSourceFileSearchSchema, ]); // Get schema for DataSourceFile const VespaDataSourceFileGetSchema = VespaDataSourceFileSchemaBase.merge(defaultVespaFieldsSchema); const SearchMatchFeaturesSchema = zod.z.union([ FileMatchFeaturesSchema, UserMatchFeaturesSchema, MailMatchFeaturesSchema, EventMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, ChatMessageMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ChatContainerMatchFeaturesSchema, ]); const VespaSearchFieldsSchema = zod.z .object({ matchfeatures: SearchMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaSearchFieldsUnionSchema); zod.z.union([ VespaUserSchema, VespaFileGetSchema, VespaMailGetSchema, VespaDataSourceFileGetSchema, ]); const VespaSearchResultsSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: VespaSearchFieldsSchema, pathId: zod.z.string().optional(), }); zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: VespaSearchFieldsSchema, pathId: zod.z.string().optional(), }); const VespaGroupSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), label: zod.z.string(), value: zod.z.string().optional(), fields: zod.z .object({ "count()": zod.z.number(), }) .optional(), children: zod.z.array(zod.z.lazy(() => VespaGroupSchema)).optional(), }); const VespaErrorSchema = zod.z.object({ code: zod.z.number(), summary: zod.z.string(), source: zod.z.string(), message: zod.z.string(), }); const VespaRootBaseSchema = zod.z.object({ root: zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: zod.z .object({ totalCount: zod.z.number(), }) .optional(), coverage: zod.z.object({ coverage: zod.z.number(), documents: zod.z.number(), full: zod.z.boolean(), nodes: zod.z.number(), results: zod.z.number(), resultsFull: zod.z.number(), }), errors: zod.z.array(VespaErrorSchema).optional(), }), trace: zod.z.any().optional(), // Add optional trace field to the root }); const VespaSearchResultSchema = zod.z.union([ VespaSearchResultsSchema, VespaGroupSchema, ]); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: zod.z.array(VespaSearchResultSchema), }), }); const AutocompleteMatchFeaturesSchema = zod.z.union([ zod.z.object({ "bm25(title_fuzzy)": zod.z.number(), }), zod.z.object({ "bm25(email_fuzzy)": zod.z.number(), "bm25(name_fuzzy)": zod.z.number(), }), zod.z.object({ "bm25(subject_fuzzy)": zod.z.number(), }), ]); const VespaAutocompleteFileSchema = zod.z .object({ docId: zod.z.string(), title: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserSchema = zod.z .object({ docId: zod.z.string(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, photoLink: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailSchema = zod.z .object({ docId: zod.z.string(), threadId: zod.z.string(), subject: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatUserSchema = zod.z .object({ docId: zod.z.string(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, image: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailAttachmentSchema = zod.z .object({ docId: zod.z.string(), filename: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); zod.z .object({ docId: zod.z.string(), name: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserQueryHSchema = zod.z .object({ docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number().optional(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatContainerSchema = zod.z .object({ docId: zod.z.string(), name: zod.z.string(), app: zod.z.nativeEnum(Apps), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteSummarySchema = zod.z.union([ VespaAutocompleteFileSchema, VespaAutocompleteUserSchema, VespaAutocompleteMailSchema, VespaAutocompleteUserQueryHSchema, VespaAutocompleteMailAttachmentSchema, VespaAutocompleteChatContainerSchema, VespaAutocompleteChatUserSchema, ]); const VespaAutocompleteFieldsSchema = zod.z .object({ matchfeatures: AutocompleteMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaAutocompleteSummarySchema); const VespaAutocompleteSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), source: zod.z.string(), fields: VespaAutocompleteFieldsSchema, }); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: zod.z.array(VespaAutocompleteSchema), }), }); const MailResponseSchema = VespaMailGetSchema.pick({ docId: true, threadId: true, app: true, entity: true, subject: true, from: true, relevance: true, timestamp: true, userMap: true, mailId: true, }) .strip() .extend({ type: zod.z.literal("mail"), mimeType: zod.z.string(), chunks_summary: zod.z.array(scoredChunk).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const MailAttachmentResponseSchema = VespaMailAttachmentGetSchema.pick({ docId: true, app: true, entity: true, relevance: true, timestamp: true, filename: true, mailId: true, partId: true, fileType: true, }) .strip() .extend({ type: zod.z.literal("mail_attachment"), chunks_summary: zod.z.array(scoredChunk).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const ChatMessageResponseSchema = VespaChatMessageGetSchema.pick({ docId: true, teamId: true, channelId: true, text: true, userId: true, app: true, entity: true, createdAt: true, threadId: true, image: true, name: true, domain: true, username: true, attachmentIds: true, mentions: true, relevance: true, updatedAt: true, }) .strip() .extend({ type: zod.z.literal("chat_message"), chunks_summary: zod.z.array(zod.z.string()).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const DataSourceFileResponseSchema = VespaDataSourceFileGetSchema.pick({ docId: true, description: true, app: true, fileName: true, fileSize: true, uploadedBy: true, duration: true, mimeType: true, createdAt: true, updatedAt: true, dataSourceRef: true, metadata: true, relevance: true, }) .strip() .extend({ type: zod.z.literal(dataSourceFileSchema), // Using the schema const for the literal chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), matchfeatures: DataSourceFileMatchFeaturesSchema.optional(), // or z.any().optional() if specific match features aren't always needed here rankfeatures: zod.z.any().optional(), }); ({ gmail: Apps.Gmail, drive: Apps.GoogleDrive, googledrive: Apps.GoogleDrive, googlecalendar: Apps.GoogleCalendar, slack: Apps.Slack, datasource: Apps.DataSource, "google-workspace": Apps.GoogleWorkspace, googledocs: Apps.GoogleDrive, googlesheets: Apps.GoogleDrive, pdf: Apps.GoogleDrive, }); const AutocompleteFileSchema = zod.z .object({ type: zod.z.literal(fileSchema), relevance: zod.z.number(), title: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, }) .strip(); const AutocompleteUserSchema = zod.z .object({ type: zod.z.literal(userSchema), relevance: zod.z.number(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, photoLink: zod.z.string().optional(), }) .strip(); const AutocompleteUserQueryHSchema = zod.z .object({ type: zod.z.literal(userQuerySchema), docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number().optional(), }) .strip(); const AutocompleteMailSchema = zod.z .object({ type: zod.z.literal(mailSchema), relevance: zod.z.number(), // optional due to contacts subject: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, threadId: zod.z.string().optional(), docId: zod.z.string(), }) .strip(); const AutocompleteMailAttachmentSchema = zod.z .object({ type: zod.z.literal(mailAttachmentSchema), relevance: zod.z.number(), app: zod.z.nativeEnum(Apps), entity: entitySchema, filename: zod.z.string(), docId: zod.z.string(), }) .strip(); const AutocompleteEventSchema = zod.z .object({ type: zod.z.literal(eventSchema), relevance: zod.z.number(), name: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, docId: zod.z.string(), }) .strip(); const AutocompleteChatUserSchema = zod.z .object({ type: zod.z.literal(chatUserSchema), relevance: zod.z.number(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, image: zod.z.string(), }) .strip(); const AutocompleteSchema = zod.z.discriminatedUnion("type", [ AutocompleteFileSchema, AutocompleteUserSchema, AutocompleteMailSchema, AutocompleteEventSchema, AutocompleteUserQueryHSchema, AutocompleteMailAttachmentSchema, AutocompleteChatUserSchema, ]); zod.z.object({ results: zod.z.array(AutocompleteSchema), }); // search result const FileResponseSchema = VespaFileSchema.pick({ docId: true, title: true, url: true, app: true, entity: true, owner: true, ownerEmail: true, photoLink: true, updatedAt: true, }) .extend({ type: zod.z.literal(fileSchema), chunk: zod.z.string().optional(), chunkIndex: zod.z.number().optional(), mimeType: zod.z.string(), chunks_summary: zod.z.array(scoredChunk).optional(), relevance: zod.z.number(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }) .strip(); const EventResponseSchema = VespaEventSchema.pick({ docId: true, name: true, url: true, app: true, entity: true, updatedAt: true, }) .extend({ type: zod.z.literal(eventSchema), relevance: zod.z.number(), description: zod.z.string().optional(), chunks_summary: zod.z.array(zod.z.string()).optional(), attendeesNames: zod.z.array(zod.z.string()).optional(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }) .strip(); const UserResponseSchema = VespaUserSchema.pick({ name: true, email: true, app: true, entity: true, photoLink: true, docId: true, }) .strip() .extend({ type: zod.z.literal(userSchema), relevance: zod.z.number(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }); // Search Response Schema const SearchResultsSchema = zod.z.discriminatedUnion("type", [ UserResponseSchema, FileResponseSchema, DataSourceFileResponseSchema, MailResponseSchema, EventResponseSchema, MailAttachmentResponseSchema, ChatMessageResponseSchema, ]); zod.z.object({ count: zod.z.number(), results: zod.z.array(SearchResultsSchema), groupCount: zod.z.any(), trace: zod.z.any().optional(), }); zod.z.object({ from: zod.z.array(zod.z.string()).optional(), to: zod.z.array(zod.z.string()).optional(), cc: zod.z.array(zod.z.string()).optional(), bcc: zod.z.array(zod.z.string()).optional(), subject: zod.z.array(zod.z.string()).optional(), }); var SearchModes; (function (SearchModes) { SearchModes["NativeRank"] = "default_native"; SearchModes["BM25"] = "default_bm25"; SearchModes["AI"] = "default_ai"; SearchModes["Random"] = "default_random"; SearchModes["GlobalSorted"] = "global_sorted"; })(SearchModes || (SearchModes = {})); const getErrorMessage = (error) => { if (error instanceof Error) return error.message; return String(error); }; // module contains all the transformations // from vespa to the user accepted types function handleVespaGroupResponse(response) { const appEntityCounts = {}; // Navigate to the first level of groups const groupRoot = response.root.children?.[0]; // Assuming this is the group:root level if (!groupRoot || !("children" in groupRoot)) return appEntityCounts; // Safeguard for empty responses // Navigate to the app grouping (e.g., grouplist:app) const appGroup = groupRoot.children?.[0]; if (!appGroup || !("children" in appGroup)) return appEntityCounts; // Safeguard for missing app group // Iterate through the apps // @ts-ignore for (const app of appGroup.children) { const appName = app.value; // Get the app name appEntityCounts[appName] = {}; // Initialize the app entry // Navigate to the entity grouping (e.g., grouplist:entity) const entityGroup = app.children?.[0]; if (!entityGroup || !("children" in entityGroup)) continue; // Skip if no entities // Iterate through the entities // @ts-ignore for (const entity of entityGroup.children) { const entityName = entity.value; // Get the entity name const count = entity.fields?.["count()"] || 0; // Get the count or default to 0 appEntityCounts[appName][entityName] = count; // Assign the count to the app-entity pair } } return appEntityCounts; // Return the final map } // Console fallback logger const consoleLogger = { info: (message, ...args) => console.info(`[INFO] ${message}`, ...args), error: (message, ...args) => { const msg = message instanceof Error ? message.message : message; console.error(`[ERROR] ${msg}`, ...args); }, warn: (message, ...args) => console.warn(`[WARN] ${message}`, ...args), debug: (message, ...args) => console.debug(`[DEBUG] ${message}`, ...args), child: (metadata) => consoleLogger, }; class VespaClient { constructor(endpoint, logger, config) { this.logger = logger || consoleLogger; this.maxRetries = config?.vespaMaxRetryAttempts || 3; this.retryDelay = config?.vespaRetryDelay || 1000; // milliseconds this.vespaEndpoint = endpoint || `http://${config?.vespaBaseHost || "localhost"}:8080`; } async delay(ms) { return new Promise((resolve) => setTimeout(resolve, ms)); } async fetchWithRetry(url, options, retryCount = 0) { const nonRetryableStatusCodes = [404]; try { const response = await fetch(url, options); if (!response.ok) { // Don't need to retry for non-retryable status codes if (nonRetryableStatusCodes.includes(response.status)) { throw new Error(`Non-retryable error: ${response.status} ${response.statusText}`); } // Retry for 429 (Too Many Requests) or 5xx errors if ((response.status === 429 || response.status >= 500) && retryCount < this.maxRetries) { this.logger.info("retrying due to status: ", response.status); await this.delay(this.retryDelay * Math.pow(2, retryCount)); return this.fetchWithRetry(url, options, retryCount + 1); } } return response; } catch (error) { const errorMessage = getErrorMessage(error); if (retryCount < this.maxRetries && !errorMessage.includes("Non-retryable error")) { await this.delay(this.retryDelay * Math.pow(2, retryCount)); // Exponential backoff return this.fetchWithRetry(url, options, retryCount + 1); } throw error; } } async search(payload) { const url = `${this.vespaEndpoint}/search/`; try { const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(payload), }); if (!response.ok) { const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`Vespa search failed - Status: ${response.status}, StatusText: ${errorText}`); this.logger.error(`Vespa error body: ${errorBody}`); throw new Error(`Failed to fetch documents in searchVespa: ${response.status} ${response.statusText} - ${errorText}`); } const result = await response.json(); return result; } catch (error) { this.logger.error(`VespaClient.search error:`, error); throw new Error(`Vespa search error: ${error.message}`); } } async fetchDocumentBatch(schema, options, limit, offset, email) { const yqlQuery = `select * from sources ${schema} where true`; const searchPayload = { yql: yqlQuery, hits: limit, offset, timeout: "10s", }; const response = await this.search(searchPayload); return (response.root?.children || []).map((doc) => { // Use optional chaining and nullish coalescing to safely extract fields const { matchfeatures, ...fieldsWithoutMatch } = doc.fields; return fieldsWithoutMatch; }); } async getAllDocumentsParallel(schema, options, concurrency = 3, email) { // First get document count const countResponse = await this.getDocumentCount(schema, options, email); const totalCount = countResponse?.root?.fields?.totalCount || 0; if (totalCount === 0) return []; // Calculate optimal batch size and create batch tasks const batchSize = 350; const tasks = []; for (let offset = 0; offset < totalCount; offset += batchSize) { tasks.push(() => this.fetchDocumentBatch(schema, options, batchSize, offset, email)); } // Run tasks with concurrency limit const pLimit = (await import('p-limit')).default; const limit = pLimit(concurrency); const results = await Promise.all(tasks.map((task) => limit(task))); // Flatten results return results.flat(); } async deleteAllDocuments(options) { const { cluster, namespace, schema } = options; // Construct the DELETE URL const url = `${this.vespaEndpoint}/document/v1/${namespace}/${schema}/docid?selection=true&cluster=${cluster}`; try { const response = await this.fetchWithRetry(url, { method: "DELETE", }); if (response.ok) { this.logger.info("All documents deleted successfully."); } else { const errorText = response.statusText; throw new Error(`Failed to delete documents: ${response.status} ${response.statusText} - ${errorText}`); } } catch (error) { this.logger.error(`Error deleting documents:, ${error} ${error.stack}`, error); throw new Error(`Vespa delete error: ${error}`); } } async insertDocument(document, options) { try { const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${document.docId}`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ fields: document }), }); if (!response.ok) { // Using status text since response.text() return Body Already used Error const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`Vespa error: ${errorBody}`); throw new Error(`Failed to insert document: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); if (response.ok) { // this.logger.info(`Document ${document.docId} inserted successfully`) } else { this.logger.error(`Error inserting document ${document.docId}`); } } catch (error) { const errMessage = getErrorMessage(error); this.logger.error(`Error inserting document ${document.docId}: ${errMessage}`, error); throw new Error(`Error inserting document ${document.docId}: ${errMessage}`); } } async insert(document, options) { try { const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${document.docId}`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ fields: document }), }); if (!response.ok) { // Using status text since response.text() return Body Already used Error const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`Vespa error: ${errorBody}`); throw new Error(`Failed to insert document: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); if (response.ok) { this.logger.info(`Document ${document.docId} inserted successfully`); } else { } } catch (error) { const errMessage = getErrorMessage(error); this.logger.error(`Error inserting document ${document.docId}: ${errMessage} ${error.stack}`, error); throw new Error(`Error inserting document ${document.docId}: ${errMessage} ${error.stack}`); } } async insertUser(user, options) { try { const url = `${this.vespaEndpoint}/document/v1/${options.namespace}/${options.schema}/docid/${user.docId}`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ fields: user }), }); const data = await response.json(); if (response.ok) { // this.logger.info(`Document ${user.docId} inserted successfully:`, data) } else { this.logger.error(`Error inserting user ${user.docId}: ${data}`, data); } } catch (error) { const errorMessage = getErrorMessage(error); this.logger.error(`Error inserting user ${user.docId}:`, errorMessage, error); throw new Error(`Error inserting user ${user.docId}: ${errorMessage}`); } } async autoComplete(searchPayload) { try { const url = `${this.vespaEndpoint}/search/`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(searchPayload), }); if (!response.ok) { const errorText = response.statusText; const errorBody = await response.text(); this.logger.error(`AutoComplete failed - Status: ${response.status}, StatusText: ${errorText}`); this.logger.error(`AutoComplete error body: ${errorBody}`); throw new Error(`Failed to perform autocomplete search: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); return data; } catch (error) { this.logger.error(`VespaClient.autoComplete error:`, error); throw new Error(`Error performing autocomplete search:, ${error} ${error.stack} `); } } async groupSearch(payload) { try { const url = `${this.vespaEndpoint}/search/`; const response = await this.fetchWithRetry(url, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify(payload), }); if (!response.ok) { const errorText = response.statusText; throw new Error(`Failed to fetch documents in groupVespaSearch: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); return handleVespaGroupResponse(data); } catch (error) { this.logger.error(`Error performing search groupVespaSearch:, ${error} - ${error.stack}`, error); throw new Error(`Error performing search groupVespaSearch:, ${error} - ${error.stack}`); } } async getDocumentCount(schema, options, email) { try { // Encode the YQL query to ensure it's URL-safe const yql = encodeURIComponent(`select * from sources ${schema} where uploadedBy contains '${email}'`); // Construct the search URL with necessary query parameters const url = `${this.vespaEndpoint}/search/?yql=${yql}&hits=0&cluster=${options.cluster}`; const response = await this.fetchWithRetry(url, { method: "GET", headers: { Accept: "application/json", }, }); if (!response.ok) { const errorText = response.statusText; throw new Error(`Failed to fetch document count: ${response.status} ${response.statusText} - ${errorText}`); } const data = await response.json(); // Extract the total number of hits from the response const totalCount = data?.root?.fields?.totalCount; if (typeof totalCount === "number") { this.logger.info(`Total documents in schema '${schema}' within namespace '${options.namespace}' and cluster '${options.cluster}': ${totalCount}`); return data; } else { this.logger.error(`Unexpected response structure:', ${data}`); } } catch (error) { const errMessage = getErrorMessage(error); this.logger.error(`Error retrieving document count: ${errMessage}`); throw new Error(`Error retrieving document count: ${errMessage}`); } } async getDocument(options) { const { docId, name