UNPKG

vespa-ts

Version:

A reusable TypeScript package for interacting with Vespa search engine with dependency injection support

1,350 lines (1,344 loc) 49.9 kB
'use strict'; var zod = require('zod'); const fileSchema = "file"; // Replace with your actual schema name const userSchema = "user"; // calendar const eventSchema = "event"; // mail const mailAttachmentSchema = "mail_attachment"; const mailSchema = "mail"; // chat const chatContainerSchema = "chat_container"; // this is not meant to be searched but we will // store the data in vespa and fetch it as needed const chatTeamSchema = "chat_team"; const chatMessageSchema = "chat_message"; const chatUserSchema = "chat_user"; // previous queries const userQuerySchema = "user_query"; const datasourceSchema = "datasource"; const dataSourceFileSchema = "datasource_file"; // not using @ because of vite of frontend var Apps; (function (Apps) { // includes everything google Apps["GoogleWorkspace"] = "google-workspace"; // more granular Apps["GoogleDrive"] = "google-drive"; Apps["Gmail"] = "gmail"; // Notion = "notion", // Notion is not yet supported Apps["GoogleCalendar"] = "google-calendar"; Apps["Slack"] = "slack"; Apps["MCP"] = "mcp"; Apps["Github"] = "github"; Apps["Xyne"] = "xyne"; Apps["DataSource"] = "data-source"; })(Apps || (Apps = {})); var GooglePeopleEntity; (function (GooglePeopleEntity) { GooglePeopleEntity["Contacts"] = "Contacts"; GooglePeopleEntity["OtherContacts"] = "OtherContacts"; GooglePeopleEntity["AdminDirectory"] = "AdminDirectory"; })(GooglePeopleEntity || (GooglePeopleEntity = {})); // the vespa schemas const Schemas = zod.z.union([ zod.z.literal(fileSchema), zod.z.literal(userSchema), zod.z.literal(mailSchema), zod.z.literal(eventSchema), zod.z.literal(userQuerySchema), zod.z.literal(mailAttachmentSchema), zod.z.literal(chatContainerSchema), zod.z.literal(chatTeamSchema), zod.z.literal(chatUserSchema), zod.z.literal(chatMessageSchema), zod.z.literal(datasourceSchema), zod.z.literal(dataSourceFileSchema), ]); var MailEntity; (function (MailEntity) { MailEntity["Email"] = "mail"; })(MailEntity || (MailEntity = {})); var CalendarEntity; (function (CalendarEntity) { CalendarEntity["Event"] = "event"; })(CalendarEntity || (CalendarEntity = {})); var SlackEntity; (function (SlackEntity) { SlackEntity["Team"] = "team"; SlackEntity["User"] = "user"; SlackEntity["Message"] = "message"; SlackEntity["Channel"] = "channel"; SlackEntity["File"] = "file"; })(SlackEntity || (SlackEntity = {})); var DriveEntity; (function (DriveEntity) { DriveEntity["Docs"] = "docs"; DriveEntity["Sheets"] = "sheets"; DriveEntity["Slides"] = "slides"; DriveEntity["Presentation"] = "presentation"; DriveEntity["PDF"] = "pdf"; DriveEntity["Folder"] = "folder"; DriveEntity["Misc"] = "driveFile"; DriveEntity["Drawing"] = "drawing"; DriveEntity["Form"] = "form"; DriveEntity["Script"] = "script"; DriveEntity["Site"] = "site"; DriveEntity["Map"] = "map"; DriveEntity["Audio"] = "audio"; DriveEntity["Video"] = "video"; DriveEntity["Photo"] = "photo"; DriveEntity["ThirdPartyApp"] = "third_party_app"; DriveEntity["Image"] = "image"; DriveEntity["Zip"] = "zip"; DriveEntity["WordDocument"] = "word_document"; DriveEntity["ExcelSpreadsheet"] = "excel_spreadsheet"; DriveEntity["PowerPointPresentation"] = "powerpoint_presentation"; DriveEntity["Text"] = "text"; DriveEntity["CSV"] = "csv"; })(DriveEntity || (DriveEntity = {})); var MailAttachmentEntity; (function (MailAttachmentEntity) { MailAttachmentEntity["PDF"] = "pdf"; MailAttachmentEntity["Sheets"] = "sheets"; MailAttachmentEntity["CSV"] = "csv"; MailAttachmentEntity["WordDocument"] = "worddocument"; MailAttachmentEntity["PowerPointPresentation"] = "powerpointpresentation"; MailAttachmentEntity["Text"] = "text"; MailAttachmentEntity["NotValid"] = "notvalid"; })(MailAttachmentEntity || (MailAttachmentEntity = {})); const PeopleEntitySchema = zod.z.nativeEnum(GooglePeopleEntity); const ChatEntitySchema = zod.z.nativeEnum(SlackEntity); var NotionEntity; (function (NotionEntity) { NotionEntity["Page"] = "page"; NotionEntity["Database"] = "database"; })(NotionEntity || (NotionEntity = {})); const FileEntitySchema = zod.z.nativeEnum(DriveEntity); const MailEntitySchema = zod.z.nativeEnum(MailEntity); const MailAttachmentEntitySchema = zod.z.nativeEnum(MailAttachmentEntity); const EventEntitySchema = zod.z.nativeEnum(CalendarEntity); const NotionEntitySchema = zod.z.nativeEnum(NotionEntity); var SystemEntity; (function (SystemEntity) { SystemEntity["SystemInfo"] = "system_info"; SystemEntity["UserProfile"] = "user_profile"; })(SystemEntity || (SystemEntity = {})); var DataSourceEntity; (function (DataSourceEntity) { DataSourceEntity["DataSourceFile"] = "data_source_file"; })(DataSourceEntity || (DataSourceEntity = {})); const SystemEntitySchema = zod.z.nativeEnum(SystemEntity); const DataSourceEntitySchema = zod.z.nativeEnum(DataSourceEntity); const entitySchema = zod.z.union([ SystemEntitySchema, PeopleEntitySchema, FileEntitySchema, NotionEntitySchema, MailEntitySchema, EventEntitySchema, MailAttachmentEntitySchema, ChatEntitySchema, DataSourceEntitySchema, ]); const scoredChunk = zod.z.object({ chunk: zod.z.string(), score: zod.z.number(), index: zod.z.number(), }); const defaultVespaFieldsSchema = zod.z.object({ relevance: zod.z.number(), source: zod.z.string(), // sddocname: Schemas, documentid: zod.z.string(), }); const SpreadsheetMetadata = zod.z.object({ spreadsheetId: zod.z.string(), totalSheets: zod.z.number(), }); const Metadata = zod.z.union([zod.z.object({}), SpreadsheetMetadata]); const VespaFileSchema = zod.z.object({ docId: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: FileEntitySchema, title: zod.z.string(), url: zod.z.string().nullable(), parentId: zod.z.string().nullable(), chunks: zod.z.array(zod.z.string()), owner: zod.z.string().nullable(), ownerEmail: zod.z.string().nullable(), photoLink: zod.z.string().nullable(), permissions: zod.z.array(zod.z.string()), mimeType: zod.z.string().nullable(), metadata: Metadata, createdAt: zod.z.number(), updatedAt: zod.z.number(), }); const chunkScoresSchema = zod.z.object({ cells: zod.z.record(zod.z.string(), zod.z.number()), }); // Match features for file schema const FileMatchFeaturesSchema = zod.z.object({ "bm25(title)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); // Match features for user schema const UserMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(email)": zod.z.number().optional(), }); // Match features for mail schema const MailMatchFeaturesSchema = zod.z.object({ "bm25(subject)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "bm25(attachmentFilenames)": zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); const EventMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(description)": zod.z.number().optional(), "bm25(attachmentFilenames)": zod.z.number().optional(), "bm25(attendeesNames)": zod.z.number().optional(), }); const MailAttachmentMatchFeaturesSchema = zod.z.object({ chunk_vector_score: zod.z.number().optional(), scaled_bm25_chunks: zod.z.number().optional(), scaled_bm25_filename: zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); const ChatMessageMatchFeaturesSchema = zod.z.object({ vector_score: zod.z.number().optional(), combined_nativeRank: zod.z.number().optional(), "nativeRank(text)": zod.z.number().optional(), "nativeRank(username)": zod.z.number().optional(), "nativeRank(name)": zod.z.number().optional(), }); const DataSourceFileMatchFeaturesSchema = zod.z.object({ "bm25(fileName)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), chunk_scores: chunkScoresSchema.optional(), }); zod.z.union([ FileMatchFeaturesSchema, MailMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ]); // Base schema for DataSource (for insertion) const VespaDataSourceSchemaBase = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), createdBy: zod.z.string(), createdAt: zod.z.number(), // long updatedAt: zod.z.number(), // long }); // Search schema for DataSource const VespaDataSourceSearchSchema = VespaDataSourceSchemaBase.extend({ sddocname: zod.z.literal(datasourceSchema), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }).merge(defaultVespaFieldsSchema); // Base schema for DataSourceFile (for insertion) const VespaDataSourceFileSchemaBase = zod.z.object({ docId: zod.z.string(), description: zod.z.string().optional(), app: zod.z.literal(Apps.DataSource), fileName: zod.z.string().optional(), fileSize: zod.z.number().optional(), // long chunks: zod.z.array(zod.z.string()), image_chunks: zod.z.array(zod.z.string()).optional(), // Added for image descriptions chunks_pos: zod.z.array(zod.z.number()).optional(), // Added for text chunk positions image_chunks_pos: zod.z.array(zod.z.number()).optional(), // Added for image chunk positions uploadedBy: zod.z.string(), duration: zod.z.number().optional(), // long mimeType: zod.z.string().optional(), createdAt: zod.z.number(), // long updatedAt: zod.z.number(), // long dataSourceRef: zod.z.string(), // reference to datasource docId metadata: zod.z.string().optional(), // JSON string }); // Search schema for DataSourceFile const VespaDataSourceFileSearchSchema = VespaDataSourceFileSchemaBase.extend({ sddocname: zod.z.literal(dataSourceFileSchema), matchfeatures: DataSourceFileMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), dataSourceName: zod.z.string().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), image_chunks_summary: zod.z .array(zod.z.union([zod.z.string(), scoredChunk])) .optional(), chunks_pos_summary: zod.z.array(zod.z.number()).optional(), image_chunks_pos_summary: zod.z.array(zod.z.number()).optional(), }); const VespaFileSearchSchema = VespaFileSchema.extend({ sddocname: zod.z.literal(fileSchema), matchfeatures: FileMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); // basically GetDocument doesn't return sddocname // in search it's always present const VespaFileGetSchema = VespaFileSchema.merge(defaultVespaFieldsSchema); const VespaUserSchema = zod.z .object({ docId: zod.z.string().min(1), name: zod.z.string().optional(), //.min(1), email: zod.z.string().min(1).email(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(GooglePeopleEntity), gender: zod.z.string().optional(), photoLink: zod.z.string().optional(), aliases: zod.z.array(zod.z.string()).optional(), language: zod.z.string().optional(), includeInGlobalAddressList: zod.z.boolean().optional(), isAdmin: zod.z.boolean().optional(), isDelegatedAdmin: zod.z.boolean().optional(), suspended: zod.z.boolean().optional(), archived: zod.z.boolean().optional(), urls: zod.z.array(zod.z.string()).optional(), rankfeatures: zod.z.any().optional(), orgName: zod.z.string().optional(), orgJobTitle: zod.z.string().optional(), orgDepartment: zod.z.string().optional(), orgLocation: zod.z.string().optional(), orgDescription: zod.z.string().optional(), creationTime: zod.z.number(), lastLoggedIn: zod.z.number().optional(), birthday: zod.z.number().optional(), occupations: zod.z.array(zod.z.string()).optional(), userDefined: zod.z.array(zod.z.string()).optional(), customerId: zod.z.string().optional(), clientData: zod.z.array(zod.z.string()).optional(), // this only exists for contacts owner: zod.z.string().optional(), sddocname: zod.z.literal(userSchema), }) .merge(defaultVespaFieldsSchema); // Mail Types const AttachmentSchema = zod.z.object({ fileType: zod.z.string(), fileSize: zod.z.number(), }); const MailSchema = zod.z.object({ docId: zod.z.string(), threadId: zod.z.string(), mailId: zod.z.string().optional(), // Optional for threads subject: zod.z.string().default(""), // Default to empty string to avoid zod errors when subject is missing chunks: zod.z.array(zod.z.string()), timestamp: zod.z.number(), app: zod.z.nativeEnum(Apps), userMap: zod.z.optional(zod.z.record(zod.z.string(), zod.z.string())), entity: zod.z.nativeEnum(MailEntity), permissions: zod.z.array(zod.z.string()), from: zod.z.string(), to: zod.z.array(zod.z.string()), cc: zod.z.array(zod.z.string()), bcc: zod.z.array(zod.z.string()), mimeType: zod.z.string(), attachmentFilenames: zod.z.array(zod.z.string()), attachments: zod.z.array(AttachmentSchema), labels: zod.z.array(zod.z.string()), }); const VespaMailSchema = MailSchema.extend({ docId: zod.z.string().min(1), }); const MailAttachmentSchema = zod.z.object({ docId: zod.z.string(), mailId: zod.z.string(), threadId: zod.z.string(), partId: zod.z.number().nullable().optional(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(MailAttachmentEntity), chunks: zod.z.array(zod.z.string()), timestamp: zod.z.number(), permissions: zod.z.array(zod.z.string()), filename: zod.z.string(), fileType: zod.z.string().nullable().optional(), fileSize: zod.z.number().nullable().optional(), }); const VespaMailAttachmentSchema = MailAttachmentSchema.extend({}); const EventUser = zod.z.object({ email: zod.z.string(), displayName: zod.z.string(), }); const EventAtatchment = zod.z.object({ fileId: zod.z.string(), title: zod.z.string(), fileUrl: zod.z.string(), mimeType: zod.z.string(), }); const VespaEventSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), description: zod.z.string(), url: zod.z.string(), status: zod.z.string(), location: zod.z.string(), createdAt: zod.z.number(), updatedAt: zod.z.number(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(CalendarEntity), creator: EventUser, organizer: EventUser, attendees: zod.z.array(EventUser), attendeesNames: zod.z.array(zod.z.string()), startTime: zod.z.number(), endTime: zod.z.number(), attachmentFilenames: zod.z.array(zod.z.string()), attachments: zod.z.array(EventAtatchment), recurrence: zod.z.array(zod.z.string()), baseUrl: zod.z.string(), joiningLink: zod.z.string(), permissions: zod.z.array(zod.z.string()), cancelledInstances: zod.z.array(zod.z.string()), defaultStartTime: zod.z.boolean(), }); const VespaMailSearchSchema = VespaMailSchema.extend({ sddocname: zod.z.literal("mail"), matchfeatures: MailMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ // attachment won't have this chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); const VespaMailAttachmentSearchSchema = VespaMailAttachmentSchema.extend({ sddocname: zod.z.literal("mail_attachment"), matchfeatures: MailAttachmentMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); const VespaEventSearchSchema = VespaEventSchema.extend({ sddocname: zod.z.literal("event"), // Assuming events can have rankfeatures rankfeatures: zod.z.any().optional(), }).merge(defaultVespaFieldsSchema); const VespaUserQueryHistorySchema = zod.z.object({ docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number(), count: zod.z.number(), }); const VespaUserQueryHGetSchema = VespaUserQueryHistorySchema.extend({ sddocname: zod.z.literal("user_query"), }).merge(defaultVespaFieldsSchema); const VespaMailGetSchema = VespaMailSchema.merge(defaultVespaFieldsSchema); const VespaMailAttachmentGetSchema = VespaMailAttachmentSchema.merge(defaultVespaFieldsSchema); const VespaChatMessageSchema = zod.z.object({ docId: zod.z.string(), // client_msg_id from Slack teamId: zod.z.string(), // Slack team ID (e.g., "T05N1EJSE0K") channelId: zod.z.string(), // Slack channel ID (e.g., "C123ABC456") text: zod.z.string(), userId: zod.z.string(), // Slack user ID (e.g., "U032QT45V53") app: zod.z.nativeEnum(Apps), // App (e.g., "slack") entity: zod.z.nativeEnum(SlackEntity), // Entity (e.g., "message") name: zod.z.string(), username: zod.z.string(), image: zod.z.string(), channelName: zod.z.string().optional(), // derived isIm: zod.z.boolean().optional(), // derived isMpim: zod.z.boolean().optional(), // derived isPrivate: zod.z.boolean().optional(), // derived permissions: zod.z.array(zod.z.string()).optional(), // derived, teamName: zod.z.string().optional(), // derived domain: zod.z.string().optional(), // derived createdAt: zod.z.number(), // Slack ts (e.g., 1734442791.514519) teamRef: zod.z.string(), // vespa id for team threadId: zod.z.string().default(""), // Slack thread_ts, null if not in thread attachmentIds: zod.z.array(zod.z.string()).default([]), // Slack file IDs (e.g., ["F0857N0FF4N"]) // reactions: z.array(z.string()), // Commented out in Vespa schema, so excluded mentions: zod.z.array(zod.z.string()), // Extracted from text (e.g., ["U032QT45V53"]) updatedAt: zod.z.number(), // Slack edited.ts (e.g., 1734442538.0), null if not edited deletedAt: zod.z.number(), metadata: zod.z.string(), // JSON string for subtype, etc. (e.g., "{\"subtype\": null}") }); const VespaChatMessageSearchSchema = VespaChatMessageSchema.extend({ sddocname: zod.z.literal(chatMessageSchema), matchfeatures: ChatMessageMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.string()).optional(), }); const VespaChatMessageGetSchema = VespaChatMessageSchema.merge(defaultVespaFieldsSchema); const VespaChatUserSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), title: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(SlackEntity), image: zod.z.string(), email: zod.z.string(), statusText: zod.z.string(), tz: zod.z.string(), teamId: zod.z.string(), deleted: zod.z.boolean(), isAdmin: zod.z.boolean(), updatedAt: zod.z.number(), }); zod.z.object({ id: zod.z.string(), pathId: zod.z.string(), fields: VespaChatUserSchema, }); const VespaChatUserSearchSchema = VespaChatUserSchema.extend({ sddocname: zod.z.literal(chatUserSchema), }).merge(defaultVespaFieldsSchema); const VespaChatContainerSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), channelName: zod.z.string(), creator: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: zod.z.nativeEnum(SlackEntity), isPrivate: zod.z.boolean(), isArchived: zod.z.boolean(), isGeneral: zod.z.boolean(), isIm: zod.z.boolean(), isMpim: zod.z.boolean(), domain: zod.z.string().optional(), // derived permissions: zod.z.array(zod.z.string()), createdAt: zod.z.number(), updatedAt: zod.z.number(), lastSyncedAt: zod.z.number(), topic: zod.z.string(), description: zod.z.string(), count: zod.z.number().int(), }); // Schema for search results that includes Vespa fields const VespaChatContainerSearchSchema = VespaChatContainerSchema.extend({ sddocname: zod.z.literal(chatContainerSchema), }).merge(defaultVespaFieldsSchema); const ChatContainerMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(topic)": zod.z.number().optional(), "bm25(description)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), }); const VespaChatTeamSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), app: zod.z.nativeEnum(Apps), icon: zod.z.string(), url: zod.z.string(), domain: zod.z.string(), email_domain: zod.z.string(), own: zod.z.boolean(), createdAt: zod.z.number(), updatedAt: zod.z.number(), count: zod.z.number().int(), }); VespaChatTeamSchema.extend({ sddocname: zod.z.literal(chatTeamSchema), }).merge(defaultVespaFieldsSchema); const VespaSearchFieldsUnionSchema = zod.z.discriminatedUnion("sddocname", [ VespaUserSchema, VespaFileSearchSchema, VespaMailSearchSchema, VespaEventSearchSchema, VespaUserQueryHGetSchema, VespaMailAttachmentSearchSchema, VespaChatContainerSearchSchema, VespaChatUserSearchSchema, VespaChatMessageSearchSchema, VespaDataSourceSearchSchema, VespaDataSourceFileSearchSchema, ]); // Get schema for DataSourceFile const VespaDataSourceFileGetSchema = VespaDataSourceFileSchemaBase.merge(defaultVespaFieldsSchema); const SearchMatchFeaturesSchema = zod.z.union([ FileMatchFeaturesSchema, UserMatchFeaturesSchema, MailMatchFeaturesSchema, EventMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, ChatMessageMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ChatContainerMatchFeaturesSchema, ]); const VespaSearchFieldsSchema = zod.z .object({ matchfeatures: SearchMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaSearchFieldsUnionSchema); zod.z.union([ VespaUserSchema, VespaFileGetSchema, VespaMailGetSchema, VespaDataSourceFileGetSchema, ]); const VespaSearchResultsSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: VespaSearchFieldsSchema, pathId: zod.z.string().optional(), }); zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: VespaSearchFieldsSchema, pathId: zod.z.string().optional(), }); const VespaGroupSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), label: zod.z.string(), value: zod.z.string().optional(), fields: zod.z .object({ "count()": zod.z.number(), }) .optional(), children: zod.z.array(zod.z.lazy(() => VespaGroupSchema)).optional(), }); const VespaErrorSchema = zod.z.object({ code: zod.z.number(), summary: zod.z.string(), source: zod.z.string(), message: zod.z.string(), }); const VespaRootBaseSchema = zod.z.object({ root: zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: zod.z .object({ totalCount: zod.z.number(), }) .optional(), coverage: zod.z.object({ coverage: zod.z.number(), documents: zod.z.number(), full: zod.z.boolean(), nodes: zod.z.number(), results: zod.z.number(), resultsFull: zod.z.number(), }), errors: zod.z.array(VespaErrorSchema).optional(), }), trace: zod.z.any().optional(), // Add optional trace field to the root }); const VespaSearchResultSchema = zod.z.union([ VespaSearchResultsSchema, VespaGroupSchema, ]); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: zod.z.array(VespaSearchResultSchema), }), }); const AutocompleteMatchFeaturesSchema = zod.z.union([ zod.z.object({ "bm25(title_fuzzy)": zod.z.number(), }), zod.z.object({ "bm25(email_fuzzy)": zod.z.number(), "bm25(name_fuzzy)": zod.z.number(), }), zod.z.object({ "bm25(subject_fuzzy)": zod.z.number(), }), ]); const VespaAutocompleteFileSchema = zod.z .object({ docId: zod.z.string(), title: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserSchema = zod.z .object({ docId: zod.z.string(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, photoLink: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailSchema = zod.z .object({ docId: zod.z.string(), threadId: zod.z.string(), subject: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatUserSchema = zod.z .object({ docId: zod.z.string(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, image: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailAttachmentSchema = zod.z .object({ docId: zod.z.string(), filename: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); zod.z .object({ docId: zod.z.string(), name: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserQueryHSchema = zod.z .object({ docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number().optional(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatContainerSchema = zod.z .object({ docId: zod.z.string(), name: zod.z.string(), app: zod.z.nativeEnum(Apps), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteSummarySchema = zod.z.union([ VespaAutocompleteFileSchema, VespaAutocompleteUserSchema, VespaAutocompleteMailSchema, VespaAutocompleteUserQueryHSchema, VespaAutocompleteMailAttachmentSchema, VespaAutocompleteChatContainerSchema, VespaAutocompleteChatUserSchema, ]); const VespaAutocompleteFieldsSchema = zod.z .object({ matchfeatures: AutocompleteMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaAutocompleteSummarySchema); const VespaAutocompleteSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), source: zod.z.string(), fields: VespaAutocompleteFieldsSchema, }); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: zod.z.array(VespaAutocompleteSchema), }), }); const MailResponseSchema = VespaMailGetSchema.pick({ docId: true, threadId: true, app: true, entity: true, subject: true, from: true, relevance: true, timestamp: true, userMap: true, mailId: true, }) .strip() .extend({ type: zod.z.literal("mail"), mimeType: zod.z.string(), chunks_summary: zod.z.array(scoredChunk).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const MailAttachmentResponseSchema = VespaMailAttachmentGetSchema.pick({ docId: true, app: true, entity: true, relevance: true, timestamp: true, filename: true, mailId: true, partId: true, fileType: true, }) .strip() .extend({ type: zod.z.literal("mail_attachment"), chunks_summary: zod.z.array(scoredChunk).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const ChatMessageResponseSchema = VespaChatMessageGetSchema.pick({ docId: true, teamId: true, channelId: true, text: true, userId: true, app: true, entity: true, createdAt: true, threadId: true, image: true, name: true, domain: true, username: true, attachmentIds: true, mentions: true, relevance: true, updatedAt: true, }) .strip() .extend({ type: zod.z.literal("chat_message"), chunks_summary: zod.z.array(zod.z.string()).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const DataSourceFileResponseSchema = VespaDataSourceFileGetSchema.pick({ docId: true, description: true, app: true, fileName: true, fileSize: true, uploadedBy: true, duration: true, mimeType: true, createdAt: true, updatedAt: true, dataSourceRef: true, metadata: true, relevance: true, }) .strip() .extend({ type: zod.z.literal(dataSourceFileSchema), // Using the schema const for the literal chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), matchfeatures: DataSourceFileMatchFeaturesSchema.optional(), // or z.any().optional() if specific match features aren't always needed here rankfeatures: zod.z.any().optional(), }); ({ gmail: Apps.Gmail, drive: Apps.GoogleDrive, googledrive: Apps.GoogleDrive, googlecalendar: Apps.GoogleCalendar, slack: Apps.Slack, datasource: Apps.DataSource, "google-workspace": Apps.GoogleWorkspace, googledocs: Apps.GoogleDrive, googlesheets: Apps.GoogleDrive, pdf: Apps.GoogleDrive, }); const AutocompleteFileSchema = zod.z .object({ type: zod.z.literal(fileSchema), relevance: zod.z.number(), title: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, }) .strip(); const AutocompleteUserSchema = zod.z .object({ type: zod.z.literal(userSchema), relevance: zod.z.number(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(Apps), entity: entitySchema, photoLink: zod.z.string().optional(), }) .strip(); const AutocompleteUserQueryHSchema = zod.z .object({ type: zod.z.literal(userQuerySchema), docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number().optional(), }) .strip(); const AutocompleteMailSchema = zod.z .object({ type: zod.z.literal(mailSchema), relevance: zod.z.number(), // optional due to contacts subject: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, threadId: zod.z.string().optional(), docId: zod.z.string(), }) .strip(); const AutocompleteMailAttachmentSchema = zod.z .object({ type: zod.z.literal(mailAttachmentSchema), relevance: zod.z.number(), app: zod.z.nativeEnum(Apps), entity: entitySchema, filename: zod.z.string(), docId: zod.z.string(), }) .strip(); const AutocompleteEventSchema = zod.z .object({ type: zod.z.literal(eventSchema), relevance: zod.z.number(), name: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, docId: zod.z.string(), }) .strip(); const AutocompleteChatUserSchema = zod.z .object({ type: zod.z.literal(chatUserSchema), relevance: zod.z.number(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string().optional(), app: zod.z.nativeEnum(Apps), entity: entitySchema, image: zod.z.string(), }) .strip(); const AutocompleteSchema = zod.z.discriminatedUnion("type", [ AutocompleteFileSchema, AutocompleteUserSchema, AutocompleteMailSchema, AutocompleteEventSchema, AutocompleteUserQueryHSchema, AutocompleteMailAttachmentSchema, AutocompleteChatUserSchema, ]); zod.z.object({ results: zod.z.array(AutocompleteSchema), }); // search result const FileResponseSchema = VespaFileSchema.pick({ docId: true, title: true, url: true, app: true, entity: true, owner: true, ownerEmail: true, photoLink: true, updatedAt: true, }) .extend({ type: zod.z.literal(fileSchema), chunk: zod.z.string().optional(), chunkIndex: zod.z.number().optional(), mimeType: zod.z.string(), chunks_summary: zod.z.array(scoredChunk).optional(), relevance: zod.z.number(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }) .strip(); const EventResponseSchema = VespaEventSchema.pick({ docId: true, name: true, url: true, app: true, entity: true, updatedAt: true, }) .extend({ type: zod.z.literal(eventSchema), relevance: zod.z.number(), description: zod.z.string().optional(), chunks_summary: zod.z.array(zod.z.string()).optional(), attendeesNames: zod.z.array(zod.z.string()).optional(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }) .strip(); const UserResponseSchema = VespaUserSchema.pick({ name: true, email: true, app: true, entity: true, photoLink: true, docId: true, }) .strip() .extend({ type: zod.z.literal(userSchema), relevance: zod.z.number(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }); // Search Response Schema const SearchResultsSchema = zod.z.discriminatedUnion("type", [ UserResponseSchema, FileResponseSchema, DataSourceFileResponseSchema, MailResponseSchema, EventResponseSchema, MailAttachmentResponseSchema, ChatMessageResponseSchema, ]); zod.z.object({ count: zod.z.number(), results: zod.z.array(SearchResultsSchema), groupCount: zod.z.any(), trace: zod.z.any().optional(), }); zod.z.object({ from: zod.z.array(zod.z.string()).optional(), to: zod.z.array(zod.z.string()).optional(), cc: zod.z.array(zod.z.string()).optional(), bcc: zod.z.array(zod.z.string()).optional(), subject: zod.z.array(zod.z.string()).optional(), }); var SearchModes; (function (SearchModes) { SearchModes["NativeRank"] = "default_native"; SearchModes["BM25"] = "default_bm25"; SearchModes["AI"] = "default_ai"; SearchModes["Random"] = "default_random"; SearchModes["GlobalSorted"] = "global_sorted"; })(SearchModes || (SearchModes = {})); function scale(val) { if (!val) return null; return (2 * Math.atan(val / 4)) / Math.PI; } // module contains all the transformations // from vespa to the user accepted types function countHiTags(str) { // Regular expression to match both <hi> and </hi> tags const regex = /<\/?hi>/g; const matches = str.match(regex); return matches ? matches.length : 0; } const getSortedScoredImageChunks = (matchfeatures, existingImageChunksPosSummary, existingImageChunksSummary, docId, maxChunks) => { // return if no chunks summary if (!existingImageChunksSummary?.length) { return []; } const imageChunksPos = existingImageChunksPosSummary; const imageChunkScores = matchfeatures && "image_chunk_scores" in matchfeatures && "cells" in matchfeatures.image_chunk_scores ? matchfeatures.image_chunk_scores.cells : {}; const imageChunksWithIndices = existingImageChunksSummary.map((chunk, index) => ({ index: index, chunk: `${docId}_${imageChunksPos[index] ?? index}`, score: scale(imageChunkScores[index] ?? 0) || 0, // Default to 0 if doesn't have score })); const filteredImageChunks = imageChunksWithIndices.filter(({ index }) => index < imageChunksPos.length); const sortedImageChunks = filteredImageChunks.sort((a, b) => b.score - a.score); return maxChunks ? sortedImageChunks.slice(0, maxChunks) : sortedImageChunks; }; const getSortedScoredChunks = (matchfeatures, existingChunksSummary, maxChunks) => { // return if no chunks summary if (!existingChunksSummary?.length) { return []; } if (!matchfeatures?.chunk_scores?.cells || !Object.keys(matchfeatures?.chunk_scores?.cells).length) { const mappedChunks = existingChunksSummary.map((v, index) => ({ chunk: v, score: 0, index, })); return maxChunks ? mappedChunks.slice(0, maxChunks) : mappedChunks; } const chunkScores = matchfeatures.chunk_scores.cells; // add chunks with chunk scores const chunksWithIndices = existingChunksSummary.map((chunk, index) => ({ index, chunk, score: scale(Number(chunkScores[index]) || 0) || 0, // Default to 0 if doesn't have score })); const filteredChunks = chunksWithIndices.filter(({ index }) => index in chunkScores); const sortedChunks = filteredChunks.sort((a, b) => b.score - a.score); return maxChunks ? sortedChunks.slice(0, maxChunks) : sortedChunks; }; // Vespa -> Backend/App -> Client const maxSearchChunks = 1; const VespaSearchResponseToSearchResult = (resp, email, textChunker) => { const { root, trace } = resp; const children = root.children || []; // Access the nested children array within the trace object const traceInfo = trace?.children || []; // Filter out any potential trace items from children if they exist const searchHits = children.filter((child) => !child.id?.startsWith("trace:")); return { count: root.fields?.totalCount ?? 0, groupCount: {}, results: searchHits ? searchHits.map((child) => { // Narrow down the type based on `sddocname` if (child.fields.sddocname === fileSchema) { // Directly use child.fields which includes matchfeatures const fields = child.fields; fields.type = fileSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields, no need to assign separately fields.chunks_summary = getSortedScoredChunks(fields.matchfeatures, fields.chunks_summary, maxSearchChunks); return FileResponseSchema.parse(fields); } else if (child.fields.sddocname === userSchema) { // Directly use child.fields const fields = child.fields; fields.type = userSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields (if returned by Vespa) // Ensure chunks_summary processing happens before parsing fields.chunks_summary?.sort((a, b) => countHiTags(b) - countHiTags(a)); fields.chunks_summary = fields.chunks_summary?.slice(0, maxSearchChunks); return UserResponseSchema.parse(fields); } else if (child.fields.sddocname === mailSchema) { // Directly use child.fields const fields = child.fields; if (email && fields.userMap && typeof fields.userMap === 'object') fields.docId = fields.userMap[email] || fields.docId; fields.type = mailSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields fields.chunks_summary = getSortedScoredChunks(fields.matchfeatures, fields.chunks_summary, maxSearchChunks); return MailResponseSchema.parse(fields); } else if (child.fields.sddocname === eventSchema) { // Directly use child.fields const fields = child.fields; fields.type = eventSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields (if returned by Vespa) // creating a new property // Ensure chunks_summary processing happens before parsing fields.chunks_summary = fields.description && textChunker ? textChunker.chunkDocument(fields.description) .map((v) => v.chunk) .sort((a, b) => countHiTags(b) - countHiTags(a)) .slice(0, maxSearchChunks) : []; // This line seems redundant as it's assigned above? Keeping it for now. fields.chunks_summary = fields.chunks_summary?.slice(0, maxSearchChunks); return EventResponseSchema.parse(fields); } else if (child.fields.sddocname === mailAttachmentSchema) { // Directly use child.fields const fields = child.fields; fields.type = mailAttachmentSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields fields.chunks_summary = getSortedScoredChunks(fields.matchfeatures, fields.chunks_summary, maxSearchChunks); return MailAttachmentResponseSchema.parse(fields); } else if (child.fields.sddocname === chatMessageSchema) { const fields = child.fields; fields.type = chatMessageSchema; fields.relevance = child.relevance; fields.attachmentIds = []; fields.mentions = []; if (!fields.teamId) { fields.teamId = ""; } return ChatMessageResponseSchema.parse(fields); } else if (child.fields.sddocname === dataSourceFileSchema) { const dsFields = child.fields; const processedChunks = getSortedScoredChunks(dsFields.matchfeatures, dsFields.chunks_summary, maxSearchChunks); const mappedResult = { docId: dsFields.docId, type: dataSourceFileSchema, app: Apps.DataSource, entity: "file", title: dsFields.fileName || dsFields.title, fileName: dsFields.fileName, url: dsFields.url, updatedAt: dsFields.updatedAt, createdAt: dsFields.createdAt, mimeType: dsFields.mimeType, size: dsFields.fileSize || dsFields.size, owner: dsFields.owner, relevance: child.relevance, chunks_summary: processedChunks, matchfeatures: dsFields.matchfeatures, }; return DataSourceFileResponseSchema.parse(mappedResult); } else { throw new Error(`Unknown schema type: ${child.fields?.sddocname ?? "undefined"}`); } }) : [], trace: traceInfo, // Add trace information to the top-level response }; }; const VespaAutocompleteResponseToResult = (resp) => { const { root } = resp; if (!root.children) { return { results: [] }; } let queryHistoryCount = 0; return { results: root.children .map((child) => { // Narrow down the type based on `sddocname` if (child.fields.sddocname === fileSchema) { const fields = child.fields; fields.type = fileSchema; fields.relevance = child.relevance; return AutocompleteFileSchema.parse(fields); } else if (child.fields.sddocname === userSchema) { const fields = child.fields; fields.type = userSchema; fields.relevance = child.relevance; return AutocompleteUserSchema.parse(fields); } else if (child.fields.sddocname === mailSchema) { const fields = child.fields; fields.type = mailSchema; fields.relevance = child.relevance; return AutocompleteMailSchema.parse(fields); } else if (child.fields.sddocname === eventSchema) { const fields = child.fields; fields.type = eventSchema; fields.relevance = child.relevance; return AutocompleteEventSchema.parse(fields); } else if (child.fields.sddocname === userQuerySchema) { const fields = child.fields; fields.type = userQuerySchema; fields.relevance = child.relevance; return AutocompleteUserQueryHSchema.parse(fields); } else if (child.fields.sddocname === mailAttachmentSchema) { const fields = child.fields; fields.type = mailAttachmentSchema; fields.relevance = child.relevance; return AutocompleteMailAttachmentSchema.parse(fields); } else if (child.fields.sddocname === chatUserSchema) { child.fields.type = chatUserSchema; child.fields.relevance = child.relevance; return AutocompleteChatUserSchema.parse(child.fields); } else { throw new Error(`Unknown schema type: ${child.fields?.sddocname}`); } }) .filter((d) => { if (d.type === userQuerySchema) { return queryHistoryCount++ < 3; } return true; }), }; }; function handleVespaGroupResponse(response) { const appEntityCounts = {}; // Navigate to the first level of groups const groupRoot = response.root.children?.[0]; // Assuming this is the group:root level if (!groupRoot || !("children" in groupRoot)) return appEntityCounts; // Safeguard for empty responses // Navigate to the app grouping (e.g., grouplist:app) const appGroup = groupRoot.children?.[0]; if (!appGroup || !("children" in appGroup)) return appEntityCounts; // Safeguard for missing app group // Iterate through the apps // @ts-ignore for (const app of appGroup.children) { const appName = app.value; // Get the app name appEntityCounts[appName] = {}; // Initialize the app entry // Navigate to the entity grouping (e.g., grouplist:entity) const entityGroup = app.children?.[0]; if (!entityGroup || !("children" in entityGroup)) continue; // Skip if no entities // Iterate through the entities // @ts-ignore for (const entity of entityGroup.children) { const entityName = entity.value; // Get the entity name const count = entity.fields?.["count()"] || 0; // Get the count or default to 0 appEntityCounts[appName][entityName] = count; // Assign the count to the app-entity pair } } return appEntityCounts; // Return the final map } const entityToSchemaMapper = (entityName, app) => { if (app === Apps.DataSource) { return dataSourceFileSchema; } const entitySchemaMap = { ...Object.fromEntries(Object.values(DriveEntity).map((e) => [e, fileSchema])), ...Object.fromEntries(Object.values(MailEntity).map((e) => [e, mailSchema])), ...Object.fromEntries(Object.values(MailAttachmentEntity).map((e) => [e, mailAttachmentSchema])), ...Object.fromEntries(Object.values(GooglePeopleEntity).map((e) => [e, userSchema])), ...Object.fromEntries(Object.values(CalendarEntity).map((e) => [e, eventSchema])), ...Object.fromEntries(Object.values(SlackEntity).map((e) => [e, chatMessageSchema])), }; // Handle cases where the same entity name exists in multiple schemas if (Object.keys(MailAttachmentEntity).includes(entityName || "")) { if (app === Apps.GoogleDrive) { return fileSchema; } else if (app === Apps.Gmail) { return mailAttachmentSchema; } } return entitySchemaMap[entityName || ""] || null; }; const appToSchemaMapper = (appName) => { if (!appName) { return null; } const lowerAppName = appName.toLowerCase(); const schemaMap = { [Apps.Gmail.toLowerCase()]: mailSchema, [Apps.GoogleDrive.toLowerCase()]: fileSchema, ["googledrive"]: fileSchema, // Alias for convenience [Apps.GoogleCalendar.toLowerCase()]: eventSchema, ["googlecalendar"]: eventSchema, // Alias for convenience [Apps.Slack.toLowerCase()]: chatMessageSchema, [Apps.DataSource.toLowerCase()]: dataSourceFileSchema, }; return schemaMap[lowerAppName] || null; }; exports.VespaAutocompleteResponseToResult = VespaAutocompleteResponseToResult; exports.VespaSearchResponseToSearchResult = VespaSearchResponseToSearchResult; exports.appToSchemaMapper = appToSchemaMapper; exports.entityToSchemaMapper = entityToSchemaMapper; exports.getSortedScoredChunks = getSortedScoredChunks; exports.getSortedScoredImageChunks = getSortedScoredImageChunks; exports.handleVespaGroupResponse = handleVespaGroupResponse;