UNPKG

vespa-ts

Version:

A reusable TypeScript package for interacting with Vespa search engine with dependency injection support

1,355 lines (1,350 loc) 212 kB
'use strict'; var zod = require('zod'); var crypto = require('crypto'); var pLimit = require('p-limit'); const fileSchema = "file"; // Replace with your actual schema name const userSchema = "user"; // calendar const eventSchema = "event"; // mail const mailAttachmentSchema = "mail_attachment"; const mailSchema = "mail"; // chat const chatContainerSchema = "chat_container"; // this is not meant to be searched but we will // store the data in vespa and fetch it as needed const chatTeamSchema = "chat_team"; const chatMessageSchema = "chat_message"; const chatUserSchema = "chat_user"; const chatAttachment = "chat_attachment"; // previous queries const userQuerySchema = "user_query"; const datasourceSchema = "datasource"; const dataSourceFileSchema = "datasource_file"; // not using @ because of vite of frontend exports.Apps = void 0; (function (Apps) { // includes everything google Apps["GoogleWorkspace"] = "google-workspace"; // more granular Apps["GoogleDrive"] = "google-drive"; Apps["Gmail"] = "gmail"; // Notion = "notion", // Notion is not yet supported Apps["GoogleCalendar"] = "google-calendar"; Apps["Slack"] = "slack"; Apps["MCP"] = "mcp"; Apps["Github"] = "github"; Apps["Xyne"] = "xyne"; Apps["DataSource"] = "data-source"; })(exports.Apps || (exports.Apps = {})); exports.GooglePeopleEntity = void 0; (function (GooglePeopleEntity) { GooglePeopleEntity["Contacts"] = "Contacts"; GooglePeopleEntity["OtherContacts"] = "OtherContacts"; GooglePeopleEntity["AdminDirectory"] = "AdminDirectory"; })(exports.GooglePeopleEntity || (exports.GooglePeopleEntity = {})); // the vespa schemas const Schemas = zod.z.union([ zod.z.literal(fileSchema), zod.z.literal(userSchema), zod.z.literal(mailSchema), zod.z.literal(eventSchema), zod.z.literal(userQuerySchema), zod.z.literal(mailAttachmentSchema), zod.z.literal(chatContainerSchema), zod.z.literal(chatTeamSchema), zod.z.literal(chatUserSchema), zod.z.literal(chatMessageSchema), zod.z.literal(datasourceSchema), zod.z.literal(dataSourceFileSchema), ]); exports.MailEntity = void 0; (function (MailEntity) { MailEntity["Email"] = "mail"; })(exports.MailEntity || (exports.MailEntity = {})); exports.CalendarEntity = void 0; (function (CalendarEntity) { CalendarEntity["Event"] = "event"; })(exports.CalendarEntity || (exports.CalendarEntity = {})); exports.SlackEntity = void 0; (function (SlackEntity) { SlackEntity["Team"] = "team"; SlackEntity["User"] = "user"; SlackEntity["Message"] = "message"; SlackEntity["Channel"] = "channel"; SlackEntity["File"] = "file"; })(exports.SlackEntity || (exports.SlackEntity = {})); exports.DriveEntity = void 0; (function (DriveEntity) { DriveEntity["Docs"] = "docs"; DriveEntity["Sheets"] = "sheets"; DriveEntity["Slides"] = "slides"; DriveEntity["Presentation"] = "presentation"; DriveEntity["PDF"] = "pdf"; DriveEntity["Folder"] = "folder"; DriveEntity["Misc"] = "driveFile"; DriveEntity["Drawing"] = "drawing"; DriveEntity["Form"] = "form"; DriveEntity["Script"] = "script"; DriveEntity["Site"] = "site"; DriveEntity["Map"] = "map"; DriveEntity["Audio"] = "audio"; DriveEntity["Video"] = "video"; DriveEntity["Photo"] = "photo"; DriveEntity["ThirdPartyApp"] = "third_party_app"; DriveEntity["Image"] = "image"; DriveEntity["Zip"] = "zip"; DriveEntity["WordDocument"] = "word_document"; DriveEntity["ExcelSpreadsheet"] = "excel_spreadsheet"; DriveEntity["PowerPointPresentation"] = "powerpoint_presentation"; DriveEntity["Text"] = "text"; DriveEntity["CSV"] = "csv"; })(exports.DriveEntity || (exports.DriveEntity = {})); exports.MailAttachmentEntity = void 0; (function (MailAttachmentEntity) { MailAttachmentEntity["PDF"] = "pdf"; MailAttachmentEntity["Sheets"] = "sheets"; MailAttachmentEntity["CSV"] = "csv"; MailAttachmentEntity["WordDocument"] = "worddocument"; MailAttachmentEntity["PowerPointPresentation"] = "powerpointpresentation"; MailAttachmentEntity["Text"] = "text"; MailAttachmentEntity["NotValid"] = "notvalid"; })(exports.MailAttachmentEntity || (exports.MailAttachmentEntity = {})); const isMailAttachment = (entity) => Object.values(exports.MailAttachmentEntity).includes(entity); const PeopleEntitySchema = zod.z.nativeEnum(exports.GooglePeopleEntity); const ChatEntitySchema = zod.z.nativeEnum(exports.SlackEntity); exports.NotionEntity = void 0; (function (NotionEntity) { NotionEntity["Page"] = "page"; NotionEntity["Database"] = "database"; })(exports.NotionEntity || (exports.NotionEntity = {})); const FileEntitySchema = zod.z.nativeEnum(exports.DriveEntity); const MailEntitySchema = zod.z.nativeEnum(exports.MailEntity); const MailAttachmentEntitySchema = zod.z.nativeEnum(exports.MailAttachmentEntity); const EventEntitySchema = zod.z.nativeEnum(exports.CalendarEntity); const NotionEntitySchema = zod.z.nativeEnum(exports.NotionEntity); exports.SystemEntity = void 0; (function (SystemEntity) { SystemEntity["SystemInfo"] = "system_info"; SystemEntity["UserProfile"] = "user_profile"; })(exports.SystemEntity || (exports.SystemEntity = {})); exports.DataSourceEntity = void 0; (function (DataSourceEntity) { DataSourceEntity["DataSourceFile"] = "data_source_file"; })(exports.DataSourceEntity || (exports.DataSourceEntity = {})); const SystemEntitySchema = zod.z.nativeEnum(exports.SystemEntity); const DataSourceEntitySchema = zod.z.nativeEnum(exports.DataSourceEntity); const entitySchema = zod.z.union([ SystemEntitySchema, PeopleEntitySchema, FileEntitySchema, NotionEntitySchema, MailEntitySchema, EventEntitySchema, MailAttachmentEntitySchema, ChatEntitySchema, DataSourceEntitySchema, ]); const scoredChunk = zod.z.object({ chunk: zod.z.string(), score: zod.z.number(), index: zod.z.number(), }); const defaultVespaFieldsSchema = zod.z.object({ relevance: zod.z.number(), source: zod.z.string(), // sddocname: Schemas, documentid: zod.z.string(), }); const SpreadsheetMetadata = zod.z.object({ spreadsheetId: zod.z.string(), totalSheets: zod.z.number(), }); const Metadata = zod.z.union([zod.z.object({}), SpreadsheetMetadata]); const VespaFileSchema = zod.z.object({ docId: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: FileEntitySchema, title: zod.z.string(), url: zod.z.string().nullable(), parentId: zod.z.string().nullable(), chunks: zod.z.array(zod.z.string()), owner: zod.z.string().nullable(), ownerEmail: zod.z.string().nullable(), photoLink: zod.z.string().nullable(), permissions: zod.z.array(zod.z.string()), mimeType: zod.z.string().nullable(), metadata: Metadata, createdAt: zod.z.number(), updatedAt: zod.z.number(), }); const chunkScoresSchema = zod.z.object({ cells: zod.z.record(zod.z.string(), zod.z.number()), }); // Match features for file schema const FileMatchFeaturesSchema = zod.z.object({ "bm25(title)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); // Match features for user schema const UserMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(email)": zod.z.number().optional(), }); // Match features for mail schema const MailMatchFeaturesSchema = zod.z.object({ "bm25(subject)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "bm25(attachmentFilenames)": zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); const EventMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(description)": zod.z.number().optional(), "bm25(attachmentFilenames)": zod.z.number().optional(), "bm25(attendeesNames)": zod.z.number().optional(), }); const MailAttachmentMatchFeaturesSchema = zod.z.object({ chunk_vector_score: zod.z.number().optional(), scaled_bm25_chunks: zod.z.number().optional(), scaled_bm25_filename: zod.z.number().optional(), chunk_scores: chunkScoresSchema, }); const ChatMessageMatchFeaturesSchema = zod.z.object({ vector_score: zod.z.number().optional(), combined_nativeRank: zod.z.number().optional(), "nativeRank(text)": zod.z.number().optional(), "nativeRank(username)": zod.z.number().optional(), "nativeRank(name)": zod.z.number().optional(), }); const DataSourceFileMatchFeaturesSchema = zod.z.object({ "bm25(fileName)": zod.z.number().optional(), "bm25(chunks)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), chunk_scores: chunkScoresSchema.optional(), }); const VespaMatchFeatureSchema = zod.z.union([ FileMatchFeaturesSchema, MailMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ]); // Base schema for DataSource (for insertion) const VespaDataSourceSchemaBase = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), createdBy: zod.z.string(), createdAt: zod.z.number(), // long updatedAt: zod.z.number(), // long }); // Search schema for DataSource const VespaDataSourceSearchSchema = VespaDataSourceSchemaBase.extend({ sddocname: zod.z.literal(datasourceSchema), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }).merge(defaultVespaFieldsSchema); // Base schema for DataSourceFile (for insertion) const VespaDataSourceFileSchemaBase = zod.z.object({ docId: zod.z.string(), description: zod.z.string().optional(), app: zod.z.literal(exports.Apps.DataSource), fileName: zod.z.string().optional(), fileSize: zod.z.number().optional(), // long chunks: zod.z.array(zod.z.string()), image_chunks: zod.z.array(zod.z.string()).optional(), // Added for image descriptions chunks_pos: zod.z.array(zod.z.number()).optional(), // Added for text chunk positions image_chunks_pos: zod.z.array(zod.z.number()).optional(), // Added for image chunk positions uploadedBy: zod.z.string(), duration: zod.z.number().optional(), // long mimeType: zod.z.string().optional(), createdAt: zod.z.number(), // long updatedAt: zod.z.number(), // long dataSourceRef: zod.z.string(), // reference to datasource docId metadata: zod.z.string().optional(), // JSON string }); // Search schema for DataSourceFile const VespaDataSourceFileSearchSchema = VespaDataSourceFileSchemaBase.extend({ sddocname: zod.z.literal(dataSourceFileSchema), matchfeatures: DataSourceFileMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), dataSourceName: zod.z.string().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), image_chunks_summary: zod.z .array(zod.z.union([zod.z.string(), scoredChunk])) .optional(), chunks_pos_summary: zod.z.array(zod.z.number()).optional(), image_chunks_pos_summary: zod.z.array(zod.z.number()).optional(), }); const VespaFileSearchSchema = VespaFileSchema.extend({ sddocname: zod.z.literal(fileSchema), matchfeatures: FileMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); // basically GetDocument doesn't return sddocname // in search it's always present const VespaFileGetSchema = VespaFileSchema.merge(defaultVespaFieldsSchema); const VespaUserSchema = zod.z .object({ docId: zod.z.string().min(1), name: zod.z.string().optional(), //.min(1), email: zod.z.string().min(1).email(), app: zod.z.nativeEnum(exports.Apps), entity: zod.z.nativeEnum(exports.GooglePeopleEntity), gender: zod.z.string().optional(), photoLink: zod.z.string().optional(), aliases: zod.z.array(zod.z.string()).optional(), language: zod.z.string().optional(), includeInGlobalAddressList: zod.z.boolean().optional(), isAdmin: zod.z.boolean().optional(), isDelegatedAdmin: zod.z.boolean().optional(), suspended: zod.z.boolean().optional(), archived: zod.z.boolean().optional(), urls: zod.z.array(zod.z.string()).optional(), rankfeatures: zod.z.any().optional(), orgName: zod.z.string().optional(), orgJobTitle: zod.z.string().optional(), orgDepartment: zod.z.string().optional(), orgLocation: zod.z.string().optional(), orgDescription: zod.z.string().optional(), creationTime: zod.z.number(), lastLoggedIn: zod.z.number().optional(), birthday: zod.z.number().optional(), occupations: zod.z.array(zod.z.string()).optional(), userDefined: zod.z.array(zod.z.string()).optional(), customerId: zod.z.string().optional(), clientData: zod.z.array(zod.z.string()).optional(), // this only exists for contacts owner: zod.z.string().optional(), sddocname: zod.z.literal(userSchema), }) .merge(defaultVespaFieldsSchema); // Mail Types const AttachmentSchema = zod.z.object({ fileType: zod.z.string(), fileSize: zod.z.number(), }); const MailSchema = zod.z.object({ docId: zod.z.string(), threadId: zod.z.string(), mailId: zod.z.string().optional(), // Optional for threads subject: zod.z.string().default(""), // Default to empty string to avoid zod errors when subject is missing chunks: zod.z.array(zod.z.string()), timestamp: zod.z.number(), app: zod.z.nativeEnum(exports.Apps), userMap: zod.z.optional(zod.z.record(zod.z.string(), zod.z.string())), entity: zod.z.nativeEnum(exports.MailEntity), permissions: zod.z.array(zod.z.string()), from: zod.z.string(), to: zod.z.array(zod.z.string()), cc: zod.z.array(zod.z.string()), bcc: zod.z.array(zod.z.string()), mimeType: zod.z.string(), attachmentFilenames: zod.z.array(zod.z.string()), attachments: zod.z.array(AttachmentSchema), labels: zod.z.array(zod.z.string()), }); const VespaMailSchema = MailSchema.extend({ docId: zod.z.string().min(1), }); const MailAttachmentSchema = zod.z.object({ docId: zod.z.string(), mailId: zod.z.string(), threadId: zod.z.string(), partId: zod.z.number().nullable().optional(), app: zod.z.nativeEnum(exports.Apps), entity: zod.z.nativeEnum(exports.MailAttachmentEntity), chunks: zod.z.array(zod.z.string()), timestamp: zod.z.number(), permissions: zod.z.array(zod.z.string()), filename: zod.z.string(), fileType: zod.z.string().nullable().optional(), fileSize: zod.z.number().nullable().optional(), }); const VespaMailAttachmentSchema = MailAttachmentSchema.extend({}); const EventUser = zod.z.object({ email: zod.z.string(), displayName: zod.z.string(), }); const EventAtatchment = zod.z.object({ fileId: zod.z.string(), title: zod.z.string(), fileUrl: zod.z.string(), mimeType: zod.z.string(), }); const VespaEventSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), description: zod.z.string(), url: zod.z.string(), status: zod.z.string(), location: zod.z.string(), createdAt: zod.z.number(), updatedAt: zod.z.number(), app: zod.z.nativeEnum(exports.Apps), entity: zod.z.nativeEnum(exports.CalendarEntity), creator: EventUser, organizer: EventUser, attendees: zod.z.array(EventUser), attendeesNames: zod.z.array(zod.z.string()), startTime: zod.z.number(), endTime: zod.z.number(), attachmentFilenames: zod.z.array(zod.z.string()), attachments: zod.z.array(EventAtatchment), recurrence: zod.z.array(zod.z.string()), baseUrl: zod.z.string(), joiningLink: zod.z.string(), permissions: zod.z.array(zod.z.string()), cancelledInstances: zod.z.array(zod.z.string()), defaultStartTime: zod.z.boolean(), }); const VespaMailSearchSchema = VespaMailSchema.extend({ sddocname: zod.z.literal("mail"), matchfeatures: MailMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ // attachment won't have this chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); const VespaMailAttachmentSearchSchema = VespaMailAttachmentSchema.extend({ sddocname: zod.z.literal("mail_attachment"), matchfeatures: MailAttachmentMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), }); const VespaEventSearchSchema = VespaEventSchema.extend({ sddocname: zod.z.literal("event"), // Assuming events can have rankfeatures rankfeatures: zod.z.any().optional(), }).merge(defaultVespaFieldsSchema); const VespaUserQueryHistorySchema = zod.z.object({ docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number(), count: zod.z.number(), }); const VespaUserQueryHGetSchema = VespaUserQueryHistorySchema.extend({ sddocname: zod.z.literal("user_query"), }).merge(defaultVespaFieldsSchema); const VespaMailGetSchema = VespaMailSchema.merge(defaultVespaFieldsSchema); const VespaMailAttachmentGetSchema = VespaMailAttachmentSchema.merge(defaultVespaFieldsSchema); const VespaChatMessageSchema = zod.z.object({ docId: zod.z.string(), // client_msg_id from Slack teamId: zod.z.string(), // Slack team ID (e.g., "T05N1EJSE0K") channelId: zod.z.string(), // Slack channel ID (e.g., "C123ABC456") text: zod.z.string(), userId: zod.z.string(), // Slack user ID (e.g., "U032QT45V53") app: zod.z.nativeEnum(exports.Apps), // App (e.g., "slack") entity: zod.z.nativeEnum(exports.SlackEntity), // Entity (e.g., "message") name: zod.z.string(), username: zod.z.string(), image: zod.z.string(), channelName: zod.z.string().optional(), // derived isIm: zod.z.boolean().optional(), // derived isMpim: zod.z.boolean().optional(), // derived isPrivate: zod.z.boolean().optional(), // derived permissions: zod.z.array(zod.z.string()).optional(), // derived, teamName: zod.z.string().optional(), // derived domain: zod.z.string().optional(), // derived createdAt: zod.z.number(), // Slack ts (e.g., 1734442791.514519) teamRef: zod.z.string(), // vespa id for team threadId: zod.z.string().default(""), // Slack thread_ts, null if not in thread attachmentIds: zod.z.array(zod.z.string()).default([]), // Slack file IDs (e.g., ["F0857N0FF4N"]) // reactions: z.array(z.string()), // Commented out in Vespa schema, so excluded mentions: zod.z.array(zod.z.string()), // Extracted from text (e.g., ["U032QT45V53"]) updatedAt: zod.z.number(), // Slack edited.ts (e.g., 1734442538.0), null if not edited deletedAt: zod.z.number(), metadata: zod.z.string(), // JSON string for subtype, etc. (e.g., "{\"subtype\": null}") }); const VespaChatMessageSearchSchema = VespaChatMessageSchema.extend({ sddocname: zod.z.literal(chatMessageSchema), matchfeatures: ChatMessageMatchFeaturesSchema, rankfeatures: zod.z.any().optional(), }) .merge(defaultVespaFieldsSchema) .extend({ chunks_summary: zod.z.array(zod.z.string()).optional(), }); const VespaChatMessageGetSchema = VespaChatMessageSchema.merge(defaultVespaFieldsSchema); const VespaChatUserSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), title: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: zod.z.nativeEnum(exports.SlackEntity), image: zod.z.string(), email: zod.z.string(), statusText: zod.z.string(), tz: zod.z.string(), teamId: zod.z.string(), deleted: zod.z.boolean(), isAdmin: zod.z.boolean(), updatedAt: zod.z.number(), }); const VespaChatUserGetSchema = zod.z.object({ id: zod.z.string(), pathId: zod.z.string(), fields: VespaChatUserSchema, }); const VespaChatUserSearchSchema = VespaChatUserSchema.extend({ sddocname: zod.z.literal(chatUserSchema), }).merge(defaultVespaFieldsSchema); const VespaChatContainerSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), channelName: zod.z.string(), creator: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: zod.z.nativeEnum(exports.SlackEntity), isPrivate: zod.z.boolean(), isArchived: zod.z.boolean(), isGeneral: zod.z.boolean(), isIm: zod.z.boolean(), isMpim: zod.z.boolean(), domain: zod.z.string().optional(), // derived permissions: zod.z.array(zod.z.string()), createdAt: zod.z.number(), updatedAt: zod.z.number(), lastSyncedAt: zod.z.number(), topic: zod.z.string(), description: zod.z.string(), count: zod.z.number().int(), }); // Schema for search results that includes Vespa fields const VespaChatContainerSearchSchema = VespaChatContainerSchema.extend({ sddocname: zod.z.literal(chatContainerSchema), }).merge(defaultVespaFieldsSchema); const ChatContainerMatchFeaturesSchema = zod.z.object({ "bm25(name)": zod.z.number().optional(), "bm25(topic)": zod.z.number().optional(), "bm25(description)": zod.z.number().optional(), "closeness(field, chunk_embeddings)": zod.z.number().optional(), }); const VespaChatTeamSchema = zod.z.object({ docId: zod.z.string(), name: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), icon: zod.z.string(), url: zod.z.string(), domain: zod.z.string(), email_domain: zod.z.string(), own: zod.z.boolean(), createdAt: zod.z.number(), updatedAt: zod.z.number(), count: zod.z.number().int(), }); const VespaChatTeamGetSchema = VespaChatTeamSchema.extend({ sddocname: zod.z.literal(chatTeamSchema), }).merge(defaultVespaFieldsSchema); const VespaSearchFieldsUnionSchema = zod.z.discriminatedUnion("sddocname", [ VespaUserSchema, VespaFileSearchSchema, VespaMailSearchSchema, VespaEventSearchSchema, VespaUserQueryHGetSchema, VespaMailAttachmentSearchSchema, VespaChatContainerSearchSchema, VespaChatUserSearchSchema, VespaChatMessageSearchSchema, VespaDataSourceSearchSchema, VespaDataSourceFileSearchSchema, ]); // Get schema for DataSourceFile const VespaDataSourceFileGetSchema = VespaDataSourceFileSchemaBase.merge(defaultVespaFieldsSchema); const SearchMatchFeaturesSchema = zod.z.union([ FileMatchFeaturesSchema, UserMatchFeaturesSchema, MailMatchFeaturesSchema, EventMatchFeaturesSchema, MailAttachmentMatchFeaturesSchema, ChatMessageMatchFeaturesSchema, DataSourceFileMatchFeaturesSchema, ChatContainerMatchFeaturesSchema, ]); const VespaSearchFieldsSchema = zod.z .object({ matchfeatures: SearchMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaSearchFieldsUnionSchema); const VespaGetFieldsSchema = zod.z.union([ VespaUserSchema, VespaFileGetSchema, VespaMailGetSchema, VespaDataSourceFileGetSchema, ]); const VespaSearchResultsSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: VespaSearchFieldsSchema, pathId: zod.z.string().optional(), }); zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: VespaSearchFieldsSchema, pathId: zod.z.string().optional(), }); const VespaGroupSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), label: zod.z.string(), value: zod.z.string().optional(), fields: zod.z .object({ "count()": zod.z.number(), }) .optional(), children: zod.z.array(zod.z.lazy(() => VespaGroupSchema)).optional(), }); const VespaErrorSchema = zod.z.object({ code: zod.z.number(), summary: zod.z.string(), source: zod.z.string(), message: zod.z.string(), }); const VespaRootBaseSchema = zod.z.object({ root: zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), fields: zod.z .object({ totalCount: zod.z.number(), }) .optional(), coverage: zod.z.object({ coverage: zod.z.number(), documents: zod.z.number(), full: zod.z.boolean(), nodes: zod.z.number(), results: zod.z.number(), resultsFull: zod.z.number(), }), errors: zod.z.array(VespaErrorSchema).optional(), }), trace: zod.z.any().optional(), // Add optional trace field to the root }); const VespaSearchResultSchema = zod.z.union([ VespaSearchResultsSchema, VespaGroupSchema, ]); VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: zod.z.array(VespaSearchResultSchema), }), }); const AutocompleteMatchFeaturesSchema = zod.z.union([ zod.z.object({ "bm25(title_fuzzy)": zod.z.number(), }), zod.z.object({ "bm25(email_fuzzy)": zod.z.number(), "bm25(name_fuzzy)": zod.z.number(), }), zod.z.object({ "bm25(subject_fuzzy)": zod.z.number(), }), ]); const VespaAutocompleteFileSchema = zod.z .object({ docId: zod.z.string(), title: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserSchema = zod.z .object({ docId: zod.z.string(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, photoLink: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailSchema = zod.z .object({ docId: zod.z.string(), threadId: zod.z.string(), subject: zod.z.string().optional(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatUserSchema = zod.z .object({ docId: zod.z.string(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, image: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteMailAttachmentSchema = zod.z .object({ docId: zod.z.string(), filename: zod.z.string(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); zod.z .object({ docId: zod.z.string(), name: zod.z.string().optional(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteUserQueryHSchema = zod.z .object({ docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number().optional(), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteChatContainerSchema = zod.z .object({ docId: zod.z.string(), name: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), sddocname: Schemas, }) .merge(defaultVespaFieldsSchema); const VespaAutocompleteSummarySchema = zod.z.union([ VespaAutocompleteFileSchema, VespaAutocompleteUserSchema, VespaAutocompleteMailSchema, VespaAutocompleteUserQueryHSchema, VespaAutocompleteMailAttachmentSchema, VespaAutocompleteChatContainerSchema, VespaAutocompleteChatUserSchema, ]); const VespaAutocompleteFieldsSchema = zod.z .object({ matchfeatures: AutocompleteMatchFeaturesSchema, sddocname: Schemas, }) .and(VespaAutocompleteSummarySchema); const VespaAutocompleteSchema = zod.z.object({ id: zod.z.string(), relevance: zod.z.number(), source: zod.z.string(), fields: VespaAutocompleteFieldsSchema, }); const VespaAutocompleteResponseSchema = VespaRootBaseSchema.extend({ root: VespaRootBaseSchema.shape.root.extend({ children: zod.z.array(VespaAutocompleteSchema), }), }); const MailResponseSchema = VespaMailGetSchema.pick({ docId: true, threadId: true, app: true, entity: true, subject: true, from: true, relevance: true, timestamp: true, userMap: true, mailId: true, }) .strip() .extend({ type: zod.z.literal("mail"), mimeType: zod.z.string(), chunks_summary: zod.z.array(scoredChunk).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const MailAttachmentResponseSchema = VespaMailAttachmentGetSchema.pick({ docId: true, app: true, entity: true, relevance: true, timestamp: true, filename: true, mailId: true, partId: true, fileType: true, }) .strip() .extend({ type: zod.z.literal("mail_attachment"), chunks_summary: zod.z.array(scoredChunk).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const ChatMessageResponseSchema = VespaChatMessageGetSchema.pick({ docId: true, teamId: true, channelId: true, text: true, userId: true, app: true, entity: true, createdAt: true, threadId: true, image: true, name: true, domain: true, username: true, attachmentIds: true, mentions: true, relevance: true, updatedAt: true, }) .strip() .extend({ type: zod.z.literal("chat_message"), chunks_summary: zod.z.array(zod.z.string()).optional(), matchfeatures: zod.z.any().optional(), rankfeatures: zod.z.any().optional(), }); const DataSourceFileResponseSchema = VespaDataSourceFileGetSchema.pick({ docId: true, description: true, app: true, fileName: true, fileSize: true, uploadedBy: true, duration: true, mimeType: true, createdAt: true, updatedAt: true, dataSourceRef: true, metadata: true, relevance: true, }) .strip() .extend({ type: zod.z.literal(dataSourceFileSchema), // Using the schema const for the literal chunks_summary: zod.z.array(zod.z.union([zod.z.string(), scoredChunk])).optional(), matchfeatures: DataSourceFileMatchFeaturesSchema.optional(), // or z.any().optional() if specific match features aren't always needed here rankfeatures: zod.z.any().optional(), }); const APP_INTEGRATION_MAPPING = { gmail: exports.Apps.Gmail, drive: exports.Apps.GoogleDrive, googledrive: exports.Apps.GoogleDrive, googlecalendar: exports.Apps.GoogleCalendar, slack: exports.Apps.Slack, datasource: exports.Apps.DataSource, "google-workspace": exports.Apps.GoogleWorkspace, googledocs: exports.Apps.GoogleDrive, googlesheets: exports.Apps.GoogleDrive, pdf: exports.Apps.GoogleDrive, }; const AutocompleteFileSchema = zod.z .object({ type: zod.z.literal(fileSchema), relevance: zod.z.number(), title: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, }) .strip(); const AutocompleteUserSchema = zod.z .object({ type: zod.z.literal(userSchema), relevance: zod.z.number(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, photoLink: zod.z.string().optional(), }) .strip(); const AutocompleteUserQueryHSchema = zod.z .object({ type: zod.z.literal(userQuerySchema), docId: zod.z.string(), query_text: zod.z.string(), timestamp: zod.z.number().optional(), }) .strip(); const AutocompleteMailSchema = zod.z .object({ type: zod.z.literal(mailSchema), relevance: zod.z.number(), // optional due to contacts subject: zod.z.string().optional(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, threadId: zod.z.string().optional(), docId: zod.z.string(), }) .strip(); const AutocompleteMailAttachmentSchema = zod.z .object({ type: zod.z.literal(mailAttachmentSchema), relevance: zod.z.number(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, filename: zod.z.string(), docId: zod.z.string(), }) .strip(); const AutocompleteEventSchema = zod.z .object({ type: zod.z.literal(eventSchema), relevance: zod.z.number(), name: zod.z.string().optional(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, docId: zod.z.string(), }) .strip(); const AutocompleteChatUserSchema = zod.z .object({ type: zod.z.literal(chatUserSchema), relevance: zod.z.number(), // optional due to contacts name: zod.z.string().optional(), email: zod.z.string().optional(), app: zod.z.nativeEnum(exports.Apps), entity: entitySchema, image: zod.z.string(), }) .strip(); const AutocompleteSchema = zod.z.discriminatedUnion("type", [ AutocompleteFileSchema, AutocompleteUserSchema, AutocompleteMailSchema, AutocompleteEventSchema, AutocompleteUserQueryHSchema, AutocompleteMailAttachmentSchema, AutocompleteChatUserSchema, ]); const AutocompleteResultsSchema = zod.z.object({ results: zod.z.array(AutocompleteSchema), }); // search result const FileResponseSchema = VespaFileSchema.pick({ docId: true, title: true, url: true, app: true, entity: true, owner: true, ownerEmail: true, photoLink: true, updatedAt: true, }) .extend({ type: zod.z.literal(fileSchema), chunk: zod.z.string().optional(), chunkIndex: zod.z.number().optional(), mimeType: zod.z.string(), chunks_summary: zod.z.array(scoredChunk).optional(), relevance: zod.z.number(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }) .strip(); const EventResponseSchema = VespaEventSchema.pick({ docId: true, name: true, url: true, app: true, entity: true, updatedAt: true, }) .extend({ type: zod.z.literal(eventSchema), relevance: zod.z.number(), description: zod.z.string().optional(), chunks_summary: zod.z.array(zod.z.string()).optional(), attendeesNames: zod.z.array(zod.z.string()).optional(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }) .strip(); const UserResponseSchema = VespaUserSchema.pick({ name: true, email: true, app: true, entity: true, photoLink: true, docId: true, }) .strip() .extend({ type: zod.z.literal(userSchema), relevance: zod.z.number(), matchfeatures: zod.z.any().optional(), // Add matchfeatures rankfeatures: zod.z.any().optional(), }); // Search Response Schema const SearchResultsSchema = zod.z.discriminatedUnion("type", [ UserResponseSchema, FileResponseSchema, DataSourceFileResponseSchema, MailResponseSchema, EventResponseSchema, MailAttachmentResponseSchema, ChatMessageResponseSchema, ]); const SearchResponseSchema = zod.z.object({ count: zod.z.number(), results: zod.z.array(SearchResultsSchema), groupCount: zod.z.any(), trace: zod.z.any().optional(), }); const IntentSchema = zod.z.object({ from: zod.z.array(zod.z.string()).optional(), to: zod.z.array(zod.z.string()).optional(), cc: zod.z.array(zod.z.string()).optional(), bcc: zod.z.array(zod.z.string()).optional(), subject: zod.z.array(zod.z.string()).optional(), }); exports.SearchModes = void 0; (function (SearchModes) { SearchModes["NativeRank"] = "default_native"; SearchModes["BM25"] = "default_bm25"; SearchModes["AI"] = "default_ai"; SearchModes["Random"] = "default_random"; SearchModes["GlobalSorted"] = "global_sorted"; })(exports.SearchModes || (exports.SearchModes = {})); function scale(val) { if (!val) return null; return (2 * Math.atan(val / 4)) / Math.PI; } const getErrorMessage = (error) => { if (error instanceof Error) return error.message; return String(error); }; const escapeYqlValue = (value) => { return value.replace(/'/g, "''"); }; // Gmail intent processing function const processGmailIntent = (intent, logger) => { const intentConditions = []; // Helper function to validate email addresses const isValidEmailAddress = (email) => { const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; return emailRegex.test(email); }; // VALIDATION: Process intent if there are actual email addresses OR subject fields // DO NOT process intent for names without email addresses (unless subject is present) const hasValidEmailAddresses = intent && ((intent.from && intent.from.length > 0 && intent.from.some(isValidEmailAddress)) || (intent.to && intent.to.length > 0 && intent.to.some(isValidEmailAddress)) || (intent.cc && intent.cc.length > 0 && intent.cc.some(isValidEmailAddress)) || (intent.bcc && intent.bcc.length > 0 && intent.bcc.some(isValidEmailAddress))); const hasSubjectFields = intent && intent.subject && intent.subject.length > 0; // Process intent if we have valid email addresses OR subject fields if (!hasValidEmailAddresses && !hasSubjectFields) { logger.debug("Intent contains only names or no actionable identifiers - skipping Gmail intent filtering", { intent }); return []; // Return empty array if no valid email addresses or subjects found } logger.debug("Intent contains valid email addresses or subjects - processing Gmail intent filtering", { intent }); // Process 'from' field if (intent.from && intent.from.length > 0) { if (intent.from.length === 1 && intent.from[0]) { const fromCondition = `"from" contains '${escapeYqlValue(intent.from[0])}'`; intentConditions.push(fromCondition); } else { const fromConditions = intent.from .map((email) => `"from" contains '${escapeYqlValue(email)}'`) .join(" or "); intentConditions.push(`(${fromConditions})`); } } // Process 'to' field if (intent.to && intent.to.length > 0 && intent.to[0]) { if (intent.to.length === 1) { const toCondition = `"to" contains '${escapeYqlValue(intent.to[0])}'`; intentConditions.push(toCondition); } else { const toConditions = intent.to .map((email) => `"to" contains '${escapeYqlValue(email)}'`) .join(" or "); intentConditions.push(`(${toConditions})`); } } // Process 'cc' field if (intent.cc && intent.cc.length > 0 && intent.cc[0]) { if (intent.cc.length === 1) { const ccCondition = `cc contains '${escapeYqlValue(intent.cc[0])}'`; intentConditions.push(ccCondition); } else { const ccConditions = intent.cc .map((email) => `cc contains '${escapeYqlValue(email)}'`) .join(" or "); intentConditions.push(`(${ccConditions})`); } } // Process 'bcc' field if (intent.bcc && intent.bcc.length > 0 && intent.bcc[0]) { if (intent.bcc.length === 1) { const bccCondition = `bcc contains '${escapeYqlValue(intent.bcc[0])}'`; intentConditions.push(bccCondition); } else { const bccConditions = intent.bcc .map((email) => `bcc contains '${escapeYqlValue(email)}'`) .join(" or "); intentConditions.push(`(${bccConditions})`); } } // Process 'subject' field if (intent.subject && intent.subject.length > 0 && intent.subject[0]) { if (intent.subject.length === 1) { const subjectCondition = `"subject" contains '${escapeYqlValue(intent.subject[0])}'`; intentConditions.push(subjectCondition); } else { const subjectConditions = intent.subject .map((subj) => `"subject" contains '${escapeYqlValue(subj)}'`) .join(" or "); intentConditions.push(`(${subjectConditions})`); } } return intentConditions; }; const dateToUnixTimestamp = (dateString, endOfDay = false) => { const date = new Date(dateString); if (isNaN(date.getTime())) { throw new Error(`Invalid date format: ${dateString}. Expected format: YYYY-MM-DD`); } if (endOfDay) { date.setHours(23, 59, 59, 999); } else { date.setHours(0, 0, 0, 0); } const timestampMs = date.getTime(); const seconds = Math.floor(timestampMs / 1000); const microseconds = (timestampMs % 1000) * 1000; return `${seconds}.${microseconds.toString().padStart(6, "0")}`; }; var VespaDbOp; (function (VespaDbOp) { VespaDbOp["Search"] = "Search"; })(VespaDbOp || (VespaDbOp = {})); var DbOp; (function (DbOp) { DbOp["Create"] = "Create"; DbOp["READ"] = "Read"; DbOp["Update"] = "Update"; DbOp["Delete"] = "Delete"; })(DbOp || (DbOp = {})); class VespaError extends Error { constructor({ message, sources, op, docId, cause }) { let fullMessage = `${message}: for source ${sources} and op: ${op}`; if (docId) fullMessage += ` for docId: ${docId}`; super(fullMessage, { cause }); Error.captureStackTrace(this, this.constructor); } } class ErrorDeletingDocuments extends VespaError { constructor(errorOpts) { super({ ...errorOpts, op: DbOp.READ }); this.name = this.constructor.name; } } class ErrorRetrievingDocuments extends VespaError { constructor(vespaErrOpts) { let { message, cause } = vespaErrOpts; if (!message) { message = "Error retrieving documents"; } super({ ...vespaErrOpts, message, cause, op: DbOp.READ }); this.name = this.constructor.name; } } class ErrorPerformingSearch extends VespaError { constructor(vespaErrOpts) { super({ ...vespaErrOpts, op: VespaDbOp.Search }); this.name = this.constructor.name; } } class ErrorInsertingDocument extends VespaError { constructor(vespaErrOpts) { let { message, cause } = vespaErrOpts; if (!message) { message = `Error inserting document`; } super({ ...vespaErrOpts, message, cause, op: DbOp.Create }); this.name = this.constructor.name; } } // module contains all the transformations // from vespa to the user accepted types function countHiTags(str) { // Regular expression to match both <hi> and </hi> tags const regex = /<\/?hi>/g; const matches = str.match(regex); return matches ? matches.length : 0; } const getSortedScoredImageChunks = (matchfeatures, existingImageChunksPosSummary, existingImageChunksSummary, docId, maxChunks) => { // return if no chunks summary if (!existingImageChunksSummary?.length) { return []; } const imageChunksPos = existingImageChunksPosSummary; const imageChunkScores = matchfeatures && "image_chunk_scores" in matchfeatures && "cells" in matchfeatures.image_chunk_scores ? matchfeatures.image_chunk_scores.cells : {}; const imageChunksWithIndices = existingImageChunksSummary.map((chunk, index) => ({ index: index, chunk: `${docId}_${imageChunksPos[index] ?? index}`, score: scale(imageChunkScores[index] ?? 0) || 0, // Default to 0 if doesn't have score })); const filteredImageChunks = imageChunksWithIndices.filter(({ index }) => index < imageChunksPos.length); const sortedImageChunks = filteredImageChunks.sort((a, b) => b.score - a.score); return maxChunks ? sortedImageChunks.slice(0, maxChunks) : sortedImageChunks; }; const getSortedScoredChunks = (matchfeatures, existingChunksSummary, maxChunks) => { // return if no chunks summary if (!existingChunksSummary?.length) { return []; } if (!matchfeatures?.chunk_scores?.cells || !Object.keys(matchfeatures?.chunk_scores?.cells).length) { const mappedChunks = existingChunksSummary.map((v, index) => ({ chunk: v, score: 0, index, })); return maxChunks ? mappedChunks.slice(0, maxChunks) : mappedChunks; } const chunkScores = matchfeatures.chunk_scores.cells; // add chunks with chunk scores const chunksWithIndices = existingChunksSummary.map((chunk, index) => ({ index, chunk, score: scale(Number(chunkScores[index]) || 0) || 0, // Default to 0 if doesn't have score })); const filteredChunks = chunksWithIndices.filter(({ index }) => index in chunkScores); const sortedChunks = filteredChunks.sort((a, b) => b.score - a.score); return maxChunks ? sortedChunks.slice(0, maxChunks) : sortedChunks; }; // Vespa -> Backend/App -> Client const maxSearchChunks = 1; const VespaSearchResponseToSearchResult = (resp, email, textChunker) => { const { root, trace } = resp; const children = root.children || []; // Access the nested children array within the trace object const traceInfo = trace?.children || []; // Filter out any potential trace items from children if they exist const searchHits = children.filter((child) => !child.id?.startsWith("trace:")); return { count: root.fields?.totalCount ?? 0, groupCount: {}, results: searchHits ? searchHits.map((child) => { // Narrow down the type based on `sddocname` if (child.fields.sddocname === fileSchema) { // Directly use child.fields which includes matchfeatures const fields = child.fields; fields.type = fileSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields, no need to assign separately fields.chunks_summary = getSortedScoredChunks(fields.matchfeatures, fields.chunks_summary, maxSearchChunks); return FileResponseSchema.parse(fields); } else if (child.fields.sddocname === userSchema) { // Directly use child.fields const fields = child.fields; fields.type = userSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields (if returned by Vespa) // Ensure chunks_summary processing happens before parsing fields.chunks_summary?.sort((a, b) => countHiTags(b) - countHiTags(a)); fields.chunks_summary = fields.chunks_summary?.slice(0, maxSearchChunks); return UserResponseSchema.parse(fields); } else if (child.fields.sddocname === mailSchema) { // Directly use child.fields const fields = child.fields; if (email && fields.userMap && typeof fields.userMap === 'object') fields.docId = fields.userMap[email] || fields.docId; fields.type = mailSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields fields.chunks_summary = getSortedScoredChunks(fields.matchfeatures, fields.chunks_summary, maxSearchChunks); return MailResponseSchema.parse(fields); } else if (child.fields.sddocname === eventSchema) { // Directly use child.fields const fields = child.fields; fields.type = eventSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields (if returned by Vespa) // creating a new property // Ensure chunks_summary processing happens before parsing fields.chunks_summary = fields.description && textChunker ? textChunker.chunkDocument(fields.description) .map((v) => v.chunk) .sort((a, b) => countHiTags(b) - countHiTags(a)) .slice(0, maxSearchChunks) : []; // This line seems redundant as it's assigned above? Keeping it for now. fields.chunks_summary = fields.chunks_summary?.slice(0, maxSearchChunks); return EventResponseSchema.parse(fields); } else if (child.fields.sddocname === mailAttachmentSchema) { // Directly use child.fields const fields = child.fields; fields.type = mailAttachmentSchema; fields.relevance = child.relevance; // matchfeatures is already part of fields fields.chunks_summary = getSortedScoredChunks(fields.matchfeatures, fields.chunks_summary, maxSearchChunks); return MailAttachmentResponseSchema.parse(fields); } else if (child.fields.sddocname === chatMessageSchema) { const fields = child.fields; fields.type = chatMessageSchema; fields.relevance = child.relevance; fields.attachmentIds = []; fields.mentions = []; if (!fields.teamId) { fields.teamId = ""; } return ChatMessageResponseSchema.parse(fields); } else if (ch