@mastra/core
Version:
Mastra is a framework for building AI-powered applications and agents with a modern TypeScript stack.
256 lines • 8.13 kB
TypeScript
/**
* SearchEngine - Unified search engine supporting BM25, vector, and hybrid search.
*
* Provides search capabilities for Workspace, enabling keyword-based (BM25),
* semantic (vector), and combined hybrid search across indexed content.
*/
import type { MastraVector } from '../../vector/index.js';
import type { LineRange } from '../line-utils.js';
import { BM25Index } from './bm25.js';
import type { BM25Config, TokenizeOptions } from './bm25.js';
/**
* Search mode options
*/
export type SearchMode = 'vector' | 'bm25' | 'hybrid';
/**
* Single-text embedder - takes one text and returns its embedding.
*
* This is the legacy embedder shape and remains the default. Each document is
* embedded with a separate call.
*/
export interface SingleEmbedder {
(text: string): Promise<number[]>;
}
/**
* Batch-capable embedder - takes an array of texts and returns their embeddings
* in the same order.
*
* Branded with `batch: true` so {@link SearchEngine} can detect batch support at
* runtime and dispatch to a single batched embedder call instead of one call
* per document. This dramatically speeds up large index rebuilds against
* providers that support batch embedding (e.g. OpenAI's `embedMany`).
*
* @example
* ```ts
* import { embedMany } from 'ai';
* import { openai } from '@ai-sdk/openai';
*
* const model = openai.embedding('text-embedding-3-small');
* const embedder: BatchEmbedder = Object.assign(
* async (texts: string[]) => {
* const { embeddings } = await embedMany({ model, values: texts });
* return embeddings;
* },
* { batch: true as const, maxBatchSize: 2048 },
* );
* ```
*/
export interface BatchEmbedder {
(texts: string[]): Promise<number[][]>;
/** Brand that marks this embedder as batch-capable. */
readonly batch: true;
/**
* Maximum number of texts the underlying provider accepts per call. When
* unset, all pending texts are sent in a single request.
*/
readonly maxBatchSize?: number;
}
/**
* Embedder interface - either a legacy single-text embedder or a batch-capable
* embedder branded with `batch: true`.
*/
export type Embedder = SingleEmbedder | BatchEmbedder;
/**
* Type guard: returns true when the embedder is the batch-capable variant.
*/
export declare function isBatchEmbedder(embedder: Embedder): embedder is BatchEmbedder;
/**
* Configuration for vector search
*/
export interface VectorConfig {
/** Vector store for semantic search */
vectorStore: MastraVector;
/** Embedder function for generating vectors */
embedder: Embedder;
/** Index name for the vector store */
indexName: string;
}
/**
* Configuration for BM25 search
*/
export interface BM25SearchConfig {
/** BM25 algorithm parameters */
bm25?: BM25Config;
/** Tokenization options */
tokenize?: TokenizeOptions;
}
/**
* A document to be indexed
*/
export interface IndexDocument {
/** Unique identifier for this document */
id: string;
/** Text content to index */
content: string;
/** Optional metadata to store with the document */
metadata?: Record<string, unknown>;
/**
* For chunked documents: the starting line number of this chunk in the original document.
* When provided, lineRange in search results will be adjusted to reflect original document lines.
* (1-indexed)
*/
startLineOffset?: number;
}
/**
* Base search result with common fields
*/
export interface SearchResult {
/** Document identifier */
id: string;
/** Document content */
content: string;
/** Search score (0-1 for normalized results) */
score: number;
/** Line range where query terms appear */
lineRange?: LineRange;
/** Optional metadata */
metadata?: Record<string, unknown>;
/** Score breakdown by search type */
scoreDetails?: {
vector?: number;
bm25?: number;
};
}
/**
* Options for searching
*/
export interface SearchOptions {
/** Maximum number of results to return */
topK?: number;
/** Minimum score threshold */
minScore?: number;
/** Search mode: 'bm25', 'vector', or 'hybrid' */
mode?: SearchMode;
/** Weight for vector scores in hybrid search (0-1, default 0.5) */
vectorWeight?: number;
/** Filter for vector search */
filter?: Record<string, unknown>;
}
/** Options for batch indexing */
export interface IndexManyOptions {
/**
* Maximum number of documents to index concurrently (embedder + vector upsert).
* Must be a safe integer ≥ 1 (same rule as `p-map`).
* @default 8
*/
concurrency?: number;
/**
* When `true` (default), the first rejected `index` rejects the whole `indexMany` call.
* When `false`, all documents are processed; if any failed, the promise rejects with an `AggregateError`.
*/
stopOnError?: boolean;
}
/**
* Configuration for SearchEngine
*/
export interface SearchEngineConfig {
/** BM25 configuration (enables BM25 search) */
bm25?: BM25SearchConfig;
/** Vector configuration (enables vector search) */
vector?: VectorConfig;
/** Whether to use lazy vector indexing (default: false = eager) */
lazyVectorIndex?: boolean;
}
export interface ChunkOptions {
maxChunkChars?: number;
overlapLines?: number;
}
export interface TextChunk {
content: string;
startLine: number;
}
/**
* Split text into line-based chunks that stay within a character budget.
*
* Each chunk is formed by accumulating whole lines until adding the next line
* would exceed `maxChunkChars`. Adjacent chunks share `overlapLines` lines so
* that context around chunk boundaries is preserved for embedding quality.
*
* Returns the original text as a single chunk when it already fits.
*/
export declare function splitIntoChunks(text: string, options?: ChunkOptions): TextChunk[];
/**
* Unified search engine supporting BM25, vector, and hybrid search.
*
* Used internally by Workspace to provide consistent search functionality.
*
* @example
* ```typescript
* const engine = new SearchEngine({
* bm25: { tokenize: { lowercase: true } },
* vector: { vectorStore, embedder, indexName: 'my-index' },
* });
*
* // Index documents
* await engine.index({ id: 'doc1', content: 'Hello world' });
*
* // Search
* const results = await engine.search('hello', { mode: 'hybrid', topK: 5 });
* ```
*/
export declare class SearchEngine {
constructor(config?: SearchEngineConfig);
/**
* Index a document for search
*/
index(doc: IndexDocument): Promise<void>;
/**
* Index multiple documents (up to `concurrency` at a time when async vector work runs).
*
* @param docs - Documents to index
* @param options - `p-map` options; `concurrency` defaults to 8
*/
indexMany(docs: IndexDocument[], options?: IndexManyOptions): Promise<void>;
/**
* Remove a document from the index
*/
remove(id: string): Promise<void>;
/**
* Remove all documents whose ID starts with the given prefix.
* Used to remove all chunks belonging to a single source document.
*/
removeByPrefix(prefix: string): Promise<void>;
/**
* Remove a source document and all of its chunked variants.
*
* This also attempts a metadata-based bulk delete for chunk vectors so stale
* chunk IDs from previous process runs are cleaned up in persistent stores.
*/
removeSource(sourceId: string): Promise<void>;
/**
* Clear all indexed documents
*/
clear(): void;
/**
* Search for documents
*/
search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
/**
* Check if BM25 search is available
*/
get canBM25(): boolean;
/**
* Check if vector search is available
*/
get canVector(): boolean;
/**
* Check if hybrid search is available
*/
get canHybrid(): boolean;
/**
* Get the BM25 index (for serialization/debugging)
*/
get bm25Index(): BM25Index | undefined;
}
//# sourceMappingURL=search-engine.d.ts.map