UNPKG

@astermind/astermind-pro

Version:

Astermind Pro - Premium ML Toolkit with Advanced RAG, Reranking, Summarization, and Information Flow Analysis

611 lines (496 loc) 14.1 kB
# Astermind Pro Examples Practical code examples for common scenarios. ## Example 1: Complete Retrieval Pipeline (Outside Workers) **NEW:** Use retrieval modules directly in your application without workers! ```typescript import { buildIndex, hybridRetrieve, rerankAndFilter, summarizeDeterministic, parseMarkdownToSections, flattenSections, backfillEmptyParents } from '@astermind/astermind-pro'; async function buildSearchIndex(documents: Array<{title: string, content: string}>) { // Parse markdown if needed const chunks = documents.map(doc => { const root = parseMarkdownToSections(doc.content); backfillEmptyParents(root); return flattenSections(root).map(chunk => ({ heading: `${doc.title} - ${chunk.heading}`, content: chunk.content, rich: chunk.rich })); }).flat(); // Build index const index = buildIndex({ chunks, vocab: 10000, landmarks: 256, headingW: 2.0, useStem: true, kernel: 'rbf', sigma: 1.0 }); return { index, chunks }; } async function search(query: string, index: any, chunks: any[]) { // Perform hybrid retrieval const retrieved = hybridRetrieve({ query, chunks, vocabMap: index.vocabMap, idf: index.idf, tfidfDocs: index.tfidfDocs, denseDocs: index.denseDocs, landmarksIdx: index.landmarksIdx, landmarkMat: index.landmarkMat, vocabSize: index.vocabMap.size, kernel: 'rbf', sigma: 1.0, alpha: 0.7, beta: 0.1, ridge: 0.08, headingW: 2.0, useStem: true, expandQuery: true, topK: 20, prefilter: 100 }); // Rerank const reranked = rerankAndFilter(query, retrieved.items, { lambdaRidge: 1e-2, probThresh: 0.45, useMMR: true, budgetChars: 2000 }); // Summarize const summary = summarizeDeterministic(query, reranked, { maxAnswerChars: 1500, includeCitations: true }); return { answer: summary.text, sources: summary.cites, retrieved: retrieved.items }; } // Usage const { index, chunks } = await buildSearchIndex(yourDocuments); const results = await search('your query', index, chunks); ``` ## Example 2: Simple Search Pipeline ```typescript import { rerankAndFilter, summarizeDeterministic } from '@astermind/astermind-pro'; async function simpleSearch(query: string, documents: Array<{title: string, content: string}>) { // Convert to chunks const chunks = documents.map(doc => ({ heading: doc.title, content: doc.content })); // Rerank const reranked = rerankAndFilter(query, chunks, { lambdaRidge: 1e-2, probThresh: 0.45, useMMR: true, budgetChars: 1200 }); // Summarize const summary = summarizeDeterministic(query, reranked, { maxAnswerChars: 1000, includeCitations: true }); return { answer: summary.text, sources: summary.cites }; } ``` ## Example 3: Auto-Tuning Hyperparameters **NEW:** Use auto-tune outside of workers! ```typescript import { buildIndex, autoTune } from '@astermind/astermind-pro'; async function optimizeSettings(documents: any[], currentSettings: any) { // Build initial index const index = buildIndex({ chunks: documents, vocab: currentSettings.vocab || 10000, landmarks: currentSettings.landmarks || 256, headingW: currentSettings.headingW || 2.0, useStem: currentSettings.useStem ?? true, kernel: currentSettings.kernel || 'rbf', sigma: currentSettings.sigma || 1.0 }); // Run auto-tune const result = await autoTune({ chunks: documents, vocabMap: index.vocabMap, idf: index.idf, tfidfDocs: index.tfidfDocs, vocabSize: index.vocabMap.size, budget: 40, sampleQueries: 24, currentSettings }, (trial, best, note) => { console.log(`Trial ${trial}: score=${best.toFixed(4)} (${note})`); }); return result.bestSettings; } ``` ## Example 4: Model Serialization **NEW:** Export and import models for persistence! ```typescript import { buildIndex, exportModel, importModel, buildDenseDocs } from '@astermind/astermind-pro'; // Build and export async function saveModel(documents: any[], settings: any) { const index = buildIndex({ chunks: documents, vocab: settings.vocab, landmarks: settings.landmarks, headingW: settings.headingW, useStem: settings.useStem, kernel: settings.kernel, sigma: settings.sigma }); const model = exportModel({ settings, vocabMap: index.vocabMap, idf: index.idf, chunks: documents, tfidfDocs: index.tfidfDocs, landmarksIdx: index.landmarksIdx, landmarkMat: index.landmarkMat, denseDocs: index.denseDocs, includeRich: true, includeDense: false // Save space, recompute on load }); // Save to file or database return JSON.stringify(model); } // Load model async function loadModel(modelJson: string) { const model = JSON.parse(modelJson); const imported = importModel(model, { buildDense: (tfidfDocs, vocabSize, landmarkMat, kernel, sigma) => buildDenseDocs(tfidfDocs, vocabSize, landmarkMat, kernel, sigma) }); return imported; } ``` ## Example 5: Code Documentation Search ```typescript import { rerankAndFilter, summarizeDeterministic } from '@astermind/astermind-pro'; async function searchCodeDocs(query: string, codeDocs: Array<{api: string, code: string, docs: string}>) { const chunks = codeDocs.map(doc => ({ heading: doc.api, content: doc.docs, rich: `\`\`\`\n${doc.code}\n\`\`\`` })); const reranked = rerankAndFilter(query, chunks, { probThresh: 0.4, budgetChars: 3000 }); const summary = summarizeDeterministic(query, reranked, { preferCode: true, codeBonus: 0.15, minQuerySimForCode: 0.30, maxAnswerChars: 2000 }); return summary.text; } ``` ## Example 6: Multi-Stage Filtering ```typescript import { rerank, filterMMR } from '@astermind/astermind-pro'; async function multiStage(query: string, chunks: Chunk[]) { // Stage 1: Coarse filtering const coarse = rerank(query, chunks, { lambdaRidge: 1e-1, randomProjDim: 16 }); const top50 = coarse.slice(0, Math.ceil(coarse.length / 2)); // Stage 2: Fine filtering const fine = rerank(query, top50, { lambdaRidge: 1e-2, randomProjDim: 32 }); // Stage 3: MMR diversity const final = filterMMR(fine, { useMMR: true, mmrLambda: 0.7, budgetChars: 1500 }); return final; } ``` ## Example 7: Adaptive Quality Control ```typescript import { InfoFlowGraph, TEController, rerankAndFilter } from '@astermind/astermind-pro'; class AdaptiveSearch { private graph = new InfoFlowGraph({ window: 256 }); private controller = new TEController(); private knobs = { probThresh: 0.45, budgetChars: 1200 }; async search(query: string, chunks: Chunk[]) { const results = rerankAndFilter(query, chunks, { probThresh: this.knobs.probThresh, budgetChars: this.knobs.budgetChars }); // Monitor this.graph.get('Q->R').push( [query.length / 100], [results.length] ); // Adjust const adjustment = this.controller.maybeAdjust(this.knobs); if (adjustment.knobs) { this.knobs = adjustment.knobs; } return results; } } ``` ## Example 5: Batch Processing ```typescript import { rerankAndFilter } from '@astermind/astermind-pro'; async function batchSearch(queries: string[], chunks: Chunk[]) { const results = await Promise.all( queries.map(query => rerankAndFilter(query, chunks, { lambdaRidge: 1e-2, probThresh: 0.45 }) ) ); return queries.map((q, i) => ({ query: q, results: results[i] })); } ``` ## Example 6: Streaming Results ```typescript import { rerank } from '@astermind/astermind-pro'; async function* streamSearch(query: string, chunks: Chunk[]) { // Process in batches const batchSize = 10; for (let i = 0; i < chunks.length; i += batchSize) { const batch = chunks.slice(i, i + batchSize); const reranked = rerank(query, batch, { lambdaRidge: 1e-2 }); for (const result of reranked) { yield result; } } } // Usage for await (const result of streamSearch(query, chunks)) { console.log(result.heading, result.score_rr); } ``` ## Example 7: Custom Feature Engineering ```typescript import { rerank, Chunk } from '@astermind/astermind-pro'; function customRerank(query: string, chunks: Chunk[]) { // Add custom features to chunks const enhanced = chunks.map(chunk => ({ ...chunk, score_base: computeCustomScore(query, chunk) })); // Use standard reranker with custom prior return rerank(query, enhanced, { lambdaRidge: 1e-2, exposeFeatures: true }); } function computeCustomScore(query: string, chunk: Chunk): number { // Your custom scoring logic const queryTerms = new Set(query.toLowerCase().split(/\W+/)); const contentTerms = new Set(chunk.content.toLowerCase().split(/\W+/)); const overlap = [...queryTerms].filter(t => contentTerms.has(t)).length; return overlap / queryTerms.size; } ``` ## Example 8: Integration with Vector DB ```typescript import { cosine, normalizeL2, rerankAndFilter } from '@astermind/astermind-pro'; async function hybridSearch( query: string, vectorDB: VectorDatabase, textDocs: Chunk[] ) { // Vector search const queryVec = await embedQuery(query); const vectorResults = await vectorDB.search(queryVec, { topK: 20 }); // Combine with text chunks const allChunks: Chunk[] = [ ...textDocs, ...vectorResults.map(r => ({ heading: r.metadata.title, content: r.metadata.content, score_base: r.score })) ]; // Rerank combined results return rerankAndFilter(query, allChunks, { lambdaRidge: 1e-2, probThresh: 0.45 }); } ``` ## Example 9: Real-time Learning ```typescript import { OnlineRidge } from '@astermind/astermind-pro'; class LearningReranker { private ridge = new OnlineRidge(64, 1, 1e-3); async learn(features: Float64Array, relevance: number) { this.ridge.update(features, new Float64Array([relevance])); } score(features: Float64Array): number { return this.ridge.predict(features)[0]; } } // Usage const learner = new LearningReranker(); // Learn from user feedback for (const [features, userRating] of feedbackData) { learner.learn(features, userRating); } // Use for scoring const score = learner.score(newFeatures); ``` ## Example 10: Multi-Query Fusion ```typescript import { rerank } from '@astermind/astermind-pro'; async function multiQuerySearch( queries: string[], chunks: Chunk[] ) { // Rerank for each query const results = queries.map(query => rerank(query, chunks, { lambdaRidge: 1e-2 }) ); // Combine scores (Reciprocal Rank Fusion) const combined = new Map<number, number>(); results.forEach((reranked, qIdx) => { reranked.forEach((chunk, rank) => { const idx = chunks.indexOf(chunk); const score = 1 / (rank + 1); combined.set(idx, (combined.get(idx) || 0) + score); }); }); // Sort by combined score return Array.from(combined.entries()) .sort((a, b) => b[1] - a[1]) .map(([idx]) => chunks[idx]); } ``` ## Example 11: A/B Testing Pipeline ```typescript import { rerankAndFilter } from '@astermind/astermind-pro'; class ABTestPipeline { async search(query: string, chunks: Chunk[], variant: 'A' | 'B') { const configA = { lambdaRidge: 1e-2, probThresh: 0.45, mmrLambda: 0.7 }; const configB = { lambdaRidge: 5e-3, probThresh: 0.5, mmrLambda: 0.8 }; const config = variant === 'A' ? configA : configB; return rerankAndFilter(query, chunks, config); } } ``` ## Example 12: Caching Layer ```typescript import { rerankAndFilter } from '@astermind/astermind-pro'; class CachedPipeline { private cache = new Map<string, ScoredChunk[]>(); async search(query: string, chunks: Chunk[]) { const key = this.getKey(query, chunks); if (this.cache.has(key)) { return this.cache.get(key)!; } const results = rerankAndFilter(query, chunks, { lambdaRidge: 1e-2 }); this.cache.set(key, results); return results; } private getKey(query: string, chunks: Chunk[]): string { return `${query}:${chunks.map(c => c.heading).join(',')}`; } } ``` ## Example 13: Error Recovery ```typescript import { rerankAndFilter } from '@astermind/astermind-pro'; async function robustSearch(query: string, chunks: Chunk[]) { try { return await rerankAndFilter(query, chunks, { lambdaRidge: 1e-2, probThresh: 0.45 }); } catch (error) { // Fallback to simpler config console.warn('Primary search failed, using fallback', error); return await rerankAndFilter(query, chunks, { lambdaRidge: 1e-1, probThresh: 0.3, useMMR: false }); } } ``` ## Example 14: Progressive Enhancement ```typescript import { rerank, filterMMR, summarizeDeterministic } from '@astermind/astermind-pro'; async function progressiveSearch(query: string, chunks: Chunk[]) { // Stage 1: Basic reranking const reranked = rerank(query, chunks, { lambdaRidge: 1e-2 }); // Stage 2: Add diversity (if needed) if (reranked.length > 10) { const diverse = filterMMR(reranked, { useMMR: true, mmrLambda: 0.7 }); // Stage 3: Summarize return summarizeDeterministic(query, diverse, { maxAnswerChars: 1000 }); } return summarizeDeterministic(query, reranked, { maxAnswerChars: 1000 }); } ``` ## Example 15: Worker Integration ```typescript // Main thread const worker = new Worker( new URL('@astermind/astermind-pro/workers/prod-worker', import.meta.url), { type: 'module' } ); worker.onmessage = (e) => { if (e.data.type === 'answer') { console.log('Answer:', e.data.text); } }; // Load model worker.postMessage({ action: 'init', payload: { model: serializedModel } }); // Query worker.postMessage({ action: 'ask', payload: { q: 'your query' } }); ```