@astermind/astermind-pro
Version:
Astermind Pro - Premium ML Toolkit with Advanced RAG, Reranking, Summarization, and Information Flow Analysis
611 lines (496 loc) • 14.1 kB
Markdown
# Astermind Pro Examples
Practical code examples for common scenarios.
## Example 1: Complete Retrieval Pipeline (Outside Workers)
**NEW:** Use retrieval modules directly in your application without workers!
```typescript
import {
buildIndex,
hybridRetrieve,
rerankAndFilter,
summarizeDeterministic,
parseMarkdownToSections,
flattenSections,
backfillEmptyParents
} from '@astermind/astermind-pro';
async function buildSearchIndex(documents: Array<{title: string, content: string}>) {
// Parse markdown if needed
const chunks = documents.map(doc => {
const root = parseMarkdownToSections(doc.content);
backfillEmptyParents(root);
return flattenSections(root).map(chunk => ({
heading: `${doc.title} - ${chunk.heading}`,
content: chunk.content,
rich: chunk.rich
}));
}).flat();
// Build index
const index = buildIndex({
chunks,
vocab: 10000,
landmarks: 256,
headingW: 2.0,
useStem: true,
kernel: 'rbf',
sigma: 1.0
});
return { index, chunks };
}
async function search(query: string, index: any, chunks: any[]) {
// Perform hybrid retrieval
const retrieved = hybridRetrieve({
query,
chunks,
vocabMap: index.vocabMap,
idf: index.idf,
tfidfDocs: index.tfidfDocs,
denseDocs: index.denseDocs,
landmarksIdx: index.landmarksIdx,
landmarkMat: index.landmarkMat,
vocabSize: index.vocabMap.size,
kernel: 'rbf',
sigma: 1.0,
alpha: 0.7,
beta: 0.1,
ridge: 0.08,
headingW: 2.0,
useStem: true,
expandQuery: true,
topK: 20,
prefilter: 100
});
// Rerank
const reranked = rerankAndFilter(query, retrieved.items, {
lambdaRidge: 1e-2,
probThresh: 0.45,
useMMR: true,
budgetChars: 2000
});
// Summarize
const summary = summarizeDeterministic(query, reranked, {
maxAnswerChars: 1500,
includeCitations: true
});
return {
answer: summary.text,
sources: summary.cites,
retrieved: retrieved.items
};
}
// Usage
const { index, chunks } = await buildSearchIndex(yourDocuments);
const results = await search('your query', index, chunks);
```
## Example 2: Simple Search Pipeline
```typescript
import { rerankAndFilter, summarizeDeterministic } from '@astermind/astermind-pro';
async function simpleSearch(query: string, documents: Array<{title: string, content: string}>) {
// Convert to chunks
const chunks = documents.map(doc => ({
heading: doc.title,
content: doc.content
}));
// Rerank
const reranked = rerankAndFilter(query, chunks, {
lambdaRidge: 1e-2,
probThresh: 0.45,
useMMR: true,
budgetChars: 1200
});
// Summarize
const summary = summarizeDeterministic(query, reranked, {
maxAnswerChars: 1000,
includeCitations: true
});
return {
answer: summary.text,
sources: summary.cites
};
}
```
## Example 3: Auto-Tuning Hyperparameters
**NEW:** Use auto-tune outside of workers!
```typescript
import { buildIndex, autoTune } from '@astermind/astermind-pro';
async function optimizeSettings(documents: any[], currentSettings: any) {
// Build initial index
const index = buildIndex({
chunks: documents,
vocab: currentSettings.vocab || 10000,
landmarks: currentSettings.landmarks || 256,
headingW: currentSettings.headingW || 2.0,
useStem: currentSettings.useStem ?? true,
kernel: currentSettings.kernel || 'rbf',
sigma: currentSettings.sigma || 1.0
});
// Run auto-tune
const result = await autoTune({
chunks: documents,
vocabMap: index.vocabMap,
idf: index.idf,
tfidfDocs: index.tfidfDocs,
vocabSize: index.vocabMap.size,
budget: 40,
sampleQueries: 24,
currentSettings
}, (trial, best, note) => {
console.log(`Trial ${trial}: score=${best.toFixed(4)} (${note})`);
});
return result.bestSettings;
}
```
## Example 4: Model Serialization
**NEW:** Export and import models for persistence!
```typescript
import { buildIndex, exportModel, importModel, buildDenseDocs } from '@astermind/astermind-pro';
// Build and export
async function saveModel(documents: any[], settings: any) {
const index = buildIndex({
chunks: documents,
vocab: settings.vocab,
landmarks: settings.landmarks,
headingW: settings.headingW,
useStem: settings.useStem,
kernel: settings.kernel,
sigma: settings.sigma
});
const model = exportModel({
settings,
vocabMap: index.vocabMap,
idf: index.idf,
chunks: documents,
tfidfDocs: index.tfidfDocs,
landmarksIdx: index.landmarksIdx,
landmarkMat: index.landmarkMat,
denseDocs: index.denseDocs,
includeRich: true,
includeDense: false // Save space, recompute on load
});
// Save to file or database
return JSON.stringify(model);
}
// Load model
async function loadModel(modelJson: string) {
const model = JSON.parse(modelJson);
const imported = importModel(model, {
buildDense: (tfidfDocs, vocabSize, landmarkMat, kernel, sigma) =>
buildDenseDocs(tfidfDocs, vocabSize, landmarkMat, kernel, sigma)
});
return imported;
}
```
## Example 5: Code Documentation Search
```typescript
import { rerankAndFilter, summarizeDeterministic } from '@astermind/astermind-pro';
async function searchCodeDocs(query: string, codeDocs: Array<{api: string, code: string, docs: string}>) {
const chunks = codeDocs.map(doc => ({
heading: doc.api,
content: doc.docs,
rich: `\`\`\`\n${doc.code}\n\`\`\``
}));
const reranked = rerankAndFilter(query, chunks, {
probThresh: 0.4,
budgetChars: 3000
});
const summary = summarizeDeterministic(query, reranked, {
preferCode: true,
codeBonus: 0.15,
minQuerySimForCode: 0.30,
maxAnswerChars: 2000
});
return summary.text;
}
```
## Example 6: Multi-Stage Filtering
```typescript
import { rerank, filterMMR } from '@astermind/astermind-pro';
async function multiStage(query: string, chunks: Chunk[]) {
// Stage 1: Coarse filtering
const coarse = rerank(query, chunks, {
lambdaRidge: 1e-1,
randomProjDim: 16
});
const top50 = coarse.slice(0, Math.ceil(coarse.length / 2));
// Stage 2: Fine filtering
const fine = rerank(query, top50, {
lambdaRidge: 1e-2,
randomProjDim: 32
});
// Stage 3: MMR diversity
const final = filterMMR(fine, {
useMMR: true,
mmrLambda: 0.7,
budgetChars: 1500
});
return final;
}
```
## Example 7: Adaptive Quality Control
```typescript
import { InfoFlowGraph, TEController, rerankAndFilter } from '@astermind/astermind-pro';
class AdaptiveSearch {
private graph = new InfoFlowGraph({ window: 256 });
private controller = new TEController();
private knobs = { probThresh: 0.45, budgetChars: 1200 };
async search(query: string, chunks: Chunk[]) {
const results = rerankAndFilter(query, chunks, {
probThresh: this.knobs.probThresh,
budgetChars: this.knobs.budgetChars
});
// Monitor
this.graph.get('Q->R').push(
[query.length / 100],
[results.length]
);
// Adjust
const adjustment = this.controller.maybeAdjust(this.knobs);
if (adjustment.knobs) {
this.knobs = adjustment.knobs;
}
return results;
}
}
```
## Example 5: Batch Processing
```typescript
import { rerankAndFilter } from '@astermind/astermind-pro';
async function batchSearch(queries: string[], chunks: Chunk[]) {
const results = await Promise.all(
queries.map(query =>
rerankAndFilter(query, chunks, {
lambdaRidge: 1e-2,
probThresh: 0.45
})
)
);
return queries.map((q, i) => ({
query: q,
results: results[i]
}));
}
```
## Example 6: Streaming Results
```typescript
import { rerank } from '@astermind/astermind-pro';
async function* streamSearch(query: string, chunks: Chunk[]) {
// Process in batches
const batchSize = 10;
for (let i = 0; i < chunks.length; i += batchSize) {
const batch = chunks.slice(i, i + batchSize);
const reranked = rerank(query, batch, {
lambdaRidge: 1e-2
});
for (const result of reranked) {
yield result;
}
}
}
// Usage
for await (const result of streamSearch(query, chunks)) {
console.log(result.heading, result.score_rr);
}
```
## Example 7: Custom Feature Engineering
```typescript
import { rerank, Chunk } from '@astermind/astermind-pro';
function customRerank(query: string, chunks: Chunk[]) {
// Add custom features to chunks
const enhanced = chunks.map(chunk => ({
...chunk,
score_base: computeCustomScore(query, chunk)
}));
// Use standard reranker with custom prior
return rerank(query, enhanced, {
lambdaRidge: 1e-2,
exposeFeatures: true
});
}
function computeCustomScore(query: string, chunk: Chunk): number {
// Your custom scoring logic
const queryTerms = new Set(query.toLowerCase().split(/\W+/));
const contentTerms = new Set(chunk.content.toLowerCase().split(/\W+/));
const overlap = [...queryTerms].filter(t => contentTerms.has(t)).length;
return overlap / queryTerms.size;
}
```
## Example 8: Integration with Vector DB
```typescript
import { cosine, normalizeL2, rerankAndFilter } from '@astermind/astermind-pro';
async function hybridSearch(
query: string,
vectorDB: VectorDatabase,
textDocs: Chunk[]
) {
// Vector search
const queryVec = await embedQuery(query);
const vectorResults = await vectorDB.search(queryVec, { topK: 20 });
// Combine with text chunks
const allChunks: Chunk[] = [
...textDocs,
...vectorResults.map(r => ({
heading: r.metadata.title,
content: r.metadata.content,
score_base: r.score
}))
];
// Rerank combined results
return rerankAndFilter(query, allChunks, {
lambdaRidge: 1e-2,
probThresh: 0.45
});
}
```
## Example 9: Real-time Learning
```typescript
import { OnlineRidge } from '@astermind/astermind-pro';
class LearningReranker {
private ridge = new OnlineRidge(64, 1, 1e-3);
async learn(features: Float64Array, relevance: number) {
this.ridge.update(features, new Float64Array([relevance]));
}
score(features: Float64Array): number {
return this.ridge.predict(features)[0];
}
}
// Usage
const learner = new LearningReranker();
// Learn from user feedback
for (const [features, userRating] of feedbackData) {
learner.learn(features, userRating);
}
// Use for scoring
const score = learner.score(newFeatures);
```
## Example 10: Multi-Query Fusion
```typescript
import { rerank } from '@astermind/astermind-pro';
async function multiQuerySearch(
queries: string[],
chunks: Chunk[]
) {
// Rerank for each query
const results = queries.map(query =>
rerank(query, chunks, { lambdaRidge: 1e-2 })
);
// Combine scores (Reciprocal Rank Fusion)
const combined = new Map<number, number>();
results.forEach((reranked, qIdx) => {
reranked.forEach((chunk, rank) => {
const idx = chunks.indexOf(chunk);
const score = 1 / (rank + 1);
combined.set(idx, (combined.get(idx) || 0) + score);
});
});
// Sort by combined score
return Array.from(combined.entries())
.sort((a, b) => b[1] - a[1])
.map(([idx]) => chunks[idx]);
}
```
## Example 11: A/B Testing Pipeline
```typescript
import { rerankAndFilter } from '@astermind/astermind-pro';
class ABTestPipeline {
async search(query: string, chunks: Chunk[], variant: 'A' | 'B') {
const configA = {
lambdaRidge: 1e-2,
probThresh: 0.45,
mmrLambda: 0.7
};
const configB = {
lambdaRidge: 5e-3,
probThresh: 0.5,
mmrLambda: 0.8
};
const config = variant === 'A' ? configA : configB;
return rerankAndFilter(query, chunks, config);
}
}
```
## Example 12: Caching Layer
```typescript
import { rerankAndFilter } from '@astermind/astermind-pro';
class CachedPipeline {
private cache = new Map<string, ScoredChunk[]>();
async search(query: string, chunks: Chunk[]) {
const key = this.getKey(query, chunks);
if (this.cache.has(key)) {
return this.cache.get(key)!;
}
const results = rerankAndFilter(query, chunks, {
lambdaRidge: 1e-2
});
this.cache.set(key, results);
return results;
}
private getKey(query: string, chunks: Chunk[]): string {
return `${query}:${chunks.map(c => c.heading).join(',')}`;
}
}
```
## Example 13: Error Recovery
```typescript
import { rerankAndFilter } from '@astermind/astermind-pro';
async function robustSearch(query: string, chunks: Chunk[]) {
try {
return await rerankAndFilter(query, chunks, {
lambdaRidge: 1e-2,
probThresh: 0.45
});
} catch (error) {
// Fallback to simpler config
console.warn('Primary search failed, using fallback', error);
return await rerankAndFilter(query, chunks, {
lambdaRidge: 1e-1,
probThresh: 0.3,
useMMR: false
});
}
}
```
## Example 14: Progressive Enhancement
```typescript
import { rerank, filterMMR, summarizeDeterministic } from '@astermind/astermind-pro';
async function progressiveSearch(query: string, chunks: Chunk[]) {
// Stage 1: Basic reranking
const reranked = rerank(query, chunks, {
lambdaRidge: 1e-2
});
// Stage 2: Add diversity (if needed)
if (reranked.length > 10) {
const diverse = filterMMR(reranked, {
useMMR: true,
mmrLambda: 0.7
});
// Stage 3: Summarize
return summarizeDeterministic(query, diverse, {
maxAnswerChars: 1000
});
}
return summarizeDeterministic(query, reranked, {
maxAnswerChars: 1000
});
}
```
## Example 15: Worker Integration
```typescript
// Main thread
const worker = new Worker(
new URL('@astermind/astermind-pro/workers/prod-worker', import.meta.url),
{ type: 'module' }
);
worker.onmessage = (e) => {
if (e.data.type === 'answer') {
console.log('Answer:', e.data.text);
}
};
// Load model
worker.postMessage({
action: 'init',
payload: { model: serializedModel }
});
// Query
worker.postMessage({
action: 'ask',
payload: { q: 'your query' }
});
```