UNPKG

contextual-agent-sdk

Version:

SDK for building AI agents with seamless voice-text context switching

445 lines (384 loc) 14.4 kB
import { ContextProvider, ContextResult, BaseConfig } from '../types/context'; import * as fs from 'fs'; import * as path from 'path'; interface DocumentSource { type: 'file' | 'directory' | 'url' | 'custom'; path: string; format?: 'markdown' | 'text' | 'json' | 'auto'; encoding?: string; weight?: number; // Priority weight for this source tags?: string[]; } interface KnowledgeBaseConfig extends BaseConfig { options?: { sources?: DocumentSource[]; searchType?: 'exact' | 'fuzzy' | 'semantic'; maxResults?: number; chunkSize?: number; // For large documents customSearch?: (query: string) => Promise<any[]>; // Auto-discover common documentation files autoDiscoverDocs?: { enabled?: boolean; rootPath?: string; patterns?: string[]; // File patterns to look for recursive?: boolean; }; }; } export class KnowledgeBaseProvider implements ContextProvider { public id: string; public name: string; public source: 'knowledge_base' = 'knowledge_base'; public priority: number; public enabled: boolean; private config: KnowledgeBaseConfig; private documentCache: Map<string, { content: string; lastModified: number }> = new Map(); constructor(config: KnowledgeBaseConfig) { this.config = config; this.id = config.id || 'knowledge_base'; this.name = config.name || 'Knowledge Base Provider'; this.priority = config.priority || 80; this.enabled = config.enabled ?? true; // Auto-discover documentation if enabled if (this.config.options?.autoDiscoverDocs?.enabled) { this.autoDiscoverDocuments(); } } async getContext(params: { query?: string }): Promise<ContextResult | null> { try { const results: any[] = []; // Use custom search if provided if (this.config.options?.customSearch && params.query) { const customResults = await this.config.options.customSearch(params.query); results.push(...customResults); } // Search through configured sources if (this.config.options?.sources) { for (const source of this.config.options.sources) { const sourceResults = await this.searchDocumentSource(source, params.query); results.push(...sourceResults); } } if (results.length === 0) { return null; } return this.formatResults(results); } catch (error) { console.error('Knowledge base search error:', error); return null; } } // Optional custom formatter formatContext(context: ContextResult): string { if (typeof context.content === 'string') { return `Documentation:\n${context.content}`; } if (Array.isArray(context.content)) { return `Documentation:\n${context.content.map((item: any, i: number) => { const title = item.title || item.filename || `Document ${i + 1}`; const content = item.content || item.text || JSON.stringify(item); const source = item.source ? ` (${item.source})` : ''; return `\n## ${title}${source}\n${content}`; }).join('\n')}`; } return JSON.stringify(context.content, null, 2); } private async searchDocumentSource(source: DocumentSource, query?: string): Promise<any[]> { const results: any[] = []; try { switch (source.type) { case 'file': const fileResult = await this.loadFile(source.path, source); if (fileResult && this.matchesQuery(fileResult.content, query)) { results.push(fileResult); } break; case 'directory': const dirResults = await this.loadDirectory(source.path, source); results.push(...dirResults.filter(result => this.matchesQuery(result.content, query))); break; case 'url': const urlResult = await this.loadUrl(source.path, source); if (urlResult && this.matchesQuery(urlResult.content, query)) { results.push(urlResult); } break; case 'custom': // For custom sources, assume they're handled by customSearch break; } } catch (error) { console.error(`Error loading source ${source.path}:`, error); } return results; } private async loadFile(filePath: string, source: DocumentSource): Promise<any | null> { try { // Check cache first const cached = this.getCachedDocument(filePath); if (cached) { return { ...cached, source: filePath, type: 'file', tags: source.tags || [], weight: source.weight || 1 }; } // Read file const stats = fs.statSync(filePath); const content = fs.readFileSync(filePath, { encoding: (source.encoding as BufferEncoding) || 'utf8' }); const parsed = this.parseDocument(content, source.format || this.detectFormat(filePath)); // Cache the result this.cacheDocument(filePath, parsed.content, stats.mtimeMs); return { title: parsed.title || path.basename(filePath), content: parsed.content, filename: path.basename(filePath), source: filePath, type: 'file', tags: source.tags || [], weight: source.weight || 1, lastModified: stats.mtimeMs }; } catch (error) { console.error(`Error loading file ${filePath}:`, error); return null; } } private async loadDirectory(dirPath: string, source: DocumentSource): Promise<any[]> { const results: any[] = []; try { const files = fs.readdirSync(dirPath); for (const file of files) { const fullPath = path.join(dirPath, file); const stats = fs.statSync(fullPath); if (stats.isFile() && this.isDocumentFile(file)) { const fileResult = await this.loadFile(fullPath, source); if (fileResult) { results.push(fileResult); } } } } catch (error) { console.error(`Error loading directory ${dirPath}:`, error); } return results; } private async loadUrl(url: string, source: DocumentSource): Promise<any | null> { try { const response = await fetch(url); if (!response.ok) { throw new Error(`HTTP ${response.status}: ${response.statusText}`); } const content = await response.text(); const parsed = this.parseDocument(content, source.format || this.detectFormat(url)); return { title: parsed.title || url, content: parsed.content, source: url, type: 'url', tags: source.tags || [], weight: source.weight || 1 }; } catch (error) { console.error(`Error loading URL ${url}:`, error); return null; } } private parseDocument(content: string, format: string): { title?: string; content: string } { switch (format.toLowerCase()) { case 'markdown': case 'md': return this.parseMarkdown(content); case 'json': return this.parseJson(content); case 'text': case 'txt': default: return { content }; } } private parseMarkdown(content: string): { title?: string; content: string } { // Extract title from first H1 heading const titleMatch = content.match(/^#\s+(.+)$/m); const title = titleMatch ? titleMatch[1].trim() : undefined; // Clean up markdown for better context const cleanContent = content .replace(/^#{1,6}\s+/gm, '') // Remove heading markers .replace(/\*\*(.*?)\*\*/g, '$1') // Remove bold .replace(/\*(.*?)\*/g, '$1') // Remove italic .replace(/`(.*?)`/g, '$1') // Remove inline code .replace(/```[\s\S]*?```/g, '[CODE BLOCK]') // Replace code blocks .replace(/\[([^\]]+)\]\([^\)]+\)/g, '$1') // Replace links with text .trim(); return { title, content: cleanContent }; } private parseJson(content: string): { title?: string; content: string } { try { const data = JSON.parse(content); // If it's a structured document with title and content if (data.title && data.content) { return { title: data.title, content: data.content }; } // If it's documentation with sections if (data.sections) { const title = data.title || data.name; const content = data.sections.map((section: any) => `${section.title || section.name}: ${section.content || section.description}` ).join('\n\n'); return { title, content }; } // Default: stringify the whole object return { content: JSON.stringify(data, null, 2) }; } catch (error) { return { content }; } } private detectFormat(filePath: string): string { const ext = path.extname(filePath).toLowerCase(); const formatMap: Record<string, string> = { '.md': 'markdown', '.markdown': 'markdown', '.txt': 'text', '.json': 'json', '.readme': 'markdown' }; return formatMap[ext] || 'auto'; } private isDocumentFile(filename: string): boolean { const docExtensions = ['.md', '.txt', '.json', '.markdown']; const docNames = ['readme', 'changelog', 'license', 'contributing', 'docs', 'documentation']; const ext = path.extname(filename).toLowerCase(); const name = path.basename(filename, ext).toLowerCase(); return docExtensions.includes(ext) || docNames.includes(name); } private autoDiscoverDocuments(): void { const autoConfig = this.config.options?.autoDiscoverDocs; if (!autoConfig?.enabled) return; const rootPath = autoConfig.rootPath || process.cwd(); const patterns = autoConfig.patterns || [ 'README.md', 'README.txt', 'README', 'CHANGELOG.md', 'CHANGELOG.txt', 'LICENSE', 'LICENSE.md', 'LICENSE.txt', 'CONTRIBUTING.md', 'CONTRIBUTING.txt', 'docs/**/*.md', 'documentation/**/*.md', '*.md' // Any markdown file in root ]; const discoveredSources: DocumentSource[] = []; patterns.forEach(pattern => { try { // Simple pattern matching for common files if (!pattern.includes('*')) { const filePath = path.join(rootPath, pattern); if (fs.existsSync(filePath)) { discoveredSources.push({ type: 'file', path: filePath, format: 'auto', weight: this.getDocumentWeight(pattern), tags: ['auto-discovered', this.getDocumentType(pattern)] }); } } else { // For patterns with wildcards, scan directories const dirPath = pattern.includes('/') ? path.dirname(pattern) : rootPath; if (fs.existsSync(path.join(rootPath, dirPath))) { discoveredSources.push({ type: 'directory', path: path.join(rootPath, dirPath), format: 'auto', weight: 0.7, tags: ['auto-discovered', 'directory'] }); } } } catch (error) { console.warn(`Could not auto-discover pattern ${pattern}:`, error); } }); // Add discovered sources to config if (!this.config.options) { this.config.options = {}; } if (!this.config.options.sources) { this.config.options.sources = []; } this.config.options.sources.push(...discoveredSources); console.log(`Auto-discovered ${discoveredSources.length} documentation sources`); } private getDocumentWeight(filename: string): number { const name = filename.toLowerCase(); if (name.includes('readme')) return 1.0; if (name.includes('docs') || name.includes('documentation')) return 0.9; if (name.includes('changelog')) return 0.7; if (name.includes('contributing')) return 0.6; if (name.includes('license')) return 0.5; return 0.8; } private getDocumentType(filename: string): string { const name = filename.toLowerCase(); if (name.includes('readme')) return 'readme'; if (name.includes('changelog')) return 'changelog'; if (name.includes('license')) return 'license'; if (name.includes('contributing')) return 'contributing'; if (name.includes('docs')) return 'documentation'; return 'document'; } private matchesQuery(content: string, query?: string): boolean { if (!query) return true; const searchType = this.config.options?.searchType || 'fuzzy'; const lowerContent = content.toLowerCase(); const lowerQuery = query.toLowerCase(); switch (searchType) { case 'exact': return lowerContent.includes(lowerQuery); case 'fuzzy': // Simple fuzzy matching - check if most words are present const queryWords = lowerQuery.split(' ').filter(w => w.length > 2); const matchCount = queryWords.filter(word => lowerContent.includes(word)).length; return matchCount >= Math.ceil(queryWords.length * 0.6); case 'semantic': // For semantic search, you would integrate with a vector database // For now, fall back to fuzzy return this.matchesQuery(content, query); default: return true; } } private getCachedDocument(filePath: string): any | null { const cached = this.documentCache.get(filePath); if (!cached) return null; try { const stats = fs.statSync(filePath); if (stats.mtimeMs > cached.lastModified) { this.documentCache.delete(filePath); return null; } return { content: cached.content }; } catch (error) { this.documentCache.delete(filePath); return null; } } private cacheDocument(filePath: string, content: string, lastModified: number): void { this.documentCache.set(filePath, { content, lastModified }); } private formatResults(results: any[]): ContextResult { // Sort by weight and limit results const maxResults = this.config.options?.maxResults || 5; const sortedResults = results .sort((a, b) => (b.weight || 0) - (a.weight || 0)) .slice(0, maxResults); return { content: sortedResults, metadata: { source: this.source, timestamp: new Date(), tags: ['knowledge_base', 'documentation'], resultCount: sortedResults.length, sources: sortedResults.map(r => r.source) } }; } }