UNPKG

vidscript

Version:

AI-powered CLI tool that transforms video content into intelligent, structured notes and scripts

github.com/camballe/vidscript

camballe/vidscript

96 lines • 3.74 kB

JavaScript

import { Pinecone } from '@pinecone-database/pinecone'; import { OpenAI } from 'openai'; import lodash from 'lodash'; const { chunk } = lodash; const openai = new OpenAI(); const pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY || '' }); export class VectorStore { index; namespace; constructor(options) { console.log('Initializing vector store with options:', { ...options, apiKeyPresent: !!process.env.PINECONE_API_KEY }); this.index = pinecone.index(options.indexName); this.namespace = options.namespace || 'default'; } /** * Convert text into embeddings using OpenAI's embedding model */ async textToEmbedding(text) { console.log('Generating embedding for text of length:', text.length); const response = await openai.embeddings.create({ input: text, model: 'text-embedding-3-small' }); return response.data[0].embedding; } /** * Store transcript chunks in the vector database */ async storeTranscript(transcript, metadata = {}) { console.log('Storing transcript of length:', transcript.length); // Split transcript into smaller chunks (around 500 tokens each) const chunkSize = 2000; // characters, approximately 500 tokens const chunks = chunk(transcript.split(' '), Math.ceil(chunkSize / 5)) .map((chunkWords) => chunkWords.join(' ')); console.log('Split transcript into chunks:', chunks.length); // Process chunks in batches to avoid rate limits const batchSize = 10; for (let i = 0; i < chunks.length; i += batchSize) { const batch = chunks.slice(i, i + batchSize); console.log(`Processing batch ${i / batchSize + 1} of ${Math.ceil(chunks.length / batchSize)}`); const records = await Promise.all(batch.map(async (text, index) => { const embedding = await this.textToEmbedding(text); const chunkMetadata = { text, chunkIndex: i + index, totalChunks: chunks.length, ...metadata }; return { id: `chunk_${i + index}`, values: embedding, metadata: chunkMetadata }; })); console.log(`Upserting ${records.length} vectors to Pinecone in namespace:`, this.namespace); await this.index.upsert(records); } console.log('Finished storing transcript in vector store'); } /** * Query the vector database to find relevant chunks */ async query(query, topK = 5) { console.log('Querying vector store for:', query, 'in namespace:', this.namespace); const queryEmbedding = await this.textToEmbedding(query); const results = await this.index.query({ vector: queryEmbedding, topK, includeMetadata: true }); console.log('Found matches:', results.matches.length); return results.matches .filter((match) => match !== undefined && match.metadata !== undefined && typeof match.metadata.text === 'string' && typeof match.score === 'number') .map(match => ({ text: match.metadata.text, score: match.score, metadata: match.metadata })); } /** * Delete all vectors in the namespace */ async clear() { console.log('Clearing vectors from namespace:', this.namespace); await this.index.deleteAll(); } } //# sourceMappingURL=vectorStore.js.map