UNPKG

game-analysis-types

Version:

Common TypeScript types and utilities for game analysis tools.

122 lines 5.75 kB
import { pipeline } from '@xenova/transformers'; /** * Service for generating text embeddings using a pre-trained transformer model. * Designed to be a singleton or instantiated once per model needed. */ export class EmbeddingService { extractor = null; modelName; modelDimensions = null; // Store dimensions after first use constructor(modelName) { // Default model can be overridden by environment variable or constructor arg this.modelName = modelName || process.env.EMBEDDING_MODEL_NAME || 'Xenova/all-MiniLM-L6-v2'; console.log(`EmbeddingService: Using model '${this.modelName}'.`); } /** * Initializes the embedding model pipeline. * Must be called before generateEmbedding. */ async initializeModel() { if (this.extractor) { console.log("EmbeddingService: Model already initialized."); return; } try { console.log(`EmbeddingService: Initializing model '${this.modelName}'... This may take some time on first run.`); // Using 'feature-extraction' task for sentence embeddings this.extractor = (await pipeline('feature-extraction', this.modelName, { quantized: true, // Use quantized model for efficiency if available })); // Assert as general Pipeline type console.log("EmbeddingService: Model initialized successfully."); // Pre-compute dimensions on initialization await this.getModelDimensions(); } catch (error) { console.error(`EmbeddingService: Failed to initialize model '${this.modelName}':`, error); throw error; // Re-throw error for calling function to handle } } /** * Generates an embedding for the given text. * Ensures the model is initialized. * @param text The text to embed. * @returns An EmbeddingResult containing the vector and its dimensions, or null if error. */ async generateEmbedding(text) { if (!this.extractor) { console.error("EmbeddingService: Model not initialized. Call initializeModel() first."); // Optionally, try to initialize lazily here? // await this.initializeModel(); // if (!this.extractor) return null; // If still not initialized, return null return null; // Or throw an error? } try { // The output structure depends on the pipeline. For feature-extraction: // It typically returns a Tensor. We need to extract the data. // The exact processing might vary slightly based on the model. // For sentence-transformers models, pooling might be needed if not done automatically. const output = await this.extractor(text, { pooling: 'mean', normalize: true }); // Output data might be nested or need conversion let embedding = []; if (output && output.data) { // Convert Float32Array to regular array embedding = Array.from(output.data); } else { console.warn("EmbeddingService: Unexpected output structure from model pipeline:", output); return null; } if (!embedding || embedding.length === 0) { console.warn("EmbeddingService: Embedding generation resulted in an empty vector."); return null; } // Ensure dimensions are known const dimensions = await this.getModelDimensions(); if (embedding.length !== dimensions) { console.warn(`EmbeddingService: Generated embedding dimension (${embedding.length}) does not match expected dimension (${dimensions}).`); // Handle this mismatch? Truncate? Pad? Error? For now, return as is. } return { embedding, dimensions }; } catch (error) { console.error("EmbeddingService: Error generating embedding:", error); return null; } } /** * Gets the expected dimensions of the embeddings produced by the model. * Caches the result after the first successful embedding generation. * @returns The number of dimensions, or null if not determined yet. */ async getModelDimensions() { if (this.modelDimensions !== null) { return this.modelDimensions; } if (!this.extractor) { console.warn("EmbeddingService: Cannot determine dimensions, model not initialized."); // Try initializing? await this.initializeModel(); if (!this.extractor) throw new Error("Model initialization failed, cannot get dimensions."); } console.log("EmbeddingService: Determining model dimensions..."); // Generate a dummy embedding to find dimensions try { // Use a simple, common word for dimension check const result = await this.generateEmbedding("dimension check"); if (result) { this.modelDimensions = result.dimensions; console.log(`EmbeddingService: Determined model dimensions: ${this.modelDimensions}`); return this.modelDimensions; } else { throw new Error("Failed to generate dummy embedding for dimension check."); } } catch (error) { console.error("EmbeddingService: Error determining model dimensions:", error); throw new Error("Could not determine embedding model dimensions."); // Re-throw a more specific error } } } //# sourceMappingURL=EmbeddingService.js.map