game-analysis-types
Version:
Common TypeScript types and utilities for game analysis tools.
122 lines • 5.75 kB
JavaScript
import { pipeline } from '@xenova/transformers';
/**
* Service for generating text embeddings using a pre-trained transformer model.
* Designed to be a singleton or instantiated once per model needed.
*/
export class EmbeddingService {
extractor = null;
modelName;
modelDimensions = null; // Store dimensions after first use
constructor(modelName) {
// Default model can be overridden by environment variable or constructor arg
this.modelName = modelName || process.env.EMBEDDING_MODEL_NAME || 'Xenova/all-MiniLM-L6-v2';
console.log(`EmbeddingService: Using model '${this.modelName}'.`);
}
/**
* Initializes the embedding model pipeline.
* Must be called before generateEmbedding.
*/
async initializeModel() {
if (this.extractor) {
console.log("EmbeddingService: Model already initialized.");
return;
}
try {
console.log(`EmbeddingService: Initializing model '${this.modelName}'... This may take some time on first run.`);
// Using 'feature-extraction' task for sentence embeddings
this.extractor = (await pipeline('feature-extraction', this.modelName, {
quantized: true, // Use quantized model for efficiency if available
})); // Assert as general Pipeline type
console.log("EmbeddingService: Model initialized successfully.");
// Pre-compute dimensions on initialization
await this.getModelDimensions();
}
catch (error) {
console.error(`EmbeddingService: Failed to initialize model '${this.modelName}':`, error);
throw error; // Re-throw error for calling function to handle
}
}
/**
* Generates an embedding for the given text.
* Ensures the model is initialized.
* @param text The text to embed.
* @returns An EmbeddingResult containing the vector and its dimensions, or null if error.
*/
async generateEmbedding(text) {
if (!this.extractor) {
console.error("EmbeddingService: Model not initialized. Call initializeModel() first.");
// Optionally, try to initialize lazily here?
// await this.initializeModel();
// if (!this.extractor) return null; // If still not initialized, return null
return null; // Or throw an error?
}
try {
// The output structure depends on the pipeline. For feature-extraction:
// It typically returns a Tensor. We need to extract the data.
// The exact processing might vary slightly based on the model.
// For sentence-transformers models, pooling might be needed if not done automatically.
const output = await this.extractor(text, { pooling: 'mean', normalize: true });
// Output data might be nested or need conversion
let embedding = [];
if (output && output.data) {
// Convert Float32Array to regular array
embedding = Array.from(output.data);
}
else {
console.warn("EmbeddingService: Unexpected output structure from model pipeline:", output);
return null;
}
if (!embedding || embedding.length === 0) {
console.warn("EmbeddingService: Embedding generation resulted in an empty vector.");
return null;
}
// Ensure dimensions are known
const dimensions = await this.getModelDimensions();
if (embedding.length !== dimensions) {
console.warn(`EmbeddingService: Generated embedding dimension (${embedding.length}) does not match expected dimension (${dimensions}).`);
// Handle this mismatch? Truncate? Pad? Error? For now, return as is.
}
return { embedding, dimensions };
}
catch (error) {
console.error("EmbeddingService: Error generating embedding:", error);
return null;
}
}
/**
* Gets the expected dimensions of the embeddings produced by the model.
* Caches the result after the first successful embedding generation.
* @returns The number of dimensions, or null if not determined yet.
*/
async getModelDimensions() {
if (this.modelDimensions !== null) {
return this.modelDimensions;
}
if (!this.extractor) {
console.warn("EmbeddingService: Cannot determine dimensions, model not initialized.");
// Try initializing?
await this.initializeModel();
if (!this.extractor)
throw new Error("Model initialization failed, cannot get dimensions.");
}
console.log("EmbeddingService: Determining model dimensions...");
// Generate a dummy embedding to find dimensions
try {
// Use a simple, common word for dimension check
const result = await this.generateEmbedding("dimension check");
if (result) {
this.modelDimensions = result.dimensions;
console.log(`EmbeddingService: Determined model dimensions: ${this.modelDimensions}`);
return this.modelDimensions;
}
else {
throw new Error("Failed to generate dummy embedding for dimension check.");
}
}
catch (error) {
console.error("EmbeddingService: Error determining model dimensions:", error);
throw new Error("Could not determine embedding model dimensions."); // Re-throw a more specific error
}
}
}
//# sourceMappingURL=EmbeddingService.js.map