@astermind/astermind-synth
Version:
OmegaSynth - Label-Conditioned Synthetic Data Generator for AsterMind ELM/KELM Pipelines
43 lines (42 loc) • 1.13 kB
TypeScript
/**
* CharacterEmbeddings - Learn character embeddings for better semantic representation
* Provides continuous vector representations of characters
*/
export declare class CharacterEmbeddings {
private embeddings;
private embeddingSize;
private vocab;
constructor(embeddingSize?: number);
/**
* Build embeddings from training samples
*/
build(samples: string[]): void;
/**
* Learn embeddings based on character co-occurrence
*/
private learnCoOccurrences;
/**
* Get embedding for a character
*/
getEmbedding(char: string): number[];
/**
* Encode a string to embedding vectors
*/
encode(str: string, maxLength: number): number[][];
/**
* Decode embedding vectors to string (find closest character)
*/
decode(embeddings: number[][], vocab: string[]): string;
/**
* Calculate Euclidean distance between two embeddings
*/
private euclideanDistance;
/**
* Get vocabulary size
*/
getVocabSize(): number;
/**
* Get all characters in vocabulary
*/
getVocab(): string[];
}