@astermind/astermind-synth
Version:
OmegaSynth - Label-Conditioned Synthetic Data Generator for AsterMind ELM/KELM Pipelines
40 lines (39 loc) • 1.03 kB
TypeScript
/**
* CharVocab - Character vocabulary builder
* Builds a vocabulary from character sets and training data
*/
export declare class CharVocab {
private charToIndex;
private indexToChar;
private size;
/**
* Build vocabulary from a set of strings
* @param samples Array of strings to build vocabulary from
* @param charSet Optional predefined character set (e.g., alphanumeric + punctuation)
*/
build(samples: string[], charSet?: string): void;
/**
* Get index for a character
*/
getIndex(char: string): number;
/**
* Get character for an index
*/
getChar(index: number): string;
/**
* Check if character exists in vocabulary
*/
hasChar(char: string): boolean;
/**
* Get vocabulary size
*/
getSize(): number;
/**
* Get all characters in vocabulary
*/
getChars(): string[];
/**
* Get default character set (alphanumeric + common punctuation)
*/
static getDefaultCharSet(): string;
}