@astermind/astermind-synthetic-data
Version:
OmegaSynth - Label-Conditioned Synthetic Data Generator for AsterMind ELM/KELM Pipelines
52 lines (51 loc) • 1.23 kB
TypeScript
/**
* StringEncoder - Encodes strings to vectors and decodes back
* Compatible with ELM/KELM pipelines
*/
import { CharVocab } from './CharVocab';
export interface StringEncoderConfig {
maxLength: number;
charSet?: string;
useOneHot?: boolean;
}
export declare class StringEncoder {
private vocab;
private config;
constructor(config: StringEncoderConfig);
/**
* Build vocabulary from training samples
*/
buildVocab(samples: string[]): void;
/**
* Encode a string to a vector
* @param str String to encode
* @returns Encoded vector (either indices or one-hot)
*/
encode(str: string): number[];
/**
* Decode a vector back to a string
* @param vector Encoded vector
* @returns Decoded string
*/
decode(vector: number[]): string;
/**
* Encode multiple strings
*/
encodeBatch(strings: string[]): number[][];
/**
* Decode multiple vectors
*/
decodeBatch(vectors: number[][]): string[];
/**
* Get the output vector size
*/
getVectorSize(): number;
/**
* Get vocabulary size
*/
getVocabSize(): number;
/**
* Get vocabulary
*/
getVocab(): CharVocab;
}