UNPKG

@astermind/astermind-synthetic-data

Version:

OmegaSynth - Label-Conditioned Synthetic Data Generator for AsterMind ELM/KELM Pipelines

52 lines (51 loc) 1.23 kB
/** * StringEncoder - Encodes strings to vectors and decodes back * Compatible with ELM/KELM pipelines */ import { CharVocab } from './CharVocab'; export interface StringEncoderConfig { maxLength: number; charSet?: string; useOneHot?: boolean; } export declare class StringEncoder { private vocab; private config; constructor(config: StringEncoderConfig); /** * Build vocabulary from training samples */ buildVocab(samples: string[]): void; /** * Encode a string to a vector * @param str String to encode * @returns Encoded vector (either indices or one-hot) */ encode(str: string): number[]; /** * Decode a vector back to a string * @param vector Encoded vector * @returns Decoded string */ decode(vector: number[]): string; /** * Encode multiple strings */ encodeBatch(strings: string[]): number[][]; /** * Decode multiple vectors */ decodeBatch(vectors: number[][]): string[]; /** * Get the output vector size */ getVectorSize(): number; /** * Get vocabulary size */ getVocabSize(): number; /** * Get vocabulary */ getVocab(): CharVocab; }