@dvdagames/pgn-tokenizer
Version:
TypeScript version of PGN Tokenizer, a Byte Pair Encoding (BPE) tokenizer for Chess Portable Game Notiation (PGN).
17 lines (16 loc) • 485 B
TypeScript
export interface PGNTokenizerInterface {
vocabSize: number;
encode: (pgn: string) => number[];
decode: (tokens: number[]) => string;
}
export default class PGNTokenizer implements PGNTokenizerInterface {
private readonly config;
private readonly mergedVocabulary;
vocabSize: number;
constructor();
private createMergedVocabulary;
private lookupToken;
private mergePair;
encode(pgn: string): number[];
decode(tokens: number[]): string;
}