@lenml/tokenizer-gemini
Version:
gemini tokenizer for NodeJS/Browser
28 lines (25 loc) • 675 B
text/typescript
import { tokenizerJSON, tokenizerConfig } from "./data";
import {
TokenizerLoader,
TokenizerClassNameMapping,
FromPreTrainedFn,
tokenizers,
} from "@lenml/tokenizers";
/**
* Build a tokenizer from a pre-trained model.
*/
export const fromPreTrained: FromPreTrainedFn<
TokenizerClassNameMapping<"GemmaTokenizer">
> = (params) => {
return TokenizerLoader.fromPreTrained({
tokenizerJSON: {
...tokenizerJSON,
...params?.tokenizerJSON,
},
tokenizerConfig: {
...tokenizerConfig,
...params?.tokenizerConfig,
},
}) as any;
};
export { tokenizerJSON, tokenizerConfig, tokenizers, TokenizerLoader };