UNPKG

gpt-tokenizer

Version:

A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models

135 lines (134 loc) 4.89 kB
{ "name": "gpt-tokenizer", "version": "2.9.0", "description": "A pure JavaScript implementation of a BPE tokenizer (Encoder/Decoder) for GPT-2 / GPT-3 / GPT-4 and other OpenAI models", "keywords": [ "BPE", "encoder", "decoder", "tokenizer", "GPT", "GPT-2", "GPT-3", "GPT-3.5", "GPT-4", "GPT-4o", "NLP", "Natural Language Processing", "Text Generation", "OpenAI", "Machine Learning", "ml" ], "homepage": "https://github.com/niieani/gpt-tokenizer#readme", "bugs": { "url": "https://github.com/niieani/gpt-tokenizer/issues" }, "repository": { "type": "git", "url": "https://github.com/niieani/gpt-tokenizer.git" }, "license": "MIT", "author": "Bazyli Brzoska <npm@invent.life> (https://github.com/niieani)", "contributors": [ { "name": "Bazyli Brzoska", "email": "npm@invent.life", "url": "https://github.com/niieani" } ], "exports": { ".": { "import": "./esm/main.js", "require": "./cjs/main.js" }, "./*": { "import": "./esm/*.js", "require": "./cjs/*.js" }, "./cjs": { "require": "./cjs/main.js" }, "./cjs/*": { "require": "./cjs/*.js" }, "./esm/*": { "import": "./esm/*.js" }, "./data/*": { "import": "./data/*", "require": "./data/*" }, "./package.json": "./package.json" }, "main": "esm/main.js", "unpkg": "dist/cl100k_base.js", "module": "esm/main.js", "source": "src/main.ts", "files": [ "src", "cjs", "esm", "data", "dist" ], "scripts": { "codegen:models": "rm -rf src/model && yarn tsx src/codegen/generateByModel.ts", "codegen:bpe": "rm -rf src/bpeRanks && yarn tsx src/codegen/generateJsBpe.ts", "build": "yarn build:cjs && yarn build:esm && yarn build:umd", "build:cjs": "yarn rrun tsc --outDir cjs --module commonjs --target es2022 --project tsconfig-cjs.json", "build:esm": "mkdir -p esm && echo '{\"name\": \"gpt-tokenizer\", \"type\": \"module\"}' > ./esm/package.json && yarn rrun tsc --outDir esm --target es2022", "build:umd": "yarn build:umd:cl100k_base && yarn build:umd:p50k_base && yarn build:umd:p50k_edit && yarn build:umd:r50k_base && yarn build:umd:o200k_base", "build:umd:cl100k_base": "beemo webpack --entry='./src/main.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_cl100k_base' --env 'filename=cl100k_base.js'", "build:umd:p50k_base": "beemo webpack --entry='./src/encoding/p50k_base.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_p50k_base' --env 'filename=p50k_base.js'", "build:umd:p50k_edit": "beemo webpack --entry='./src/encoding/p50k_edit.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_p50k_edit' --env 'filename=p50k_edit.js'", "build:umd:r50k_base": "beemo webpack --entry='./src/encoding/r50k_base.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_r50k_base' --env 'filename=r50k_base.js'", "build:umd:o200k_base": "beemo webpack --entry='./src/encoding/o200k_base.ts' --env 'outDir=dist' --env 'moduleTarget=umd' --env 'engineTarget=web' --env 'codeTarget=es2022' --env 'name=GPTTokenizer_o200k_base' --env 'filename=o200k_base.js'", "clean": "git clean -dfX --exclude=node_modules src && beemo typescript:sync-project-refs", "format": "yarn rrun prettier --write \"./{src,tests,.config}/**/!(*.d).{.js,jsx,ts,tsx,json,md}\"", "postinstallDev": "yarn prepare", "prepare": "rrun husky install .config/husky && beemo create-config", "release": "beemo run-script release", "test": "yarn test:format && yarn test:types && yarn test:lint && yarn test:code", "test:code": "vitest", "test:format": "yarn rrun prettier --check \"./{src,tests,.config}/**/!(*.d).{.js,jsx,ts,tsx,json,md}\"", "test:lint": "rrun eslint 'src/*.{js,jsx,ts,tsx}'", "test:types": "yarn rrun tsc --noEmit" }, "release": { "branches": [ "+([0-9])?(.{+([0-9]),x}).x", "master", { "name": "main", "channel": false }, "next", { "name": "beta", "prerelease": true }, { "name": "alpha", "prerelease": true } ], "tagFormat": "${version}" }, "devDependencies": { "@edge-runtime/vm": "^5.0.0", "@niieani/scaffold": "^1.7.39", "@swc/cli": "^0.5.2", "@swc/core": "^1.10.4", "tsx": "^4.19.2", "typescript": "^5.7.2", "vitest": "^2.1.8" }, "resolutions": { "typescript": "5.7.2" }, "packageManager": "yarn@4.5.0", "publishConfig": { "access": "public" } }