UNPKG

@lenml/tokenizer-gemini

Version:

gemini tokenizer for NodeJS/Browser

2,352 lines (2,351 loc) 17.5 MB
{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "<eos>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "<bos>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "<mask>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 5, "content": "<2mass>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 6, "content": "[@BOS@]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 7, "content": "<unused0>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 8, "content": "<unused1>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 9, "content": "<unused2>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 10, "content": "<unused3>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 11, "content": "<unused4>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 12, "content": "<unused5>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 13, "content": "<unused6>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 14, "content": "<unused7>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 15, "content": "<unused8>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 16, "content": "<unused9>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 17, "content": "<unused10>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 18, "content": "<unused11>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 19, "content": "<unused12>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 20, "content": "<unused13>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 21, "content": "<unused14>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 22, "content": "<unused15>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 23, "content": "<unused16>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 24, "content": "<unused17>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 25, "content": "<unused18>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 26, "content": "<unused19>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 27, "content": "<unused20>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 28, "content": "<unused21>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 29, "content": "<unused22>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 30, "content": "<unused23>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 31, "content": "<unused24>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 32, "content": "<unused25>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 33, "content": "<unused26>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 34, "content": "<unused27>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 35, "content": "<unused28>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 36, "content": "<unused29>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 37, "content": "<unused30>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 38, "content": "<unused31>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 39, "content": "<unused32>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 40, "content": "<unused33>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 41, "content": "<unused34>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 42, "content": "<unused35>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 43, "content": "<unused36>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 44, "content": "<unused37>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 45, "content": "<unused38>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 46, "content": "<unused39>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 47, "content": "<unused40>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 48, "content": "<unused41>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 49, "content": "<unused42>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 50, "content": "<unused43>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 51, "content": "<unused44>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 52, "content": "<unused45>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 53, "content": "<unused46>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 54, "content": "<unused47>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 55, "content": "<unused48>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 56, "content": "<unused49>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 57, "content": "<unused50>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 58, "content": "<unused51>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 59, "content": "<unused52>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 60, "content": "<unused53>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 61, "content": "<unused54>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 62, "content": "<unused55>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 63, "content": "<unused56>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 64, "content": "<unused57>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 65, "content": "<unused58>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 66, "content": "<unused59>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 67, "content": "<unused60>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 68, "content": "<unused61>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 69, "content": "<unused62>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 70, "content": "<unused63>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 71, "content": "<unused64>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 72, "content": "<unused65>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 73, "content": "<unused66>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 74, "content": "<unused67>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 75, "content": "<unused68>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 76, "content": "<unused69>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 77, "content": "<unused70>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 78, "content": "<unused71>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 79, "content": "<unused72>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 80, "content": "<unused73>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 81, "content": "<unused74>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 82, "content": "<unused75>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 83, "content": "<unused76>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 84, "content": "<unused77>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 85, "content": "<unused78>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 86, "content": "<unused79>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 87, "content": "<unused80>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 88, "content": "<unused81>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 89, "content": "<unused82>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 90, "content": "<unused83>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 91, "content": "<unused84>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 92, "content": "<unused85>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 93, "content": "<unused86>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 94, "content": "<unused87>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 95, "content": "<unused88>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 96, "content": "<unused89>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 97, "content": "<unused90>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 98, "content": "<unused91>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 99, "content": "<unused92>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 100, "content": "<unused93>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 101, "content": "<unused94>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 102, "content": "<unused95>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 103, "content": "<unused96>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 104, "content": "<unused97>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 105, "content": "<unused98>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 106, "content": "<start_of_turn>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 107, "content": "<end_of_turn>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 108, "content": "\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 109, "content": "\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 110, "content": "\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 111, "content": "\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 112, "content": "\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 113, "content": "\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 114, "content": "\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 115, "content": "\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 116, "content": "\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 117, "content": "\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 118, "content": "\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 119, "content": "\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 120, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 121, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 122, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 123, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 124, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 125, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 126, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 127, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 128, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 129, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 130, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 131, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 132, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 133, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 134, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 135, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 136, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 137, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 138, "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 139, "content": "▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 140, "content": "▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 141, "content": "▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 142, "content": "▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 143, "content": "▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 144, "content": "▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 145, "content": "▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 146, "content": "▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 147, "content": "▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 148, "content": "▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 149, "content": "▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 150, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 151, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 152, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 153, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 154, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 155, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 156, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 157, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 158, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 159, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 160, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 161, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 162, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 163, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 164, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 165, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 166, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 167, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 168, "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 169, "content": "<table>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 170, "content": "<caption>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 171, "content": "<thead>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 172, "content": "<tbody>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 173, "content": "<tfoot>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 174, "content": "<tr>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 175, "content": "<th>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 176, "content": "<td>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 177, "content": "</table>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 178, "content": "</caption>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 179, "content": "</thead>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 180, "content": "</tbody>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 181, "content": "</tfoot>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 182, "content": "</tr>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 183, "content": "</th>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 184, "content": "</td>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 185, "content": "<h1>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 186, "content": "<h2>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 187, "content": "<h3>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 188, "content": "<h4>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 189, "content": "<h5>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 190, "content": "<h6>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 191, "content": "<blockquote>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 192, "content": "</h1>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 193, "content": "</h2>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 194, "content": "</h3>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 195, "content": "</h4>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 196, "content": "</h5>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 197, "content": "</h6>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 198, "content": "</blockquote>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 199, "content": "<strong>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 200, "content": "<em>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 201, "content": "<b>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 202, "content": "<i>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 203, "content": "<u>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 204, "content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 205, "content": "<sub>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 206, "content": "<sup>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 207, "content": "<code>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 208, "content": "</strong>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 209, "content": "</em>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 210, "content": "</b>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 211, "content": "</i>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 212, "content": "</u>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 213, "content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 214, "content": "</sub>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 215, "content": "</sup>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false }, { "id": 216, "content": "</code>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": false } ], "normalizer": { "type": "Replace", "pattern": { "String": " " }, "content": "▁" }, "pre_tokenizer": null, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "<bos>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "<bos>", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "<bos>", "type_id": 1 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "<bos>": { "id": "<bos>", "ids": [ 2 ], "tokens": [ "<bos>" ] } } }, "decoder": { "type": "Sequence", "decoders": [ { "type": "Replace", "pattern": { "String": "▁" }, "content": " " }, { "type": "ByteFallback" }, { "type": "Fuse" } ] }, "model": { "type": "BPE", "dropout": null, "unk_token": "<unk>", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": true, "byte_fallback": true, "ignore_merges": false, "vocab": { "<pad>": 0, "<eos>": 1, "<bos>": 2, "<unk>": 3, "<mask>": 4, "<2mass>": 5, "[@BOS@]": 6, "<unused0>": 7, "<unused1>": 8, "<unused2>": 9, "<unused3>": 10, "<unused4>": 11, "<unused5>": 12, "<unused6>": 13, "<unused7>": 14, "<unused8>": 15, "<unused9>": 16, "<unused10>": 17, "<unused11>": 18, "<unused12>": 19, "<unused13>": 20, "<unused14>": 21, "<unused15>": 22, "<unused16>": 23, "<unused17>": 24, "<unused18>": 25, "<unused19>": 26, "<unused20>": 27, "<unused21>": 28, "<unused22>": 29, "<unused23>": 30, "<unused24>": 31, "<unused25>": 32, "<unused26>": 33, "<unused27>": 34, "<unused28>": 35, "<unused29>": 36, "<unused30>": 37, "<unused31>": 38, "<unused32>": 39, "<unused33>": 40, "<unused34>": 41, "<unused35>": 42, "<unused36>": 43, "<unused37>": 44, "<unused38>": 45, "<unused39>": 46, "<unused40>": 47, "<unused41>": 48, "<unused42>": 49, "<unused43>": 50, "<unused44>": 51, "<unused45>": 52, "<unused46>": 53, "<unused47>": 54, "<unused48>": 55, "<unused49>": 56, "<unused50>": 57, "<unused51>": 58, "<unused52>": 59, "<unused53>": 60, "<unused54>": 61, "<unused55>": 62, "<unused56>": 63, "<unused57>": 64, "<unused58>": 65, "<unused59>": 66, "<unused60>": 67, "<unused61>": 68, "<unused62>": 69, "<unused63>": 70, "<unused64>": 71, "<unused65>": 72, "<unused66>": 73, "<unused67>": 74, "<unused68>": 75, "<unused69>": 76, "<unused70>": 77, "<unused71>": 78, "<unused72>": 79, "<unused73>": 80, "<unused74>": 81, "<unused75>": 82, "<unused76>": 83, "<unused77>": 84, "<unused78>": 85, "<unused79>": 86, "<unused80>": 87, "<unused81>": 88, "<unused82>": 89, "<unused83>": 90, "<unused84>": 91, "<unused85>": 92, "<unused86>": 93, "<unused87>": 94, "<unused88>": 95, "<unused89>": 96, "<unused90>": 97, "<unused91>": 98, "<unused92>": 99, "<unused93>": 100, "<unused94>": 101, "<unused95>": 102, "<unused96>": 103, "<unused97>": 104, "<unused98>": 105, "<start_of_turn>": 106, "<end_of_turn>": 107, "\n": 108, "\n\n": 109, "\n\n\n": 110, "\n\n\n\n": 111, "\n\n\n\n\n": 112, "\n\n\n\n\n\n": 113, "\n\n\n\n\n\n\n": 114, "\n\n\n\n\n\n\n\n": 115, "\n\n\n\n\n\n\n\n\n": 116, "\n\n\n\n\n\n\n\n\n\n": 117, "\n\n\n\n\n\n\n\n\n\n\n": 118, "\n\n\n\n\n\n\n\n\n\n\n\n": 119, "\n\n\n\n\n\n\n\n\n\n\n\n\n": 120, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 121, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 122, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 123, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 124, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 125, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 126, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 127, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 128, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 129, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 130, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 131, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 132, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 133, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 134, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 135, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 136, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 137, "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n": 138, "▁▁": 139, "▁▁▁": 140, "▁▁▁▁": 141, "▁▁▁▁▁": 142, "▁▁▁▁▁▁": 143, "▁▁▁▁▁▁▁": 144, "▁▁▁▁▁▁▁▁": 145, "▁▁▁▁▁▁▁▁▁": 146, "▁▁▁▁▁▁▁▁▁▁": 147, "▁▁▁▁▁▁▁▁▁▁▁": 148, "▁▁▁▁▁▁▁▁▁▁▁▁": 149, "▁▁▁▁▁▁▁▁▁▁▁▁▁": 150, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 151, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 152, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 153, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 154, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 155, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 156, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 157, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 158, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 159, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 160, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 161, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 162, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 163, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 164, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 165, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 166, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 167, "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 168, "<table>": 169, "<caption>": 170, "<thead>": 171, "<tbody>": 172, "<tfoot>": 173, "<tr>": 174, "<th>": 175, "<td>": 176, "</table>": 177, "</caption>": 178, "</thead>": 179, "</tbody>": 180, "</tfoot>": 181, "</tr>": 182, "</th>": 183, "</td>": 184, "<h1>": 185, "<h2>": 186, "<h3>": 187, "<h4>": 188, "<h5>": 189, "<h6>": 190, "<blockquote>": 191, "</h1>": 192, "</h2>": 193, "</h3>": 194, "</h4>": 195, "</h5>": 196, "</h6>": 197, "</blockquote>": 198, "<strong>": 199, "<em>": 200, "<b>": 201, "<i>": 202, "<u>": 203, "<s>": 204, "<sub>": 205, "<sup>": 206, "<code>": 207, "</strong>": 208, "</em>": 209, "</b>": 210, "</i>": 211, "</u>": 212, "</s>": 213, "</sub>": 214, "</sup>": 215, "</code>": 216, "<0x00>": 217, "<0x01>": 218, "<0x02>": 219, "<0x03>": 220, "<0x04>": 221, "<0x05>": 222, "<0x06>": 223, "<0x07>": 224, "<0x08>": 225, "\t": 226, "<0x0A>": 227, "<0x0B>": 228, "<0x0C>": 229, "<0x0D>": 230, "<0x0E>": 231, "<0x0F>": 232, "<0x10>": 233, "<0x11>": 234, "<0x12>": 235, "<0x13>": 236, "<0x14>": 237, "<0x15>": 238, "<0x16>": 239, "<0x17>": 240, "<0x18>": 241, "<0x19>": 242, "<0x1A>": 243, "<0x1B>": 244, "<0x1C>": 245, "<0x1D>": 246, "<0x1E>": 247, "<0x1F>": 248, "<0x20>": 249, "<0x21>": 250, "<0x22>": 251, "<0x23>": 252, "<0x24>": 253, "<0x25>": 254, "<0x26>": 255, "<0x27>": 256, "<0x28>": 257, "<0x29>": 258, "<0x2A>": 259, "<0x2B>": 260, "<0x2C>": 261, "<0x2D>": 262, "<0x2E>": 263, "<0x2F>": 264, "<0x30>": 265, "<0x31>": 266, "<0x32>": 267, "<0x33>": 268, "<0x34>": 269, "<0x35>": 270, "<0x36>": 271, "<0x37>": 272, "<0x38>": 273, "<0x39>": 274, "<0x3A>": 275, "<0x3B>": 276, "<0x3C>": 277, "<0x3D>": 278, "<0x3E>": 279, "<0x3F>": 280, "<0x40>": 281, "<0x41>": 282, "<0x42>": 283, "<0x43>": 284, "<0x44>": 285, "<0x45>": 286, "<0x46>": 287, "<0x47>": 288, "<0x48>": 289, "<0x49>": 290, "<0x4A>": 291, "<0x4B>": 292, "<0x4C>": 293, "<0x4D>": 294, "<0x4E>": 295, "<0x4F>": 296, "<0x50>": 297, "<0x51>": 298, "<0x52>": 299, "<0x53>": 300, "<0x54>": 301,