pii-paladin
Version:
A Node.js package to censor PII in a string using a hybrid NER and Regex approach.
2,045 lines (2,044 loc) • 844 kB
JSON
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 100,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 101,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 102,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 103,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": null,
"lowercase": false
},
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [101],
"tokens": ["[CLS]"]
},
"[SEP]": {
"id": "[SEP]",
"ids": [102],
"tokens": ["[SEP]"]
}
}
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[PAD]": 0,
"[unused1]": 1,
"[unused2]": 2,
"[unused3]": 3,
"[unused4]": 4,
"[unused5]": 5,
"[unused6]": 6,
"[unused7]": 7,
"[unused8]": 8,
"[unused9]": 9,
"[unused10]": 10,
"[unused11]": 11,
"[unused12]": 12,
"[unused13]": 13,
"[unused14]": 14,
"[unused15]": 15,
"[unused16]": 16,
"[unused17]": 17,
"[unused18]": 18,
"[unused19]": 19,
"[unused20]": 20,
"[unused21]": 21,
"[unused22]": 22,
"[unused23]": 23,
"[unused24]": 24,
"[unused25]": 25,
"[unused26]": 26,
"[unused27]": 27,
"[unused28]": 28,
"[unused29]": 29,
"[unused30]": 30,
"[unused31]": 31,
"[unused32]": 32,
"[unused33]": 33,
"[unused34]": 34,
"[unused35]": 35,
"[unused36]": 36,
"[unused37]": 37,
"[unused38]": 38,
"[unused39]": 39,
"[unused40]": 40,
"[unused41]": 41,
"[unused42]": 42,
"[unused43]": 43,
"[unused44]": 44,
"[unused45]": 45,
"[unused46]": 46,
"[unused47]": 47,
"[unused48]": 48,
"[unused49]": 49,
"[unused50]": 50,
"[unused51]": 51,
"[unused52]": 52,
"[unused53]": 53,
"[unused54]": 54,
"[unused55]": 55,
"[unused56]": 56,
"[unused57]": 57,
"[unused58]": 58,
"[unused59]": 59,
"[unused60]": 60,
"[unused61]": 61,
"[unused62]": 62,
"[unused63]": 63,
"[unused64]": 64,
"[unused65]": 65,
"[unused66]": 66,
"[unused67]": 67,
"[unused68]": 68,
"[unused69]": 69,
"[unused70]": 70,
"[unused71]": 71,
"[unused72]": 72,
"[unused73]": 73,
"[unused74]": 74,
"[unused75]": 75,
"[unused76]": 76,
"[unused77]": 77,
"[unused78]": 78,
"[unused79]": 79,
"[unused80]": 80,
"[unused81]": 81,
"[unused82]": 82,
"[unused83]": 83,
"[unused84]": 84,
"[unused85]": 85,
"[unused86]": 86,
"[unused87]": 87,
"[unused88]": 88,
"[unused89]": 89,
"[unused90]": 90,
"[unused91]": 91,
"[unused92]": 92,
"[unused93]": 93,
"[unused94]": 94,
"[unused95]": 95,
"[unused96]": 96,
"[unused97]": 97,
"[unused98]": 98,
"[unused99]": 99,
"[UNK]": 100,
"[CLS]": 101,
"[SEP]": 102,
"[MASK]": 103,
"[unused100]": 104,
"[unused101]": 105,
"!": 106,
"\"": 107,
"#": 108,
"$": 109,
"%": 110,
"&": 111,
"'": 112,
"(": 113,
")": 114,
"*": 115,
"+": 116,
",": 117,
"-": 118,
".": 119,
"/": 120,
"0": 121,
"1": 122,
"2": 123,
"3": 124,
"4": 125,
"5": 126,
"6": 127,
"7": 128,
"8": 129,
"9": 130,
":": 131,
";": 132,
"<": 133,
"=": 134,
">": 135,
"?": 136,
"@": 137,
"A": 138,
"B": 139,
"C": 140,
"D": 141,
"E": 142,
"F": 143,
"G": 144,
"H": 145,
"I": 146,
"J": 147,
"K": 148,
"L": 149,
"M": 150,
"N": 151,
"O": 152,
"P": 153,
"Q": 154,
"R": 155,
"S": 156,
"T": 157,
"U": 158,
"V": 159,
"W": 160,
"X": 161,
"Y": 162,
"Z": 163,
"[": 164,
"\\": 165,
"]": 166,
"^": 167,
"_": 168,
"`": 169,
"a": 170,
"b": 171,
"c": 172,
"d": 173,
"e": 174,
"f": 175,
"g": 176,
"h": 177,
"i": 178,
"j": 179,
"k": 180,
"l": 181,
"m": 182,
"n": 183,
"o": 184,
"p": 185,
"q": 186,
"r": 187,
"s": 188,
"t": 189,
"u": 190,
"v": 191,
"w": 192,
"x": 193,
"y": 194,
"z": 195,
"{": 196,
"|": 197,
"}": 198,
"~": 199,
"¡": 200,
"¢": 201,
"£": 202,
"¥": 203,
"§": 204,
"¨": 205,
"©": 206,
"ª": 207,
"«": 208,
"¬": 209,
"®": 210,
"°": 211,
"±": 212,
"²": 213,
"³": 214,
"´": 215,
"µ": 216,
"¶": 217,
"·": 218,
"¹": 219,
"º": 220,
"»": 221,
"¼": 222,
"½": 223,
"¾": 224,
"¿": 225,
"À": 226,
"Á": 227,
"Â": 228,
"Ä": 229,
"Å": 230,
"Æ": 231,
"Ç": 232,
"È": 233,
"É": 234,
"Í": 235,
"Î": 236,
"Ñ": 237,
"Ó": 238,
"Ö": 239,
"×": 240,
"Ø": 241,
"Ú": 242,
"Ü": 243,
"Þ": 244,
"ß": 245,
"à": 246,
"á": 247,
"â": 248,
"ã": 249,
"ä": 250,
"å": 251,
"æ": 252,
"ç": 253,
"è": 254,
"é": 255,
"ê": 256,
"ë": 257,
"ì": 258,
"í": 259,
"î": 260,
"ï": 261,
"ð": 262,
"ñ": 263,
"ò": 264,
"ó": 265,
"ô": 266,
"õ": 267,
"ö": 268,
"÷": 269,
"ø": 270,
"ù": 271,
"ú": 272,
"û": 273,
"ü": 274,
"ý": 275,
"þ": 276,
"ÿ": 277,
"Ā": 278,
"ā": 279,
"ă": 280,
"ą": 281,
"Ć": 282,
"ć": 283,
"Č": 284,
"č": 285,
"ď": 286,
"Đ": 287,
"đ": 288,
"ē": 289,
"ė": 290,
"ę": 291,
"ě": 292,
"ğ": 293,
"ġ": 294,
"Ħ": 295,
"ħ": 296,
"ĩ": 297,
"Ī": 298,
"ī": 299,
"İ": 300,
"ı": 301,
"ļ": 302,
"Ľ": 303,
"ľ": 304,
"Ł": 305,
"ł": 306,
"ń": 307,
"ņ": 308,
"ň": 309,
"ŋ": 310,
"Ō": 311,
"ō": 312,
"ŏ": 313,
"ő": 314,
"Œ": 315,
"œ": 316,
"ř": 317,
"Ś": 318,
"ś": 319,
"Ş": 320,
"ş": 321,
"Š": 322,
"š": 323,
"Ţ": 324,
"ţ": 325,
"ť": 326,
"ũ": 327,
"ū": 328,
"ŭ": 329,
"ů": 330,
"ű": 331,
"ų": 332,
"ŵ": 333,
"ŷ": 334,
"ź": 335,
"Ż": 336,
"ż": 337,
"Ž": 338,
"ž": 339,
"Ə": 340,
"ƒ": 341,
"ơ": 342,
"ư": 343,
"ǎ": 344,
"ǐ": 345,
"ǒ": 346,
"ǔ": 347,
"ǫ": 348,
"Ș": 349,
"ș": 350,
"Ț": 351,
"ț": 352,
"ɐ": 353,
"ɑ": 354,
"ɔ": 355,
"ɕ": 356,
"ə": 357,
"ɛ": 358,
"ɡ": 359,
"ɣ": 360,
"ɨ": 361,
"ɪ": 362,
"ɲ": 363,
"ɾ": 364,
"ʀ": 365,
"ʁ": 366,
"ʂ": 367,
"ʃ": 368,
"ʊ": 369,
"ʋ": 370,
"ʌ": 371,
"ʐ": 372,
"ʑ": 373,
"ʒ": 374,
"ʔ": 375,
"ʰ": 376,
"ʲ": 377,
"ʳ": 378,
"ʷ": 379,
"ʻ": 380,
"ʼ": 381,
"ʾ": 382,
"ʿ": 383,
"ˈ": 384,
"ː": 385,
"ˡ": 386,
"ˢ": 387,
"ˣ": 388,
"́": 389,
"̃": 390,
"̍": 391,
"̯": 392,
"͡": 393,
"Α": 394,
"Β": 395,
"Γ": 396,
"Δ": 397,
"Ε": 398,
"Η": 399,
"Θ": 400,
"Ι": 401,
"Κ": 402,
"Λ": 403,
"Μ": 404,
"Ν": 405,
"Ο": 406,
"Π": 407,
"Σ": 408,
"Τ": 409,
"Φ": 410,
"Χ": 411,
"Ψ": 412,
"Ω": 413,
"ά": 414,
"έ": 415,
"ή": 416,
"ί": 417,
"α": 418,
"β": 419,
"γ": 420,
"δ": 421,
"ε": 422,
"ζ": 423,
"η": 424,
"θ": 425,
"ι": 426,
"κ": 427,
"λ": 428,
"μ": 429,
"ν": 430,
"ξ": 431,
"ο": 432,
"π": 433,
"ρ": 434,
"ς": 435,
"σ": 436,
"τ": 437,
"υ": 438,
"φ": 439,
"χ": 440,
"ψ": 441,
"ω": 442,
"ό": 443,
"ύ": 444,
"ώ": 445,
"І": 446,
"Ј": 447,
"А": 448,
"Б": 449,
"В": 450,
"Г": 451,
"Д": 452,
"Е": 453,
"Ж": 454,
"З": 455,
"И": 456,
"К": 457,
"Л": 458,
"М": 459,
"Н": 460,
"О": 461,
"П": 462,
"Р": 463,
"С": 464,
"Т": 465,
"У": 466,
"Ф": 467,
"Х": 468,
"Ц": 469,
"Ч": 470,
"Ш": 471,
"Э": 472,
"Ю": 473,
"Я": 474,
"а": 475,
"б": 476,
"в": 477,
"г": 478,
"д": 479,
"е": 480,
"ж": 481,
"з": 482,
"и": 483,
"й": 484,
"к": 485,
"л": 486,
"м": 487,
"н": 488,
"о": 489,
"п": 490,
"р": 491,
"с": 492,
"т": 493,
"у": 494,
"ф": 495,
"х": 496,
"ц": 497,
"ч": 498,
"ш": 499,
"щ": 500,
"ъ": 501,
"ы": 502,
"ь": 503,
"э": 504,
"ю": 505,
"я": 506,
"ё": 507,
"і": 508,
"ї": 509,
"ј": 510,
"њ": 511,
"ћ": 512,
"Ա": 513,
"Հ": 514,
"ա": 515,
"ե": 516,
"ի": 517,
"կ": 518,
"մ": 519,
"յ": 520,
"ն": 521,
"ո": 522,
"ս": 523,
"տ": 524,
"ր": 525,
"ւ": 526,
"ְ": 527,
"ִ": 528,
"ֵ": 529,
"ֶ": 530,
"ַ": 531,
"ָ": 532,
"ֹ": 533,
"ּ": 534,
"א": 535,
"ב": 536,
"ג": 537,
"ד": 538,
"ה": 539,
"ו": 540,
"ז": 541,
"ח": 542,
"ט": 543,
"י": 544,
"כ": 545,
"ל": 546,
"ם": 547,
"מ": 548,
"ן": 549,
"נ": 550,
"ס": 551,
"ע": 552,
"פ": 553,
"צ": 554,
"ק": 555,
"ר": 556,
"ש": 557,
"ת": 558,
"،": 559,
"ء": 560,
"آ": 561,
"أ": 562,
"إ": 563,
"ئ": 564,
"ا": 565,
"ب": 566,
"ة": 567,
"ت": 568,
"ث": 569,
"ج": 570,
"ح": 571,
"خ": 572,
"د": 573,
"ذ": 574,
"ر": 575,
"ز": 576,
"س": 577,
"ش": 578,
"ص": 579,
"ض": 580,
"ط": 581,
"ظ": 582,
"ع": 583,
"غ": 584,
"ف": 585,
"ق": 586,
"ك": 587,
"ل": 588,
"م": 589,
"ن": 590,
"ه": 591,
"و": 592,
"ى": 593,
"ي": 594,
"َ": 595,
"ِ": 596,
"ٹ": 597,
"پ": 598,
"چ": 599,
"ک": 600,
"گ": 601,
"ہ": 602,
"ی": 603,
"ے": 604,
"ं": 605,
"आ": 606,
"क": 607,
"ग": 608,
"च": 609,
"ज": 610,
"ण": 611,
"त": 612,
"द": 613,
"ध": 614,
"न": 615,
"प": 616,
"ब": 617,
"भ": 618,
"म": 619,
"य": 620,
"र": 621,
"ल": 622,
"व": 623,
"श": 624,
"ष": 625,
"स": 626,
"ह": 627,
"ा": 628,
"ि": 629,
"ी": 630,
"ु": 631,
"े": 632,
"ो": 633,
"्": 634,
"।": 635,
"॥": 636,
"আ": 637,
"ই": 638,
"এ": 639,
"ও": 640,
"ক": 641,
"খ": 642,
"গ": 643,
"চ": 644,
"ছ": 645,
"জ": 646,
"ট": 647,
"ত": 648,
"থ": 649,
"দ": 650,
"ধ": 651,
"ন": 652,
"প": 653,
"ব": 654,
"ম": 655,
"য": 656,
"র": 657,
"ল": 658,
"শ": 659,
"স": 660,
"হ": 661,
"়": 662,
"া": 663,
"ি": 664,
"ী": 665,
"ু": 666,
"ে": 667,
"ো": 668,
"্": 669,
"য়": 670,
"க": 671,
"த": 672,
"ப": 673,
"ம": 674,
"ய": 675,
"ர": 676,
"ல": 677,
"வ": 678,
"ா": 679,
"ி": 680,
"ு": 681,
"்": 682,
"ร": 683,
"་": 684,
"ག": 685,
"ང": 686,
"ད": 687,
"ན": 688,
"བ": 689,
"མ": 690,
"ར": 691,
"ལ": 692,
"ས": 693,
"ི": 694,
"ུ": 695,
"ེ": 696,
"ོ": 697,
"ა": 698,
"ე": 699,
"ი": 700,
"ლ": 701,
"ნ": 702,
"ო": 703,
"რ": 704,
"ს": 705,
"ᴬ": 706,
"ᴵ": 707,
"ᵀ": 708,
"ᵃ": 709,
"ᵇ": 710,
"ᵈ": 711,
"ᵉ": 712,
"ᵍ": 713,
"ᵏ": 714,
"ᵐ": 715,
"ᵒ": 716,
"ᵖ": 717,
"ᵗ": 718,
"ᵘ": 719,
"ᵢ": 720,
"ᵣ": 721,
"ᵤ": 722,
"ᵥ": 723,
"ᶜ": 724,
"ᶠ": 725,
"ḍ": 726,
"Ḥ": 727,
"ḥ": 728,
"Ḩ": 729,
"ḩ": 730,
"ḳ": 731,
"ṃ": 732,
"ṅ": 733,
"ṇ": 734,
"ṛ": 735,
"ṣ": 736,
"ṭ": 737,
"ạ": 738,
"ả": 739,
"ấ": 740,
"ầ": 741,
"ẩ": 742,
"ậ": 743,
"ắ": 744,
"ế": 745,
"ề": 746,
"ể": 747,
"ễ": 748,
"ệ": 749,
"ị": 750,
"ọ": 751,
"ố": 752,
"ồ": 753,
"ổ": 754,
"ộ": 755,
"ớ": 756,
"ờ": 757,
"ợ": 758,
"ụ": 759,
"ủ": 760,
"ứ": 761,
"ừ": 762,
"ử": 763,
"ữ": 764,
"ự": 765,
"ỳ": 766,
"ỹ": 767,
"ἀ": 768,
"ἐ": 769,
"ὁ": 770,
"ὐ": 771,
"ὰ": 772,
"ὶ": 773,
"ὸ": 774,
"ῆ": 775,
"ῖ": 776,
"ῦ": 777,
"ῶ": 778,
"‐": 779,
"‑": 780,
"‒": 781,
"–": 782,
"—": 783,
"―": 784,
"‖": 785,
"‘": 786,
"’": 787,
"‚": 788,
"“": 789,
"”": 790,
"„": 791,
"†": 792,
"‡": 793,
"•": 794,
"…": 795,
"‰": 796,
"′": 797,
"″": 798,
"⁄": 799,
"⁰": 800,
"ⁱ": 801,
"⁴": 802,
"⁵": 803,
"⁶": 804,
"⁷": 805,
"⁸": 806,
"⁹": 807,
"⁺": 808,
"⁻": 809,
"ⁿ": 810,
"₀": 811,
"₁": 812,
"₂": 813,
"₃": 814,
"₄": 815,
"₅": 816,
"₆": 817,
"₇": 818,
"₈": 819,
"₉": 820,
"₊": 821,
"₍": 822,
"₎": 823,
"ₐ": 824,
"ₑ": 825,
"ₒ": 826,
"ₓ": 827,
"ₕ": 828,
"ₖ": 829,
"ₘ": 830,
"ₙ": 831,
"ₚ": 832,
"ₛ": 833,
"ₜ": 834,
"₤": 835,
"€": 836,
"₱": 837,
"₹": 838,
"ℓ": 839,
"№": 840,
"ℝ": 841,
"⅓": 842,
"←": 843,
"↑": 844,
"→": 845,
"↔": 846,
"⇌": 847,
"⇒": 848,
"∂": 849,
"∈": 850,
"−": 851,
"∗": 852,
"∘": 853,
"√": 854,
"∞": 855,
"∧": 856,
"∨": 857,
"∩": 858,
"∪": 859,
"≈": 860,
"≠": 861,
"≡": 862,
"≤": 863,
"≥": 864,
"⊂": 865,
"⊆": 866,
"⊕": 867,
"⋅": 868,
"─": 869,
"│": 870,
"■": 871,
"●": 872,
"★": 873,
"☆": 874,
"☉": 875,
"♠": 876,
"♣": 877,
"♥": 878,
"♦": 879,
"♭": 880,
"♯": 881,
"⟨": 882,
"⟩": 883,
"ⱼ": 884,
"、": 885,
"。": 886,
"《": 887,
"》": 888,
"「": 889,
"」": 890,
"『": 891,
"』": 892,
"〜": 893,
"い": 894,
"う": 895,
"え": 896,
"お": 897,
"か": 898,
"き": 899,
"く": 900,
"け": 901,
"こ": 902,
"さ": 903,
"し": 904,
"す": 905,
"せ": 906,
"そ": 907,
"た": 908,
"ち": 909,
"つ": 910,
"て": 911,
"と": 912,
"な": 913,
"に": 914,
"の": 915,
"は": 916,
"ひ": 917,
"ま": 918,
"み": 919,
"む": 920,
"め": 921,
"も": 922,
"や": 923,
"ゆ": 924,
"よ": 925,
"ら": 926,
"り": 927,
"る": 928,
"れ": 929,
"ん": 930,
"ア": 931,
"ィ": 932,
"イ": 933,
"ウ": 934,
"エ": 935,
"オ": 936,
"カ": 937,
"ガ": 938,
"キ": 939,
"ク": 940,
"グ": 941,
"コ": 942,
"サ": 943,
"シ": 944,
"ジ": 945,
"ス": 946,
"ズ": 947,
"タ": 948,
"ダ": 949,
"ッ": 950,
"テ": 951,
"デ": 952,
"ト": 953,
"ド": 954,
"ナ": 955,
"ニ": 956,
"ハ": 957,
"バ": 958,
"パ": 959,
"フ": 960,
"ブ": 961,
"プ": 962,
"マ": 963,
"ミ": 964,
"ム": 965,
"ャ": 966,
"ュ": 967,
"ラ": 968,
"リ": 969,
"ル": 970,
"レ": 971,
"ロ": 972,
"ン": 973,
"・": 974,
"ー": 975,
"一": 976,
"三": 977,
"上": 978,
"下": 979,
"中": 980,
"事": 981,
"二": 982,
"井": 983,
"京": 984,
"人": 985,
"亻": 986,
"仁": 987,
"佐": 988,
"侍": 989,
"光": 990,
"公": 991,
"力": 992,
"北": 993,
"十": 994,
"南": 995,
"原": 996,
"口": 997,
"史": 998,
"司": 999,
"吉": 1000,
"同": 1001,
"和": 1002,
"囗": 1003,
"国": 1004,
"國": 1005,
"土": 1006,
"城": 1007,
"士": 1008,
"大": 1009,
"天": 1010,
"太": 1011,
"夫": 1012,
"女": 1013,
"子": 1014,
"宀": 1015,
"安": 1016,
"宮": 1017,
"宿": 1018,
"小": 1019,
"尚": 1020,
"山": 1021,
"島": 1022,
"川": 1023,
"州": 1024,
"平": 1025,
"年": 1026,
"心": 1027,
"愛": 1028,
"戸": 1029,
"文": 1030,
"新": 1031,
"方": 1032,
"日": 1033,
"明": 1034,
"星": 1035,
"書": 1036,
"月": 1037,
"木": 1038,
"本": 1039,
"李": 1040,
"村": 1041,
"東": 1042,
"松": 1043,
"林": 1044,
"正": 1045,
"武": 1046,
"氏": 1047,
"水": 1048,
"氵": 1049,
"江": 1050,
"河": 1051,
"海": 1052,
"版": 1053,
"犬": 1054,
"王": 1055,
"生": 1056,
"田": 1057,
"白": 1058,
"皇": 1059,
"省": 1060,
"真": 1061,
"石": 1062,
"社": 1063,
"神": 1064,
"竹": 1065,
"美": 1066,
"義": 1067,
"花": 1068,
"藤": 1069,
"西": 1070,
"谷": 1071,
"車": 1072,
"辶": 1073,
"道": 1074,
"郎": 1075,
"郡": 1076,
"部": 1077,
"野": 1078,
"金": 1079,
"長": 1080,
"門": 1081,
"陽": 1082,
"青": 1083,
"食": 1084,
"馬": 1085,
"高": 1086,
"龍": 1087,
"龸": 1088,
"사": 1089,
"씨": 1090,
"의": 1091,
"이": 1092,
"한": 1093,
"fi": 1094,
"fl": 1095,
"!": 1096,
"(": 1097,
")": 1098,
",": 1099,
"-": 1100,
"/": 1101,
":": 1102,
"the": 1103,
"of": 1104,
"and": 1105,
"to": 1106,
"in": 1107,
"was": 1108,
"The": 1109,
"is": 1110,
"for": 1111,
"as": 1112,
"on": 1113,
"with": 1114,
"that": 1115,
"##s": 1116,
"his": 1117,
"by": 1118,
"he": 1119,
"at": 1120,
"from": 1121,
"it": 1122,
"her": 1123,
"He": 1124,
"had": 1125,
"an": 1126,
"were": 1127,
"you": 1128,
"be": 1129,
"In": 1130,
"she": 1131,
"are": 1132,
"but": 1133,
"which": 1134,
"It": 1135,
"not": 1136,
"or": 1137,
"have": 1138,
"my": 1139,
"him": 1140,
"one": 1141,
"this": 1142,
"me": 1143,
"has": 1144,
"also": 1145,
"up": 1146,
"their": 1147,
"first": 1148,
"out": 1149,
"who": 1150,
"been": 1151,
"they": 1152,
"She": 1153,
"into": 1154,
"all": 1155,
"would": 1156,
"its": 1157,
"##ing": 1158,
"time": 1159,
"two": 1160,
"##a": 1161,
"##e": 1162,
"said": 1163,
"about": 1164,
"when": 1165,
"over": 1166,
"more": 1167,
"other": 1168,
"can": 1169,
"after": 1170,
"back": 1171,
"them": 1172,
"then": 1173,
"##ed": 1174,
"there": 1175,
"like": 1176,
"so": 1177,
"only": 1178,
"##n": 1179,
"could": 1180,
"##d": 1181,
"##i": 1182,
"##y": 1183,
"what": 1184,
"no": 1185,
"##o": 1186,
"where": 1187,
"This": 1188,
"made": 1189,
"than": 1190,
"if": 1191,
"You": 1192,
"##ly": 1193,
"through": 1194,
"we": 1195,
"before": 1196,
"##r": 1197,
"just": 1198,
"some": 1199,
"##er": 1200,
"years": 1201,
"do": 1202,
"New": 1203,
"##t": 1204,
"down": 1205,
"between": 1206,
"new": 1207,
"now": 1208,
"will": 1209,
"three": 1210,
"most": 1211,
"On": 1212,
"around": 1213,
"year": 1214,
"used": 1215,
"such": 1216,
"being": 1217,
"well": 1218,
"during": 1219,
"They": 1220,
"know": 1221,
"against": 1222,
"under": 1223,
"later": 1224,
"did": 1225,
"part": 1226,
"known": 1227,
"off": 1228,
"while": 1229,
"His": 1230,
"re": 1231,
"...": 1232,
"##l": 1233,
"people": 1234,
"until": 1235,
"way": 1236,
"American": 1237,
"didn": 1238,
"University": 1239,
"your": 1240,
"both": 1241,
"many": 1242,
"get": 1243,
"United": 1244,
"became": 1245,
"head": 1246,
"There": 1247,
"second": 1248,
"As": 1249,
"work": 1250,
"any": 1251,
"But": 1252,
"still": 1253,
"again": 1254,
"born": 1255,
"even": 1256,
"eyes": 1257,
"After": 1258,
"including": 1259,
"de": 1260,
"took": 1261,
"And": 1262,
"long": 1263,
"team": 1264,
"season": 1265,
"family": 1266,
"see": 1267,
"right": 1268,
"same": 1269,
"called": 1270,
"name": 1271,
"because": 1272,
"film": 1273,
"don": 1274,
"10": 1275,
"found": 1276,
"much": 1277,
"school": 1278,
"##es": 1279,
"going": 1280,
"won": 1281,
"place": 1282,
"away": 1283,
"We": 1284,
"day": 1285,
"left": 1286,
"John": 1287,
"000": 1288,
"hand": 1289,
"since": 1290,
"World": 1291,
"these": 1292,
"how": 1293,
"make": 1294,
"number": 1295,
"each": 1296,
"life": 1297,
"area": 1298,
"man": 1299,
"four": 1300,
"go": 1301,
"No": 1302,
"here": 1303,
"very": 1304,
"National": 1305,
"##m": 1306,
"played": 1307,
"released": 1308,
"never": 1309,
"began": 1310,
"States": 1311,
"album": 1312,
"home": 1313,
"last": 1314,
"too": 1315,
"held": 1316,
"several": 1317,
"May": 1318,
"own": 1319,
"##on": 1320,
"take": 1321,
"end": 1322,
"School": 1323,
"##h": 1324,
"ll": 1325,
"series": 1326,
"What": 1327,
"want": 1328,
"use": 1329,
"another": 1330,
"city": 1331,
"When": 1332,
"2010": 1333,
"side": 1334,
"At": 1335,
"may": 1336,
"That": 1337,
"came": 1338,
"face": 1339,
"June": 1340,
"think": 1341,
"game": 1342,
"those": 1343,
"high": 1344,
"March": 1345,
"early": 1346,
"September": 1347,
"##al": 1348,
"2011": 1349,
"looked": 1350,
"July": 1351,
"state": 1352,
"small": 1353,
"thought": 1354,
"went": 1355,
"January": 1356,
"October": 1357,
"##u": 1358,
"based": 1359,
"August": 1360,
"##us": 1361,
"world": 1362,
"good": 1363,
"April": 1364,
"York": 1365,
"us": 1366,
"12": 1367,
"2012": 1368,
"2008": 1369,
"For": 1370,
"2009": 1371,
"group": 1372,
"along": 1373,
"few": 1374,
"South": 1375,
"little": 1376,
"##k": 1377,
"following": 1378,
"November": 1379,
"something": 1380,
"2013": 1381,
"December": 1382,
"set": 1383,
"2007": 1384,
"old": 1385,
"2006": 1386,
"2014": 1387,
"located": 1388,
"##an": 1389,
"music": 1390,
"County": 1391,
"City": 1392,
"former": 1393,
"##in": 1394,
"room": 1395,
"ve": 1396,
"next": 1397,
"All": 1398,
"##man": 1399,
"got": 1400,
"father": 1401,
"house": 1402,
"##g": 1403,
"body": 1404,
"15": 1405,
"20": 1406,
"18": 1407,
"started": 1408,
"If": 1409,
"2015": 1410,
"town": 1411,
"our": 1412,
"line": 1413,
"War": 1414,
"large": 1415,
"population": 1416,
"named": 1417,
"British": 1418,
"company": 1419,
"member": 1420,
"five": 1421,
"My": 1422,
"single": 1423,
"##en": 1424,
"age": 1425,
"State": 1426,
"moved": 1427,
"February": 1428,
"11": 1429,
"Her": 1430,
"should": 1431,
"century": 1432,
"government": 1433,
"built": 1434,
"come": 1435,
"best": 1436,
"show": 1437,
"However": 1438,
"within": 1439,
"look": 1440,
"men": 1441,
"door": 1442,
"without": 1443,
"need": 1444,
"wasn": 1445,
"2016": 1446,
"water": 1447,
"One": 1448,
"system": 1449,
"knew": 1450,
"every": 1451,
"died": 1452,
"League": 1453,
"turned": 1454,
"asked": 1455,
"North": 1456,
"St": 1457,
"wanted": 1458,
"building": 1459,
"received": 1460,
"song": 1461,
"served": 1462,
"though": 1463,
"felt": 1464,
"##ia": 1465,
"station": 1466,
"band": 1467,
"##ers": 1468,
"local": 1469,
"public": 1470,
"himself": 1471,
"different": 1472,
"death": 1473,
"say": 1474,
"##1": 1475,
"30": 1476,
"##2": 1477,
"2005": 1478,
"16": 1479,
"night": 1480,
"behind": 1481,
"children": 1482,
"English": 1483,
"members": 1484,
"near": 1485,
"saw": 1486,
"together": 1487,
"son": 1488,
"14": 1489,
"voice": 1490,
"village": 1491,
"13": 1492,
"hands": 1493,
"help": 1494,
"##3": 1495,
"due": 1496,
"French": 1497,
"London": 1498,
"top": 1499,
"told": 1500,
"open": 1501,
"published": 1502,
"third": 1503,
"2017": 1504,
"play": 1505,
"across": 1506,
"During": 1507,
"put": 1508,
"final": 1509,
"often": 1510,
"include": 1511,
"25": 1512,
"##le": 1513,
"main": 1514,
"having": 1515,
"2004": 1516,
"once": 1517,
"ever": 1518,
"let": 1519,
"book": 1520,
"led": 1521,
"gave": 1522,
"late": 1523,
"front": 1524,
"find": 1525,
"club": 1526,
"##4": 1527,
"German": 1528,
"included": 1529,
"species": 1530,
"College": 1531,
"form": 1532,
"opened": 1533,
"mother": 1534,
"women": 1535,
"enough": 1536,
"West": 1537,
"must": 1538,
"2000": 1539,
"power": 1540,
"really": 1541,
"17": 1542,
"making": 1543,
"half": 1544,
"##6": 1545,
"order": 1546,
"might": 1547,
"##is": 1548,
"given": 1549,
"million": 1550,
"times": 1551,
"days": 1552,
"point": 1553,
"full": 1554,
"service": 1555,
"With": 1556,
"km": 1557,
"major": 1558,
"##7": 1559,
"original": 1560,
"become": 1561,
"seen": 1562,
"II": 1563,
"north": 1564,
"six": 1565,
"##te": 1566,
"love": 1567,
"##0": 1568,
"national": 1569,
"International": 1570,
"##5": 1571,
"24": 1572,
"So": 1573,
"District": 1574,
"lost": 1575,
"run": 1576,
"couldn": 1577,
"career": 1578,
"always": 1579,
"##9": 1580,
"2003": 1581,
"##th": 1582,
"country": 1583,
"##z": 1584,
"House": 1585,
"air": 1586,
"tell": 1587,
"south": 1588,
"worked": 1589,
"woman": 1590,
"player": 1591,
"##A": 1592,
"almost": 1593,
"war": 1594,
"River": 1595,
"##ic": 1596,
"married": 1597,
"continued": 1598,
"Then": 1599,
"James": 1600,
"close": 1601,
"black": 1602,
"short": 1603,
"##8": 1604,
"##na": 1605,
"using": 1606,
"history": 1607,
"returned": 1608,
"light": 1609,
"car": 1610,
"##ra": 1611,
"sure": 1612,
"William": 1613,
"things": 1614,
"General": 1615,
"##ry": 1616,
"2002": 1617,
"better": 1618,
"support": 1619,
"100": 1620,
"among": 1621,
"From": 1622,
"feet": 1623,
"King": 1624,
"anything": 1625,
"21": 1626,
"19": 1627,
"established": 1628,
"district": 1629,
"2001": 1630,
"feel": 1631,
"great": 1632,
"##ton": 1633,
"level": 1634,
"Cup": 1635,
"These": 1636,
"written": 1637,
"games": 1638,
"others": 1639,
"already": 1640,
"title": 1641,
"story": 1642,
"##p": 1643,
"law": 1644,
"thing": 1645,
"US": 1646,
"record": 1647,
"role": 1648,
"however": 1649,
"By": 1650,
"students": 1651,
"England": 1652,
"white": 1653,
"control": 1654,
"least": 1655,
"inside": 1656,
"land": 1657,
"##C": 1658,
"22": 1659,
"give": 1660,
"community": 1661,
"hard": 1662,
"##ie": 1663,
"non": 1664,
"##c": 1665,
"produced": 1666,
"George": 1667,
"round": 1668,
"period": 1669,
"Park": 1670,
"business": 1671,
"various": 1672,
"##ne": 1673,
"does": 1674,
"present": 1675,
"wife": 1676,
"far": 1677,
"taken": 1678,
"per": 1679,
"reached": 1680,
"David": 1681,
"able": 1682,
"version": 1683,
"working": 1684,
"young": 1685,
"live": 1686,
"created": 1687,
"joined": 1688,
"East": 1689,
"living": 1690,
"appeared": 1691,
"case": 1692,
"High": 1693,
"done": 1694,
"23": 1695,
"important": 1696,
"President": 1697,
"Award": 1698,
"France": 1699,
"position": 1700,
"office": 1701,
"looking": 1702,
"total": 1703,
"general": 1704,
"class": 1705,
"To": 1706,
"production": 1707,
"##S": 1708,
"football": 1709,
"party": 1710,
"brother": 1711,
"keep": 1712,
"mind": 1713,
"free": 1714,
"Street": 1715,
"hair": 1716,
"announced": 1717,
"development": 1718,
"either": 1719,
"nothing": 1720,
"moment": 1721,
"Church": 1722,
"followed": 1723,
"wrote": 1724,
"why": 1725,
"India": 1726,
"San": 1727,
"election": 1728,
"1999": 1729,
"lead": 1730,
"How": 1731,
"##ch": 1732,
"##rs": 1733,
"words": 1734,
"European": 1735,
"course": 1736,
"considered": 1737,
"America": 1738,
"arms": 1739,
"Army": 1740,
"political": 1741,
"##la": 1742,
"28": 1743,
"26": 1744,
"west": 1745,
"east": 1746,
"ground": 1747,
"further": 1748,
"church": 1749,
"less": 1750,
"site": 1751,
"First": 1752,
"Not": 1753,
"Australia": 1754,
"toward": 1755,
"California": 1756,
"##ness": 1757,
"described": 1758,
"works": 1759,
"An": 1760,
"Council": 1761,
"heart": 1762,
"past": 1763,
"military": 1764,
"27": 1765,
"##or": 1766,
"heard": 1767,
"field": 1768,
"human": 1769,
"soon": 1770,
"founded": 1771,
"1998": 1772,
"playing": 1773,
"trying": 1774,
"##x": 1775,
"##ist": 1776,
"##ta": 1777,
"television": 1778,
"mouth": 1779,
"although": 1780,
"taking": 1781,
"win": 1782,
"fire": 1783,
"Division": 1784,
"##ity": 1785,
"Party": 1786,
"Royal": 1787,
"program": 1788,
"Some": 1789,
"Don": 1790,
"Association": 1791,
"According": 1792,
"tried": 1793,
"TV": 1794,
"Paul": 1795,
"outside": 1796,
"daughter": 1797,
"Best": 1798,
"While": 1799,
"someone": 1800,
"match": 1801,
"recorded": 1802,
"Canada": 1803,
"closed": 1804,
"region": 1805,
"Air": 1806,
"above": 1807,
"months": 1808,
"elected": 1809,
"##da": 1810,
"##ian": 1811,
"road": 1812,
"##ar": 1813,
"brought": 1814,
"move": 1815,
"1997": 1816,
"leave": 1817,
"##um": 1818,
"Thomas": 1819,
"1996": 1820,
"am": 1821,
"low": 1822,
"Robert": 1823,
"formed": 1824,
"person": 1825,
"services": 1826,
"points": 1827,
"Mr": 1828,
"miles": 1829,
"##b": 1830,
"stop": 1831,
"rest": 1832,
"doing": 1833,
"needed": 1834,
"international": 1835,
"release": 1836,
"floor": 1837,
"start": 1838,
"sound": 1839,
"call": 1840,
"killed": 1841,
"real": 1842,
"dark": 1843,
"research": 1844,
"finished": 1845,
"language": 1846,
"Michael": 1847,
"professional": 1848,
"change": 1849,
"sent": 1850,
"50": 1851,
"upon": 1852,
"29": 1853,
"track": 1854,
"hit": 1855,
"event": 1856,
"2018": 1857,
"term": 1858,
"example": 1859,
"Germany": 1860,
"similar": 1861,
"return": 1862,
"##ism": 1863,
"fact": 1864,
"pulled": 1865,
"stood": 1866,
"says": 1867,
"ran": 1868,
"information": 1869,
"yet": 1870,
"result": 1871,
"developed": 1872,
"girl": 1873,
"##re": 1874,
"God": 1875,
"1995": 1876,
"areas": 1877,
"signed": 1878,
"decided": 1879,
"##ment": 1880,
"Company": 1881,
"seemed": 1882,
"##el": 1883,
"co": 1884,
"turn": 1885,
"race": 1886,
"common": 1887,
"video": 1888,
"Charles": 1889,
"Indian": 1890,
"##ation": 1891,
"blood": 1892,
"art": 1893,
"red": 1894,
"##able": 1895,
"added": 1896,
"rather": 1897,
"1994": 1898,
"met": 1899,
"director": 1900,
"addition": 1901,
"design": 1902,
"average": 1903,
"minutes": 1904,