budoux
Version:
A small chunk segmenter.
672 lines (666 loc) • 65.4 kB
JavaScript
"use strict";
(() => {
// module/data/models/ja.js
var model = { "UW3": { "\u3002": 6728, "\u3001": 4793, "\u3092": 5613, "\u306B": 3832, "\u306E": 3728, "\u306F": 4247, "\u304C": 4149, "\u3068": 2957, "\u308B": 2755, "\u304F": 2852, "\u3082": 3216, "\u3066": 2448, "\u3067": 3460, "\u305F": 2223, "\u3063": -1955, "\u3044": 1914, "\u3046": 1964, "\u65B9": 2774, "\u306A": 2289, "\u3089": 1418, "\uFF3D": 2179, "\u304B": 1931, "\u308A": 1064, "\uFF0C": 3562, "\uFF01": 3285, "\u30FC": -113, "\uFF1F": 3707, "\u3084": 2168, "\u3070": 1685, "\u3069": 1948, "\uFF0E": 2081, "\u30FB": 1008, "\u3064": 1318, "\u2026": 2476, "\u3054": -2411, "\uFF09": 2663, "\u90E8": 1626, "\u2606": 2714, "\u4E0A": 540, "\u300D": 1657, "\u30C3": -2007, "\u308C": 375, "\u266A": 2559, "\u305A": 2032, "\u4EE5": -1912, "\u3093": 704, "\u3060": 895, "\u3078": 2163, "\u308D": 1051, "\u30AF": 342, "\u96FB": -1749, "\u7B11": 2419, "\u4ECA": 1104, "\u307E": -484, "\u5206": 1162, "\u4E2D": 1012, "\u3050": 2032, "\u4EAC": -1416, "\u4EBA": 495, "\u304A": -1350, "\u30A4": -592, "\u5229": -713, "\uFF10": -863, "\u304D": 491, "\u3041": 1313, "\u9593": 727, "\u3059": 959, "\u30F3": -313, "\u30D5": -1354, "\u3083": 1253, "\u30B9": -1019, "\u300C": -1412, "\u65E5": 750, "\u5EA6": 1104, "\u5F8C": 1273, "\u30B7": -1228, "\u884C": -808, "\u3005": 1430, "\u30A2": -842, "\u30C9": -1098, "\u5F53": -343, "\u89B3": -1024, "\u3076": 1465, "\u30EA": -626, "\u5927": -904, "\u3048": 751, "\u591C": 1560, "\u300F": 1445, "\uFF11": -635, "\u79CB": 1420, "\u614B": -1193, "\u6614": 1693, "\u4F53": 1037, "\u3080": 1166, "\u7A7A": 796, "\uFF3B": -1062, "\u5909": 398, "\u5B66": -641, "\u7686": 1406, "\u6708": 621, "\u984C": 1106, "\u6A5F": -775, "\u7136": 591, "\u3042": 783, "\u8D85": 1029, "\u301C": 526, "\u305E": 831, "\u975E": 849, "\u3061": 463, "\u5149": -218, "\u4FC2": 555, "\u5E74": 352, "\u307F": 312, "\u7531": 478, "\uFF1E": 696, "\u611F": 452, "\u30D7": -431, "\uFF4F": -603, "\u756A": 290, "\u30BA": 344, "\u8272": 657, "\u3057": 224, "\u610F": -369, "\u3073": 341, "\u5165": -491, "\u3051": 157, "\u6240": 622, "\u81EA": -411, "\u5834": -59, "\u30E1": -203, "\u3000": 311, "\u6307": -356, "\u6C17": 69, "\u982D": 788, "\u7740": -302, "\u5225": 286, "\u51FA": -175, "\u3071": 455, "\u30AB": -121, "\u671D": 316, "\u201D": 313, "\u54C1": 139, "\u679C": 565, "\u4E00": -71, "\u8A9E": 196, "\u7A0B": 262, "\u5FDC": 275, "\u5BFA": 135, "\u6709": -208, "\u751F": -143, "\u795E": -190, "\u60F3": 209, "\u4E0D": -171, "\u5E97": 200, "\uFF1A": 111, "\u30B0": 38, "\u30EC": -105, "\u6599": -113, "\u5C4B": 71, "\u30D1": -87, "\u6C34": 70, "\u30B5": 79, "\u6E1B": 86, "\u56DE": 28, "\u30D0": -12, "\uFF15": -4 }, "UW4": { "\u3002": -7454, "\u306E": -4026, "\u3066": -3886, "\u304C": -3308, "\u3001": -7522, "\u308B": -5031, "\u306B": -4106, "\u306F": -3340, "\u3067": -3076, "\u3063": -4215, "\u3092": -4779, "\u3042": 564, "\u3068": -2905, "\u3046": -2139, "\u305F": -1616, "\u307E": -1127, "\u306A": -1887, "\u308A": -4104, "\u3054": 551, "\u3057": -1826, "\u308C": -4215, "\u3053": -496, "\u304F": -3009, "\u3089": -4434, "\u304B": -2652, "\u305D": 152, "\u304D": -2034, "\u3060": -1794, "\u30FC": -2920, "\u3044": -1560, "\u307F": -1078, "\u884C": 374, "\u7A7A": 2550, "\u3093": -3604, "\u3082": -2695, "\u3059": -1087, "\u300D": -5550, "\u3055": -1889, "\u601D": 1398, "\u3081": -1842, "\u3051": -3319, "\u300C": 2416, "\u3084": -1292, "\u6642": 593, "\u4EBA": 598, "\u4E00": 541, "\u4EAC": 686, "\u79C1": 1919, "\uFF0C": -4270, "\u8868": 902, "\u3064": -1274, "\uFF01": -4521, "\u30C3": -2351, "\u30FB": -625, "\u3088": -553, "\u51B7": 2511, "\uFF11": 770, "\u30F3": -2142, "\u3048": -1671, "\u6301": 119, "\u4F7F": 1139, "\u308F": -1549, "\u65B9": -831, "\u6765": 435, "\uFF08": 2512, "\u3086": -2377, "\u308D": -2910, "\u30EB": -1250, "\u624B": 246, "\u2026": -2234, "\u3058": -2235, "\uFF1F": -3518, "\u4F55": 1232, "\u3069": -1238, "\uFF3B": 1883, "\u898B": 508, "\u3079": -2886, "\uFF09": -4231, "\u5927": 531, "\u672C": -1158, "\u5F7C": 1311, "\u7B11": -3521, "\u5F53": 271, "\uFF0E": -2893, "\u65E5": 288, "\u305B": -1291, "\u3070": -2305, "\u4ECA": 1115, "\u30A4": -501, "\u3061": -2020, "\u6C17": 410, "\u5165": 633, "\u81EA": 804, "\u305A": -1738, "\uFF12": 553, "\u6700": 1132, "\u7528": -721, "\u5E2F": -1198, "\u304A": 755, "\u6709": 910, "\u643A": 394, "\u98DF": 175, "\u3083": -2476, "\u9593": -350, "\u591A": 1041, "\u3072": 1304, "\u4E2D": 62, "\uFF3D": -1461, "\u300F": -3295, "\u306D": -1755, "\u767D": 570, "\u3041": -1446, "\u6C34": 337, "\u98A8": 759, "\u30B3": 329, "\u304E": -1380, "\u5408": -1101, "\u30EA": -588, "\uFF10": -400, "\u50D5": 974, "\u611F": 216, "\u201D": -842, "\u3087": -1617, "\u5C0F": 722, "\u307C": -959, "\u516C": 685, "\uFF13": 84, "\u697D": 380, "\u266A": -1513, "\u301C": -1208, "\u2606": -1315, "\u5168": 523, "\u9053": 276, "\u4E8C": 520, "\u30B9": -228, "\u5B50": -685, "\u66F8": 380, "\u30C9": -58, "\u4E0D": 535, "\u9AD8": 245, "\u7279": 763, "\u796D": 408, "\u8FD4": -363, "\u614B": -544, "\u3065": -1053, "\u5B9F": 477, "\u5B9A": -355, "\u5C11": 444, "\u8A71": 355, "\u8CB7": 433, "\u4ED6": 388, "\u305C": -896, "\u8857": 536, "\u76EE": 133, "\uFF4F": -565, "\u7684": -562, "\u8FBC": -726, "\u305E": -956, "\u30E5": -549, "\u3050": -787, "\u30B5": 147, "\u7121": 172, "\u6771": 517, "\u590F": 546, "\u73FE": 229, "\u524D": -155, "\u3076": -685, "\u5834": 121, "\u63DB": -349, "\u3052": -637, "\u30BF": -256, "\u30C7": 205, "\u5E74": -410, "\u3073": -543, "\u307B": -202, "\u7269": -209, "\u30D5": -202, "\u8A00": 146, "\u5238": -589, "\u3078": -432, "\u30C8": -159, "\u5916": 180, "\u3056": -354, "\u79CB": 360, "\u96E3": 227, "\u30E7": -220, "\u5473": 93, "\u6D41": 325, "\u5199": 295, "\u3000": -221, "\u65B0": 91, "\u30E9": -110, "\u5E97": 99, "\u53C2": 199, "\uFF15": 52, "\uFF3E": -328, "\u7F8E": 167, "\u3005": -202, "\u4E09": 85, "\u958B": 59, "\u62BC": 210, "\u76F8": 97, "\u7518": 106, "\u5B89": 114, "\u56DE": -147, "\u95A2": -151, "\u53D6": -117, "\u51FA": -33, "\u52E2": -12, "\u30E0": -52, "\u2212": -57, "\uFF16": 24, "\u767E": 32, "\uFF23": 24, "\u5C4B": -16, "\u30AA": 8, "\u5BB6": 16, "\u30A3": -12, "\u30AF": -4 }, "UW5": { "\u3002": -2064, "\u3001": -1010, "\u3067": -892, "\u307E": -99, "\u3059": -474, "\u306A": -458, "\u3063": 997, "\u308B": 217, "\u3066": 222, "\u306F": -855, "\u304C": -518, "\u304D": 1186, "\u3057": -365, "\u3092": -637, "\u3042": -552, "\u65E5": 790, "\u3053": 424, "\u306B": -611, "\u3048": 686, "\u3060": -539, "\u3044": 363, "\u305D": -296, "\u308D": -280, "\u3093": 1035, "\u3079": 1778, "\u30FB": -1055, "\u3068": -630, "\u307F": 332, "\u304B": 223, "\u3056": 1423, "\u5E2F": 371, "\u308F": 580, "\u308C": 534, "\u306E": -607, "\u3089": 233, "\u3064": 1267, "\u3061": 794, "\u3046": 383, "\u3081": 689, "\u30EB": -478, "\u3058": 642, "\u3055": -302, "\u300D": -1120, "\u5408": 456, "\u3088": -273, "\u305A": 996, "\u308A": 353, "\u307C": 1417, "\u304F": 460, "\u5E74": 975, "\u96FB": -290, "\u305B": -306, "\u51B7": 1220, "\u305F": 203, "\u4F7F": -1266, "\uFF01": -437, "\u601D": -810, "\uFF3D": -483, "\u3082": -128, "\u5229": 356, "\u5B66": 191, "\u610F": 739, "\u3050": 179, "\u30D0": -257, "\u30C8": -355, "\u5F53": 439, "\u5206": 201, "\u3005": 563, "\u4F53": -1129, "\u5834": 469, "\u3080": 664, "\u5238": -832, "\u898B": -249, "\u30A4": 189, "\u6765": -128, "\u6599": 584, "\u5BFA": -493, "\u624B": 173, "\u304E": 262, "\uFF0E": -169, "\u30BF": 284, "\u3070": 302, "\u6C34": -221, "\u901A": -189, "\u884C": -215, "\u5148": -737, "\u4E00": -212, "\u7530": 358, "\u7D9A": -686, "\u672C": 41, "\u30FC": 24, "\u30A2": -172, "\u3087": 338, "\uFF1F": -244, "\u3073": 222, "\uFF4F": 191, "\u3078": -279, "\u8981": 114, "\u4F55": -342, "\u6700": -104, "\u4E0A": -38, "\u3051": 38, "\u3000": -234, "\u90FD": 43, "\u6210": 93, "\u7684": -118, "\u30E9": 59, "\u4E2D": 88, "\u30F3": -25, "\uFF08": -97, "\uFF2E": 235, "\uFF17": -151, "\u52B9": 87, "\u679A": 95, "\uFF11": -41, "\u3083": 20, "\u6709": -57, "\u4EAC": -41, "\u30AA": -102, "\u964D": 28, "\u751F": -12, "\uFF09": -20, "\u30E5": -4 }, "UW2": { "\u3002": -1435, "\u3001": -1422, "\u3092": -2559, "\u304C": -1164, "\u306F": -1025, "\u3063": 253, "\u306B": -1053, "\u4EE5": 2191, "\u308C": 6, "\u304B": -87, "\u3066": -1038, "\u3067": -850, "\u5168": 1718, "\u3093": 588, "\u306E": -678, "\u305D": -342, "\u3086": 1826, "\u3081": 94, "\u3084": 171, "\u3068": -790, "\u304F": -676, "\u3046": -389, "\u3082": -824, "\u4E00": 1094, "\u3042": -137, "\u3053": 84, "\u3055": 563, "\u5229": 22, "\u3060": 329, "\u3054": -701, "\u6BCE": 1711, "\u3059": 65, "\u7D50": 2020, "\u5C11": 607, "\u5F7C": 771, "\u6700": 450, "\u304D": -546, "\u540C": 501, "\u3088": 599, "\u6A5F": 1124, "\u4EBA": 227, "\u3048": 118, "\u307B": 667, "\u3044": -98, "\u5927": 361, "\u308A": -391, "\u304A": -412, "\u300C": -480, "\u672C": 699, "\u624B": 768, "\u610F": 558, "\u95A2": 892, "\u4EAC": -332, "\u76EE": -1046, "\u308B": -265, "\u305C": 780, "\u306A": -421, "\u7740": -459, "\u30B1": -265, "\u984C": 869, "\u601D": -34, "\u6211": -1566, "\u3072": -1571, "\u591A": 282, "\u4E0A": -219, "\u305F": -140, "\u30FC": 281, "\u3071": 403, "\u3069": 67, "\u6C34": 310, "\u632F": -1294, "\u3070": -375, "\u5915": 692, "\u3064": -293, "\u7D76": 1290, "\u88CF": -1205, "\u5BFA": -454, "\u53CB": 327, "\u540D": 308, "\u5BB9": 912, "\u4ECA": 280, "\u307F": -131, "\u30D5": -298, "\u5165": -364, "\u30F3": 138, "\u30A4": -180, "\u9AD8": 345, "\u307E": -151, "\u3089": -57, "\u30B9": 123, "\u53D6": -279, "\u6599": 53, "\u4E16": -929, "\u30D1": 295, "\u96FB": 310, "\uFF52": 1144, "\u3005": 180, "\uFF0C": -377, "\u6253": 427, "\uFF3E": 149, "\u843D": -586, "\u30D7": -252, "\u51B7": -564, "\u8ECA": 221, "\u548C": 397, "\u7528": -220, "\u7518": 553, "\u7684": 174, "\u4E0B": 230, "\u9078": 254, "\u6975": 690, "\u77E5": -368, "\u30E9": -235, "\u91CD": 319, "\u826F": 164, "\u30E1": -47, "\u5B9F": 126, "\u6642": 98, "\u7ACB": -357, "\uFF3D": -114, "\u76F8": 242, "\uFF1A": 201, "\u4E57": -222, "\u5973": -225, "\u683C": 351, "\uFF28": -196, "\u30A3": -50, "\u611F": 67, "\u6B63": 275, "\u5F53": 105, "\uFF3B": -37, "\u3051": 29, "\u65E9": 100, "\u5F0F": 33, "\u30EB": 8, "\u30B5": -74, "\uFF14": 45, "\uFF15": -44, "\u5DDD": 49, "\u6B8B": 40, "\u4E09": 20, "\u898B": -8, "\u30AB": -4, "\u3083": 4, "\u660E": 4 }, "UW6": { "\u304C": 458, "\u3002": -261, "\u3067": 251, "\u5165": 1042, "\u3057": 496, "\u306F": 244, "\u3054": -343, "\u3042": -434, "\u306B": 259, "\u7A7A": 1574, "\u3066": -48, "\u3060": 87, "\u306E": 74, "\u3081": 1567, "\u306A": 197, "\u3055": 523, "\u3046": -76, "\u5238": 1647, "\u3084": 728, "\u308A": 471, "\u307E": 172, "\u308D": 319, "\u3044": 346, "\u3092": 168, "\u304B": 239, "\u3093": 363, "\u3089": 50, "\u30EB": 173, "\u3063": 269, "\u30AF": -297, "\u5F7C": -1380, "\u3051": -161, "\uFF10": 657, "\u5BFA": 710, "\u614B": 407, "\u30FB": -672, "\u3001": 143, "\u8FBC": 694, "\u3070": 364, "\u5834": -799, "\uFF1A": 881, "\u304F": 196, "\u3086": 571, "\u308B": 83, "\u6C17": 448, "\u30FC": -8, "\u30BF": 425, "\u3071": 888, "\u7684": 471, "\u30E1": 254, "\u308F": 215, "\u30D4": 833, "\u305A": -550, "\u30B1": -466, "\u904E": 936, "\u9001": -1043, "\u30B7": -165, "\u30A4": -256, "\u30AB": 294, "\u3082": -76, "\u52B9": -690, "\u308C": 131, "\u713C": 761, "\u305F": -5, "\u6765": -262, "\u65B9": -301, "\u3064": 66, "\u6A5F": 251, "\u601D": -43, "\u51FA": 201, "\u5370": 379, "\u3053": 51, "\u5E97": 254, "\u81EA": 167, "\u304E": 322, "\u3083": -136, "\u7740": -113, "\u53C2": 278, "\u9593": -73, "\u4E0D": 176, "\u305E": 227, "\u5468": 337, "\u30D1": -119, "\u2026": -96, "\u30F3": -25, "\u3087": -138, "\u56DE": -46, "\u529B": -75, "\u3073": 53, "\uFF11": 4, "\u7DF4": 36, "\u751F": -24, "\uFF2E": -48, "\uFF33": -20, "\uFF19": 8 }, "BW2": { "\u305F\u3068": 1506, "\u3068\u3044": 288, "\u3068\u304A": -3916, "\u306E\u307F": -2778, "\u3066\u3044": -1173, "\u3067\u3042": -3424, "\u306A\u3044": -2833, "\u3067\u3059": -3140, "\u305F\u307E": -2384, "\u306E\u3067": -905, "\u3044\u305F": -1757, "\u3067\u304D": -2098, "\u3046\u3084": -1836, "\u306B\u3082": -1242, "\u306B\u306F": -963, "\u3067\u3082": -1866, "\u306F\u306A": -294, "\u304F\u306A": -1808, "\u3001\u305D": -1730, "\u3001\u3068": -636, "\u308B\u3088": -1166, "\u5E2F\u96FB": -1447, "\u3066\u304A": -2417, "\u305F\u3060": -3137, "\u306E\u3088": -616, "\u3067\u3057": -1972, "\u307E\u305F": -2915, "\u308B\u3068": -559, "\u306A\u3069": -1328, "\u306E\u6A5F": -1328, "\u306E\u306F": -1154, "\u304F\u3057": -1455, "\u3082\u3042": -956, "\u3044\u307E": -603, "\u3066\u307F": -1631, "\u3066\u3042": -2192, "\u305F\u3044": -1203, "\u3082\u3057": -546, "\u306F\u3067": 398, "\u3044\u3064": -1249, "\u304C\u3068": -484, "\u306E\u3060": -995, "\u3059\u3054": -1286, "\u306F\u3044": -737, "\u3046\u3069": -1333, "\u4EBA\u304C": 1448, "\u3044\u3082": 544, "\u306B\u306A": -270, "\u30FB\u30FB": -1236, "\u3066\u304D": -1175, "\u6642\u9593": -828, "\u306E\u524D": -673, "\u306E\u3082": -751, "\u306F\u3042": -1272, "\u3069\u3053": -929, "\u3067\u306F": -1166, "\u304F\u3060": -995, "\u3068\u306A": 111, "\u308C\u305F": -801, "\u306A\u304F": -804, "\u305F\u6642": 453, "\u3089\u306A": -87, "\u3001\u306A": -216, "\u3068\u304B": -910, "\u306B\u3057": -414, "\u3066\u304F": -749, "\u3067\u306A": -1185, "\u308B\u5834": 78, "\u306E\u304B": -354, "\u3068\u3082": -533, "\u3057\u305F": -580, "\u3082\u306A": -186, "\u3068\u3053": -385, "\u3044\u3057": -588, "\u30FC\u30C9": -559, "\u305F\u3088": -582, "\u3044\u3088": -794, "\u306F\u305A": -658, "\u3070\u3044": -551, "\u3066\u3082": -255, "\u3044\u3044": -353, "\u3068\u304D": -281, "\u3066\u306F": 27, "\u3082\u3064": -372, "\u306A\u304B": -225, "\u306E\u304C": -360, "\u306E\uFF11": -795, "\u304F\u3055": -134, "\u306F\u3058": -435, "\u3082\u306E": -147, "\u3001\uFF11": -856, "\u30FC\u306A": 657, "\u3001\u6E05": -495, "\u3068\u540C": -436, "\u306B\u3067": -377, "\u3001\u307E": -262, "\u3057\u3044": -318, "\u3044\u306A": -109, "\u306E\u65B9": 319, "\u3084\u3059": -414, "\u305F\u3082": 143, "\u304C\u306A": 82, "\u3064\u306E": 388, "\u307E\u305B": -121, "\u3068\u3059": -220, "\uFF3D\u4EAC": -271, "\u3088\u3046": -85, "\u308A\u306A": 136, "\u3067\u3044": -139, "\u3044\u4EBA": 105, "\u308B\u306E": -109, "\u30FC\u3067": 125, "\u3089\u3044": -107, "\u306E\u307E": -45, "\u306A\u3041": -53, "\u3066\u305F": -82, "\u6C17\u6301": -106, "\u306A\u308A": 81, "\u3067\u306B": 32, "\u308B\u3067": 28, "\u306E\u5F8C": -32, "\u4ECA\u65E5": -40, "\u3093\u3067": 24, "\u3082\u3046": -12, "\u3068\u306F": -4 }, "BW1": { "\u4EE5\u4E0A": 1273, "\u306E\u307F": 2658, "\u308C\u305F": 1590, "\u307E\u307E": 2847, "\u305F\u307E": 2261, "\u3044\u3046": 338, "\u304B\u3089": 1803, "\u3088\u3046": -1764, "\u308A\u304C": -1370, "\u3081\u3066": 1033, "\u305D\u306E": -1196, "\u767D\u3044": 2426, "\u3063\u305F": 161, "\u3068\u3046": 1397, "\u3067\u306F": -501, "\u305F\u3068": -1243, "\u306A\u3044": 827, "\u3068\u304B": 1743, "\u3046\u306A": 512, "\u308B\u306E": -1611, "\u3053\u3068": -1467, "\u5229\u7528": 1155, "\u3067\u3082": 756, "\u3068\u304C": -1259, "\u305F\u306E": -1570, "\u3053\u306E": -1058, "\u3088\u308A": 1131, "\u3002\u30FB": 2639, "\u306E\u306F": 1326, "\u3057\u3044": 842, "\u304C\u3001": 1759, "\u6700\u8FD1": 1593, "\u304B\u3082": -1285, "\u30C3\u30AF": 622, "\u3082\u3046": 1776, "\u3066\u3082": 636, "\u3093\u306A": 369, "\u305F\u3089": 943, "\u3093\u3067": -747, "\u304C\u3068": -1484, "\u3082\u306E": -1092, "\u308C\u3066": -1075, "\u307E\u308A": 1378, "\u306B\u3082": 652, "\u5C11\u3057": 1499, "\u307E\u3060": 1379, "\u3068\u3044": -1037, "\u308A\u3068": 1131, "\u3068\u304D": 1494, "\u308B\u3068": 205, "\u540C\u3058": 1574, "\u3082\u3064": 1554, "\u3053\u305D": 1334, "\u3055\u3093": 451, "\u3067\u3001": 726, "\u306F\u3001": 527, "\u3042\u308B": 205, "\u3060\u3051": 713, "\u306A\u3089": 1205, "\u305F\uFF0E": 1789, "\u3066\u3001": 840, "\u308C\u308B": 473, "\u306B\u306F": 421, "\u6C17\u306B": -66, "\u601D\u3044": -1307, "\u306A\u305C": 1605, "\u3046\u3084": -1437, "\u306E\u9593": 1274, "\u3063\u3066": -155, "\u3044\u3001": -292, "\u3082\u3057": 1057, "\u3048\u3070": 807, "\u304D\u306A": 923, "\u3093\u3068": -204, "\u3066\u308B": 615, "\u3001\u3068": 386, "\u307E\u308B": -282, "\u305F\u3044": 222, "\u3044\u3067": -902, "\u30B1\u30FC": -854, "\u305F\u3060": 581, "\u3044\u306E": -311, "\u3066\u3044": -581, "\u308B\u3067": 373, "\u3068\u3067": 938, "\u304F\u3066": 684, "\u3044\u305F": 369, "\u3044\u308B": 181, "\u307E\u3041": 482, "\u305D\u308C": 187, "\u307E\u3067": 406, "\u666E\u6BB5": 620, "\u306E\u304C": 605, "\u308C\u3070": 544, "\u306E\u5F8C": 946, "\u30FC\u30B8": 970, "\u3057\u304B": 49, "\u306E\u3082": 609, "\u306A\u306E": -54, "\u308B\u4EBA": 668, "\u53CB\u9054": 846, "\u306A\u304F": 169, "\uFF01\uFF01": 318, "\u3068\u3001": 422, "\u8CB7\u3044": -799, "\u5927\u5909": 359, "\uFF10\u5186": 324, "\u4F7F\u3044": -756, "\u3001\u4ECA": 407, "\u306B\u3001": 573, "\u3081\u308B": -345, "\u73FE\u5728": 575, "\u305F\u3081": 581, "\u6C17\u304C": 370, "\u30AF\u30EB": 478, "\u3093\u3060": 313, "\u304D\u305F": 331, "\u6765\u3066": 493, "\u3071\u308A": 435, "\u3067\u304D": -74, "\u3079\u304D": 564, "\u304F\u308C": 268, "\u304D\u308A": 466, "\u53D6\u308A": -293, "\u3002\u3042": -543, "\u3066\u305F": 304, "\u3053\u3046": -328, "\u3066\u304F": -112, "\u3084\u3089": 275, "\u306E\u4E0A": 508, "\u4F7F\u3046": 230, "\u3044\u3082": -376, "\u304B\u3001": -238, "\u3002\uFF13": 372, "\u3051\u3069": 166, "\u308B\u306B": -305, "\u3068\u3082": 71, "\u308A\u306B": 73, "\u3058\u3066": 195, "\u3082\u3001": 165, "\u4E00\u756A": 100, "\u3059\u308B": -78, "\u3059\u3089": 209, "\u3044\u306A": -106, "\u3046\u3068": -114, "\u306A\u3069": 204, "\u56FD\u4EBA": 143, "\u306E\u6642": 215, "\u5B9F\u969B": 189, "\u308D\u3046": -193, "\u3068\u306F": -77, "\u3046\u3061": 96, "\u3069\u3046": 58, "\u30FB\u30FB": 87, "\u305F\u308A": 74, "\u3068\u306B": -57, "\u306F\uFF11": 180, "\u3046\u306E": -32, "\u3058\u3083": -89, "\u30E5\u30FC": 28, "\u4E00\u3064": -56, "\u306B\u304B": 36, "\u308A\u306F": -12, "\u898B\u3066": -4 }, "UW1": { "\u304A": 980, "\u306F": -281, "\u307E": 70, "\u3046": 349, "\u3054": 921, "\u3068": 102, "\u308B": -32, "\u3089": 110, "\u306A": -311, "\u4E0A": -1236, "\u306B": -339, "\u3067": -111, "\u5B66": 179, "\u3044": -69, "\u308A": -86, "\u3001": -28, "\u30D0": 452, "\u5F53": -977, "\u304C": -164, "\u96FB": 145, "\u3061": 389, "\u6C34": 1514, "\u3092": -149, "\u3064": 85, "\u643A": -52, "\u3063": 192, "\u304B": -151, "\uFF3B": -679, "\u3086": -565, "\u30FC": 121, "\u3093": 176, "\u30D5": -161, "\u308D": 227, "\u884C": -21, "\u3042": -60, "\u3060": 182, "\u7528": 648, "\u5916": 230, "\uFF10": 232, "\u30B9": -301, "\u7A7A": -552, "\u9593": 358, "\u81EA": 442, "\u3059": 163, "\u3057": -7, "\u30B1": -283, "\u6A5F": 454, "\u5229": -567, "\u3084": -289, "\u4F55": 307, "\u30C4": 156, "\u5C0F": 306, "\u5165": 300, "\u5B89": -487, "\u65B0": 434, "\u30C1": 131, "\u3053": 98, "\u751F": 269, "\u3055": -140, "\uFF13": -298, "\u56F0": 671, "\u30A4": 258, "\u30C3": 224, "\u8FBC": 96, "\u5EAD": 580, "\u554F": 615, "\u7530": 144, "\u3002": -28, "\u3082": -33, "\u4F7F": -177, "\u9AD8": 204, "\u5238": 379, "\u3081": -103, "\u30D1": 261, "\u56FD": -467, "\u69D8": 370, "\u30E9": 129, "\u793E": 254, "\uFF01": 110, "\u632F": 349, "\u8A00": -136, "\u7740": -48, "\u30DA": 353, "\u307F": -44, "\uFF0E": -49, "\u601D": 107, "\u898B": 83, "\u6B32": 212, "\u5C45": 173, "\u30E1": 94, "\uFF1F": 79, "\u3056": 158, "\u64AE": -208, "\u5927": 33, "\u9023": -100, "\u9577": 115, "\u3087": -53, "\u500B": -127, "\u5468": 40, "\u9032": 93, "\u5C71": 61, "\u30B5": 32, "\u30DC": 32, "\u65B9": 20, "\u30F3": 12, "\u8CB7": -8, "\u306D": -4 }, "BW3": { "\u3082\u306E": 3658, "\u3042\u308A": 1182, "\u307E\u3059": -1304, "\u3044\u305F": 591, "\u3044\u3046": 798, "\u305D\u306E": 2051, "\u3044\u3044": 1225, "\u305F\u307E": 1149, "\u306E\u307F": 2809, "\u3067\u3082": 1705, "\u3042\u308B": -359, "\u3088\u3046": -159, "\u6301\u3063": 1377, "\u3068\u304D": 1908, "\u3068\u304A": 1041, "\u3068\u3044": -1177, "\u306A\u3063": -544, "\u306A\u308B": 406, "\u3053\u308D": -364, "\u3068\u3046": -430, "\u304B\u3051": 2917, "\u3053\u3068": 975, "\u624B\u306B": 1163, "\u4EE5\u4E0A": -1970, "\u3053\u306E": 1601, "\u3057\u3066": 352, "\u3067\u304D": 652, "\u306F\u3057": 3071, "\u3068\u3066": 2260, "\u304B\u3089": -1074, "\u30E1\u30FC": 296, "\u305D\u3046": -790, "\u3082\u3089": 2683, "\u3061\u3087": 2013, "\u304B\u304B": 1802, "\u305F\u3081": 910, "\u3057\u307E": -186, "\u3082\u3046": 2313, "\u30FB\u30FB": -1608, "\u3057\u305F": 991, "\u3081\u3066": -2007, "\u3088\u308A": -1514, "\u3068\u3082": 1059, "\u3044\u308D": 772, "\u3082\u3093": 1116, "\u3069\u3046": 784, "\u3059\u3054": 1168, "\u304A\u308A": -1312, "\u307E\u3041": 1409, "\u305F\u3060": 1447, "\u304D\u3063": 651, "\u3060\u3051": -1043, "\u6765\u305F": 310, "\u96FB\u8ECA": 190, "\u306F\u307E": 1022, "\u304D\u307E": -890, "\u3057\u3083": 1379, "\u3057\u3044": -571, "\u3068\u3063": 989, "\u307E\u3057": -902, "\u3088\u304F": 815, "\u3055\u3093": -1344, "\u304B\u3063": -920, "\u3053\u308C": 999, "\u5834\u5408": 69, "\u3046\u307E": 557, "\u3081\u308B": -1596, "\u3044\u3066": -473, "\u306A\u3044": 260, "\u3059\u3050": 1151, "\u3067\u3059": -540, "\u3084\u3063": 464, "\u3082\u3068": 1360, "\u3059\u3002": -277, "\u3044\u304B": 537, "\u4E2D\u3067": 435, "\u308F\u3051": -32, "\u308F\u304B": 697, "\u3044\u308B": 617, "\u306A\u304B": 151, "\u8003\u3048": 545, "\u3068\u3053": 787, "\u3044\u3084": 897, "\u307E\u305F": -494, "\u3053\u305D": -806, "\u3082\u3063": 729, "\u307F\u305F": -758, "\u3069\u3053": 758, "\u307E\u3060": 374, "\u304A\u8336": 782, "\u3042\u3063": -84, "\u30D1\u30BD": 446, "\u304B\u3082": -516, "\u304B\u3064": 730, "\u306A\u304F": 184, "\u3068\u308A": 708, "\u3046\u3061": 225, "\u307E\u3063": -266, "\u3081\u3063": 368, "\u3044\u3089": 687, "\u307E\u305A": 464, "\u51FA\u6765": -456, "\u307E\u305B": -464, "\u3060\u3044": 421, "\u306A\u3089": -162, "\u306F\u3044": 351, "\u3082\u3064": 607, "\u3053\u3053": 647, "\u307B\u3057": -356, "\u305F\u3044": -308, "\u3064\u3044": 283, "\u3064\u3051": 354, "\u3069\u306E": 556, "\u307E\u3067": -558, "\u6599\u7406": 67, "\u4F7F\u3063": 250, "\u884C\u304D": -274, "\u65E5\u672C": 213, "\u51FA\u3066": 149, "\u796D\u308A": 361, "\u30EB\u30FC": 408, "\u697D\u3057": 197, "\u3055\u3044": -317, "\u3060\u308D": -548, "\u9006\u306B": 379, "\u3064\u306A": 305, "\u3068\u3042": 326, "\u304A\u5E97": 197, "\u30A4\u30F3": 186, "\u66F8\u3044": 164, "\u307C\u304F": 281, "\u3093\u306F": 289, "\u4EBA\u306F": 79, "\u306A\u3069": -138, "\u3067\u3001": 96, "\u307E\u308A": -225, "\u307F\u304C": -123, "\u306F\u305A": 44, "\u7A0B\u5EA6": -73, "\u305A\u3063": 44, "\u3057\u304F": -24, "\u8FBC\u3093": -3 }, "TW4": { "\u3068\u3053\u308D": 2298, "\u304F\u3089\u3044": 1840, "\u3088\u3046\u306A": -2720, "\u3057\u305F\u3002": -1639, "\u3067\u304D\u305F": 2328, "\u3042\u308A\u307E": -1854, "\u3053\u3068\u304C": -955, "\u304B\u306A\u308A": 2910, "\u307E\u3059\u3002": -1449, "\u3088\u3046\u306B": -1522, "\u305D\u3046\u3067": -1026, "\u3044\u305F\u3002": 1051, "\u3060\u304B\u3089": 995, "\u3053\u3068\u306B": -907, "\u3057\u304B\u3057": 1612, "\u3046\u3069\u3093": 1004, "\u30E1\u30FC\u30EB": 369, "\u3082\u3061\u308D": 1602, "\u3060\u3063\u305F": -1494, "\u3060\u3063\u3066": 523, "\u3053\u3068\u3082": -539, "\u3044\u308D\u3044": 697, "\u3088\u3046\u3067": -1413, "\u3044\u304B\u306A": 786, "\u3042\u308A\u3001": -558, "\u3067\u3082\u3001": 675, "\u3061\u306A\u307F": 1556, "\u3053\u3068\u3002": 678, "\u3053\u3068\u3067": 263, "\u306A\u3044\u3002": 112, "\u306A\u304B\u306A": 855, "\u30BD\u30D5\u30C8": 704, "\u3059\u308B\u3068": 339, "\u308F\u3051\u3067": -339, "\u3044\u3048\u3070": 647, "\u306A\u304F\u306A": 274, "\u306A\u3093\u3066": -265, "\u3057\u306A\u3044": 519, "\u307E\u3063\u3066": -644, "\u306A\u3093\u3068": 485, "\u3053\u3068\u3092": 228, "\u3057\u3066\u304D": 324, "\u3057\u304B\u3082": 790, "\u3089\u3057\u3044": 546, "\u3042\u305F\u308A": -494, "\u3059\u3054\u3044": 197, "\u3053\u3068\u306F": -199, "\u3044\u304F\u3089": 180, "\u306A\u3093\u304B": 98, "\u3067\u304D\u307E": 80, "\u3061\u3087\u3063": 190, "\u306E\u306F\u3001": 150, "\u3044\u3044\u3068": -70, "\u305D\u3046\u3044": 136, "\u3057\u3066\u3082": -90, "\u89B3\u5149\u5BA2": -70, "\u304A\u3044\u3057": 74, "\u3044\u3063\u3071": 98, "\u3044\u308B\u3002": 52, "\u3044\u307E\u3059": -20 }, "TW3": { "\u3082\u306A\u304F": -1963, "\u3068\u3044\u3046": 835, "\u3068\u3053\u308D": -1715, "\u3068\u3057\u3066": 677, "\u3066\u3044\u308B": -1633, "\u306E\u307E\u307E": -1388, "\u3001\u3042\u308B": -2880, "\u306F\u306A\u3044": -522, "\u3059\u3053\u3068": 762, "\u3066\u3057\u307E": -1446, "\u3089\u306A\u3044": -1178, "\u3068\u3057\u305F": -1303, "\u3068\u8A00\u3063": -1139, "\u3066\u3044\u3046": 588, "\u306F\u305D\u308C": -915, "\u3067\u3042\u308B": -808, "\u306B\u306A\u3063": -35, "\u3001\u306A\u3093": -182, "\u305F\u3053\u3068": -95, "\u3067\u3042\u3063": -926, "\u3068\u306A\u3063": 309, "\u3082\u3042\u308A": -283, "\u3044\u3068\u3053": 100, "\u3070\u3044\u3044": -261, "\u3082\u3044\u3044": -455, "\u3067\u3042\u308A": -715, "\u306A\u3093\u3067": 470, "\u3046\u3044\u3046": -319, "\u308B\u3068\u304D": 259, "\u3066\u3044\u305F": -117, "\u306B\u3057\u3066": -41, "\u3067\u3057\u305F": -136 }, "TW1": { "\u3068\u3044\u3046": 710, "\u3088\u3046\u306A": 1395, "\u306E\u307E\u307E": 1430, "\u3057\u304B\u3057": 2233, "\u30FB\u30FB\u30FB": 1265, "\u6301\u3063\u3066": -1918, "\u306A\u304C\u3089": 1900, "\u304B\u306A\u308A": 2064, "\u3002\u3053\u306E": -230, "\u3084\u306F\u308A": 2088, "\u601D\u3063\u305F": 877, "\u3082\u306A\u304F": 783, "\u3092\u5229\u7528": -1416, "\u306A\u3093\u3068": -712, "\u305D\u3057\u3066": 819, "\u3001\u305D\u308C": 328, "\u3067\u3042\u308B": 375, "\u306A\u3044\u3001": -985, "\u305D\u308C\u306F": -305, "\u307E\u3057\u305F": -327, "\u304B\u306A\u304B": 703, "\u3066\u3044\u308B": 29, "\u3063\u304B\u308A": 775, "\u304F\u3055\u3093": 545, "\u3061\u308D\u3093": 463, "\u3072\u3068\u3064": 586, "\u306F\u305D\u306E": -121, "\u3002\u305F\u3060": 628, "\u3059\u3054\u304F": 328, "\u30EA\u30FC\u30B0": 349, "\u3068\u3053\u308D": 320, "\u308D\u3044\u308D": 327, "\u308B\u3053\u3068": -347, "\u3087\u3046\u3069": 291, "\u3066\u3044\u3066": 133, "\u304F\u3089\u3044": 83, "\u306A\u3063\u3066": -66, "\uFF10\uFF10\u5186": 58, "\u601D\u3063\u3066": -131, "\u3068\u3057\u3066": 28, "\u8CB7\u3063\u3066": -32 }, "TW2": { "\u6C17\u306B\u5165": -4314, "\u3068\u304C\u3042": -1364, "\u3067\u306F\u306A": -1140, "\u3093\u3067\u3044": -2225, "\u3046\u306B\u306A": -921, "\u304B\u3082\u3057": -382, "\u305D\u3082\u305D": -1415, "\u3057\u3066\u3044": -533, "\u3057\u304B\u306A": 526, "\u3067\u3082\u3042": -1125, "\u3068\u3082\u3042": -986, "\u3063\u305F\u3053": -226, "\u304B\u3089\u306A": -580, "\u3048\u308B\u3053": -465, "\u3057\u3066\u304A": -184, "\u3067\u3082\u306A": -388, "\u3063\u3066\u3044": 45, "\u3068\u306F\u3044": -16, "\u3063\u3066\u3053": -20, "\u306A\u3044\u3068": -32, "\u3048\u3066\u3044": 4 } };
// module/win-browser.js
var win = window;
// module/dom.js
var parseFromString = (html) => {
return new win.DOMParser().parseFromString(html, "text/html");
};
var setInnerHtml = (element, html) => {
element.innerHTML = html;
};
var applyWrapStyle = (element) => {
element.style.wordBreak = "keep-all";
element.style.overflowWrap = "anywhere";
};
// module/parser.js
var Parser = class {
/**
* Constructs a BudouX parser.
* @param model A model data.
*/
constructor(model2) {
this.model = new Map(Object.entries(model2).map(([k, v]) => [k, new Map(Object.entries(v))]));
this.baseScore = -0.5 * [...this.model.values()].map((group) => [...group.values()]).flat().reduce((prev, curr) => prev + curr, 0);
}
/**
* Parses the input sentence and returns a list of semantic chunks.
*
* @param sentence An input sentence.
* @return The retrieved chunks.
*/
parse(sentence) {
if (sentence === "")
return [];
const boundaries = this.parseBoundaries(sentence);
const result = [];
let start = 0;
for (const boundary of boundaries) {
result.push(sentence.slice(start, boundary));
start = boundary;
}
result.push(sentence.slice(start));
return result;
}
/**
* Parses the input sentence and returns a list of boundaries.
*
* @param sentence An input sentence.
* @return The list of boundaries.
*/
parseBoundaries(sentence) {
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
const result = [];
for (let i = 1; i < sentence.length; i++) {
let score = this.baseScore;
score += ((_a = this.model.get("UW1")) === null || _a === void 0 ? void 0 : _a.get(sentence.substring(i - 3, i - 2))) || 0;
score += ((_b = this.model.get("UW2")) === null || _b === void 0 ? void 0 : _b.get(sentence.substring(i - 2, i - 1))) || 0;
score += ((_c = this.model.get("UW3")) === null || _c === void 0 ? void 0 : _c.get(sentence.substring(i - 1, i))) || 0;
score += ((_d = this.model.get("UW4")) === null || _d === void 0 ? void 0 : _d.get(sentence.substring(i, i + 1))) || 0;
score += ((_e = this.model.get("UW5")) === null || _e === void 0 ? void 0 : _e.get(sentence.substring(i + 1, i + 2))) || 0;
score += ((_f = this.model.get("UW6")) === null || _f === void 0 ? void 0 : _f.get(sentence.substring(i + 2, i + 3))) || 0;
score += ((_g = this.model.get("BW1")) === null || _g === void 0 ? void 0 : _g.get(sentence.substring(i - 2, i))) || 0;
score += ((_h = this.model.get("BW2")) === null || _h === void 0 ? void 0 : _h.get(sentence.substring(i - 1, i + 1))) || 0;
score += ((_j = this.model.get("BW3")) === null || _j === void 0 ? void 0 : _j.get(sentence.substring(i, i + 2))) || 0;
score += ((_k = this.model.get("TW1")) === null || _k === void 0 ? void 0 : _k.get(sentence.substring(i - 3, i))) || 0;
score += ((_l = this.model.get("TW2")) === null || _l === void 0 ? void 0 : _l.get(sentence.substring(i - 2, i + 1))) || 0;
score += ((_m = this.model.get("TW3")) === null || _m === void 0 ? void 0 : _m.get(sentence.substring(i - 1, i + 2))) || 0;
score += ((_o = this.model.get("TW4")) === null || _o === void 0 ? void 0 : _o.get(sentence.substring(i, i + 3))) || 0;
if (score > 0)
result.push(i);
}
return result;
}
};
// module/html_processor.js
var assert = console.assert;
var ZWSP_CODEPOINT = 8203;
var ZWSP = String.fromCharCode(ZWSP_CODEPOINT);
var NodeType = {
ELEMENT_NODE: 1,
TEXT_NODE: 3
};
var DomAction = {
Inline: 0,
// An inline content, becomes a part of a paragraph.
Block: 1,
// A nested paragraph.
Skip: 2,
// Skip the content. The content before and after are connected.
Break: 3,
// A forced break. The content before and after become paragraphs.
NoBreak: 4,
// The content provides context, but it's not breakable.
BreakOpportunity: 5
// Force a break opportunity.
};
var domActions = {
// Hidden elements
// https://html.spec.whatwg.org/multipage/rendering.html#hidden-elements
AREA: DomAction.Skip,
BASE: DomAction.Skip,
BASEFONT: DomAction.Skip,
DATALIST: DomAction.Skip,
HEAD: DomAction.Skip,
LINK: DomAction.Skip,
META: DomAction.Skip,
NOEMBED: DomAction.Skip,
NOFRAMES: DomAction.Skip,
PARAM: DomAction.Skip,
RP: DomAction.Skip,
SCRIPT: DomAction.Skip,
STYLE: DomAction.Skip,
TEMPLATE: DomAction.Skip,
TITLE: DomAction.Skip,
NOSCRIPT: DomAction.Skip,
// Flow content
// https://html.spec.whatwg.org/multipage/rendering.html#flow-content-3
HR: DomAction.Break,
// Disable if `white-space: pre`.
LISTING: DomAction.Skip,
PLAINTEXT: DomAction.Skip,
PRE: DomAction.Skip,
XMP: DomAction.Skip,
// Phrasing content
// https://html.spec.whatwg.org/multipage/rendering.html#phrasing-content-3
BR: DomAction.Break,
RT: DomAction.Skip,
WBR: DomAction.BreakOpportunity,
// Form controls
// https://html.spec.whatwg.org/multipage/rendering.html#form-controls
INPUT: DomAction.Skip,
SELECT: DomAction.Skip,
BUTTON: DomAction.Skip,
TEXTAREA: DomAction.Skip,
// Other elements where the phrase-based line breaking should be disabled.
// https://github.com/google/budoux/blob/main/budoux/skip_nodes.json
ABBR: DomAction.Skip,
CODE: DomAction.Skip,
IFRAME: DomAction.Skip,
TIME: DomAction.Skip,
VAR: DomAction.Skip,
// Deprecated, but supported in all browsers.
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/nobr
NOBR: DomAction.NoBreak
};
var defaultBlockElements = /* @__PURE__ */ new Set([
// 15.3.2 The page
"HTML",
"BODY",
// 15.3.3 Flow content
"ADDRESS",
"BLOCKQUOTE",
"CENTER",
"DIALOG",
"DIV",
"FIGURE",
"FIGCAPTION",
"FOOTER",
"FORM",
"HEADER",
"LEGEND",
"LISTING",
"MAIN",
"P",
// 15.3.6 Sections and headings
"ARTICLE",
"ASIDE",
"H1",
"H2",
"H3",
"H4",
"H5",
"H6",
"HGROUP",
"NAV",
"SECTION",
// 15.3.7 Lists
"DIR",
"DD",
"DL",
"DT",
"MENU",
"OL",
"UL",
"LI",
// 15.3.8 Tables
"TABLE",
"CAPTION",
"COL",
"TR",
"TD",
"TH",
// 15.3.12 The fieldset and legend elements
"FIELDSET",
// 15.5.4 The details and summary elements
"DETAILS",
"SUMMARY",
// 15.5.12 The marquee element
"MARQUEE"
]);
var NODETYPE = {
ELEMENT: 1,
TEXT: 3
};
function actionForElement(element) {
const nodeName = element.nodeName;
const action = domActions[nodeName];
if (action !== void 0)
return action;
if (typeof win.getComputedStyle === "function") {
const style = win.getComputedStyle(element);
switch (style.whiteSpace) {
case "nowrap":
case "pre":
return DomAction.NoBreak;
}
const display = style.display;
if (display)
return display === "inline" ? DomAction.Inline : DomAction.Block;
}
return defaultBlockElements.has(nodeName) ? DomAction.Block : DomAction.Inline;
}
var NodeOrText = class {
constructor(nodeOrText) {
this.chunks = [];
this.hasBreakOpportunityAfter = false;
this.nodeOrText = nodeOrText;
}
get isString() {
return typeof this.nodeOrText === "string";
}
get canSplit() {
return !this.isString;
}
get text() {
return this.isString ? this.nodeOrText : this.nodeOrText.nodeValue;
}
get length() {
var _a, _b;
return (_b = (_a = this.text) === null || _a === void 0 ? void 0 : _a.length) !== null && _b !== void 0 ? _b : 0;
}
/**
* Split the {@link Text} in the same way as the {@link chunks}.
* Joining all {@link chunks} must be equal to {@link text}.
*/
split(separator) {
const chunks = this.chunks;
assert(chunks.length === 0 || chunks.join("") === this.text);
if (chunks.length <= 1)
return;
assert(this.canSplit);
const node = this.nodeOrText;
if (typeof separator === "string") {
node.nodeValue = chunks.join(separator);
return;
}
const document = node.ownerDocument;
let nodes = [];
for (const chunk of chunks) {
if (chunk)
nodes.push(document.createTextNode(chunk));
nodes.push(null);
}
nodes.pop();
nodes = nodes.map((n) => n ? n : separator.cloneNode(true));
node.replaceWith(...nodes);
}
};
var NodeOrTextForTesting = class extends NodeOrText {
};
var Paragraph = class {
constructor(element) {
this.nodes = [];
this.element = element;
}
isEmpty() {
return this.nodes.length === 0;
}
get text() {
return this.nodes.map((node) => node.text).join("");
}
get lastNode() {
return this.nodes.length ? this.nodes[this.nodes.length - 1] : void 0;
}
setHasBreakOpportunityAfter() {
const lastNode = this.lastNode;
if (lastNode)
lastNode.hasBreakOpportunityAfter = true;
}
/**
* @return Indices of forced break opportunities in the source.
* They can be created by `<wbr>` tag or `​`.
*/
getForcedOpportunities() {
const opportunities = [];
let len = 0;
for (const node of this.nodes) {
if (node.canSplit) {
const text = node.text;
if (text) {
for (let i = 0; i < text.length; ++i) {
if (text.charCodeAt(i) === ZWSP_CODEPOINT) {
opportunities.push(len + i + 1);
}
}
}
}
len += node.length;
if (node.hasBreakOpportunityAfter) {
opportunities.push(len);
}
}
return opportunities;
}
/**
* @return Filtered {@param boundaries} by excluding
* {@link getForcedOpportunities} if it's not empty.
* Otherwise {@param boundaries}.
*/
excludeForcedOpportunities(boundaries) {
const forcedOpportunities = this.getForcedOpportunities();
if (!forcedOpportunities.length)
return boundaries;
const set = new Set(forcedOpportunities);
return boundaries.filter((i) => !set.has(i));
}
};
var HTMLProcessor = class {
/**
* @param parser A BudouX {@link Parser} to compute semantic line breaks.
*/
constructor(parser3, options) {
this.separator = ZWSP;
this.parser_ = parser3;
if (options !== void 0) {
if (options.className !== void 0)
this.className = options.className;
if (options.separator !== void 0)
this.separator = options.separator;
}
}
/**
* Checks if the given element has a text node in its children.
*
* @param ele An element to be checked.
* @return Whether the element has a child text node.
*/
static hasChildTextNode(ele) {
for (const child of ele.childNodes) {
if (child.nodeType === NODETYPE.TEXT)
return true;
}
return false;
}
/**
* Applies markups for semantic line breaks to the given HTML element.
*
* It breaks descendant nodes into paragraphs,
* and applies the BudouX to each paragraph.
* @param element The input element.
*/
applyToElement(element) {
for (const block of this.getBlocks(element)) {
assert(!block.isEmpty());
this.applyToParagraph(block);
}
}
/**
* Find paragraphs from a given HTML element.
* @param element The root element to find paragraphs.
* @param parent The parent {@link Paragraph} if any.
* @return A list of {@link Paragraph}s.
*/
*getBlocks(element, parent) {
assert(element.nodeType === NodeType.ELEMENT_NODE);
if (this.className && element.classList.contains(this.className))
return;
const action = actionForElement(element);
if (action === DomAction.Skip)
return;
if (action === DomAction.Break) {
if (parent && !parent.isEmpty()) {
parent.setHasBreakOpportunityAfter();
yield parent;
parent.nodes = [];
}
assert(!element.firstChild);
return;
}
if (action === DomAction.BreakOpportunity) {
if (parent)
parent.setHasBreakOpportunityAfter();
return;
}
assert(action === DomAction.Block || action === DomAction.Inline || action === DomAction.NoBreak);
const isNewBlock = !parent || action === DomAction.Block;
const block = isNewBlock ? new Paragraph(element) : parent;
for (const child of element.childNodes) {
switch (child.nodeType) {
case NodeType.ELEMENT_NODE:
for (const childBlock of this.getBlocks(child, block))
yield childBlock;
break;
case NodeType.TEXT_NODE:
if (action === DomAction.NoBreak) {
const text = child.nodeValue;
if (text) {
block.nodes.push(new NodeOrText(text));
}
break;
}
block.nodes.push(new NodeOrText(child));
break;
}
}
if (isNewBlock && !block.isEmpty())
yield block;
}
/**
* Apply the BudouX to the given {@link Paragraph}.
* @param paragraph The {@link Paragraph} to apply.
*/
applyToParagraph(paragraph) {
assert(paragraph.nodes.length > 0);
if (!paragraph.nodes.some((node) => node.canSplit))
return;
const text = paragraph.text;
if (/^\s*$/.test(text))
return;
const boundaries = this.parser_.parseBoundaries(text);
if (boundaries.length <= 0)
return;
assert(boundaries[0] > 0);
assert(boundaries.every((x, i) => i === 0 || x > boundaries[i - 1]));
assert(boundaries[boundaries.length - 1] < text.length);
const adjustedBoundaries = paragraph.excludeForcedOpportunities(boundaries);
adjustedBoundaries.push(text.length + 1);
this.splitNodes(paragraph.nodes, adjustedBoundaries);
this.applyBlockStyle(paragraph.element);
}
/**
* Split {@link NodeOrText} at the specified boundaries.
* @param nodes A list of {@link NodeOrText}.
* @param boundaries A list of indices of the text to split at.
*/
splitNodes(nodes, boundaries) {
var _a;
assert(boundaries.length > 0);
assert(boundaries.every((x, i) => i === 0 || x > boundaries[i - 1]));
const textLen = nodes.reduce((sum, node) => sum + node.length, 0);
assert(boundaries[boundaries.length - 1] > textLen);
let boundary_index = 0;
let boundary = boundaries[0];
assert(boundary > 0);
let nodeStart = 0;
let lastNode = null;
for (const node of nodes) {
assert(boundary >= nodeStart);
assert(node.chunks.length === 0);
const nodeText = node.text;
if (!nodeText)
continue;
const nodeLength = nodeText.length;
const nodeEnd = nodeStart + nodeLength;
assert(!lastNode || lastNode.canSplit);
if (!node.canSplit) {
if (lastNode && boundary === nodeStart) {
if (lastNode.chunks.length === 0)
lastNode.chunks.push((_a = lastNode.text) !== null && _a !== void 0 ? _a : "");
lastNode.chunks.push("");
}
while (boundary < nodeEnd) {
boundary = boundaries[++boundary_index];
}
lastNode = null;
nodeStart = nodeEnd;
continue;
}
lastNode = node;
if (boundary >= nodeEnd) {
nodeStart = nodeEnd;
continue;
}
const chunks = node.chunks;
let chunkStartInNode = 0;
while (boundary < nodeEnd) {
const boundaryInNode = boundary - nodeStart;
assert(boundaryInNode >= chunkStartInNode);
chunks.push(nodeText.slice(chunkStartInNode, boundaryInNode));
chunkStartInNode = boundaryInNode;
boundary = boundaries[++boundary_index];
}
assert(chunkStartInNode < nodeLength);
chunks.push(nodeText.slice(chunkStartInNode));
nodeStart = nodeEnd;
}
assert(nodeStart === textLen);
assert(boundary_index < boundaries.length);
assert(boundaries[boundary_index] >= textLen);
for (const node of nodes) {
node.split(this.separator);
}
}
/**
* Applies the block style to the given element.
* @param element The element to apply the block style.
*/
applyBlockStyle(element) {
if (this.className) {
element.classList.add(this.className);
return;
}
applyWrapStyle(element);
}
};
var HTMLProcessingParser = class extends Parser {
constructor(model2, htmlProcessorOptions = {
separator: ZWSP
}) {
super(model2);
this.htmlProcessor = new HTMLProcessor(this, htmlProcessorOptions);
}
/**
* @deprecated Use `applyToElement` instead. `applyElement` will be removed
* in v0.7.0 to align the function name with `HTMLProcessor`'s API.
*
* Applies markups for semantic line breaks to the given HTML element.
* @param parentElement The input element.
*/
applyElement(parentElement) {
console.warn("`applyElement` is deprecated. Please use `applyToElement` instead. `applyElement` will be removed in v0.7.0.");
this.applyToElement(parentElement);
}
/**
* Applies markups for semantic line breaks to the given HTML element.
* @param parentElement The input element.
*/
applyToElement(parentElement) {
this.htmlProcessor.applyToElement(parentElement);
}
/**
* Translates the given HTML string to another HTML string with markups
* for semantic line breaks.
* @param html An input html string.
* @return The translated HTML string.
*/
translateHTMLString(html) {
if (html === "")
return html;
const doc = parseFromString(html);
if (HTMLProcessor.hasChildTextNode(doc.body)) {
const wrapper = doc.createElement("span");
wrapper.append(...doc.body.childNodes);
doc.body.append(wrapper);
}
this.applyToElement(doc.body.childNodes[0]);
return doc.body.innerHTML;
}
};
// module/index.js
var loadDefaultJapaneseParser = () => {
return new HTMLProcessingParser(model);
};
// module/tests/test_html_processor.js
var parser = loadDefaultJapaneseParser();
var MockHTMLProcessorBase = class extends HTMLProcessor {
constructor(options) {
super(parser, options);
}
};
function getBlocks(html) {
const document = win.document;
setInnerHtml(document.body, html);
const processor = new MockHTMLProcessorBase();
return processor.getBlocks(document.body);
}
describe("HTMLProcessor.applyToElement", () => {
const document = win.document;
const wbr = document.createElement("wbr");
function apply(html, separator) {
setInnerHtml(document.body, html);
const processor = new MockHTMLProcessorBase({
separator,
className: "applied"
});
processor.applyToElement(document.body);
return document.body.innerHTML;
}
for (const test of [
{
in: "<div>\u6674\u308C</div>",
out: "<div>\u6674\u308C</div>"
},
{
in: "<div>\u4ECA\u65E5\u306F\u6674\u308C\u3067\u3059</div>",
out: '<div class="applied">\u4ECA\u65E5\u306F|\u6674\u308C\u3067\u3059</div>'
},
{
in: "<div><span>\u4ECA\u65E5\u306F</span>\u6674\u308C\u3067\u3059</div>",
out: '<div class="applied"><span>\u4ECA\u65E5\u306F</span>|\u6674\u308C\u3067\u3059</div>'
},
{
in: "<div><span>\u4ECA\u65E5\u306F\u6674\u308C</span>\u3067\u3059</div>",
out: '<div class="applied"><span>\u4ECA\u65E5\u306F|\u6674\u308C</span>\u3067\u3059</div>'
},
{
in: "<code>\u4ECA\u65E5\u306F\u6674\u308C\u3067\u3059</code>",
out: "<code>\u4ECA\u65E5\u306F\u6674\u308C\u3067\u3059</code>"
},
{
in: "<div>\u4ECA\u65E5\u306F<code>code</code>\u6674\u308C\u3067\u3059</div>",
out: '<div class="applied">\u4ECA\u65E5\u306F<code>code</code>|\u6674\u308C\u3067\u3059</div>'
},
{
in: "<div>\u4ECA\u65E5\u306F\u6674\u308C\u3001\u4ECA\u65E5\u306F\u6674\u308C</div>",
out: '<div class="applied">\u4ECA\u65E5\u306F|\u6674\u308C\u3001|\u4ECA\u65E5\u306F|\u6674\u308C</div>'
},
{
in: "<div>\u4ECA\u65E5\u306F<nobr>\u6674\u308C\u3001\u4ECA\u65E5\u306F</nobr>\u6674\u308C</div>",
out: '<div class="applied">\u4ECA\u65E5\u306F|<nobr>\u6674\u308C\u3001\u4ECA\u65E5\u306F</nobr>|\u6674\u308C</div>'
},
{
in: '<div>\u4ECA\u65E5\u306F<span style="white-space: nowrap">\u6674\u308C\u3001\u4ECA\u65E5\u306F</span>\u6674\u308C</div>',
out: '<div class="applied">\u4ECA\u65E5\u306F|<span style="white-space: nowrap">\u6674\u308C\u3001\u4ECA\u65E5\u306F</span>|\u6674\u308C</div>'
}
]) {
it(test.in, () => {
const out = test.out.replace(/\|/g, "<wbr>");
expect(apply(test.in, wbr)).toEqual(out);
});
it(test.in, () => {
const out = test.out.replace(/\|/g, "/");
expect(apply(test.in, "/")).toEqual(out);
});
}
});
describe("HTMLProcessor.applyToElement.separator.node", () => {
it("should clone separator element deeply", () => {
const doc = win.document;
setInnerHtml(doc.body, "<div>\u4ECA\u65E5\u306F\u826F\u3044\u5929\u6C17\u3067\u3059</div>");
const separator = doc.createElement("span");
separator.style.whiteSpace = "normal";
separator.textContent = "\u200B";
const processor = new MockHTMLProcessorBase({
separator,
className: "applied"
});
processor.applyToElement(doc.body);
expect(doc.body.innerHTML).toEqual('<div class="applied">\u4ECA\u65E5\u306F<span style="white-space: normal;">\u200B</span>\u826F\u3044<span style="white-space: normal;">\u200B</span>\u5929\u6C17\u3067\u3059</div>');
});
});
describe("HTMLProcessor.getBlocks", () => {
function getText(html) {
const blocks = getBlocks(html);
return Array.from(function* (blocks2) {
for (const block of blocks2)
yield block.text;
}(blocks));
}
it("should collect all text of a simple block", () => {
expect(getText("<div>123</div>")).toEqual(["123"]);
});
it("should collect two blocks separately", () => {
expect(getText("<div>123</div><div>456</div>")).toEqual(["123", "456"]