UNPKG

clustex

Version:

Clustex is a lightweight text classification package designed to efficiently categorize text based on similarity metrics and learned token weights.

82 lines (71 loc) 6.58 kB
{ "classifications": [ "politics", "sport", "art", "science", "entertainment" ], "data": [ {"text": "Парламент утвердил новый закон о налогообложении", "classification": "politics"}, {"text": "Чемпионат мира по футболу переходит в четвертьфинал", "classification": "sport"}, {"text": "Открылась выставка современного искусства", "classification": "art"}, {"text": "Ученые обнаружили новый экзопланету", "classification": "science"}, {"text": "Вышел долгожданный трейлер нового фильма", "classification": "entertainment"}, {"text": "Congress approves major healthcare reform", "classification": "politics"}, {"text": "NBA playoffs heat up as teams fight for the championship", "classification": "sport"}, {"text": "New sculpture park unveiled in the city center", "classification": "art"}, {"text": "Breakthrough in cancer research offers hope", "classification": "science"}, {"text": "Music festival lineup announced, featuring top artists", "classification": "entertainment"}, {"text": "El presidente firma acuerdo de comercio internacional", "classification": "politics"}, {"text": "La selección nacional gana la Copa América", "classification": "sport"}, {"text": "Nueva obra de teatro recibe elogios de la crítica", "classification": "art"}, {"text": "Científicos revelan avances en energía renovable", "classification": "science"}, {"text": "Se estrena la esperada serie de televisión", "classification": "entertainment"}, {"text": "Le gouvernement annonce une réforme éducative", "classification": "politics"}, {"text": "Le Tour de France débute avec des surprises", "classification": "sport"}, {"text": "Une exposition d'art impressionniste attire des foules", "classification": "art"}, {"text": "Découverte révolutionnaire en astrophysique", "classification": "science"}, {"text": "Le film tant attendu bat des records au box-office", "classification": "entertainment"}, {"text": "政府公布新的经济政策", "classification": "politics"}, {"text": "中国队夺得奥运金牌", "classification": "sport"}, {"text": "新的当代艺术博物馆开幕", "classification": "art"}, {"text": "研究人员开发出更高效的太阳能电池", "classification": "science"}, {"text": "知名歌手发布最新专辑", "classification": "entertainment"}, {"text": "Bundesregierung beschließt neue Klimapolitik", "classification": "politics"}, {"text": "Deutsche Mannschaft gewinnt Weltmeisterschaft", "classification": "sport"}, {"text": "Neue Kunstausstellung zeigt moderne Werke", "classification": "art"}, {"text": "Forscher machen Fortschritte in Quantencomputing", "classification": "science"}, {"text": "Beliebte Serie bekommt eine neue Staffel", "classification": "entertainment"}, {"text": "Новый указ президента повышает минимальную зарплату", "classification": "politics"}, {"text": "На Олимпиаде установлен новый мировой рекорд", "classification": "sport"}, {"text": "В театре поставили новую пьесу Чехова", "classification": "art"}, {"text": "Астрономы подтвердили существование редкого космического явления", "classification": "science"}, {"text": "Известный актер получил награду за лучшую роль", "classification": "entertainment"}, {"text": "Senate passes landmark bill on renewable energy", "classification": "politics"}, {"text": "NFL season kicks off with thrilling matches", "classification": "sport"}, {"text": "Modern art exhibition sparks controversy", "classification": "art"}, {"text": "NASA successfully launches a new space mission", "classification": "science"}, {"text": "Critically acclaimed play premieres on Broadway", "classification": "entertainment"}, {"text": "El gobierno implementa nuevas políticas fiscales", "classification": "politics"}, {"text": "El torneo de tenis atrae a los mejores jugadores", "classification": "sport"}, {"text": "Nueva galería de arte abre en el centro", "classification": "art"}, {"text": "Investigación genética promete avances médicos", "classification": "science"}, {"text": "Una nueva película de ciencia ficción captura la atención mundial", "classification": "entertainment"}, {"text": "La réforme du système de santé entre en vigueur", "classification": "politics"}, {"text": "La Coupe du Monde féminine bat des records de spectateurs", "classification": "sport"}, {"text": "Les galeries européennes célèbrent l'art abstrait", "classification": "art"}, {"text": "Les scientifiques détectent des ondes gravitationnelles inédites", "classification": "science"}, {"text": "Le dernier album d'un célèbre musicien fait sensation", "classification": "entertainment"}, {"text": "中央政府发布最新科技发展战略", "classification": "politics"}, {"text": "世界羽毛球锦标赛进入半决赛", "classification": "sport"}, {"text": "中国书法展览吸引众多参观者", "classification": "art"}, {"text": "科学家成功克隆实验小鼠", "classification": "science"}, {"text": "流行乐团发布新的单曲", "classification": "entertainment"}, {"text": "Neue Steuerpolitik sorgt für Diskussionen im Bundestag", "classification": "politics"}, {"text": "Deutsche Skifahrer dominieren internationale Wettbewerbe", "classification": "sport"}, {"text": "Berühmter Künstler stellt neue Werke aus", "classification": "art"}, {"text": "Durchbruch in der KI-Forschung revolutioniert Technologie", "classification": "science"}, {"text": "Hollywood feiert die Premiere eines mit Spannung erwarteten Films", "classification": "entertainment"} ] }