UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

791 lines (763 loc) 20.3 kB
/* * Copyright (c) AXA Shared Services Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const BaseStemmer = require('./base-stemmer'); const Among = require('./among'); /** * This class was automatically generated by a Snowball to JSX compiler * It implements the stemming algorithm defined by a snowball script. */ class RussianStemmer extends BaseStemmer { constructor(tokenizer) { super(tokenizer); this.I_p2 = 0; this.I_pV = 0; } copy_from(other) { this.I_p2 = other.I_p2; this.I_pV = other.I_pV; super.copy_from(other); } r_mark_regions() { let v_1; // (, line 57 this.I_pV = this.limit; this.I_p2 = this.limit; // do, line 61 v_1 = this.cursor; let lab0 = true; lab0: while (lab0 == true) { lab0 = false; // (, line 61 // gopast, line 62 golab1: while (true) { let lab2 = true; while (lab2 == true) { lab2 = false; if (!this.in_grouping(RussianStemmer.g_v, 1072, 1103)) { break; } break golab1; } if (this.cursor >= this.limit) { break lab0; } this.cursor++; } // setmark pV, line 62 this.I_pV = this.cursor; // gopast, line 62 golab3: while (true) { let lab4 = true; while (lab4 == true) { lab4 = false; if (!this.out_grouping(RussianStemmer.g_v, 1072, 1103)) { break; } break golab3; } if (this.cursor >= this.limit) { break lab0; } this.cursor++; } // gopast, line 63 golab5: while (true) { let lab6 = true; while (lab6 == true) { lab6 = false; if (!this.in_grouping(RussianStemmer.g_v, 1072, 1103)) { break; } break golab5; } if (this.cursor >= this.limit) { break lab0; } this.cursor++; } // gopast, line 63 golab7: while (true) { let lab8 = true; while (lab8 == true) { lab8 = false; if (!this.out_grouping(RussianStemmer.g_v, 1072, 1103)) { break; } break golab7; } if (this.cursor >= this.limit) { break lab0; } this.cursor++; } // setmark p2, line 63 this.I_p2 = this.cursor; } this.cursor = v_1; return true; } r_R2() { if (!(this.I_p2 <= this.cursor)) { return false; } return true; } r_perfective_gerund() { let among_var; let v_1; // (, line 71 // [, line 72 this.ket = this.cursor; // substring, line 72 among_var = this.find_among_b(RussianStemmer.a_0, 9); if (among_var == 0) { return false; } // ], line 72 this.bra = this.cursor; switch (among_var) { case 0: return false; case 1: // (, line 76 // or, line 76 var lab0 = true; lab0: while (lab0 == true) { lab0 = false; v_1 = this.limit - this.cursor; let lab1 = true; while (lab1 == true) { lab1 = false; // literal, line 76 if (!this.eq_s_b(1, '\u0430')) { break; } break lab0; } this.cursor = this.limit - v_1; // literal, line 76 if (!this.eq_s_b(1, '\u044F')) { return false; } } // delete, line 76 if (!this.slice_del()) { return false; } break; case 2: // (, line 83 // delete, line 83 if (!this.slice_del()) { return false; } break; } return true; } r_adjective() { let among_var; // (, line 87 // [, line 88 this.ket = this.cursor; // substring, line 88 among_var = this.find_among_b(RussianStemmer.a_1, 26); if (among_var == 0) { return false; } // ], line 88 this.bra = this.cursor; switch (among_var) { case 0: return false; case 1: // (, line 97 // delete, line 97 if (!this.slice_del()) { return false; } break; } return true; } r_adjectival() { let among_var; let v_1; let v_2; // (, line 101 // call adjective, line 102 if (!this.r_adjective()) { return false; } // try, line 109 v_1 = this.limit - this.cursor; let lab0 = true; lab0: while (lab0 == true) { lab0 = false; // (, line 109 // [, line 110 this.ket = this.cursor; // substring, line 110 among_var = this.find_among_b(RussianStemmer.a_2, 8); if (among_var == 0) { this.cursor = this.limit - v_1; break; } // ], line 110 this.bra = this.cursor; switch (among_var) { case 0: this.cursor = this.limit - v_1; break lab0; case 1: // (, line 115 // or, line 115 var lab1 = true; lab1: while (lab1 == true) { lab1 = false; v_2 = this.limit - this.cursor; let lab2 = true; while (lab2 == true) { lab2 = false; // literal, line 115 if (!this.eq_s_b(1, '\u0430')) { break; } break lab1; } this.cursor = this.limit - v_2; // literal, line 115 if (!this.eq_s_b(1, '\u044F')) { this.cursor = this.limit - v_1; break lab0; } } // delete, line 115 if (!this.slice_del()) { return false; } break; case 2: // (, line 122 // delete, line 122 if (!this.slice_del()) { return false; } break; } } return true; } r_reflexive() { let among_var; // (, line 128 // [, line 129 this.ket = this.cursor; // substring, line 129 among_var = this.find_among_b(RussianStemmer.a_3, 2); if (among_var == 0) { return false; } // ], line 129 this.bra = this.cursor; switch (among_var) { case 0: return false; case 1: // (, line 132 // delete, line 132 if (!this.slice_del()) { return false; } break; } return true; } r_verb() { let among_var; let v_1; // (, line 136 // [, line 137 this.ket = this.cursor; // substring, line 137 among_var = this.find_among_b(RussianStemmer.a_4, 46); if (among_var == 0) { return false; } // ], line 137 this.bra = this.cursor; switch (among_var) { case 0: return false; case 1: // (, line 143 // or, line 143 var lab0 = true; lab0: while (lab0 == true) { lab0 = false; v_1 = this.limit - this.cursor; let lab1 = true; while (lab1 == true) { lab1 = false; // literal, line 143 if (!this.eq_s_b(1, '\u0430')) { break; } break lab0; } this.cursor = this.limit - v_1; // literal, line 143 if (!this.eq_s_b(1, '\u044F')) { return false; } } // delete, line 143 if (!this.slice_del()) { return false; } break; case 2: // (, line 151 // delete, line 151 if (!this.slice_del()) { return false; } break; } return true; } r_noun() { let among_var; // (, line 159 // [, line 160 this.ket = this.cursor; // substring, line 160 among_var = this.find_among_b(RussianStemmer.a_5, 36); if (among_var == 0) { return false; } // ], line 160 this.bra = this.cursor; switch (among_var) { case 0: return false; case 1: // (, line 167 // delete, line 167 if (!this.slice_del()) { return false; } break; } return true; } r_derivational() { let among_var; // (, line 175 // [, line 176 this.ket = this.cursor; // substring, line 176 among_var = this.find_among_b(RussianStemmer.a_6, 2); if (among_var == 0) { return false; } // ], line 176 this.bra = this.cursor; // call R2, line 176 if (!this.r_R2()) { return false; } switch (among_var) { case 0: return false; case 1: // (, line 179 // delete, line 179 if (!this.slice_del()) { return false; } break; } return true; } r_tidy_up() { let among_var; // (, line 183 // [, line 184 this.ket = this.cursor; // substring, line 184 among_var = this.find_among_b(RussianStemmer.a_7, 4); if (among_var == 0) { return false; } // ], line 184 this.bra = this.cursor; switch (among_var) { case 0: return false; case 1: // (, line 188 // delete, line 188 if (!this.slice_del()) { return false; } // [, line 189 this.ket = this.cursor; // literal, line 189 if (!this.eq_s_b(1, '\u043D')) { return false; } // ], line 189 this.bra = this.cursor; // literal, line 189 if (!this.eq_s_b(1, '\u043D')) { return false; } // delete, line 189 if (!this.slice_del()) { return false; } break; case 2: // (, line 192 // literal, line 192 if (!this.eq_s_b(1, '\u043D')) { return false; } // delete, line 192 if (!this.slice_del()) { return false; } break; case 3: // (, line 194 // delete, line 194 if (!this.slice_del()) { return false; } break; } return true; } stem() { let v_1; let v_2; let v_3; let v_4; let v_5; let v_6; let v_7; let v_8; let v_9; let v_10; // (, line 199 // do, line 201 v_1 = this.cursor; let lab0 = true; while (lab0 == true) { lab0 = false; // call mark_regions, line 201 if (!this.r_mark_regions()) { break; } } this.cursor = v_1; // backwards, line 202 this.limit_backward = this.cursor; this.cursor = this.limit; // setlimit, line 202 v_2 = this.limit - this.cursor; // tomark, line 202 if (this.cursor < this.I_pV) { return false; } this.cursor = this.I_pV; v_3 = this.limit_backward; this.limit_backward = this.cursor; this.cursor = this.limit - v_2; // (, line 202 // do, line 203 v_4 = this.limit - this.cursor; let lab1 = true; lab1: while (lab1 == true) { lab1 = false; // (, line 203 // or, line 204 let lab2 = true; lab2: while (lab2 == true) { lab2 = false; v_5 = this.limit - this.cursor; let lab3 = true; while (lab3 == true) { lab3 = false; // call perfective_gerund, line 204 if (!this.r_perfective_gerund()) { break; } break lab2; } this.cursor = this.limit - v_5; // (, line 205 // try, line 205 v_6 = this.limit - this.cursor; let lab4 = true; while (lab4 == true) { lab4 = false; // call reflexive, line 205 if (!this.r_reflexive()) { this.cursor = this.limit - v_6; break; } } // or, line 206 let lab5 = true; lab5: while (lab5 == true) { lab5 = false; v_7 = this.limit - this.cursor; let lab6 = true; while (lab6 == true) { lab6 = false; // call adjectival, line 206 if (!this.r_adjectival()) { break; } break lab5; } this.cursor = this.limit - v_7; let lab7 = true; while (lab7 == true) { lab7 = false; // call verb, line 206 if (!this.r_verb()) { break; } break lab5; } this.cursor = this.limit - v_7; // call noun, line 206 if (!this.r_noun()) { break lab1; } } } } this.cursor = this.limit - v_4; // try, line 209 v_8 = this.limit - this.cursor; let lab8 = true; while (lab8 == true) { lab8 = false; // (, line 209 // [, line 209 this.ket = this.cursor; // literal, line 209 if (!this.eq_s_b(1, '\u0438')) { this.cursor = this.limit - v_8; break; } // ], line 209 this.bra = this.cursor; // delete, line 209 if (!this.slice_del()) { return false; } } // do, line 212 v_9 = this.limit - this.cursor; let lab9 = true; while (lab9 == true) { lab9 = false; // call derivational, line 212 if (!this.r_derivational()) { break; } } this.cursor = this.limit - v_9; // do, line 213 v_10 = this.limit - this.cursor; let lab10 = true; while (lab10 == true) { lab10 = false; // call tidy_up, line 213 if (!this.r_tidy_up()) { break; } } this.cursor = this.limit - v_10; this.limit_backward = v_3; this.cursor = this.limit_backward; return true; } } RussianStemmer.methodObject = new RussianStemmer(); RussianStemmer.a_0 = [ new Among('\u0432', -1, 1), new Among('\u0438\u0432', 0, 2), new Among('\u044B\u0432', 0, 2), new Among('\u0432\u0448\u0438', -1, 1), new Among('\u0438\u0432\u0448\u0438', 3, 2), new Among('\u044B\u0432\u0448\u0438', 3, 2), new Among('\u0432\u0448\u0438\u0441\u044C', -1, 1), new Among('\u0438\u0432\u0448\u0438\u0441\u044C', 6, 2), new Among('\u044B\u0432\u0448\u0438\u0441\u044C', 6, 2) ]; RussianStemmer.a_1 = [ new Among('\u0435\u0435', -1, 1), new Among('\u0438\u0435', -1, 1), new Among('\u043E\u0435', -1, 1), new Among('\u044B\u0435', -1, 1), new Among('\u0438\u043C\u0438', -1, 1), new Among('\u044B\u043C\u0438', -1, 1), new Among('\u0435\u0439', -1, 1), new Among('\u0438\u0439', -1, 1), new Among('\u043E\u0439', -1, 1), new Among('\u044B\u0439', -1, 1), new Among('\u0435\u043C', -1, 1), new Among('\u0438\u043C', -1, 1), new Among('\u043E\u043C', -1, 1), new Among('\u044B\u043C', -1, 1), new Among('\u0435\u0433\u043E', -1, 1), new Among('\u043E\u0433\u043E', -1, 1), new Among('\u0435\u043C\u0443', -1, 1), new Among('\u043E\u043C\u0443', -1, 1), new Among('\u0438\u0445', -1, 1), new Among('\u044B\u0445', -1, 1), new Among('\u0435\u044E', -1, 1), new Among('\u043E\u044E', -1, 1), new Among('\u0443\u044E', -1, 1), new Among('\u044E\u044E', -1, 1), new Among('\u0430\u044F', -1, 1), new Among('\u044F\u044F', -1, 1) ]; RussianStemmer.a_2 = [ new Among('\u0435\u043C', -1, 1), new Among('\u043D\u043D', -1, 1), new Among('\u0432\u0448', -1, 1), new Among('\u0438\u0432\u0448', 2, 2), new Among('\u044B\u0432\u0448', 2, 2), new Among('\u0449', -1, 1), new Among('\u044E\u0449', 5, 1), new Among('\u0443\u044E\u0449', 6, 2) ]; RussianStemmer.a_3 = [ new Among('\u0441\u044C', -1, 1), new Among('\u0441\u044F', -1, 1) ]; RussianStemmer.a_4 = [ new Among('\u043B\u0430', -1, 1), new Among('\u0438\u043B\u0430', 0, 2), new Among('\u044B\u043B\u0430', 0, 2), new Among('\u043D\u0430', -1, 1), new Among('\u0435\u043D\u0430', 3, 2), new Among('\u0435\u0442\u0435', -1, 1), new Among('\u0438\u0442\u0435', -1, 2), new Among('\u0439\u0442\u0435', -1, 1), new Among('\u0435\u0439\u0442\u0435', 7, 2), new Among('\u0443\u0439\u0442\u0435', 7, 2), new Among('\u043B\u0438', -1, 1), new Among('\u0438\u043B\u0438', 10, 2), new Among('\u044B\u043B\u0438', 10, 2), new Among('\u0439', -1, 1), new Among('\u0435\u0439', 13, 2), new Among('\u0443\u0439', 13, 2), new Among('\u043B', -1, 1), new Among('\u0438\u043B', 16, 2), new Among('\u044B\u043B', 16, 2), new Among('\u0435\u043C', -1, 1), new Among('\u0438\u043C', -1, 2), new Among('\u044B\u043C', -1, 2), new Among('\u043D', -1, 1), new Among('\u0435\u043D', 22, 2), new Among('\u043B\u043E', -1, 1), new Among('\u0438\u043B\u043E', 24, 2), new Among('\u044B\u043B\u043E', 24, 2), new Among('\u043D\u043E', -1, 1), new Among('\u0435\u043D\u043E', 27, 2), new Among('\u043D\u043D\u043E', 27, 1), new Among('\u0435\u0442', -1, 1), new Among('\u0443\u0435\u0442', 30, 2), new Among('\u0438\u0442', -1, 2), new Among('\u044B\u0442', -1, 2), new Among('\u044E\u0442', -1, 1), new Among('\u0443\u044E\u0442', 34, 2), new Among('\u044F\u0442', -1, 2), new Among('\u043D\u044B', -1, 1), new Among('\u0435\u043D\u044B', 37, 2), new Among('\u0442\u044C', -1, 1), new Among('\u0438\u0442\u044C', 39, 2), new Among('\u044B\u0442\u044C', 39, 2), new Among('\u0435\u0448\u044C', -1, 1), new Among('\u0438\u0448\u044C', -1, 2), new Among('\u044E', -1, 2), new Among('\u0443\u044E', 44, 2) ]; RussianStemmer.a_5 = [ new Among('\u0430', -1, 1), new Among('\u0435\u0432', -1, 1), new Among('\u043E\u0432', -1, 1), new Among('\u0435', -1, 1), new Among('\u0438\u0435', 3, 1), new Among('\u044C\u0435', 3, 1), new Among('\u0438', -1, 1), new Among('\u0435\u0438', 6, 1), new Among('\u0438\u0438', 6, 1), new Among('\u0430\u043C\u0438', 6, 1), new Among('\u044F\u043C\u0438', 6, 1), new Among('\u0438\u044F\u043C\u0438', 10, 1), new Among('\u0439', -1, 1), new Among('\u0435\u0439', 12, 1), new Among('\u0438\u0435\u0439', 13, 1), new Among('\u0438\u0439', 12, 1), new Among('\u043E\u0439', 12, 1), new Among('\u0430\u043C', -1, 1), new Among('\u0435\u043C', -1, 1), new Among('\u0438\u0435\u043C', 18, 1), new Among('\u043E\u043C', -1, 1), new Among('\u044F\u043C', -1, 1), new Among('\u0438\u044F\u043C', 21, 1), new Among('\u043E', -1, 1), new Among('\u0443', -1, 1), new Among('\u0430\u0445', -1, 1), new Among('\u044F\u0445', -1, 1), new Among('\u0438\u044F\u0445', 26, 1), new Among('\u044B', -1, 1), new Among('\u044C', -1, 1), new Among('\u044E', -1, 1), new Among('\u0438\u044E', 30, 1), new Among('\u044C\u044E', 30, 1), new Among('\u044F', -1, 1), new Among('\u0438\u044F', 33, 1), new Among('\u044C\u044F', 33, 1) ]; RussianStemmer.a_6 = [ new Among('\u043E\u0441\u0442', -1, 1), new Among('\u043E\u0441\u0442\u044C', -1, 1) ]; RussianStemmer.a_7 = [ new Among('\u0435\u0439\u0448\u0435', -1, 1), new Among('\u043D', -1, 2), new Among('\u0435\u0439\u0448', -1, 1), new Among('\u044C', -1, 3) ]; RussianStemmer.g_v = [33, 65, 8, 232]; module.exports = RussianStemmer;