UNPKG

node-nlp

Version:

Library for NLU (Natural Language Understanding) done in Node.js

916 lines (889 loc) 21.5 kB
/* * Copyright (c) AXA Shared Services Spain S.A. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ const BaseStemmer = require('./base-stemmer'); const Among = require('./among'); /** * This class was automatically generated by a Snowball to JSX compiler * It implements the stemming algorithm defined by a snowball script. */ class DutchStemmer extends BaseStemmer { constructor(tokenizer) { super(tokenizer); this.I_p2 = 0; this.I_p1 = 0; this.B_e_found = false; } copy_from(other) { this.I_p2 = other.I_p2; this.I_p1 = other.I_p1; this.B_e_found = other.B_e_found; super.copy_from(other); } r_prelude() { let among_var; let v_1; let v_2; let v_3; let v_4; let v_5; let v_6; // (, line 41 // test, line 42 v_1 = this.cursor; // repeat, line 42 replab0: while (true) { v_2 = this.cursor; let lab1 = true; lab1: while (lab1 == true) { lab1 = false; // (, line 42 // [, line 43 this.bra = this.cursor; // substring, line 43 among_var = this.find_among(DutchStemmer.a_0, 11); if (among_var == 0) { break; } // ], line 43 this.ket = this.cursor; switch (among_var) { case 0: break lab1; case 1: // (, line 45 // <-, line 45 if (!this.slice_from('a')) { return false; } break; case 2: // (, line 47 // <-, line 47 if (!this.slice_from('e')) { return false; } break; case 3: // (, line 49 // <-, line 49 if (!this.slice_from('i')) { return false; } break; case 4: // (, line 51 // <-, line 51 if (!this.slice_from('o')) { return false; } break; case 5: // (, line 53 // <-, line 53 if (!this.slice_from('u')) { return false; } break; case 6: // (, line 54 // next, line 54 if (this.cursor >= this.limit) { break lab1; } this.cursor++; break; } continue replab0; } this.cursor = v_2; break; } this.cursor = v_1; // try, line 57 v_3 = this.cursor; let lab2 = true; while (lab2 == true) { lab2 = false; // (, line 57 // [, line 57 this.bra = this.cursor; // literal, line 57 if (!this.eq_s(1, 'y')) { this.cursor = v_3; break; } // ], line 57 this.ket = this.cursor; // <-, line 57 if (!this.slice_from('Y')) { return false; } } // repeat, line 58 replab3: while (true) { v_4 = this.cursor; let lab4 = true; lab4: while (lab4 == true) { lab4 = false; // goto, line 58 golab5: while (true) { v_5 = this.cursor; let lab6 = true; lab6: while (lab6 == true) { lab6 = false; // (, line 58 if (!this.in_grouping(DutchStemmer.g_v, 97, 232)) { break; } // [, line 59 this.bra = this.cursor; // or, line 59 let lab7 = true; lab7: while (lab7 == true) { lab7 = false; v_6 = this.cursor; let lab8 = true; while (lab8 == true) { lab8 = false; // (, line 59 // literal, line 59 if (!this.eq_s(1, 'i')) { break; } // ], line 59 this.ket = this.cursor; if (!this.in_grouping(DutchStemmer.g_v, 97, 232)) { break; } // <-, line 59 if (!this.slice_from('I')) { return false; } break lab7; } this.cursor = v_6; // (, line 60 // literal, line 60 if (!this.eq_s(1, 'y')) { break lab6; } // ], line 60 this.ket = this.cursor; // <-, line 60 if (!this.slice_from('Y')) { return false; } } this.cursor = v_5; break golab5; } this.cursor = v_5; if (this.cursor >= this.limit) { break lab4; } this.cursor++; } continue replab3; } this.cursor = v_4; break; } return true; } r_mark_regions() { // (, line 64 this.I_p1 = this.limit; this.I_p2 = this.limit; // gopast, line 69 golab0: while (true) { let lab1 = true; while (lab1 == true) { lab1 = false; if (!this.in_grouping(DutchStemmer.g_v, 97, 232)) { break; } break golab0; } if (this.cursor >= this.limit) { return false; } this.cursor++; } // gopast, line 69 golab2: while (true) { let lab3 = true; while (lab3 == true) { lab3 = false; if (!this.out_grouping(DutchStemmer.g_v, 97, 232)) { break; } break golab2; } if (this.cursor >= this.limit) { return false; } this.cursor++; } // setmark p1, line 69 this.I_p1 = this.cursor; // try, line 70 let lab4 = true; while (lab4 == true) { lab4 = false; // (, line 70 if (!(this.I_p1 < 3)) { break; } this.I_p1 = 3; } // gopast, line 71 golab5: while (true) { let lab6 = true; while (lab6 == true) { lab6 = false; if (!this.in_grouping(DutchStemmer.g_v, 97, 232)) { break; } break golab5; } if (this.cursor >= this.limit) { return false; } this.cursor++; } // gopast, line 71 golab7: while (true) { let lab8 = true; while (lab8 == true) { lab8 = false; if (!this.out_grouping(DutchStemmer.g_v, 97, 232)) { break; } break golab7; } if (this.cursor >= this.limit) { return false; } this.cursor++; } // setmark p2, line 71 this.I_p2 = this.cursor; return true; } r_postlude() { let among_var; let v_1; // repeat, line 75 replab0: while (true) { v_1 = this.cursor; let lab1 = true; lab1: while (lab1 == true) { lab1 = false; // (, line 75 // [, line 77 this.bra = this.cursor; // substring, line 77 among_var = this.find_among(DutchStemmer.a_1, 3); if (among_var == 0) { break; } // ], line 77 this.ket = this.cursor; switch (among_var) { case 0: break lab1; case 1: // (, line 78 // <-, line 78 if (!this.slice_from('y')) { return false; } break; case 2: // (, line 79 // <-, line 79 if (!this.slice_from('i')) { return false; } break; case 3: // (, line 80 // next, line 80 if (this.cursor >= this.limit) { break lab1; } this.cursor++; break; } continue replab0; } this.cursor = v_1; break; } return true; } r_R1() { if (!(this.I_p1 <= this.cursor)) { return false; } return true; } r_R2() { if (!(this.I_p2 <= this.cursor)) { return false; } return true; } r_undouble() { let v_1; // (, line 90 // test, line 91 v_1 = this.limit - this.cursor; // among, line 91 if (this.find_among_b(DutchStemmer.a_2, 3) == 0) { return false; } this.cursor = this.limit - v_1; // [, line 91 this.ket = this.cursor; // next, line 91 if (this.cursor <= this.limit_backward) { return false; } this.cursor--; // ], line 91 this.bra = this.cursor; // delete, line 91 if (!this.slice_del()) { return false; } return true; } r_e_ending() { let v_1; // (, line 94 // unset e_found, line 95 this.B_e_found = false; // [, line 96 this.ket = this.cursor; // literal, line 96 if (!this.eq_s_b(1, 'e')) { return false; } // ], line 96 this.bra = this.cursor; // call R1, line 96 if (!this.r_R1()) { return false; } // test, line 96 v_1 = this.limit - this.cursor; if (!this.out_grouping_b(DutchStemmer.g_v, 97, 232)) { return false; } this.cursor = this.limit - v_1; // delete, line 96 if (!this.slice_del()) { return false; } // set e_found, line 97 this.B_e_found = true; // call undouble, line 98 if (!this.r_undouble()) { return false; } return true; } r_en_ending() { let v_1; let v_2; // (, line 101 // call R1, line 102 if (!this.r_R1()) { return false; } // and, line 102 v_1 = this.limit - this.cursor; if (!this.out_grouping_b(DutchStemmer.g_v, 97, 232)) { return false; } this.cursor = this.limit - v_1; // not, line 102 { v_2 = this.limit - this.cursor; let lab0 = true; while (lab0 == true) { lab0 = false; // literal, line 102 if (!this.eq_s_b(3, 'gem')) { break; } return false; } this.cursor = this.limit - v_2; } // delete, line 102 if (!this.slice_del()) { return false; } // call undouble, line 103 if (!this.r_undouble()) { return false; } return true; } r_standard_suffix() { let among_var; let v_1; let v_2; let v_3; let v_4; let v_5; let v_6; let v_7; let v_8; let v_9; let v_10; // (, line 106 // do, line 107 v_1 = this.limit - this.cursor; let lab0 = true; lab0: while (lab0 == true) { lab0 = false; // (, line 107 // [, line 108 this.ket = this.cursor; // substring, line 108 among_var = this.find_among_b(DutchStemmer.a_3, 5); if (among_var == 0) { break; } // ], line 108 this.bra = this.cursor; switch (among_var) { case 0: break lab0; case 1: // (, line 110 // call R1, line 110 if (!this.r_R1()) { break lab0; } // <-, line 110 if (!this.slice_from('heid')) { return false; } break; case 2: // (, line 113 // call en_ending, line 113 if (!this.r_en_ending()) { break lab0; } break; case 3: // (, line 116 // call R1, line 116 if (!this.r_R1()) { break lab0; } if (!this.out_grouping_b(DutchStemmer.g_v_j, 97, 232)) { break lab0; } // delete, line 116 if (!this.slice_del()) { return false; } break; } } this.cursor = this.limit - v_1; // do, line 120 v_2 = this.limit - this.cursor; let lab1 = true; while (lab1 == true) { lab1 = false; // call e_ending, line 120 if (!this.r_e_ending()) { break; } } this.cursor = this.limit - v_2; // do, line 122 v_3 = this.limit - this.cursor; let lab2 = true; lab2: while (lab2 == true) { lab2 = false; // (, line 122 // [, line 122 this.ket = this.cursor; // literal, line 122 if (!this.eq_s_b(4, 'heid')) { break; } // ], line 122 this.bra = this.cursor; // call R2, line 122 if (!this.r_R2()) { break; } // not, line 122 { v_4 = this.limit - this.cursor; let lab3 = true; while (lab3 == true) { lab3 = false; // literal, line 122 if (!this.eq_s_b(1, 'c')) { break; } break lab2; } this.cursor = this.limit - v_4; } // delete, line 122 if (!this.slice_del()) { return false; } // [, line 123 this.ket = this.cursor; // literal, line 123 if (!this.eq_s_b(2, 'en')) { break; } // ], line 123 this.bra = this.cursor; // call en_ending, line 123 if (!this.r_en_ending()) { break; } } this.cursor = this.limit - v_3; // do, line 126 v_5 = this.limit - this.cursor; let lab4 = true; lab4: while (lab4 == true) { lab4 = false; // (, line 126 // [, line 127 this.ket = this.cursor; // substring, line 127 among_var = this.find_among_b(DutchStemmer.a_4, 6); if (among_var == 0) { break; } // ], line 127 this.bra = this.cursor; switch (among_var) { case 0: break lab4; case 1: // (, line 129 // call R2, line 129 if (!this.r_R2()) { break lab4; } // delete, line 129 if (!this.slice_del()) { return false; } // or, line 130 var lab5 = true; lab5: while (lab5 == true) { lab5 = false; v_6 = this.limit - this.cursor; let lab6 = true; lab6: while (lab6 == true) { lab6 = false; // (, line 130 // [, line 130 this.ket = this.cursor; // literal, line 130 if (!this.eq_s_b(2, 'ig')) { break; } // ], line 130 this.bra = this.cursor; // call R2, line 130 if (!this.r_R2()) { break; } // not, line 130 { v_7 = this.limit - this.cursor; let lab7 = true; while (lab7 == true) { lab7 = false; // literal, line 130 if (!this.eq_s_b(1, 'e')) { break; } break lab6; } this.cursor = this.limit - v_7; } // delete, line 130 if (!this.slice_del()) { return false; } break lab5; } this.cursor = this.limit - v_6; // call undouble, line 130 if (!this.r_undouble()) { break lab4; } } break; case 2: // (, line 133 // call R2, line 133 if (!this.r_R2()) { break lab4; } // not, line 133 { v_8 = this.limit - this.cursor; let lab8 = true; while (lab8 == true) { lab8 = false; // literal, line 133 if (!this.eq_s_b(1, 'e')) { break; } break lab4; } this.cursor = this.limit - v_8; } // delete, line 133 if (!this.slice_del()) { return false; } break; case 3: // (, line 136 // call R2, line 136 if (!this.r_R2()) { break lab4; } // delete, line 136 if (!this.slice_del()) { return false; } // call e_ending, line 136 if (!this.r_e_ending()) { break lab4; } break; case 4: // (, line 139 // call R2, line 139 if (!this.r_R2()) { break lab4; } // delete, line 139 if (!this.slice_del()) { return false; } break; case 5: // (, line 142 // call R2, line 142 if (!this.r_R2()) { break lab4; } // Boolean test e_found, line 142 if (!this.B_e_found) { break lab4; } // delete, line 142 if (!this.slice_del()) { return false; } break; } } this.cursor = this.limit - v_5; // do, line 146 v_9 = this.limit - this.cursor; let lab9 = true; while (lab9 == true) { lab9 = false; // (, line 146 if (!this.out_grouping_b(DutchStemmer.g_v_I, 73, 232)) { break; } // test, line 148 v_10 = this.limit - this.cursor; // (, line 148 // among, line 149 if (this.find_among_b(DutchStemmer.a_5, 4) == 0) { break; } if (!this.out_grouping_b(DutchStemmer.g_v, 97, 232)) { break; } this.cursor = this.limit - v_10; // [, line 152 this.ket = this.cursor; // next, line 152 if (this.cursor <= this.limit_backward) { break; } this.cursor--; // ], line 152 this.bra = this.cursor; // delete, line 152 if (!this.slice_del()) { return false; } } this.cursor = this.limit - v_9; return true; } stem() { let v_1; let v_2; let v_3; let v_4; // (, line 157 // do, line 159 v_1 = this.cursor; let lab0 = true; while (lab0 == true) { lab0 = false; // call prelude, line 159 if (!this.r_prelude()) { break; } } this.cursor = v_1; // do, line 160 v_2 = this.cursor; let lab1 = true; while (lab1 == true) { lab1 = false; // call mark_regions, line 160 if (!this.r_mark_regions()) { break; } } this.cursor = v_2; // backwards, line 161 this.limit_backward = this.cursor; this.cursor = this.limit; // do, line 162 v_3 = this.limit - this.cursor; let lab2 = true; while (lab2 == true) { lab2 = false; // call standard_suffix, line 162 if (!this.r_standard_suffix()) { break; } } this.cursor = this.limit - v_3; this.cursor = this.limit_backward; // do, line 163 v_4 = this.cursor; let lab3 = true; while (lab3 == true) { lab3 = false; // call postlude, line 163 if (!this.r_postlude()) { break; } } this.cursor = v_4; return true; } } DutchStemmer.methodObject = new DutchStemmer(); DutchStemmer.a_0 = [ new Among('', -1, 6), new Among('\u00E1', 0, 1), new Among('\u00E4', 0, 1), new Among('\u00E9', 0, 2), new Among('\u00EB', 0, 2), new Among('\u00ED', 0, 3), new Among('\u00EF', 0, 3), new Among('\u00F3', 0, 4), new Among('\u00F6', 0, 4), new Among('\u00FA', 0, 5), new Among('\u00FC', 0, 5) ]; DutchStemmer.a_1 = [ new Among('', -1, 3), new Among('I', 0, 2), new Among('Y', 0, 1) ]; DutchStemmer.a_2 = [ new Among('dd', -1, -1), new Among('kk', -1, -1), new Among('tt', -1, -1) ]; DutchStemmer.a_3 = [ new Among('ene', -1, 2), new Among('se', -1, 3), new Among('en', -1, 2), new Among('heden', 2, 1), new Among('s', -1, 3) ]; DutchStemmer.a_4 = [ new Among('end', -1, 1), new Among('ig', -1, 2), new Among('ing', -1, 1), new Among('lijk', -1, 3), new Among('baar', -1, 4), new Among('bar', -1, 5) ]; DutchStemmer.a_5 = [ new Among('aa', -1, -1), new Among('ee', -1, -1), new Among('oo', -1, -1), new Among('uu', -1, -1) ]; DutchStemmer.g_v = [17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128]; DutchStemmer.g_v_I = [ 1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 ]; DutchStemmer.g_v_j = [17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128]; module.exports = DutchStemmer;