node-nlp
Version:
Library for NLU (Natural Language Understanding) done in Node.js
464 lines (454 loc) • 10.5 kB
JavaScript
/*
* Copyright (c) AXA Shared Services Spain S.A.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* 'Software'), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
const BaseStemmer = require('./base-stemmer');
const Among = require('./among');
class IrishStemmer extends BaseStemmer {
constructor(tokenizer) {
super(tokenizer);
this.I_p1 = 0;
this.I_p2 = 0;
this.I_pV = 0;
}
r_mark_regions() {
let v_1;
let v_3;
this.I_pV = this.limit;
this.I_p1 = this.limit;
this.I_p2 = this.limit;
v_1 = this.cursor;
lab0: do {
golab1: while (true) {
lab2: do {
if (!this.in_grouping(IrishStemmer.g_v, 97, 250)) {
break lab2;
}
break golab1;
} while (false);
if (this.cursor >= this.limit) {
break lab0;
}
this.cursor++;
}
this.I_pV = this.cursor;
} while (false);
this.cursor = v_1;
v_3 = this.cursor;
lab3: do {
golab4: while (true) {
lab5: do {
if (!this.in_grouping(IrishStemmer.g_v, 97, 250)) {
break lab5;
}
break golab4;
} while (false);
if (this.cursor >= this.limit) {
break lab3;
}
this.cursor++;
}
golab6: while (true) {
lab7: do {
if (!this.out_grouping(IrishStemmer.g_v, 97, 250)) {
break lab7;
}
break golab6;
} while (false);
if (this.cursor >= this.limit) {
break lab3;
}
this.cursor++;
}
this.I_p1 = this.cursor;
golab8: while (true) {
lab9: do {
if (!this.in_grouping(IrishStemmer.g_v, 97, 250)) {
break lab9;
}
break golab8;
} while (false);
if (this.cursor >= this.limit) {
break lab3;
}
this.cursor++;
}
golab10: while (true) {
lab11: do {
if (!this.out_grouping(IrishStemmer.g_v, 97, 250)) {
break lab11;
}
break golab10;
} while (false);
if (this.cursor >= this.limit) {
break lab3;
}
this.cursor++;
}
this.I_p2 = this.cursor;
} while (false);
this.cursor = v_3;
return true;
}
r_initial_morph() {
let among_var;
this.bra = this.cursor;
among_var = this.find_among(IrishStemmer.a_0);
if (among_var === 0) {
return false;
}
this.ket = this.cursor;
switch (among_var) {
case 0:
return false;
case 1:
this.slice_del();
break;
case 2:
this.slice_del();
break;
case 3:
this.slice_from('f');
break;
case 4:
this.slice_del();
break;
case 5:
this.slice_from('s');
break;
case 6:
this.slice_from('b');
break;
case 7:
this.slice_from('c');
break;
case 8:
this.slice_from('d');
break;
case 9:
this.slice_from('f');
break;
case 10:
this.slice_from('g');
break;
case 11:
this.slice_from('p');
break;
case 12:
this.slice_from('s');
break;
case 13:
this.slice_from('t');
break;
case 14:
this.slice_from('b');
break;
case 15:
this.slice_from('c');
break;
case 16:
this.slice_from('d');
break;
case 17:
this.slice_from('f');
break;
case 18:
this.slice_from('g');
break;
case 19:
this.slice_from('m');
break;
case 20:
this.slice_from('p');
break;
case 21:
this.slice_from('t');
break;
}
return true;
}
r_RV() {
if (!(this.I_pV <= this.cursor)) {
return false;
}
return true;
}
r_R1() {
if (!(this.I_p1 <= this.cursor)) {
return false;
}
return true;
}
r_R2() {
if (!(this.I_p2 <= this.cursor)) {
return false;
}
return true;
}
r_noun_sfx() {
let among_var;
this.ket = this.cursor;
among_var = this.find_among_b(IrishStemmer.a_1);
if (among_var === 0) {
return false;
}
this.bra = this.cursor;
switch (among_var) {
case 0:
return false;
case 1:
if (!this.r_R1()) {
return false;
}
this.slice_del();
break;
case 2:
if (!this.r_R2()) {
return false;
}
this.slice_del();
break;
}
return true;
}
r_deriv() {
let among_var;
this.ket = this.cursor;
among_var = this.find_among_b(IrishStemmer.a_2);
if (among_var === 0) {
return false;
}
this.bra = this.cursor;
switch (among_var) {
case 0:
return false;
case 1:
if (!this.r_R2()) {
return false;
}
this.slice_del();
break;
case 2:
this.slice_from('arc');
break;
case 3:
this.slice_from('gin');
break;
case 4:
this.slice_from('graf');
break;
case 5:
this.slice_from('paite');
break;
case 6:
this.slice_from('\u00F3id');
break;
}
return true;
}
r_verb_sfx() {
let among_var;
this.ket = this.cursor;
among_var = this.find_among_b(IrishStemmer.a_3);
if (among_var === 0) {
return false;
}
this.bra = this.cursor;
switch (among_var) {
case 0:
return false;
case 1:
if (!this.r_RV()) {
return false;
}
this.slice_del();
break;
case 2:
if (!this.r_R1()) {
return false;
}
this.slice_del();
break;
}
return true;
}
stem() {
let v_1;
let v_2;
let v_3;
let v_4;
let v_5;
v_1 = this.cursor;
lab0: do {
if (!this.r_initial_morph()) {
break lab0;
}
} while (false);
this.cursor = v_1;
v_2 = this.cursor;
lab1: do {
if (!this.r_mark_regions()) {
break lab1;
}
} while (false);
this.cursor = v_2;
this.limit_backward = this.cursor;
this.cursor = this.limit;
v_3 = this.limit - this.cursor;
lab2: do {
if (!this.r_noun_sfx()) {
break lab2;
}
} while (false);
this.cursor = this.limit - v_3;
v_4 = this.limit - this.cursor;
lab3: do {
if (!this.r_deriv()) {
break lab3;
}
} while (false);
this.cursor = this.limit - v_4;
v_5 = this.limit - this.cursor;
lab4: do {
if (!this.r_verb_sfx()) {
break lab4;
}
} while (false);
this.cursor = this.limit - v_5;
this.cursor = this.limit_backward;
return true;
}
stem(...args) {
switch (args.length) {
case 0:
return this.stem(...args);
}
return super.stem(...args);
}
}
IrishStemmer.a_0 = [
new Among("b'", -1, 4),
new Among('bh', -1, 14),
new Among('bhf', 1, 9),
new Among('bp', -1, 11),
new Among('ch', -1, 15),
new Among("d'", -1, 2),
new Among("d'fh", 5, 3),
new Among('dh', -1, 16),
new Among('dt', -1, 13),
new Among('fh', -1, 17),
new Among('gc', -1, 7),
new Among('gh', -1, 18),
new Among('h-', -1, 1),
new Among("m'", -1, 4),
new Among('mb', -1, 6),
new Among('mh', -1, 19),
new Among('n-', -1, 1),
new Among('nd', -1, 8),
new Among('ng', -1, 10),
new Among('ph', -1, 20),
new Among('sh', -1, 5),
new Among('t-', -1, 1),
new Among('th', -1, 21),
new Among('ts', -1, 12)
];
IrishStemmer.a_1 = [
new Among('\u00EDochta', -1, 1),
new Among('a\u00EDochta', 0, 1),
new Among('ire', -1, 2),
new Among('aire', 2, 2),
new Among('abh', -1, 1),
new Among('eabh', 4, 1),
new Among('ibh', -1, 1),
new Among('aibh', 6, 1),
new Among('amh', -1, 1),
new Among('eamh', 8, 1),
new Among('imh', -1, 1),
new Among('aimh', 10, 1),
new Among('\u00EDocht', -1, 1),
new Among('a\u00EDocht', 12, 1),
new Among('ir\u00ED', -1, 2),
new Among('air\u00ED', 14, 2)
];
IrishStemmer.a_2 = [
new Among('\u00F3ideacha', -1, 6),
new Among('patacha', -1, 5),
new Among('achta', -1, 1),
new Among('arcachta', 2, 2),
new Among('eachta', 2, 1),
new Among('grafa\u00EDochta', -1, 4),
new Among('paite', -1, 5),
new Among('ach', -1, 1),
new Among('each', 7, 1),
new Among('\u00F3ideach', 8, 6),
new Among('gineach', 8, 3),
new Among('patach', 7, 5),
new Among('grafa\u00EDoch', -1, 4),
new Among('pataigh', -1, 5),
new Among('\u00F3idigh', -1, 6),
new Among('acht\u00FAil', -1, 1),
new Among('eacht\u00FAil', 15, 1),
new Among('gineas', -1, 3),
new Among('ginis', -1, 3),
new Among('acht', -1, 1),
new Among('arcacht', 19, 2),
new Among('eacht', 19, 1),
new Among('grafa\u00EDocht', -1, 4),
new Among('arcachta\u00ED', -1, 2),
new Among('grafa\u00EDochta\u00ED', -1, 4)
];
IrishStemmer.a_3 = [
new Among('imid', -1, 1),
new Among('aimid', 0, 1),
new Among('\u00EDmid', -1, 1),
new Among('a\u00EDmid', 2, 1),
new Among('adh', -1, 2),
new Among('eadh', 4, 2),
new Among('faidh', -1, 1),
new Among('fidh', -1, 1),
new Among('\u00E1il', -1, 2),
new Among('ain', -1, 2),
new Among('tear', -1, 2),
new Among('tar', -1, 2)
];
IrishStemmer.g_v = [
17,
65,
16,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
1,
17,
4,
2
];
module.exports = IrishStemmer;