phpmorphy-locutus
Version:
The original package is located at https://github.com/antixrist/node-phpmorphy however it used the phpjs module, which contained dependencies with critical vulnerabilities and is not maintained anymore. This package was swapped for the newer lucutus packa
329 lines (274 loc) • 8.02 kB
JavaScript
/**
* This file is part of phpMorphy library
*
* Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
import _ from 'lodash';
import fs from 'fs';
import { php, castArray } from '../../../utils';
import { Morphy_Fsa } from '../fsa';
class Morphy_Fsa_Tree_File extends Morphy_Fsa {
constructor (...args) {
super(...args);
}
/**
* @param trans
* @param word
* @param {boolean} [readAnnot=true]
* @returns {*}
*/
walk (trans, word, readAnnot = true) {
const fh = this.resource;
const fsa_start = this.fsa_start;
const wordBuf = Buffer.from(word);
let prev_trans;
let char;
let result;
let start_offset;
let buf;
let attr;
let annot;
let i = 0;
let c = wordBuf.length;
for (; i < c; i++) {
prev_trans = trans;
char = php.strings.ord(wordBuf, i);
/////////////////////////////////
// find char in state begin
// tree version
result = true;
start_offset = fsa_start + (((trans >> 11) & 0x1FFFFF) << 2);
// read first trans in state
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, start_offset);
trans = php.unpack('V', buf)[0];
// If first trans is term(i.e. pointing to annot) then skip it
if ((trans & 0x0100)) {
// When this is single transition in state then break
if ((trans & 0x0200) && (trans & 0x0400)) {
result = false;
} else {
start_offset += 4;
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, start_offset);
trans = php.unpack('V', buf)[0];
}
}
// if all ok process rest transitions in state
if (result) {
// walk through state
let idx = 1;
let j = 0;
for (;; j++) {
attr = (trans & 0xFF);
if (attr == char) {
result = true;
break;
} else
if (attr > char) {
if ((trans & 0x0200)) {
result = false;
break;
}
idx = idx << 1;
} else {
if ((trans & 0x0400)) {
result = false;
break;
}
idx = (idx << 1) + 1;
}
if (j > 255) {
throw new Error('Infinite recursion possible');
}
// read next trans
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, start_offset + ((idx - 1) << 2));
trans = php.unpack('V', buf)[0];
}
}
// find char in state end
/////////////////////////////////
if (!result) {
trans = prev_trans;
break;
}
}
annot = null;
result = false;
prev_trans = trans;
if (i >= c) {
// Read annotation when we walked all chars in word
result = true;
if (readAnnot) {
// read annot trans
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, fsa_start + (((trans >> 11) & 0x1FFFFF) << 2));
trans = php.unpack('V', buf)[0];
if ((trans & 0x0100) == 0) {
result = false;
} else {
annot = this.getAnnot(trans);
}
}
}
return {
result,
annot,
walked: i,
last_trans: trans,
word_trans: prev_trans
};
}
/**
* @param startNode
* @param callback
* @param {boolean} [readAnnot=true]
* @param {string} [path=]
* @returns {number}
*/
collect (startNode, callback, readAnnot = true, path = '') {
// `path` нигде не используется, даже в `Morphy_Morphier_PredictCollector.collect`,
// куда попадает этот `path` через вызов коллбека ниже
const stack = [];
const stack_idx = [];
let total = 0;
let start_idx = 0;
let state;
let trans;
let annot;
stack.push(null);
stack_idx.push(null);
state = this.readState(((startNode) >> 11) & 0x1FFFFF);
do {
let i = start_idx;
let c = _.size(state);
for (; i < c; i++) {
trans = state[i];
if ((trans & 0x0100)) {
total++;
if (readAnnot) {
annot = this.getAnnot(trans);
} else {
annot = trans;
}
if (!php.funchand.call_user_func(callback, null, annot)) {
return total;
}
} else {
//path += php.strings.chr((trans & 0xFF));
stack.push(state);
stack_idx.push(i + 1);
state = this.readState(((trans) >> 11) & 0x1FFFFF);
start_idx = 0;
break;
}
}
if (i >= c) {
state = stack.pop();
start_idx = stack_idx.pop();
//path = php.strings.substr(path, 0, -1);
}
} while (!!stack.length);
return total;
}
readState (index) {
const fh = this.resource;
const fsa_start = this.fsa_start;
const result = [];
let buf;
let trans;
let offset = fsa_start + ((index) << 2);
// read first trans
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, offset);
trans = php.unpack('V', buf)[0];
// check if first trans is pointer to annot, and not single in state
if ((trans & 0x0100) && !((trans & 0x0200) || (trans & 0x0400))) {
result.push(trans);
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, null);
trans = php.unpack('V', buf)[0];
offset += 4;
}
// read rest
for (let expect = 1; expect; expect--) {
if (!(trans & 0x0200)) {
expect++;
}
if (!(trans & 0x0400)) {
expect++;
}
result.push(trans);
if (expect > 1) {
buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, null);
trans = php.unpack('V', buf)[0];
offset += 4;
}
}
return result;
}
unpackTranses (rawTranses) {
rawTranses = castArray(rawTranses);
const result = [];
_.forEach(rawTranses, rawTrans => {
result.push({
term: !!(rawTrans & 0x0100),
llast: !!(rawTrans & 0x0200),
rlast: !!(rawTrans & 0x0400),
attr: (rawTrans & 0xFF),
dest: ((rawTrans) >> 11) & 0x1FFFFF
});
});
return result;
}
readRootTrans () {
const fh = this.resource;
const fsa_start = this.fsa_start;
let buf = Buffer.alloc(4);
fs.readSync(fh, buf, 0, 4, fsa_start + 0);
return php.unpack('V', buf)[0];
}
readAlphabet () {
const fh = this.resource;
let buf = Buffer.alloc(this.header['alphabet_size']);
fs.readSync(fh, buf, 0, this.header['alphabet_size'], this.header['alphabet_offset']);
return buf.toString();
}
getAnnot (trans) {
if (!(trans & 0x0100)) {
return null;
}
const fh = this.resource;
const offset = this.header['annot_offset'] + (((trans & 0xFF) << 21) | ((trans >> 11) & 0x1FFFFF));
let annot;
let buf = Buffer.alloc(1);
fs.readSync(fh, buf, 0, 1, offset);
let len = php.strings.ord(buf);
if (len) {
buf = Buffer.alloc(len);
fs.readSync(fh, buf, 0, len, null);
annot = buf;
} else {
annot = null;
}
return annot;
}
}
export { Morphy_Fsa_Tree_File };