phpmorphy
Version:
Original package is located at http://phpmorphy.sourceforge.net/
285 lines (234 loc) • 6.71 kB
JavaScript
import _ from 'lodash';
import { php, castArray } from '~/utils';
import { Fsa } from '~/lib/fsa/fsa';
class FsaTreeMem extends Fsa {
/**
* @param trans
* @param word
* @param {boolean} [readAnnot=true]
* @returns {*}
*/
walk(trans, word, readAnnot = true) {
const mem = this.resource;
const fsa_start = this.fsa_start;
const wordBuf = Buffer.from(word);
let prev_trans;
let char;
let result;
let start_offset;
let buf;
let attr;
let annot;
let i = 0;
const c = wordBuf.length;
for (; i < c; i++) {
prev_trans = trans;
char = php.strings.ord(wordBuf, i);
// ///////////////////////////////
// find char in state begin
// tree version
result = true;
start_offset = fsa_start + (((trans >> 11) & 0x1fffff) << 2);
// read first trans in state
buf = php.strings.substr(mem, start_offset, 4);
trans = php.unpack('V', buf)[0];
// If first trans is term(i.e. pointing to annot) then skip it
if (trans & 0x0100) {
// When this is single transition in state then break
if (trans & 0x0200 && trans & 0x0400) {
result = false;
} else {
start_offset += 4;
buf = php.strings.substr(mem, start_offset, 4);
trans = php.unpack('V', buf)[0];
}
}
// if all ok process rest transitions in state
if (result) {
// walk through state
for (let idx = 1, j = 0; ; j++) {
attr = trans & 0xff;
if (attr == char) {
result = true;
break;
} else if (attr > char) {
if (trans & 0x0200) {
result = false;
break;
}
idx <<= 1;
} else {
if (trans & 0x0400) {
result = false;
break;
}
idx = (idx << 1) + 1;
}
if (j > 255) {
throw new Error('Infinite recursion possible');
}
// read next trans
buf = php.strings.substr(mem, start_offset + ((idx - 1) << 2), 4);
trans = php.unpack('V', buf)[0];
}
}
// find char in state end
// ///////////////////////////////
if (!result) {
trans = prev_trans;
break;
}
}
annot = null;
result = false;
prev_trans = trans;
if (i >= c) {
// Read annotation when we walked all chars in word
result = true;
if (readAnnot) {
// read annot trans
buf = php.strings.substr(mem, fsa_start + (((trans >> 11) & 0x1fffff) << 2), 4);
trans = php.unpack('V', buf)[0];
if ((trans & 0x0100) == 0) {
result = false;
} else {
annot = this.getAnnot(trans);
}
}
}
return {
result,
annot,
walked: i,
last_trans: trans,
word_trans: prev_trans,
};
}
/**
* @param startNode
* @param callback
* @param {boolean} [readAnnot=true]
* @param {string} [path=]
* @returns {number}
*/
collect(startNode, callback, readAnnot = true, path = '') {
// `path` нигде не используется, даже в `Morphy_Morphier_PredictCollector.collect`,
// куда попадает этот `path` через вызов коллбека ниже
const stack = [];
const stack_idx = [];
let total = 0;
let start_idx = 0;
let trans;
let annot;
stack.push(null);
stack_idx.push(null);
let state = this.readState((startNode >> 11) & 0x1fffff);
do {
let i = start_idx;
const c = _.size(state);
for (; i < c; i++) {
trans = state[i];
if (trans & 0x0100) {
total++;
if (readAnnot) {
annot = this.getAnnot(trans);
} else {
annot = trans;
}
// if (!php.funchand.call_user_func(callback, path, annot)) {
if (!php.funchand.call_user_func(callback, null, annot)) {
return total;
}
} else {
// path += php.strings.chr((trans & 0xFF));
stack.push(state);
stack_idx.push(i + 1);
state = this.readState((trans >> 11) & 0x1fffff);
start_idx = 0;
break;
}
}
if (i >= c) {
state = stack.pop();
start_idx = stack_idx.pop();
// path = php.strings.substr(path, 0, -1);
}
} while (stack.length);
return total;
}
readState(index) {
const mem = this.resource;
const fsa_start = this.fsa_start;
const result = [];
let offset = fsa_start + (index << 2);
// read first trans
let buf = php.strings.substr(mem, offset, 4);
let trans = php.unpack('V', buf)[0];
// check if first trans is pointer to annot, and not single in state
if (trans & 0x0100 && !(trans & 0x0200 || trans & 0x0400)) {
result.push(trans);
buf = php.strings.substr(mem, offset, 4);
trans = php.unpack('V', buf)[0];
offset += 4;
}
// read rest
for (let expect = 1; expect; expect--) {
if (!(trans & 0x0200)) {
expect++;
}
if (!(trans & 0x0400)) {
expect++;
}
result.push(trans);
if (expect > 1) {
buf = php.strings.substr(mem, offset, 4);
trans = php.unpack('V', buf)[0];
offset += 4;
}
}
return result;
}
unpackTranses(rawTranses) {
rawTranses = castArray(rawTranses);
const result = [];
_.forEach(rawTranses, rawTrans => {
result.push({
term: !!(rawTrans & 0x0100),
llast: !!(rawTrans & 0x0200),
rlast: !!(rawTrans & 0x0400),
attr: rawTrans & 0xff,
dest: (rawTrans >> 11) & 0x1fffff,
});
});
return result;
}
readRootTrans() {
const fh = this.resource;
const fsa_start = this.fsa_start;
const buf = php.strings.substr(fh, fsa_start + 0, 4);
return php.unpack('V', buf)[0];
}
readAlphabet() {
const fh = this.resource;
const buf = php.strings.substr(fh, this.header.alphabet_offset, this.header.alphabet_size);
return buf.toString();
}
getAnnot(trans) {
if (!(trans & 0x0100)) {
return null;
}
const fh = this.resource;
const offset = this.header.annot_offset + (((trans & 0xff) << 21) | ((trans >> 11) & 0x1fffff));
let annot;
let buf = php.strings.substr(fh, offset, 1);
const len = php.strings.ord(buf);
if (len) {
buf = php.strings.substr(fh, offset + 1, len);
annot = buf;
} else {
annot = null;
}
return annot;
}
}
export { FsaTreeMem };