UNPKG

phpmorphy

Version:

Original package is located at http://phpmorphy.sourceforge.net/

301 lines (250 loc) 6.99 kB
import fs from 'fs'; import _ from 'lodash'; import { php, castArray } from '~/utils'; import { Fsa } from '~/lib/fsa/fsa'; class FsaTreeFile extends Fsa { /** * @param trans * @param word * @param {boolean} [readAnnot=true] * @returns {*} */ walk(trans, word, readAnnot = true) { const fh = this.resource; const fsa_start = this.fsa_start; const wordBuf = Buffer.from(word); let prev_trans; let char; let result; let start_offset; let buf; let attr; let annot; let i = 0; const c = wordBuf.length; for (; i < c; i++) { prev_trans = trans; char = php.strings.ord(wordBuf, i); // /////////////////////////////// // find char in state begin // tree version result = true; start_offset = fsa_start + (((trans >> 11) & 0x1fffff) << 2); // read first trans in state buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, start_offset); trans = php.unpack('V', buf)[0]; // If first trans is term(i.e. pointing to annot) then skip it if (trans & 0x0100) { // When this is single transition in state then break if (trans & 0x0200 && trans & 0x0400) { result = false; } else { start_offset += 4; buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, start_offset); trans = php.unpack('V', buf)[0]; } } // if all ok process rest transitions in state if (result) { // walk through state let idx = 1; let j = 0; for (; ; j++) { attr = trans & 0xff; if (attr == char) { result = true; break; } else if (attr > char) { if (trans & 0x0200) { result = false; break; } idx <<= 1; } else { if (trans & 0x0400) { result = false; break; } idx = (idx << 1) + 1; } if (j > 255) { throw new Error('Infinite recursion possible'); } // read next trans buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, start_offset + ((idx - 1) << 2)); trans = php.unpack('V', buf)[0]; } } // find char in state end // /////////////////////////////// if (!result) { trans = prev_trans; break; } } annot = null; result = false; prev_trans = trans; if (i >= c) { // Read annotation when we walked all chars in word result = true; if (readAnnot) { // read annot trans buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, fsa_start + (((trans >> 11) & 0x1fffff) << 2)); trans = php.unpack('V', buf)[0]; if ((trans & 0x0100) == 0) { result = false; } else { annot = this.getAnnot(trans); } } } return { result, annot, walked: i, last_trans: trans, word_trans: prev_trans, }; } /** * @param startNode * @param callback * @param {boolean} [readAnnot=true] * @param {string} [path=] * @returns {number} */ collect(startNode, callback, readAnnot = true, path = '') { // `path` нигде не используется, даже в `Morphy_Morphier_PredictCollector.collect`, // куда попадает этот `path` через вызов коллбека ниже const stack = []; const stack_idx = []; let total = 0; let start_idx = 0; let state; let trans; let annot; stack.push(null); stack_idx.push(null); state = this.readState((startNode >> 11) & 0x1fffff); do { let i = start_idx; const c = _.size(state); for (; i < c; i++) { trans = state[i]; if (trans & 0x0100) { total++; if (readAnnot) { annot = this.getAnnot(trans); } else { annot = trans; } if (!php.funchand.call_user_func(callback, null, annot)) { return total; } } else { // path += php.strings.chr((trans & 0xFF)); stack.push(state); stack_idx.push(i + 1); state = this.readState((trans >> 11) & 0x1fffff); start_idx = 0; break; } } if (i >= c) { state = stack.pop(); start_idx = stack_idx.pop(); // path = php.strings.substr(path, 0, -1); } } while (stack.length); return total; } readState(index) { const fh = this.resource; const fsa_start = this.fsa_start; const result = []; let buf; let trans; let offset = fsa_start + (index << 2); // read first trans buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, offset); trans = php.unpack('V', buf)[0]; // check if first trans is pointer to annot, and not single in state if (trans & 0x0100 && !(trans & 0x0200 || trans & 0x0400)) { result.push(trans); buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, null); trans = php.unpack('V', buf)[0]; offset += 4; } // read rest for (let expect = 1; expect; expect--) { if (!(trans & 0x0200)) { expect++; } if (!(trans & 0x0400)) { expect++; } result.push(trans); if (expect > 1) { buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, null); trans = php.unpack('V', buf)[0]; offset += 4; } } return result; } unpackTranses(rawTranses) { rawTranses = castArray(rawTranses); const result = []; _.forEach(rawTranses, rawTrans => { result.push({ term: !!(rawTrans & 0x0100), llast: !!(rawTrans & 0x0200), rlast: !!(rawTrans & 0x0400), attr: rawTrans & 0xff, dest: (rawTrans >> 11) & 0x1fffff, }); }); return result; } readRootTrans() { const fh = this.resource; const fsa_start = this.fsa_start; const buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, fsa_start + 0); return php.unpack('V', buf)[0]; } readAlphabet() { const fh = this.resource; const buf = Buffer.alloc(this.header.alphabet_size); fs.readSync(fh, buf, 0, this.header.alphabet_size, this.header.alphabet_offset); return buf.toString(); } getAnnot(trans) { if (!(trans & 0x0100)) { return null; } const fh = this.resource; const offset = this.header.annot_offset + (((trans & 0xff) << 21) | ((trans >> 11) & 0x1fffff)); let annot; let buf = Buffer.alloc(1); fs.readSync(fh, buf, 0, 1, offset); const len = php.strings.ord(buf); if (len) { buf = Buffer.alloc(len); fs.readSync(fh, buf, 0, len, null); annot = buf; } else { annot = null; } return annot; } } export { FsaTreeFile };