UNPKG

phpmorphy

Version:

Original package is located at http://phpmorphy.sourceforge.net/

314 lines (257 loc) 6.71 kB
import _ from 'lodash'; import { php } from '~/utils'; import { State } from './fsa-state'; class FsaInterface { /** * Return root transition of fsa * @return {[]} */ getRootTrans() {} /** * Returns root state object * @return {*} */ getRootState() {} /** * Returns alphabet i.e. all chars used in automat * @return {[]} */ getAlphabet() {} /** * Return annotation for given transition(if annotation flag is set for given trans) * * @param {[]} trans * @return {string} */ getAnnot(trans) {} /** * Find word in automat * * @param {*} trans starting transition * @param {string} word * @param {boolean} [readAnnot=true] read annot or simple check if word exists in automat * @return {boolean} TRUE if word is found, FALSE otherwise */ walk(trans, word, readAnnot = true) {} /** * Traverse automat and collect words * For each found words callback function invoked with follow arguments: * call_user_func(callback, word, annot) * when `readAnnot` is FALSE then `annot` arg is always NULL * * @param {*} startNode * @param {*} callback callback function(in php format callback i.e. string or array(obj, method) or array(class, * method) * @param {boolean} [readAnnot=true] read annot * @param {string} [path=] string to be append to all words */ collect(startNode, callback, readAnnot = true, path = '') {} /** * Read state at given index * * @param {number} index * @return {[]} */ readState(index) {} /** * Unpack transition from binary form to array * * @param {*} rawTranses may be array for convert more than one transitions * @return {[]} */ unpackTranses(rawTranses) {} } class Fsa extends FsaInterface { static get HEADER_SIZE() { return 128; } /** * @param {Storage} storage * @param {boolean} lazy * @returns {*} */ static create(storage, lazy) { if (lazy) { return new FsaProxy(storage); } const { readHeader, validateHeader, HEADER_SIZE } = Fsa; const header = readHeader(storage.read(0, HEADER_SIZE, true)); if (!validateHeader(header)) { throw new Error('Invalid fsa format'); } let type; if (header.flags.is_sparse) { type = 'sparse'; } else if (header.flags.is_tree) { type = 'tree'; } else { throw new Error('Only sparse or tree fsa`s supported'); } const storageType = storage.getTypeAsString(); const className = `Fsa${_.upperFirst(type)}${_.upperFirst(storageType)}`; const fsaAccess = require(`./access/fsa-${type}-${storageType}`); return new fsaAccess[className](storage.getResource(), header); } static readHeader(headerRaw) { const { HEADER_SIZE } = Fsa; if (headerRaw.length !== HEADER_SIZE) { throw new Error('Invalid header string given'); } const header = php.unpack( [ 'a4fourcc', 'Vver', 'Vflags', 'Valphabet_offset', 'Vfsa_offset', 'Vannot_offset', 'Valphabet_size', 'Vtranses_count', 'Vannot_length_size', 'Vannot_chunk_size', 'Vannot_chunks_count', 'Vchar_size', 'Vpadding_size', 'Vdest_size', 'Vhash_size', ].join('/'), headerRaw, ); if (header === false) { throw new Error('Can`t unpack header'); } const flags = {}; const raw_flags = header.flags; flags.is_tree = !!(raw_flags & 0x01); flags.is_hash = !!(raw_flags & 0x02); flags.is_sparse = !!(raw_flags & 0x04); flags.is_be = !!(raw_flags & 0x08); header.flags = flags; header.trans_size = header.char_size + header.padding_size + header.dest_size + header.hash_size; return header; } static validateHeader(header) { return !( header.fourcc !== 'meal' || header.ver !== 3 || header.char_size !== 1 || header.padding_size > 0 || header.dest_size !== 3 || header.hash_size !== 0 || header.annot_length_size !== 1 || header.annot_chunk_size !== 1 || header.flags.is_be || header.flags.is_hash || 1 === 0 ); } constructor(resource, header) { super(); this.resource = resource; this.header = header; this.fsa_start = header.fsa_offset; this.root_trans = this.readRootTrans(); this.alphabet = null; } getRootTrans() { return this.root_trans; } getRootState() { return this.createState(this.getRootStateIndex()); } getAlphabet() { if (!php.var.isset(this.alphabet)) { // this.alphabet = php.strings.str_split(this.readAlphabet()); const alphabet = this.readAlphabet(); const alphabetBuf = Buffer.from(alphabet); const result = []; for (let i = 0, length = alphabetBuf.length; i < length; i++) { result.push(alphabetBuf.slice(i, i + 1).toString()); } this.alphabet = result; } return this.alphabet; } createState(index) { return new State(this, index); } getRootStateIndex() {} readRootTrans() {} readAlphabet() {} } class FsaWordsCollector { constructor(collectLimit) { this.limit = collectLimit; this.items = {}; } collect(word, annot) { if (_.size(this.items) < this.limit) { this.items[word] = annot; return true; } return false; } getItems() { return this.items; } clear() { this.items = {}; } getCallback() { return [this, 'collect']; } } class FsaDecorator extends FsaInterface { /** * @param {FsaInterface} fsa */ constructor(fsa) { super(); this.fsa = fsa; } getRootTrans(...args) { return this.fsa.getRootTrans(...args); } getRootState(...args) { return this.fsa.getRootState(...args); } getAlphabet(...args) { return this.fsa.getAlphabet(...args); } getAnnot(...args) { return this.fsa.getAnnot(...args); } walk(...args) { return this.fsa.walk(...args); } collect(...args) { return this.fsa.collect(...args); } readState(...args) { return this.fsa.readState(...args); } unpackTranses(...args) { return this.fsa.unpackTranses(...args); } } class FsaProxy extends FsaDecorator { /** * @param {Storage} storage */ constructor(storage) { super(storage); this.storage = storage; this._fsa = null; } get fsa() { if (!this._fsa) { this._fsa = Fsa.create(this.storage, false); delete this.storage; } return this._fsa; } set fsa(value) { this._fsa = !_.isUndefined(value) ? value : null; } } export { FsaInterface, Fsa, FsaWordsCollector, FsaDecorator, FsaProxy };