UNPKG

phpmorphy-locutus

Version:

The original package is located at https://github.com/antixrist/node-phpmorphy however it used the phpjs module, which contained dependencies with critical vulnerabilities and is not maintained anymore. This package was swapped for the newer lucutus packa

1,969 lines (1,562 loc) 251 kB
module.exports = /******/ (function(modules) { // webpackBootstrap /******/ // The module cache /******/ var installedModules = {}; /******/ /******/ // The require function /******/ function __webpack_require__(moduleId) { /******/ /******/ // Check if module is in cache /******/ if(installedModules[moduleId]) /******/ return installedModules[moduleId].exports; /******/ /******/ // Create a new module (and put it into the cache) /******/ var module = installedModules[moduleId] = { /******/ i: moduleId, /******/ l: false, /******/ exports: {} /******/ }; /******/ /******/ // Execute the module function /******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__); /******/ /******/ // Flag the module as loaded /******/ module.l = true; /******/ /******/ // Return the exports of the module /******/ return module.exports; /******/ } /******/ /******/ /******/ // expose the modules object (__webpack_modules__) /******/ __webpack_require__.m = modules; /******/ /******/ // expose the module cache /******/ __webpack_require__.c = installedModules; /******/ /******/ // identity function for calling harmony imports with the correct context /******/ __webpack_require__.i = function(value) { return value; }; /******/ /******/ // define getter function for harmony exports /******/ __webpack_require__.d = function(exports, name, getter) { /******/ if(!__webpack_require__.o(exports, name)) { /******/ Object.defineProperty(exports, name, { /******/ configurable: false, /******/ enumerable: true, /******/ get: getter /******/ }); /******/ } /******/ }; /******/ /******/ // getDefaultExport function for compatibility with non-harmony modules /******/ __webpack_require__.n = function(module) { /******/ var getter = module && module.__esModule ? /******/ function getDefault() { return module['default']; } : /******/ function getModuleExports() { return module; }; /******/ __webpack_require__.d(getter, 'a', getter); /******/ return getter; /******/ }; /******/ /******/ // Object.prototype.hasOwnProperty.call /******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); }; /******/ /******/ // __webpack_public_path__ /******/ __webpack_require__.p = ""; /******/ /******/ // Load entry module and return exports /******/ return __webpack_require__(__webpack_require__.s = 57); /******/ }) /************************************************************************/ /******/ ([ /* 0 */ /***/ function(module, exports) { module.exports = require("lodash"); /***/ }, /* 1 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.clone = exports.str2hex = exports.str2ascii = exports.buffer2str = exports.toBuffer = exports.isStringifyedNumber = exports.php = exports.inspect = exports.logger = exports.castArray = exports.onShutdown = undefined; var _lodash = __webpack_require__(0); var _lodash2 = _interopRequireDefault(_lodash); var _util = __webpack_require__(53); var _util2 = _interopRequireDefault(_util); var _php = __webpack_require__(51); var _php2 = _interopRequireDefault(_php); var _phpunserialize = __webpack_require__(52); var _phpunserialize2 = _interopRequireDefault(_phpunserialize); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } /** * @param {function} cb */ // import jschardet from 'jschardet'; // import encoding from 'encoding'; function onShutdown(cb) { onShutdown.handlers.push(cb); } onShutdown.handlers = []; const logger = {}; logger.log = console.log.bind(console); logger.trace = console.trace.bind(console); logger.info = console.info.bind(console); logger.warn = console.warn.bind(console); logger.error = console.error.bind(console); /** * @param any * @returns {Array} */ function castArray(any) { any = !_lodash2.default.isUndefined(any) && !_lodash2.default.isNull(any) ? any : []; any = _lodash2.default.isArray(any) ? any : [any]; return any; } /** * @param object * @param {{}} [opts] * @returns {string} */ function inspect(object, opts = { depth: null, colors: true, maxArrayLength: 1000 }) { return _util2.default.inspect(object, opts); } // /** // * @param {String|Buffer} text // * @returns {String} // */ // function detectCharset (text) { // let buffer; // let retVal = null; // // if (_.isArray(text)) { // return _.map(text, item => detectCharset(item)); // } else // if (_.isString(text)) { // buffer = Buffer.from(text, 'binary'); // } else // if (!Buffer.isBuffer(text)) { // buffer = null; // } // // if (buffer) { // retVal = jschardet.detect(buffer).encoding; // retVal = (retVal) ? retVal.toLowerCase() : null; // } // // return retVal; // } // /** // * @param {String|Buffer} text // * @param {String} to // * @param {String} [from='UTF-8'] // * @returns {*} // */ // function convert (text, to, from = 'UTF-8') { // let args = _.toArray(arguments); // let buffer = null; // let retVal = text; // // if (_.isArray(text)) { // return _.map(text, (item) => convert.apply(convert, [item].concat(args.slice(1)))); // } else // if (_.isString(text)) { // buffer = Buffer.from(text, 'binary'); // } else // if (!Buffer.isBuffer(text)) { // buffer = null; // } // // if (buffer) { // from = (!from) ? detectCharset(buffer) : from; // // if (from) { // retVal = encoding.convert(buffer, to, from); // } // // return retVal.toString(); // } // // return retVal; // } //php.info.ini_set('unicode.semantics', 'on'); _php2.default.info.ini_set('phpjs.objectsAsArrays', false); _php2.default.unpack = function unpack(format, buffer) { /** * Параметр format задается в виде строки и состоит из кодов формата и * опционального аргумента повторения. Аргумент может быть целочисленным, * либо * для повторения до конца введенных данных. Для a, A, h, H число * повторений определяет то, сколько символов взято от одного аргумента * данных, для @ - это абсолютная позиция для размещения следующих данных, * для всего остального число повторений определяет как много аргументов * данных было обработано и упаковано в результирующую бинарную строку. */ const codes = { 'a': 'Строка (string) с NULL-заполнением', 'A': 'Строка (string) со SPACE-заполнением', 'h': 'Hex-строка (Hex string), с нижнего разряда', 'H': 'Hex-строка (Hex string), с верхнего разряда', 'c': 'знаковый символ (char)', 'C': 'беззнаковый символ (char)', 's': 'знаковый short (всегда 16 бит, машинный байтовый порядок)', 'S': 'беззнаковый short (всегда 16 бит, машинный байтовый порядок)', 'n': 'беззнаковый short (всегда 16 бит, порядок big endian)', 'v': 'беззнаковый short (всегда 16 бит, порядок little endian)', 'i': 'знаковый integer (машинно-зависимый размер и порядок)', 'I': 'беззнаковый integer (машинно-зависимый размер и порядок)', 'l': 'знаковый long (всегда 32 бит, машинный порядок)', 'L': 'беззнаковый long (всегда 32 бит, машинный порядок)', 'N': 'беззнаковый long (всегда 32 бит, порядок big endian)', 'V': 'беззнаковый long (всегда 32 бит, порядок little endian)', 'f': 'float (машинно-зависимые размер и прдставление)', 'd': 'double (машинно-зависимые размер и прдставление)', 'x': 'NUL байт', 'X': 'Резервирование одного байта', '@': 'NUL-заполнение до абсолютной позиции' }; const parts = format.split('/'); let offset = 0, mod, lenStr, len; if (parts.length > 1) { let result = {}; for (let idx = 0; idx < parts.length; idx++) { mod = parts[idx][0]; if (mod in codes) { switch (mod) { case 'V': result[parts[idx].slice(1)] = buffer.readUInt32LE(offset); offset += 4; break; case 'v': result[parts[idx].slice(1)] = buffer.readUInt16LE(offset); offset += 2; break; case 'a': lenStr = /\d+/g.exec(parts[idx])[0]; len = parseInt(lenStr, 10); result[parts[idx].slice(1 + lenStr.length)] = buffer.toString('ascii', offset, len); offset += len; break; default: _util2.default.puts(parts[idx] + ' ' + offset); break; } } } return result; } else { let result = []; do { let obj = {}; mod = format[0]; if (mod in codes) { switch (mod) { case 'V': obj = buffer.readUInt32LE(offset); offset += 4; break; case 'v': obj = buffer.readUInt16LE(offset); offset += 2; break; case 'a': lenStr = /\d+/g.exec(format)[0]; len = parseInt(lenStr, 10); obj = buffer.toString('ascii', offset, len); offset += len; break; default: _util2.default.puts(format); break; } } result.push(obj); } while (offset < buffer.length); return result; } }; _php2.default.var.unserialize = _phpunserialize2.default; _php2.default.strings.ord = function ord(str, idx) { if (!Buffer.isBuffer(str)) { str = Buffer.from(str); } idx = !_lodash2.default.isUndefined(idx) && _lodash2.default.isNumber(idx) && idx < str.length ? idx : 0; return str[idx]; }; _php2.default.strings._substr = _php2.default.strings.substr; // safe /** * @param {String|Buffer} str * @param {Number} start * @param {Number} [len] * @returns {string|Buffer|boolean} */ _php2.default.strings.substr = function php$substr(str, start, len) { let end; if (Buffer.isBuffer(str)) { end = str.length; start = start < 0 ? start + end : start; end = typeof len === 'undefined' ? end : len < 0 ? len + end : len + start; return start >= str.length || start < 0 || start > end ? false : str.slice(start, end); } return _php2.default.strings._substr.apply(_php2.default.strings._substr, arguments); }; // /** // * @param raw // * @returns {String|null} // */ // function detectEncoding (raw) { // let buffer, result; // if (Buffer.isBuffer(raw)) { // buffer = raw; // } else { // raw = (_.isString(raw)) ? raw : raw +''; // buffer = Buffer.from(raw, 'binary'); // } // // result = jschardet.detect(buffer); // // return (result.encoding) ? result.encoding : null; // } /** * @param any * @returns {boolean} */ function isStringifyedNumber(any) { let int = _lodash2.default.toInteger(any); if (int === 0 && any !== '0') { return false; } return any == int; } /** * @param something * @param [encoding='utf-8'] * @returns {Buffer|*} */ function toBuffer(something, encoding = 'utf-8') { let retVal = something; function _ref(item) { return toBuffer(item, encoding); } if (_lodash2.default.isArray(something)) { retVal = _lodash2.default.map(something, _ref); } else if (Buffer.isBuffer(something)) { retVal = something; } else if (_lodash2.default.isString(something)) { retVal = Buffer.from(something, encoding); } else if (_lodash2.default.isPlainObject(something)) { let obj = _lodash2.default.clone(something); _lodash2.default.forEach(obj, (val, key) => obj[key] = toBuffer(val, encoding)); retVal = obj; } return retVal; } /** * @param something * @param {String} [encoding='utf8'] * @returns {string|*} */ function buffer2str(something, encoding = 'utf8') { return Buffer.isBuffer(something) ? something.toString(encoding) : something; } /** * @param something * @returns {Array} */ function str2ascii(something) { let retVal = []; let buffer = !Buffer.isBuffer(something) ? Buffer.from(something, 'binary') : something; for (let i = 0, length = buffer.length; i < length; i++) { retVal.push(buffer[i]); } return retVal; } /** * @param something * @returns {String} */ function str2hex(something) { let retVal = !Buffer.isBuffer(something) ? Buffer.from(something, 'binary') : something; return retVal.toString('hex'); } function clone(instance) { return _lodash2.default.merge({}, Object.create(Object.getPrototypeOf(instance)), instance); } exports.onShutdown = onShutdown; exports.castArray = castArray; exports.logger = logger; exports.inspect = inspect; exports.php = _php2.default; exports.isStringifyedNumber = isStringifyedNumber; exports.toBuffer = toBuffer; exports.buffer2str = buffer2str; exports.str2ascii = str2ascii; exports.str2hex = str2hex; exports.clone = clone; /***/ }, /* 2 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Morphy_Fsa_Proxy = exports.Morphy_Fsa_Decorator = exports.Morphy_Fsa_WordsCollector = exports.Morphy_Fsa = exports.Morphy_Fsa_Interface = undefined; var _lodash = __webpack_require__(0); var _lodash2 = _interopRequireDefault(_lodash); var _fsa_state = __webpack_require__(19); var _utils = __webpack_require__(1); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } class Morphy_Fsa_Interface { /** * Return root transition of fsa * @return {[]} */ getRootTrans() {} /** * Returns root state object * @return {*} */ getRootState() {} /** * Returns alphabet i.e. all chars used in automat * @return {[]} */ getAlphabet() {} /** * Return annotation for given transition(if annotation flag is set for given trans) * * @param {[]} trans * @return {string} */ getAnnot(trans) {} /** * Find word in automat * * @param {*} trans starting transition * @param {string} word * @param {boolean} [readAnnot=true] read annot or simple check if word exists in automat * @return {boolean} TRUE if word is found, FALSE otherwise */ walk(trans, word, readAnnot = true) {} /** * Traverse automat and collect words * For each found words callback function invoked with follow arguments: * call_user_func(callback, word, annot) * when `readAnnot` is FALSE then `annot` arg is always NULL * * @param {*} startNode * @param {*} callback callback function(in php format callback i.e. string or array(obj, method) or array(class, * method) * @param {boolean} [readAnnot=true] read annot * @param {string} [path=] string to be append to all words */ collect(startNode, callback, readAnnot = true, path = '') {} /** * Read state at given index * * @param {number} index * @return {[]} */ readState(index) {} /** * Unpack transition from binary form to array * * @param {*} rawTranses may be array for convert more than one transitions * @return {[]} */ unpackTranses(rawTranses) {} } /** * This file is part of phpMorphy library * * Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ class Morphy_Fsa extends Morphy_Fsa_Interface { static get HEADER_SIZE() { return 128; } /** * @param {Morphy_Storage} storage * @param {boolean} lazy * @returns {*} */ static create(storage, lazy) { if (!!lazy) { return new Morphy_Fsa_Proxy(storage); } const readHeader = Morphy_Fsa.readHeader, validateHeader = Morphy_Fsa.validateHeader, HEADER_SIZE = Morphy_Fsa.HEADER_SIZE; const header = readHeader(storage.read(0, HEADER_SIZE, true)); if (!validateHeader(header)) { throw new Error('Invalid fsa format'); } let type; if (header['flags']['is_sparse']) { type = 'sparse'; } else if (header['flags']['is_tree']) { type = 'tree'; } else { throw new Error('Only sparse or tree fsa`s supported'); } const storage_type = storage.getTypeAsString(); const className = `Morphy_Fsa_${_utils.php.strings.ucfirst(type)}_${_utils.php.strings.ucfirst(storage_type)}`; const fsaAccess = __webpack_require__(25)("./fsa_" + type + '_' + storage_type); return new fsaAccess[className](storage.getResource(), header); } static readHeader(headerRaw) { const HEADER_SIZE = Morphy_Fsa.HEADER_SIZE; if (headerRaw.length != HEADER_SIZE) { throw new Error('Invalid header string given'); } const header = _utils.php.unpack(['a4fourcc', 'Vver', 'Vflags', 'Valphabet_offset', 'Vfsa_offset', 'Vannot_offset', 'Valphabet_size', 'Vtranses_count', 'Vannot_length_size', 'Vannot_chunk_size', 'Vannot_chunks_count', 'Vchar_size', 'Vpadding_size', 'Vdest_size', 'Vhash_size'].join('/'), headerRaw); if (header === false) { throw new Error('Can`t unpack header'); } const flags = {}; const raw_flags = header['flags']; flags['is_tree'] = !!(raw_flags & 0x01); flags['is_hash'] = !!(raw_flags & 0x02); flags['is_sparse'] = !!(raw_flags & 0x04); flags['is_be'] = !!(raw_flags & 0x08); header['flags'] = flags; header['trans_size'] = header['char_size'] + header['padding_size'] + header['dest_size'] + header['hash_size']; return header; } static validateHeader(header) { return !(header['fourcc'] != 'meal' || header['ver'] != 3 || header['char_size'] != 1 || header['padding_size'] > 0 || header['dest_size'] != 3 || header['hash_size'] != 0 || header['annot_length_size'] != 1 || header['annot_chunk_size'] != 1 || header['flags']['is_be'] || header['flags']['is_hash'] || 1 == 0); } constructor(resource, header) { super(...arguments); this.resource = resource; this.header = header; this.fsa_start = header['fsa_offset']; this.root_trans = this.readRootTrans(); this.alphabet = null; } getRootTrans() { return this.root_trans; } getRootState() { return this.createState(this.getRootStateIndex()); } getAlphabet() { if (!_utils.php.var.isset(this.alphabet)) { //this.alphabet = php.strings.str_split(this.readAlphabet()); const alphabet = this.readAlphabet(); const alphabetBuf = Buffer.from(alphabet); const result = []; for (let i = 0, length = alphabetBuf.length; i < length; i++) { result.push(alphabetBuf.slice(i, i + 1).toString()); } this.alphabet = result; } return this.alphabet; } createState(index) { return new _fsa_state.Morphy_State(this, index); } getRootStateIndex() {} readRootTrans() {} readAlphabet() {} } class Morphy_Fsa_WordsCollector { constructor(collectLimit) { this.limit = collectLimit; this.items = {}; } collect(word, annot) { if (_lodash2.default.size(this.items) < this.limit) { this.items[word] = annot; return true; } return false; } getItems() { return this.items; } clear() { this.items = {}; } getCallback() { return [this, 'collect']; } } class Morphy_Fsa_Decorator extends Morphy_Fsa_Interface { /** * @param {Morphy_Fsa_Interface} fsa */ constructor(fsa) { super(...arguments); this.fsa = fsa; } getRootTrans(...args) { return this.fsa.getRootTrans(...args); } getRootState(...args) { return this.fsa.getRootState(...args); } getAlphabet(...args) { return this.fsa.getAlphabet(...args); } getAnnot(...args) { return this.fsa.getAnnot(...args); } walk(...args) { return this.fsa.walk(...args); } collect(...args) { return this.fsa.collect(...args); } readState(...args) { return this.fsa.readState(...args); } unpackTranses(...args) { return this.fsa.unpackTranses(...args); } } class Morphy_Fsa_Proxy extends Morphy_Fsa_Decorator { /** * @param {Morphy_Storage} storage */ constructor(storage) { super(...arguments); this.storage = storage; this._fsa = null; } get fsa() { if (!this._fsa) { this._fsa = Morphy_Fsa.create(this.storage, false); delete this.storage; } return this._fsa; } set fsa(value) { this._fsa = !_lodash2.default.isUndefined(value) ? value : null; } } exports.Morphy_Fsa_Interface = Morphy_Fsa_Interface; exports.Morphy_Fsa = Morphy_Fsa; exports.Morphy_Fsa_WordsCollector = Morphy_Fsa_WordsCollector; exports.Morphy_Fsa_Decorator = Morphy_Fsa_Decorator; exports.Morphy_Fsa_Proxy = Morphy_Fsa_Proxy; /***/ }, /* 3 */ /***/ function(module, exports) { module.exports = require("fs"); /***/ }, /* 4 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Morphy_GrammemsProvider_Factory = exports.Morphy_GrammemsProvider_ForFactory = exports.Morphy_GrammemsProvider_Empty = exports.Morphy_GrammemsProvider_Base = exports.Morphy_GrammemsProvider_Decorator = exports.Morphy_GrammemsProvider_Interface = undefined; var _lodash = __webpack_require__(0); var _lodash2 = _interopRequireDefault(_lodash); var _encoding = __webpack_require__(16); var _encoding2 = _interopRequireDefault(_encoding); var _utils = __webpack_require__(1); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } class Morphy_GrammemsProvider_Interface { getGrammems(partOfSpeech) {} } class Morphy_GrammemsProvider_Decorator extends Morphy_GrammemsProvider_Interface { /** * @param {Morphy_GrammemsProvider_Interface} inner */ constructor(inner) { super(...arguments); this.inner = inner; } getGrammems(partOfSpeech) { return this.inner.getGrammems(partOfSpeech); } } class Morphy_GrammemsProvider_Base extends Morphy_GrammemsProvider_Interface { static flatizeArray(array) { return _lodash2.default.flatten(_lodash2.default.values(array)); } constructor() { super(...arguments); this.grammems = {}; this.all_grammems = Morphy_GrammemsProvider_Base.flatizeArray(this.getAllGrammemsGrouped()); } getAllGrammemsGrouped() {} includeGroups(partOfSpeech, names) { const grammems = this.getAllGrammemsGrouped(); names = !_lodash2.default.isArray(names) ? [names] : names; names = _utils.php.array.array_flip(names); _lodash2.default.forEach(_utils.php.array.array_keys(grammems), key => { if (!_utils.php.var.isset(names[key])) { delete grammems[key]; } }); this.grammems[partOfSpeech] = Morphy_GrammemsProvider_Base.flatizeArray(grammems); return this; } excludeGroups(partOfSpeech, names) { const grammems = this.getAllGrammemsGrouped(); names = !_lodash2.default.isArray(names) ? [names] : names; _lodash2.default.forEach(names, key => delete grammems[key]); this.grammems[partOfSpeech] = Morphy_GrammemsProvider_Base.flatizeArray(grammems); return this; } resetGroups(partOfSpeech) { delete this.grammems[partOfSpeech]; return this; } resetGroupsForAll() { this.grammems = {}; return this; } getGrammems(partOfSpeech) { if (_utils.php.var.isset(this.grammems[partOfSpeech])) { return this.grammems[partOfSpeech]; } return this.all_grammems; } } class Morphy_GrammemsProvider_Empty extends Morphy_GrammemsProvider_Base { constructor() { super(...arguments); } getAllGrammemsGrouped() { return {}; } getGrammems(partOfSpeech) { return false; } } class Morphy_GrammemsProvider_ForFactory extends Morphy_GrammemsProvider_Base { constructor(enc) { super(...arguments); this.encoded_grammems = this.encodeGrammems(this.getGrammemsMap(), enc); // а как по-другому? // кроме как копипастой кода родительского конструктора // и чтобы аккуратно - никак this.grammems = {}; this.all_grammems = Morphy_GrammemsProvider_Base.flatizeArray(this.getAllGrammemsGrouped()); } getGrammemsMap() {} getAllGrammemsGrouped() { return this.encoded_grammems; } encodeGrammems(grammems, enc) { const from_enc = this.getSelfEncoding(); const result = {}; if (from_enc == enc) { return grammems; } _lodash2.default.forEach(grammems, (ary, key) => { const keyBuffer = Buffer.from(key); const keyBufferConverted = _encoding2.default.convert(keyBuffer, enc, from_enc); const new_key = keyBufferConverted.toString(); const new_value = []; _lodash2.default.forEach(ary, value => { const valueBuffer = Buffer.from(value); const valueBufferConverted = _encoding2.default.convert(valueBuffer, enc, from_enc); new_value.push(valueBufferConverted.toString()); }); result[new_key] = new_value; }); return result; } } const Morphy_GrammemsProvider_Factory_included = new Map(); class Morphy_GrammemsProvider_Factory { /** * @param {phpMorphy} morphy * @returns {*} */ static create(morphy) { const locale = morphy.getLocale().toLowerCase(); if (!Morphy_GrammemsProvider_Factory_included.has(morphy)) { Morphy_GrammemsProvider_Factory_included.set(morphy, {}); } const included = Morphy_GrammemsProvider_Factory_included.get(morphy); if (_lodash2.default.isUndefined(included[locale])) { const className = `Morphy_GrammemsProvider_${locale}`; let grammemsProviders = {}; try { grammemsProviders = __webpack_require__(27)("./" + locale); } catch (err) { included[locale] = new Morphy_GrammemsProvider_Empty(morphy); return included[locale]; } if (_lodash2.default.isFunction(grammemsProviders[className])) { included[locale] = grammemsProviders[className].instance(morphy); } else { throw new Error("Class '" + className + "' not found"); } } return included[locale]; } } exports.Morphy_GrammemsProvider_Interface = Morphy_GrammemsProvider_Interface; exports.Morphy_GrammemsProvider_Decorator = Morphy_GrammemsProvider_Decorator; exports.Morphy_GrammemsProvider_Base = Morphy_GrammemsProvider_Base; exports.Morphy_GrammemsProvider_Empty = Morphy_GrammemsProvider_Empty; exports.Morphy_GrammemsProvider_ForFactory = Morphy_GrammemsProvider_ForFactory; exports.Morphy_GrammemsProvider_Factory = Morphy_GrammemsProvider_Factory; /***/ }, /* 5 */ /***/ function(module, exports) { module.exports = require("path"); /***/ }, /* 6 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Morphy_GramInfo_AncodeCache = exports.Morphy_GramInfo_RuntimeCaching = exports.Morphy_GramInfo_Proxy_WithHeader = exports.Morphy_GramInfo_Proxy = exports.Morphy_GramInfo_Decorator = exports.Morphy_GramInfo = exports.Morphy_GramInfo_Interface = undefined; var _lodash = __webpack_require__(0); var _lodash2 = _interopRequireDefault(_lodash); var _fs = __webpack_require__(3); var _fs2 = _interopRequireDefault(_fs); var _path = __webpack_require__(5); var _path2 = _interopRequireDefault(_path); var _utils = __webpack_require__(1); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } /** * This file is part of phpMorphy library * * Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ class Morphy_GramInfo_Interface { /** * Returns language for graminfo file * @returns {string} */ getLocale() {} /** * Return encoding for graminfo file * @returns {string} */ getEncoding() {} /** * Return size of character (cp1251 - 1, utf8 - 1, utf16 - 2, utf32 - 4 etc) * @returns {number} */ getCharSize() {} /** * Return end of string value (usually string with \0 value of char_size + 1 length) * @returns {string} */ getEnds() {} /** * Reads graminfo header * * @param {number} offset * @returns {[]} */ readGramInfoHeader(offset) {} /** * Returns size of header struct */ getGramInfoHeaderSize() {} /** * Read ancodes section for header retrieved with readGramInfoHeader * * @param {[]} info * @returns {[]} */ readAncodes(info) {} /** * Read flexias section for header retrieved with readGramInfoHeader * * @param {[]} info * @returns {[]} */ readFlexiaData(info) {} /** * Read all graminfo headers offsets, which can be used latter for readGramInfoHeader method * @returns {[]} */ readAllGramInfoOffsets() {} getHeader() {} readAllPartOfSpeech() {} readAllGrammems() {} readAllAncodes() {} } class Morphy_GramInfo extends Morphy_GramInfo_Interface { static get HEADER_SIZE() { return 128; } /** * @param {Morphy_Storage} storage * @param {boolean} lazy * @returns {*} */ static create(storage, lazy) { if (lazy) { return new Morphy_GramInfo_Proxy(storage); } const readHeader = Morphy_GramInfo.readHeader, validateHeader = Morphy_GramInfo.validateHeader, HEADER_SIZE = Morphy_GramInfo.HEADER_SIZE; const header = readHeader(storage.read(0, HEADER_SIZE)); if (!validateHeader(header)) { throw new Error('Invalid graminfo format'); } const storage_type = storage.getTypeAsString(); const className = `Morphy_GramInfo_${_utils.php.strings.ucfirst(storage_type)}`; const graminfoAccess = __webpack_require__(26)("./graminfo_" + storage_type); return new graminfoAccess[className](storage.getResource(), header); } static readHeader(headerRaw) { const header = _utils.php.unpack(['Vver', 'Vis_be', 'Vflex_count_old', 'Vflex_offset', 'Vflex_size', 'Vflex_count', 'Vflex_index_offset', 'Vflex_index_size', 'Vposes_offset', 'Vposes_size', 'Vposes_count', 'Vposes_index_offset', 'Vposes_index_size', 'Vgrammems_offset', 'Vgrammems_size', 'Vgrammems_count', 'Vgrammems_index_offset', 'Vgrammems_index_size', 'Vancodes_offset', 'Vancodes_size', 'Vancodes_count', 'Vancodes_index_offset', 'Vancodes_index_size', 'Vchar_size', ''].join('/'), headerRaw); let offset = 24 * 4; let len = _utils.php.strings.ord(_utils.php.strings.substr(headerRaw, offset++, 1)); header['lang'] = _utils.php.strings.rtrim(_utils.php.strings.substr(headerRaw, offset, len)); offset += len; len = _utils.php.strings.ord(_utils.php.strings.substr(headerRaw, offset++, 1)); header['encoding'] = _utils.php.strings.rtrim(_utils.php.strings.substr(headerRaw, offset, len)); return header; } static validateHeader(header) { return header['ver'] == 3 || header['is_be'] != 1; } constructor(resource, header) { super(...arguments); this.resource = resource; this.header = header; //this.ends = php.strings.str_repeat('\0', header['char_size'] + 1); //this.ends_size = php.strings.strlen(this.ends); const buf = Buffer.alloc(header['char_size'] + 1); this.ends = buf.fill('\0'); this.ends_size = buf.length; } getLocale() { return this.header['lang']; } getEncoding() { return this.header['encoding']; } getCharSize() { return this.header['char_size']; } getEnds() { return this.ends; } getHeader() { return this.header; } cleanupCString(string) { //var pos = php.strings.strpos(string, this.ends); //if (pos !== false) { // string = php.strings.substr(string, 0, pos); //} let stringBuf = Buffer.isBuffer(string) ? string : Buffer.from(string); const pos = this.ends.indexOf(stringBuf); if (pos >= 0) { stringBuf = stringBuf.slice(0, pos); } return stringBuf.toString(); } readSectionIndex(offset, count) {} readSectionIndexAsSize(offset, count, total_size) { // todo if (!count) { return []; } const index = this.readSectionIndex(offset, count); index[count] = index[0] + total_size; for (let i = 0; i < count; i++) { index[i] = index[i + 1] - index[i]; } delete index[count]; return index; } } class Morphy_GramInfo_Decorator extends Morphy_GramInfo_Interface { /** * @param {Morphy_GramInfo_Interface} info */ constructor(info) { super(...arguments); this.info = info; } readGramInfoHeader(...args) { return this.info.readGramInfoHeader(...args); } getGramInfoHeaderSize(...args) { return this.info.getGramInfoHeaderSize(...args); } readAncodes(...args) { return this.info.readAncodes(...args); } readFlexiaData(...args) { return this.info.readFlexiaData(...args); } readAllGramInfoOffsets(...args) { return this.info.readAllGramInfoOffsets(...args); } readAllPartOfSpeech(...args) { return this.info.readAllPartOfSpeech(...args); } readAllGrammems(...args) { return this.info.readAllGrammems(...args); } readAllAncodes(...args) { return this.info.readAllAncodes(...args); } getLocale(...args) { return this.info.getLocale(...args); } getEncoding(...args) { return this.info.getEncoding(...args); } getCharSize(...args) { return this.info.getCharSize(...args); } getEnds(...args) { return this.info.getEnds(...args); } getHeader(...args) { return this.info.getHeader(...args); } } class Morphy_GramInfo_Proxy extends Morphy_GramInfo_Decorator { /** * @param {Morphy_Storage} $storage */ constructor($storage) { super(...arguments); this.storage = $storage; this._info = null; } get info() { if (!this._info) { this._info = Morphy_GramInfo.create(this.storage, false); delete this.storage; } return this._info; } set info(value) { this._info = !_lodash2.default.isUndefined(value) ? value : null; } } class Morphy_GramInfo_Proxy_WithHeader extends Morphy_GramInfo_Decorator { /** * @param {Morphy_Storage} $storage * @param $cacheFile */ constructor($storage, $cacheFile) { super(...arguments); this.storage = $storage; this._info = null; this.cache = this.readCache($cacheFile); //this.ends = php.strings.str_repeat('\0', this.getCharSize() + 1); const buf = Buffer.alloc(this.getCharSize() + 1); this.ends = buf.fill('\0'); } readCache(fileName) { let result = _fs2.default.readFileSync(fileName); result = /\(([\s\S]*)\)/igm.exec(result.toString())[1]; result = result.replace(/\s/igm, '').replace(/,$/, '').replace(/=>/g, ':').replace(/'/g, '"'); result = ['{', result, '}'].join(''); let parsingGood = true; try { result = JSON.parse(result); parsingGood = _lodash2.default.isPlainObject(result); } catch (e) { parsingGood = false; } if (!parsingGood) { throw new Error('Can`t get header cache from "' + fileName + '" file'); } return result; } getLocale() { return this.cache['lang']; } getEncoding() { return this.cache['encoding']; } getCharSize() { return this.cache['char_size']; } getEnds() { return this.ends; } getHeader() { return this.cache; } get info() { if (!this._info) { this._info = Morphy_GramInfo.create(this.storage, false); delete this.storage; } return this._info; } set info(value) { this._info = !_lodash2.default.isUndefined(value) ? value : null; } } class Morphy_GramInfo_RuntimeCaching extends Morphy_GramInfo_Decorator { constructor(...args) { super(...args); this.$ancodes = {}; this.$flexia_all = {}; } readFlexiaData(info) { const offset = info['offset']; if (!_utils.php.var.isset(this.$flexia_all[offset])) { this.$flexia_all[offset] = this.info.readFlexiaData(info); } return this.$flexia_all[offset]; } } class Morphy_GramInfo_AncodeCache extends Morphy_GramInfo_Decorator { /** * @param {Morphy_GramInfo_Interface} inner * @param resource */ constructor(inner, resource) { super(...arguments); this.hits = 0; this.miss = 0; this.cache = null; this.cache = _utils.php.var.unserialize(resource.read(0, resource.getFileSize()).toString()); if (this.cache === false) { throw new Error("Can`t read ancodes cache"); } } readAncodes(info) { const $offset = info['offset']; // todo: проверить доступ по индекс if (_utils.php.var.isset(this.cache[$offset])) { this.hits++; return this.cache[$offset]; } // in theory misses never occur this.miss++; return super.readAncodes(info); } } exports.Morphy_GramInfo_Interface = Morphy_GramInfo_Interface; exports.Morphy_GramInfo = Morphy_GramInfo; exports.Morphy_GramInfo_Decorator = Morphy_GramInfo_Decorator; exports.Morphy_GramInfo_Proxy = Morphy_GramInfo_Proxy; exports.Morphy_GramInfo_Proxy_WithHeader = Morphy_GramInfo_Proxy_WithHeader; exports.Morphy_GramInfo_RuntimeCaching = Morphy_GramInfo_RuntimeCaching; exports.Morphy_GramInfo_AncodeCache = Morphy_GramInfo_AncodeCache; /***/ }, /* 7 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const STORAGE_FILE = exports.STORAGE_FILE = 'file'; const STORAGE_MEM = exports.STORAGE_MEM = 'mem'; const SOURCE_FSA = exports.SOURCE_FSA = 'fsa'; const RESOLVE_ANCODES_AS_TEXT = exports.RESOLVE_ANCODES_AS_TEXT = 0; const RESOLVE_ANCODES_AS_DIALING = exports.RESOLVE_ANCODES_AS_DIALING = 1; const RESOLVE_ANCODES_AS_INT = exports.RESOLVE_ANCODES_AS_INT = 2; const NORMAL = exports.NORMAL = 0; const IGNORE_PREDICT = exports.IGNORE_PREDICT = 2; const ONLY_PREDICT = exports.ONLY_PREDICT = 3; const PREDICT_BY_NONE = exports.PREDICT_BY_NONE = 'none'; const PREDICT_BY_SUFFIX = exports.PREDICT_BY_SUFFIX = 'by_suffix'; const PREDICT_BY_DB = exports.PREDICT_BY_DB = 'by_db'; /***/ }, /* 8 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Morphy_Fsa_Sparse_File = undefined; var _lodash = __webpack_require__(0); var _lodash2 = _interopRequireDefault(_lodash); var _fs = __webpack_require__(3); var _fs2 = _interopRequireDefault(_fs); var _utils = __webpack_require__(1); var _fsa = __webpack_require__(2); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } /** * This file is part of phpMorphy library * * Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ class Morphy_Fsa_Sparse_File extends _fsa.Morphy_Fsa { constructor(...args) { super(...args); this.alphabet_num = null; } /** * @param trans * @param word * @param {boolean} [readAnnot=true] */ walk(trans, word, readAnnot = true) { const fh = this.resource; const fsa_start = this.fsa_start; const wordBuf = Buffer.from(word); let prev_trans; let char; let result; let annot; let buf; let i = 0; let c = wordBuf.length; for (; i < c; i++) { prev_trans = trans; char = _utils.php.strings.ord(wordBuf, i); ///////////////////////////////// // find char in state begin // sparse version result = true; buf = Buffer.alloc(4); _fs2.default.readSync(fh, buf, 0, 4, fsa_start + ((trans >> 10 & 0x3FFFFF) + char + 1 << 2)); trans = _utils.php.unpack('V', buf)[0]; if (trans & 0x0200 || (trans & 0xFF) != char) { result = false; } // find char in state end ///////////////////////////////// if (!result) { trans = prev_trans; break; } } annot = null; result = false; prev_trans = trans; if (i >= c) { // Read annotation when we walked all chars in word result = true; if (readAnnot) { // read annot trans buf = Buffer.alloc(4); _fs2.default.readSync(fh, buf, 0, 4, fsa_start + ((trans >> 10 & 0x3FFFFF) << 2)); trans = _utils.php.unpack('V', buf)[0]; if ((trans & 0x0100) == 0) { result = false; } else { annot = this.getAnnot(trans); } } } return { result, annot, walked: i, last_trans: trans, word_trans: prev_trans }; } /** * @param {*} startNode * @param {*} callback * @param {boolean} [readAnnot=true] * @param {string} [path=] * @returns {number} */ collect(startNode, callback, readAnnot = true, path = '') { const stack = []; const stack_idx = []; let total = 0; let start_idx = 0; let state; let trans; let annot; stack.push(null); stack_idx.push(null); state = this.readState(startNode >> 10 & 0x3FFFFF); do { let i = start_idx; let c = _lodash2.default.size(state); for (; i < c; i++) { trans = state[i]; if (trans & 0x0100) { total++; if (readAnnot) { annot = this.getAnnot(trans); } else { annot = trans; } //if (!php.funchand.call_user_func(callback, path, annot)) { if (!_utils.php.funchand.call_user_func(callback, null, annot)) { return total; } } else { //path += php.strings.chr((trans & 0xFF)); stack.push(state); stack_idx.push(i + 1); state = this.readState(trans >> 10 & 0x3FFFFF); start_idx = 0; break; } } if (i >= c) { state = stack.pop(); start_idx = stack_idx.pop(); //path = php.strings.substr(Buffer.from(path), 0, -1).toString(); } } while (!!stack.length); return total; } readState(index) { const fh = this.resource; const fsa_start = this.fsa_start; const result = []; let buf; let trans; let start_offset = fsa_start + (index << 2); // first try read annot transition buf = Buffer.alloc(4); _fs2.default.readSync(fh, buf, 0, 4, start_offset); trans = _utils.php.unpack('V', buf)[0]; if (trans & 0x0100) { result.push(trans); } // read rest start_offset += 4; _lodash2.default.forEach(this.getAlphabetNum(), char => { buf = Buffer.alloc(4); _fs2.default.readSync(fh, buf, 0, 4, start_offset + (char << 2)); trans = _utils.php.unpack('V', buf)[0]; //if(!(trans & 0x0200) && (trans & 0xFF) == char) { // TODO: check term and empty flags at once i.e. trans & 0x0300 if (!(trans & 0x0200 || trans & 0x0100) && (trans & 0xFF) == char) { result.push(trans); } }); return result; } unpackTranses(rawTranses) { rawTranses = (0, _utils.castArray)(rawTranses); const result = []; _lodash2.default.forEach(rawTranses, rawTrans => { result.push({ term: !!(rawTrans & 0x0100), empty: !!(rawTrans & 0x0200), attr: rawTrans & 0xFF, dest: rawTrans >> 10 & 0x3FFFFF }); }); return result; } readRootTrans() { const fh = this.resource; const fsa_start = this.fsa_start; let trans; let buf; buf = Buffer.alloc(4); _fs2.default.readSync(fh, buf, 0, 4, fsa_start + 4); trans = _utils.php.unpack('V', buf)[0]; return trans; } readAlphabet() { const fh = this.resource; let buf; buf = Buffer.alloc(this.header['alphabet_size']); _fs2.default.readSync(fh, buf, 0, this.header['alphabet_size'], this.header['alphabet_offset']); return buf.toString(); } getAnnot(trans) { if (!(trans & 0x0100)) { return null; } const fh = this.resource; const offset = this.header['annot_offset'] + ((trans & 0xFF) << 22 | trans >> 10 & 0x3FFFFF); let len; let annot; let buf; buf = Buffer.alloc(1); _fs2.default.readSync(fh, buf, 0, 1, offset); len = _utils.php.strings.ord(buf); if (len) { buf = Buffer.alloc(len); _fs2.default.readSync(fh, buf, 0, len, offset + 1); annot = buf; } else { annot = null; } return annot; } getAlphabetNum() { if (!_utils.php.var.isset(this.alphabet_num)) { this.alphabet_num = _utils.php.array.array_map(_utils.php.strings.ord, this.getAlphabet()); } return this.alphabet_num; } } exports.Morphy_Fsa_Sparse_File = Morphy_Fsa_Sparse_File; /***/ }, /* 9 */ /***/ function(module, exports, __webpack_require__) { "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Morphy_Fsa_Sparse_Mem = undefined; var _lodash = __webpack_require__(0); var _lodash2 = _interopRequireDefault(_lodash); var _utils = __webpack_require__(1); var _fsa = __webpack_require__(2); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } class Morphy_Fsa_Sparse_Mem extends _fsa.Morphy_Fsa { constructor(...args) { super(...args); this.alphabet_num = null; } /** * @param trans * @param word * @param {boolean} [readAnnot=true] */ walk(trans, word, readAnnot = true) { const mem = this.resource; const fsa_start = this.fsa_start; const wordBuf = Buffer.from(word); let prev_trans; let char; let result; let annot; let buf; let i = 0; let c = wordBuf.length; for (; i < c; i++) { prev_trans = trans; char = _utils.php.strings.ord(wordBuf, i); ///////////////////////////////// // find char in state begin // sparse version result = true; buf = _utils.php.strings.substr(mem, fsa_start + ((trans >> 10 & 0x3FFFFF) + char + 1 << 2), 4); trans = _utils.php.unpack('V', buf)[0]; if (trans & 0x0200 || (trans & 0xFF) != char) { result = false; } // find char in state end ///////////////////////////////// if (!result) { trans = prev_trans; break; } } annot = null; result = false; prev_trans = trans; if (i >= c) { // Read annotation when we walked all chars in word result = true; if (readAnnot) { // read annot trans buf = _utils.php.strings.substr(mem, fsa_start + ((trans >> 10 & 0x3FFFFF) << 2), 4); trans = _utils.php.unpack('V', buf)[0]; if ((trans & 0x0100) == 0) { result = false; } else { annot = this.getAnnot(trans); } } } return { result, annot, walked: i, last_trans: trans, word_trans: prev_trans }; } /** * @param {*} startNode * @param {*} callback * @param {boolean} [readAnnot=true] * @param {string} [path=] * @returns {number} */ collect(startNode, callback, readAnnot = true, path = '') { const stack = []; const stack_idx = []; let total = 0; let start_idx = 0; let state; let trans; let annot; stack.push(null); stack_idx.push(null); state = this.readState(startNode >> 10 & 0x3FFFFF); do { let i = start_idx; let c = _lodash2.default.size(state); for (; i < c; i++) { trans = state[i]; if (trans & 0x0100) { total++; if (readAnnot) { annot = this.getAnnot(trans); } else { annot = trans; } //if (!php.funchand.call_user_func(callback, path, annot)) { if (!_utils.php.funchand.call_user_func(callback, null, annot)) { return total; } } else { //path += php.strings.chr((trans & 0xFF)); stack.push(state); stack_idx.push(i + 1); state = this.readState(trans >> 10 & 0x3FFFFF); start_idx = 0; break; } } if (i >= c) { state = stack.pop(); start_idx = stack_idx.pop