UNPKG

phpmorphy-locutus

Version:

The original package is located at https://github.com/antixrist/node-phpmorphy however it used the phpjs module, which contained dependencies with critical vulnerabilities and is not maintained anymore. This package was swapped for the newer lucutus packa

github.com/noduslabs/node-phpmorphy

noduslabs/node-phpmorphy

278 lines (221 loc) • 7.11 kB

JavaScript

/** * This file is part of phpMorphy library * * Copyright c 2007-2008 Kamaev Vladimir <heromantor@users.sourceforge.net> * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ import _ from 'lodash'; import fs from 'fs'; import { php } from '../../../utils'; import { Morphy_GramInfo } from '../graminfo'; class Morphy_GramInfo_File extends Morphy_GramInfo { constructor (...args) { super(...args); // todo: вместо магической константы, хаотично распиханной по методам, подставить эту переменную this.header_size = 20; } getGramInfoHeaderSize () { return 20; } readGramInfoHeader (offset) { const fh = this.resource; let buf = Buffer.alloc(20); fs.readSync(fh, buf, 0, 20, offset); const result = php.unpack([ 'vid', 'vfreq', 'vforms_count', 'vpacked_forms_count', 'vancodes_count', 'vancodes_offset', 'vancodes_map_offset', 'vaffixes_offset', 'vaffixes_size', 'vbase_size' ].join('/'), buf); result['offset'] = offset; return result; } readAncodesMap (info) { const fh = this.resource; // TODO: this can be wrong due to aligning ancodes map section const offset = info['offset'] + 20 + info['forms_count'] * 2; const forms_count = info['packed_forms_count']; const buf = Buffer.alloc(forms_count * 2); fs.readSync(fh, buf, 0, forms_count * 2, offset); return php.unpack('v'+ forms_count, buf); } splitAncodes (ancodes, map) { const result = []; let k; let kc; let j = 0; _.forEach(map, function (mapItem) { const res = []; for (k = 0, kc = mapItem; k < kc; k++, j++) { res.push(ancodes[j]); } result.push(res); }); return result; } readAncodes (info) { const fh = this.resource; // TODO: this can be wrong due to aligning ancodes section const offset = info['offset'] + 20; const forms_count = info['forms_count']; const buf = Buffer.alloc(forms_count * 2); fs.readSync(fh, buf, 0, forms_count * 2, offset); const ancodes = php.unpack('v' + forms_count, buf); // if (!expand) { return ancodes; } const map = this.readAncodesMap(info); return this.splitAncodes(ancodes, map); } readFlexiaData (info) { const fh = this.resource; let offset = info['offset'] + 20; if (php.var.isset(info['affixes_offset'])) { offset += info['affixes_offset']; } else { offset += info['forms_count'] * 2 + info['packed_forms_count'] * 2; } const buf = Buffer.alloc(info['affixes_size'] - this.ends_size); fs.readSync(fh, buf, 0, info['affixes_size'] - this.ends_size, offset); return buf.toString().split(this.ends.toString()); } readAllGramInfoOffsets () { return this.readSectionIndex(this.header['flex_index_offset'], this.header['flex_count']); } readSectionIndex (offset, count) { const buf = Buffer.alloc(count * 4); fs.readSync(this.resource, buf, 0, count * 4, offset); return _.values(php.unpack('V'+ count, buf)); } readAllFlexia () { const result = {}; let offset = this.header['flex_offset']; _.forEach( this.readSectionIndexAsSize( this.header['flex_index_offset'], this.header['flex_count'], this.header['flex_size'] ), size => { const header = this.readGramInfoHeader(offset); const affixes = this.readFlexiaData(header); const ancodes = this.readAncodes(header, true); // todo: проверить полученные переменные result[header['id']] = { 'header': header, 'affixes': affixes, 'ancodes': ancodes }; offset += size; } ); return result; } readAllPartOfSpeech () { const fh = this.resource; const result = {}; let offset = this.header['poses_offset']; let buf; let res; _.forEach( this.readSectionIndexAsSize( this.header['poses_index_offset'], this.header['poses_count'], this.header['poses_size'] ), size => { buf = Buffer.alloc(3); fs.readSync(fh, buf, 0, 3, offset); res = php.unpack('vid/Cis_predict', buf); buf = Buffer.alloc(size - 3); fs.readSync(fh, buf, 0, size - 3, offset); result[res['id']] = { name: this.cleanupCString(buf), is_predict: !!res['is_predict'] }; offset += size; } ); // todo: сверить result return result; } readAllGrammems () { const fh = this.resource; const result = {}; let offset = this.header['grammems_offset']; let buf; let res; _.forEach( this.readSectionIndexAsSize( this.header['grammems_index_offset'], this.header['grammems_count'], this.header['grammems_size'] ), size => { buf = Buffer.alloc(3); fs.readSync(fh, buf, 0, 3, offset); res = php.unpack('vid/Cshift', buf); buf = Buffer.alloc(size - 3); fs.readSync(fh, buf, 0, size - 3, offset); result[res['id']] = { name: this.cleanupCString(buf), shift: res['shift'] }; offset += size; } ); return result; } readAllAncodes () { const fh = this.resource; const result = {}; let offset = this.header['ancodes_offset']; let res; let grammems_count; let grammem_ids; let buf; for (let i = 0; i < this.header['ancodes_count']; i++) { buf = Buffer.alloc(4); fs.readSync(fh, buf, 0, 4, offset); res = php.unpack('vid/vpos_id', buf); offset += 4; buf = Buffer.alloc(2); fs.readSync(fh, buf, 0, 2, offset); grammems_count = php.unpack('v', buf)[1]; offset += 2; if (grammems_count) { buf = Buffer.alloc(grammems_count * 2); fs.readSync(fh, buf, 0, grammems_count * 2, offset); grammem_ids = _.values(php.unpack('v' + grammems_count, buf)); } else { grammem_ids = []; } result[res['id']] = { pos_id: res['pos_id'], offset: offset, grammem_ids: grammem_ids }; offset += grammems_count * 2; } return result; } } export { Morphy_GramInfo_File };