UNPKG

wordpos

Version:

wordpos is a set of part-of-speech utilities for Node.js & browser using the WordNet database.

196 lines (167 loc) 5.6 kB
/*! * node/indexFile.js * * implements fast index lookup of WordNet's index files * * Copyright (c) 2012-2019 mooster@42at.com * https://github.com/moos/wordpos * * Portions: Copyright (c) 2011, Chris Umbel * * Released under MIT license */ var _ = require('underscore')._, util = require('util'), path = require('path'), fs = require('fs'), piper = require('./piper'), { indexLookup } = require('../common'), KEY_LENGTH = 3; /** * load fast index bucket data * * @param dir {string} - dir path of index files * @param name {string} - name of index file, eg, 'index.verb' * @returns {Object} - fast index data object */ function loadFastIndex(dir, name) { var jsonFile = path.join(dir, 'fast-' + name + '.json'), data = null; try{ data = JSON.parse( fs.readFileSync(jsonFile,'utf8') ); //console.log('loaded %d buckets for %s', data.stats.buckets, data.name); } catch(e) { console.error('Error with fast index file. Try reinstalling from npm!'); throw e; } return data; } /** * read index file using fast index data at key * * @param key {string} - 3-char key into fast index * @param index {object} - index file object * @param callback {function} - function receives buffer of data read * @returns none */ function readIndexForKey(key, index, callback) { var data = index.fastIndex, offset = data.offsets[key][0], nextKey = data.offsets[key][1], nextOffset = data.offsets[nextKey][0], len = nextOffset - offset - 1, buffer = new Buffer.alloc(len); fs.read(index.fd, buffer, 0, len, offset, function(err, count){ if (err) return console.log(err); //console.log(' read %d bytes for <%s>', count, key); callback(buffer); }); } /** * read index file using fast index data at keyStart to keyEnd (inclusive) * * @param keyStart {string} - 3-char key into fast index to begin at * @param keyEnd {string|null} - 3-char key into fast index to end at. If null, reads to next key. * @param index {object} - index file object * @param callback - function receives buffer of data read * @returns none */ function readIndexBetweenKeys(keyStart, keyEnd, index, callback) { var data = index.fastIndex, offset = data.offsets[keyStart][0], end = keyEnd || keyStart, nextKey = data.offsets[end][1], nextOffset = data.offsets[nextKey][0], len = nextOffset - offset - 1, buffer = new Buffer.alloc(len); //console.log('### readIndexBetweenKeys', keyStart, keyEnd, nextKey, len) fs.read(index.fd, buffer, 0, len, offset, function(err, count){ if (err) return console.log(err); // console.log(' read %d bytes for <%s>', count, keyStart); callback(buffer); }); } /** * find a search term in an index file (using fast index) * * Calls to same bucket are queued for callback using the piper. * * @param search {string} - word to search for * @param callback {function} - callback receives found line and tokens * @returns none */ function find(search, callback) { var self = this, data = this.fastIndex, readCallbacks = this.callbackQueue, miss = {status: 'miss'}; var key = search.slice(0, KEY_LENGTH); if (!(key in data.offsets)) return process.nextTick(function(){ callback(miss) }); // prepare the piper var task = 'find:' + key, args = [key, this], context = [search, callback]; // last arg MUST be callback // pay the piper this.piper(task, readIndexForKey, args, context, collector); function collector(_key, index, search, callback, buffer){ var lines = buffer.toString().split('\n'), keys = lines.map(function(line){ return line.substring(0,line.indexOf(' ')); }), ind = _.indexOf(keys, search, /*isSorted*/ true); // binary search! //console.log(' %s is %d', search, ind); if (ind === -1) return callback(miss); var tokens = lines[ind].split(/\s+/), key = tokens[0], result = {status: 'hit', key: key, 'line': lines[ind], tokens: tokens}; callback(result); } } /** * loads fast index data and return fast index find function * * @param index {object} - the IndexFile instance */ function initIndex(index){ var key = index.filePath, data; if (!(key in cache)) { data = loadFastIndex(index.dictPath, index.fileName); cache[key] = data; } // if no fast index data was found or was corrupt, throw if (!cache[key]) throw new Error('Unable to load fastIndex file: ' + index.filePath); index.fastIndex = cache[key]; index.fastIndex.indexKeys = Object.keys(index.fastIndex.offsets); index.fastIndex.trie = null; // calc on demand index.refcount = 0; index.callbackQueue = {}; index.piper = _.bind(piper, index); } /** * IndexFile class * * @param dictPath {string} - WordNet db dict path * @param name {string} - name of index: noun, verb, adj, adv * @constructor */ var IndexFile = function(dictPath, name) { this.dictPath = dictPath; this.fileName = 'index.' + name; this.filePath = path.join(this.dictPath, this.fileName); initIndex(this); }; IndexFile.prototype.lookup = indexLookup; IndexFile.prototype.find = find; /** * export static method * @type {readIndexBetweenKeys} */ IndexFile.readIndexBetweenKeys = readIndexBetweenKeys; /** * cache of fast index data across instances of WordPOS class * * @type {object} */ var cache = {}; module.exports = IndexFile;