UNPKG

nodebb-plugin-solr

Version:

Full-text searching for NodeBB using Apache Solr

698 lines (591 loc) 18.3 kB
'use strict'; /* globals module, require */ const db = require.main.require('./src/database'); const winston = require.main.require('winston'); const engine = require('solr-client'); const async = require('async'); const LRU = require('lru-cache'); const titleCache = LRU({ max: 20, maxAge: 1000 * 60 * 20 }); // Remember the last 20 searches in the past twenty minutes const postCache = LRU({ max: 20, maxAge: 1000 * 60 * 20 }); // Remember the last 20 searches in the past twenty minutes const topics = require.main.require('./src/topics'); const posts = require.main.require('./src/posts'); const batch = require.main.require('./src/batch'); const utils = require('./lib/utils'); const Solr = { /* Defaults configs: host: localhost port: 8983 core: '' path: '/solr' enabled: undefined (false) titleField: 'title_t' contentField: 'description_t', secure: false */ config: {}, // default is localhost:8983, '' core, '/solr' path client: undefined, indexStatus: { running: false, current: 0, total: 0, message: 'Initializing', }, }; Solr.init = function (data, callback) { const pluginMiddleware = require('./middleware'); const render = function (req, res) { // Regenerate csrf token const token = req.csrfToken(); res.render('admin/plugins/solr', { ping: res.locals.ping, enabled: res.locals.enabled, stats: res.locals.stats, csrf: token, running: Solr.indexStatus.running, }); }; data.router.get('/admin/plugins/solr', data.middleware.applyCSRF, data.middleware.admin.buildHeader, pluginMiddleware.ping, pluginMiddleware.getEnabled, pluginMiddleware.getStats, render); data.router.get('/api/admin/plugins/solr', data.middleware.applyCSRF, pluginMiddleware.ping, pluginMiddleware.getEnabled, pluginMiddleware.getStats, render); // Utility data.router.post('/admin/plugins/solr/rebuild', Solr.rebuildIndex); data.router.get('/admin/plugins/solr/rebuildProgress', Solr.getIndexProgress); data.router.post('/admin/plugins/solr/toggle', Solr.toggle); data.router.delete('/admin/plugins/solr/flush', Solr.flush); data.router.delete('/admin/plugins/solr/cache', Solr.dropCaches); Solr.getSettings(Solr.connect); callback(); }; Solr.ping = function (callback) { if (Solr.client) { Solr.client.ping(callback); } else { callback(new Error('not-connected')); } }; Solr.checkConflict = function () { return !!module.parent.exports.libraries['nodebb-plugin-dbsearch']; }; Solr.getNotices = function (notices, callback) { Solr.ping(function (err) { const solrNotices = [ { done: !err, doneText: 'Solr connection OK', notDoneText: 'Could not connect to Solr server' }, { done: parseInt(Solr.config.enabled, 10) || false, doneText: 'Solr Indexing Enabled', notDoneText: 'Solr Indexing Disabled' }, ]; callback(null, notices.concat(solrNotices)); }); }; Solr.getSettings = function (callback) { db.getObject('settings:solr', function (err, config) { Solr.config = {}; if (!err) { for (const k in config) { if (config.hasOwnProperty(k) && config[k].length && !Solr.config.hasOwnProperty(k)) { Solr.config[k] = config[k]; } } } else { winston.error('[plugin:solr] Could not fetch settings, assuming defaults.'); } callback(); }); }; Solr.getRecordCount = function (callback) { const query = Solr.client.createQuery().q('*:*').start(0).rows(0); Solr.client.search(query, function (err, obj) { if (!err && obj && obj.response) { callback(undefined, obj.response.numFound); } else { callback(err, 0); } }); }; Solr.getTopicCount = function (callback) { const query = Solr.client.createQuery().q((Solr.config.titleField || 'title_t') + ':*').start(0).rows(0); Solr.client.search(query, function (err, obj) { if (!err && obj && obj.response) { callback(undefined, obj.response.numFound); } else { callback(err, 0); } }); }; Solr.connect = function () { if (Solr.client) { delete Solr.client; } Solr.config = { ...Solr.config, secure: Solr.config.secure && Solr.config.secure === 'on', }; Solr.client = engine.createClient(Solr.config); if (Solr.config.username && Solr.config.password) { Solr.client.basicAuth(Solr.config.username, Solr.config.password); } }; Solr.adminMenu = function (custom_header, callback) { custom_header.plugins.push({ route: '/plugins/solr', icon: 'fa-search', name: 'Apache Solr', }); callback(null, custom_header); }; Solr.search = function (data, callback) { if (Solr.checkConflict()) { // The dbsearch plugin was detected, abort search! winston.warn('[plugin/solr] Another search plugin (dbsearch) is enabled, so search via Solr was aborted.'); return callback(null, data); } const isTopic = data.index === 'topic'; const field = isTopic ? 'tid_i' : 'pid_i'; const term = utils.addFiltersToTerm(data.content, data); // Determine which cache to use const cache = isTopic ? titleCache : postCache; if (cache.has(term)) { data.ids = data.ids.concat(cache.get(term)); callback(null, data); } else { const fields = {}; // Populate Fields if (isTopic) { fields[Solr.config.titleField || 'title_t'] = 1; } else { fields[Solr.config.contentField || 'description_t'] = 1; } const query = Solr.client.createQuery() .q(term) .edismax() .qf(fields) .start(0) .rows(500); Solr.ping(function (err) { if (err) { // Stop use search engine when connection failed winston.warn('[plugins/solr] Could not connect to Solr server'); callback(null, data); } else { // Working around: https://github.com/lbdremy/solr-node-client/issues/197 query.parameters = query.parameters.map(param => param.replace('^1', '%5E1')); Solr.client.search(query, function (err, obj) { if (err) { return callback(err); } else if (obj && obj.response && obj.response.docs.length > 0) { const ids = obj.response.docs.map(function (result) { return result[field]; }).filter(Boolean); data.ids = data.ids.concat(ids); callback(null, data); cache.set(term, ids); } else { callback(null, data); cache.set(term, []); } winston.verbose('[plugin/solr] Search (' + data.index + ') for "' + data.content + '" returned ' + obj.response.docs.length + ' results'); }); } }); } }; Solr.searchTopic = function (data, callback) { const tid = data.tid; const term = data.term; if (!term || !term.length) { return callback(null, data); } const fields = {}; // Populate Query fields[Solr.config.contentField || 'description_t'] = term; fields.tid_i = tid; const query = Solr.client.createQuery().q(fields); Solr.client.search(query, function (err, obj) { if (err) { callback(err); } else if (obj && obj.response && obj.response.docs.length > 0) { data.ids = data.ids.concat(obj.response.docs.map(function (result) { return result.pid_i; })) callback(null, data); } else { callback(null, []); } }); }; Solr.toggle = function (req, res) { if (req.body.state) { db.setObjectField('settings:solr', 'enabled', parseInt(req.body.state, 10) ? '1' : '0', function (err) { Solr.config.enabled = req.body.state; res.sendStatus(!err ? 200 : 500); }); } else { res.status(400).send('"state" required'); } }; Solr.add = function (payload, callback) { async.series([ function (next) { Solr.client.add(payload, next); }, async.apply(Solr.commit), ], function (err) { if (err) { winston.error('[plugins/solr] Could not index post ' + payload.id + ', error: ' + err.message); } else if (typeof callback === 'function') { callback.apply(arguments); } }); }; Solr.remove = function (key, callback) { Solr.client.delete('id', key, function (err) { if (err) { winston.error('[plugins/solr] Could not remove ' + key + ' from index'); } if (typeof callback === 'function') { callback(err); } else { Solr.commit(); } }); }; Solr.commit = function (callback) { Solr.client.commit(callback); }; Solr.flush = function (req, res) { async.series([ function (next) { Solr.client.deleteAll(next); }, async.apply(Solr.commit), ], function (err) { if (err) { winston.error('[plugins/solr] Could not empty the search index'); res.status(500).send(err.message); } else { Solr.dropCaches(); winston.verbose('[plugins/solr] Search index flushed!'); res.sendStatus(200); } }); }; Solr.dropCaches = function (req, res) { postCache.reset(); titleCache.reset(); if (res && res.sendStatus) { res.sendStatus(200); } }; Solr.post = {}; Solr.post.save = function (data) { if (!parseInt(Solr.config.enabled, 10)) { return; } Solr.indexPost(data.post); }; Solr.post.delete = function (data, callback) { if (!parseInt(Solr.config.enabled, 10)) { return; } Solr.remove('post:' + data.post.pid); if (typeof callback === 'function') { if (!parseInt(Solr.config.enabled, 10)) { return; } callback(); } }; Solr.post.restore = Solr.post.save; Solr.post.edit = Solr.post.save; Solr.post.move = function (payload) { async.parallel({ postData: async.apply(posts.getPostFields, payload.post.pid, ['pid', 'content', 'uid']), cid: async.apply(posts.getCidByPid, payload.post.pid), }, function (err, metadata) { if (err) { winston.error('[plugins/solr] Could not retrieve base data for post move'); } metadata.postData.cid = metadata.cid; Solr.indexPost(metadata.postData); }); }; Solr.topic = {}; Solr.topic.post = function (data) { if (!parseInt(Solr.config.enabled, 10)) { return; } Solr.indexTopic(data.topic); }; Solr.topic.delete = function (data) { const topicObj = data.topic; const tid = (undefined === topicObj.tid) ? topicObj : topicObj.tid; if (!parseInt(Solr.config.enabled, 10)) { return; } Solr.deindexTopic(tid); }; Solr.topic.restore = Solr.topic.post; Solr.topic.edit = function (data) { const topicObj = data.topic; if (!parseInt(Solr.config.enabled, 10)) { return; } async.waterfall([ async.apply(posts.getPostFields, topicObj.mainPid, ['pid', 'tid', 'uid', 'content']), Solr.indexPost, ], function (err, payload) { if (err) { return winston.error(err.message); } if (!payload) { return winston.warn('[solr] no payload for pid ' + topicObj.mainPid); } payload[Solr.config.titleField || 'title_t'] = topicObj.title; payload = [payload]; payload.push({ id: 'topic:' + topicObj.tid, tid_i: topicObj.tid, cid_i: topicObj.cid, uid_i: topicObj.uid, title_t: topicObj.title, }); Solr.add(payload); }); }; Solr.topic.move = function (data) { if (!parseInt(Solr.config.enabled, 10)) { return; } async.waterfall([ async.apply(Solr.deindexTopic, data.tid), async.apply(topics.getTopicFields, data.tid, ['tid', 'mainPid', 'title', 'cid', 'uid']), async.apply(Solr.indexTopic), async.apply(Solr.add), ], function (err) { if (!err) { winston.verbose('[plugins/solr] tid ' + data.tid + ' moved, index updated'); } }); }; /* Topic and Post indexing methods */ Solr.indexTopic = function (topicObj, callback) { if (topicObj.hasOwnProperty('deleted') && parseInt(topicObj.deleted, 10) === 1) { callback = callback || function () {}; return callback(); } async.waterfall([ async.apply(topics.getPids, topicObj.tid), function (pids, next) { posts.getPostsFields(pids, ['pid', 'tid', 'content', 'uid'], function (err, posts) { if (err) { return next(err); } next(null, posts.map(function (post) { post.cid = topicObj.cid; return post; })); }); }, function (posts, next) { winston.verbose('[plugins/solr] Indexing tid ' + topicObj.tid + ' (' + posts.length + ' posts)'); async.map(posts, Solr.indexPost, next); }, ], function (err, payload) { if (err) { winston.error('[plugins/solr] Encountered an error while compiling post data for tid ' + topicObj.tid); if (typeof callback === 'function') { return callback(err); } } payload = payload.filter(Boolean); // Also index the title const titleObj = { id: 'topic:' + topicObj.tid, // Just needs to be unique tid_i: topicObj.tid, cid_i: topicObj.cid, uid_i: topicObj.uid, }; titleObj[Solr.config.titleField || 'title_t'] = topicObj.title; payload.push(titleObj); // Increment counter for index status if (Solr.indexStatus.running) { Solr.indexStatus.current += 1; } if (typeof callback === 'function') { callback(undefined, payload); } else { Solr.add(payload); } }); }; Solr.deindexTopic = function (tid, callback) { topics.getPids(tid, function (err, pids) { if (err) { winston.error('[plugins/solr] Could not retrieve pids for deindex'); return callback(err); } const commands = [ async.apply(Solr.remove, 'topic:' + tid), ]; for (let x = 0, numPids = pids.length; x < numPids; x++) { commands.push(async.apply(Solr.remove, 'post:' + pids[x])); } commands.push(async.apply(Solr.commit)); async.series(commands, function (err) { if (err) { winston.error('[plugins/solr] Encountered an error while deindexing tid ' + tid); } else { winston.verbose('[plugins/solr] Removed tid ' + tid + ' from index'); } if (typeof callback === 'function') { callback(err); } }); }); }; Solr.indexPost = function (postData, callback) { if (!postData || !postData.pid || !postData.content) { if (typeof callback === 'function') { callback(null); } return; } const payload = { id: 'post:' + postData.pid, // Just needs to be unique pid_i: postData.pid, tid_i: postData.tid, cid_i: postData.cid, uid_i: postData.uid, }; payload[Solr.config.contentField || 'description_t'] = postData.content; if (typeof callback === 'function') { callback(undefined, payload); } else { Solr.add(payload); } }; Solr.deindexPost = Solr.post.delete; Solr.rebuildIndex = function (req, res) { if (Solr.indexStatus.running) { winston.warn('[plugins/solr] Solr is already indexing...'); return res.sendStatus(400); } res.sendStatus(200); Solr.indexStatus.running = true; Solr.indexStatus.current = 0; Solr.indexStatus.message = 'Initializing'; async.series({ total: function (next) { async.parallel({ topics: async.apply(db.sortedSetCount, 'topics:tid', 0, Date.now()), }, function (err, results) { if (err) { return next(err); } Solr.indexStatus.total = results.topics; next(); }); }, topics: async.apply(Solr.rebuildTopicIndex), }, function (err, results) { if (!err) { Solr.add(results.topics, function (err) { if (!err) { winston.info('[plugins/solr] Re-indexing completed.'); Solr.indexStatus.message = 'Indexing finished'; Solr.indexStatus.running = false; } else { winston.error('[plugins/solr] Unable to add final data to solr. Error: ' + err.message); } }); } else { winston.error('[plugins/solr] Could not retrieve topic listing for indexing. Error: ' + err.message); } }); }; Solr.rebuildTopicIndex = function (callback) { Solr.indexStatus.message = 'Collecting topic metadata'; async.waterfall([ async.apply(db.sortedSetCount, 'topics:tid', '-inf', '+inf'), function (topicsCount, next) { let topicsFields = []; let metadataCollected = 0; Solr.indexStatus.message = 'Collecting topic metadata 0 / ' + topicsCount; batch.processSortedSet('topics:tid', function (tids, callback) { topics.getTopicsFields(tids, ['tid', 'mainPid', 'title', 'cid', 'uid', 'deleted'], function (err, results) { if (!err) { topicsFields = topicsFields.concat(results); metadataCollected += results.length; Solr.indexStatus.message = 'Collecting topic metadata ' + metadataCollected + ' / ' + topicsCount; } callback(err); }); }, function (err) { if (err) { winston.error('[plugins/solr/reindexTopic] Could not retrieve topic listing for indexing. Error: ' + err.message); } next(err, topicsFields); }); }, function (topics, next) { Solr.indexStatus.message = 'Indexed topics 0 / ' + topics.length; let indexedTopicCount = 0; async.whilst( function () { return indexedTopicCount < topics.length; }, function (callback) { const indexingTopics = topics.slice(indexedTopicCount, indexedTopicCount + 1000); async.mapLimit(indexingTopics, 100, Solr.indexTopic, function (err, topicPayloads) { if (err) { winston.error('[plugins/solr/reindexTopic] Could not retrieve topic content for indexing. Error: ' + err.message); return callback(err); } // Normalise and validate the entries before they're added to Solr const payload = topicPayloads.reduce(function (currentPayload, topics) { if (Array.isArray(topics)) { return currentPayload.concat(topics); } currentPayload.push(topics); return currentPayload; }, []).filter(function (entry) { return entry && entry.hasOwnProperty('id'); }); indexedTopicCount += indexingTopics.length; Solr.indexStatus.message = 'Indexed topics ' + indexedTopicCount + ' / ' + topics.length; Solr.add(payload, function (err) { if (!err) { const progressPercent = (100 * indexedTopicCount / topics.length).toFixed(2); winston.info('[plugins/solr/reindexTopic] Partial re-indexing completed: ' + progressPercent + '%'); } callback(err); }); }); }, next); }, ], function (err) { if (typeof callback === 'function') { callback(err, []); } else if (!err) { winston.info('[plugins/solr/reindexTopic] Topic re-indexing completed.'); } else { winston.error('[plugins/solr/reindexTopic] Could not insert data into Solr for indexing. Error: ' + err.message); } }); }; Solr.getIndexProgress = function (req, res) { if (Solr.indexStatus.running) { if (Solr.indexStatus.total > 0) { const progress = (Solr.indexStatus.current / Solr.indexStatus.total).toFixed(4) * 100; res.status(200).send({ percentage: progress, message: Solr.indexStatus.message, }); } else { res.status(200).send({ percentage: 0, message: Solr.indexStatus.message, }); } } else { res.status(200).send({ percentage: -1, message: 'Done', }); } }; module.exports = Solr;