UNPKG

apostrophe

Version:

The Apostrophe Content Management System.

421 lines (355 loc) • 14.2 kB
// Implement sitewide search for Apostrophe. Provides the // `apostrophe-search` page type for the `/search` page, which // you should include in your "parked pages" if you wish // to have one (see [apostrophe-pages](/reference/modules/apostrophe-pages)). // // Search is powered by the full-text search features of MongoDB. // // ## Options // // ### `perPage`: search results per results page. Defaults to 10. // // ### `suggestions`: if `suggestions` is `true`, and the `notFound.html` // template of `apostrophe-pages` contains this element: // // `<div data-apos-notfound-search-results></div>` // // Apostrophe will attempt to locate relevant pages by feeding the component // words of the URL to the search engine, and display those suggestions. // // If `suggestions` is explicitly `false`, this does not happen. // // If `suggestions` is an object, this feature is enabled and the `limit` // suboption may optionally be changed to a value other than `10`. // // For legacy reasons, if `suggestions` is not set at all, the feature // still operates but attempts to obtain suggestions from `/search`. This // will work adequately if you have an Apostrophe sitewide search page // at that URL, but we recommend you set `suggestions: true` instead. // This allows you to override `suggest.html` to customize the behavior, // and also improves performance by using a simpler query for the 404 // suggestions. // // `types`: an array of page and piece doc type names allowed to be included // in search results. If not present, this is determined programmatically. // In the latter case, the `searchDetermineTypes` callAll method and the // `determineTypes` promise event are fired. Implementations of these // take an array argument and push new type names on it. `apostrophe-pieces` modules // monitor this and add their `name`, or do not, based on their `searchable` option. // // `filters`: an array of filters to be offered to the user, each of which // is an object with a `name` property and a `label` property. If no // entry has the name `__else`, an "Everything Else" filter is automatically // added. This is because there are always page types and piece types that // are edge cases not relevant enough to explicitly offer a filter for, but // which should nevertheless be included in results. var _ = require('@sailshq/lodash'); var async = require('async'); module.exports = { alias: 'search', extend: 'apostrophe-custom-pages', name: 'apostrophe-search', perPage: 10, afterConstruct: function(self) { self.dispatchAll(); self.enableFilters(); self.pushAssets(); self.apos.tasks.add(self.__meta.name, 'index', self.indexTask); }, construct: function(self, options) { self.perPage = options.perPage; if (options.suggestions === undefined) { // bc fallback, not great options.suggestions = { url: '/search' }; } else if (options.suggestions === true) { // will catch the new, better standard route URL options.suggestions = {}; } else { // already an object } options.suggestions.url = options.suggestions.url || self.action + '/suggest'; require('./lib/browser.js')(self, options); self.modulesReady = function(callback) { return self.determineTypes(callback); }; self.determineTypes = function(callback) { self.types = self.options.types || _.pluck(self.apos.pages.typeChoices, 'name'); if (self.options.types) { // Explicit configuration was chosen return setImmediate(callback); } return self.callAllAndEmit('searchDetermineTypes', 'determineTypes', self.types, callback); }; self.enableFilters = function() { if (self.options.filters) { self.filters = self.options.filters; if (!_.find(self.filters, { name: '__else' })) { self.filters = self.options.filters.concat( [ { label: "Everything Else", name: "__else" } ] ); } } }; self.renderRoute('get', 'suggest', function(req, res, next) { return self.suggest(req, self.apos.launder.string(req.query.q)).then(function(docs) { return next(null, { template: 'suggest', data: { docs: docs } }); }).catch(next); }); self.suggest = function(req, q) { return self.apos.docs.find(req, {}, { _url: 1, title: 1 }) .limit((self.options.suggestions && self.options.suggestions.limit) || 10) .search(q) .toArray(); }; // This method implements the search results page. It populates `req.data.docs` // and provides pagination via `req.data.currentPage` and `req.data.totalPages`, // not to be confused with `req.data.totalDocs` which is the total number of // documents matching the search. The filters configured for the module are // respected. self.indexPage = function(req, callback) { // Finesse so we can use queryToFilters but we still support q, which is // a common expectation/preference req.query.search = req.query.search || req.query.q; // Cope with filters var allowedTypes; var defaultingToAll = false; var cursor = self.apos.docs.find(req, {}) .queryToFilters(req.query, 'public') .perPage(self.perPage); if (self.filters) { var filterTypes = _.filter( _.pluck(self.filters, 'name'), function(name) { return name !== '__else'; } ); allowedTypes = _.filter(self.types, function(name) { return _.has(req.query, name); }); if (req.query.__else) { allowedTypes = allowedTypes.concat(_.difference(self.types, filterTypes)); } if (!allowedTypes.length) { // Default is everything defaultingToAll = true; allowedTypes = self.types; } } else { allowedTypes = self.types; } cursor.and({ type: { $in: allowedTypes } }); var docs = []; if (self.filters) { req.data.filters = _.cloneDeep(self.filters); _.each(req.data.filters, function(filter) { if (defaultingToAll || req.query[filter.name]) { filter.value = true; } }); } return async.series([ totalDocs, findDocs ], function(err) { if (err) { return callback(err); } if (self.apos.utils.isAjaxRequest(req)) { req.template = self.renderer('indexAjax'); } else { req.template = self.renderer('index'); } req.data.currentPage = cursor.get('page'); req.data.docs = docs; return self.beforeIndex(req, callback); }); function totalDocs(callback) { return cursor.toCount(function(err, count) { if (err) { return callback(err); } if (cursor.get('page') > cursor.get('totalPages')) { req.notFound = true; return callback(null); } req.data.totalDocs = count; req.data.totalPages = cursor.get('totalPages'); return callback(); }); } function findDocs(callback) { // Polymorphic find: fetch just the ids at first, then go back // and fetch them via their own type managers so that we get the // expected joins and urls and suchlike. var idsAndTypes; var byType; return async.series([ getIdsAndTypes, getDocs ], callback); function getIdsAndTypes(callback) { return cursor.projection({ _id: 1, type: 1 }).toArray(function(err, _idsAndTypes) { if (err) { return callback(err); } idsAndTypes = _idsAndTypes; return callback(null); }); } function getDocs(callback) { byType = _.groupBy(idsAndTypes, 'type'); return async.eachSeries(_.keys(byType), getDocsOfType, function(err) { if (err) { return callback(err); } // Restore the intended order ($in doesn't respect it and neither does // fetching them all by type). ACHTUNG: without this search quality goes // right out the window. -Tom docs = self.apos.utils.orderById(_.pluck(idsAndTypes, '_id'), docs); return callback(null); }); } function getDocsOfType(type, callback) { var manager = self.apos.docs.getManager(type); if (!manager) { return setImmediate(callback); } return manager.find(req, { type: type, _id: { $in: _.pluck(byType[type], '_id') } } ).toArray(function(err, docsOfType) { if (err) { return callback(err); } docs = docs.concat(docsOfType); return callback(null); }); } } }; // Called before each page of search results is rendered; override hook self.beforeIndex = function(req, callback) { return setImmediate(callback); }; self.dispatchAll = function() { self.dispatch('/', self.indexPage); }; // Implementation of search indexing as documents are saved. Invoked // via callAll by the docs module self.docBeforeSave = function(req, doc, options) { return self.indexDoc(req, doc); }; // Index one doc for participation in search self.indexDoc = function(req, doc) { var texts = self.getSearchTexts(doc); _.each(texts, function(text) { if (text.text === undefined) { text.text = ''; } }); var highTexts = _.filter(texts, function(text) { return text.weight > 10; }); var searchSummary = _.map(_.filter(texts, function(text) { return !text.silent; }), function(text) { return text.text; }).join(" "); var highText = self.boilTexts(highTexts); var lowText = self.boilTexts(texts); var titleSortified = self.apos.utils.sortify(doc.title); var highWords = _.uniq(highText.split(/ /)); // merge our doc with its various search texts _.assign(doc, { titleSortified: titleSortified, highSearchText: highText, highSearchWords: highWords, lowSearchText: lowText, searchSummary: searchSummary }); }; // Implements the `apostrophe-search:index` task, which re-indexes all pages. // This should only be needed if you have changed your mind about the // `searchable` property for various schema fields. Indexing is automatic // every time a doc is saved self.indexTask = function(apos, argv, callback) { var req = self.apos.tasks.getReq(); return self.apos.migrations.eachDoc({}, _.partial(self.indexTaskOne, req), callback); }; // Indexes just one document as part of the implementation of the // `apostrophe-search:index` task. This isn't the method you want to // override. See `indexDoc` and `getSearchTexts` self.indexTaskOne = function(req, doc, callback) { self.indexDoc(req, doc); return self.apos.docs.db.update({ _id: doc._id }, doc, callback); }; // Returns texts which are a reasonable basis for // generating search results for this page. Should return // an array in which each entry is an object with // 'weight' and 'text' properties. 'weight' is a measure // of relative importance. 'text' is the text associated // with that chunk of content. self.getSearchTexts = function(doc) { var texts = []; // Shown separately, so don't include it in the summary texts.push({ weight: 100, text: doc.title, silent: true }); // Usually redundant to the text of the page, so don't // show it in the description, but it's highly-weighted stuff // because we use it as the summary in a google search // result texts.push({ weight: 100, text: doc.seoDescription, silent: true }); // The slug often reveals more useful search-targeting information texts.push({ weight: 100, text: doc.slug, silent: true }); // Not great to include in the summary either texts.push({ weight: 100, text: (doc.tags || []).join("\n"), silent: true }); // This event is an opportunity to add custom texts for // various types of pages self.apos.emit('docSearchIndex', doc, texts); // Areas can be schemaless so find them automatically self.apos.areas.walk(doc, function(area, dotPath) { // Do not examine areas accessed via temporarily // joined information, such as snippets in a snippet // widget. Allow those items to be found on their // own as search results, and avoid bloating the // search text up to the 16MB limit as happened on DR if (dotPath.match(/\._\w/)) { return; } _.each(area.items, function(item) { var manager = self.apos.areas.getWidgetManager(item.type); if (!manager) { self.apos.areas.warnMissingWidgetType(item.type); return; } if (manager.addSearchTexts) { manager.addSearchTexts(item, texts); } }); }); return texts; }; // Reduces array of texts to a single space-separated string, passes the result // through apos.utils.sortify to eliminate unwanted characters and case differences self.boilTexts = function(texts) { var text = _.reduce(texts, function(memo, text) { return memo + ' ' + text.text; }, ''); text = self.apos.utils.sortify(text); return text; }; // Invoked by the apostrophe-versions module. // Identify fields that should never be rolled back self.docUnversionedFields = function(req, doc, fields) { fields.push('titleSortified', 'highSearchText', 'highSearchWords', 'lowSearchText', 'searchSummary'); }; } };