UNPKG

apostrophe

Version:
424 lines (382 loc) • 13.7 kB
// Implement sitewide search for Apostrophe. Provides the // `@apostrophecms/search` page type for the `/search` page, which // you should include in your "parked pages" if you wish // to have one (see [@apostrophecms/page](../@apostrophecms/page/index.html)). // // Search is powered by the full-text search features of MongoDB. // // ## Options // // ### `perPage`: search results per results page. Defaults to 10. // // ### `suggestions`: if `suggestions` is `true`, and the `notFound.html` // template of `@apostrophecms/page` contains this element: // // `<div data-apos-notfound-search-results></div>` // // Apostrophe will attempt to locate relevant pages by feeding the component // words of the URL to the search engine, and display those suggestions. // // If `suggestions` is explicitly `false`, this does not happen. // // If `suggestions` is an object, this feature is enabled and the `limit` // suboption may optionally be changed to a value other than `10`. // // For legacy reasons, if `suggestions` is not set at all, the feature // still operates but attempts to obtain suggestions from `/search`. This // will work adequately if you have an Apostrophe sitewide search page // at that URL, but we recommend you set `suggestions: true` instead. // This allows you to override `suggest.html` to customize the behavior, // and also improves performance by using a simpler query for the 404 // suggestions. // // `types`: an array of page and piece doc type names allowed to be included // in search results. If not present, this is determined programmatically. // In the latter case, the `searchDetermineTypes` callAll method and the // `determineTypes` promise event are fired. Implementations of these // take an array argument and push new type names on it. // `@apostrophecms/piece-type` modules monitor this and add their `name`, or do // not, based on their `searchable` option. // // `filters`: an array of filters to be offered to the user, each of which // is an object with a `name` property and a `label` property. If no // entry has the name `__else`, an "Everything Else" filter is automatically // added. This is because there are always page types and piece types that // are edge cases not relevant enough to explicitly offer a filter for, but // which should nevertheless be included in results. const { stripIndent } = require('common-tags'); const _ = require('lodash'); module.exports = { extend: '@apostrophecms/page-type', options: { alias: 'search', perPage: 10, label: 'apostrophe:searchLabel', // Default projection for ancestors, used in search results. // See `req.aposAncestors` and `req.ancestorsApiProjection` // in modules/@apostrophecms/search/index.js and // in modules/@apostrophecms/any-page-type/index.js ancestorsApiProjection: { _id: 1, title: 1, slug: 1, type: 1, visibility: 1, orphan: 1, parkedId: 1, parked: 1, rank: 1, level: 1, aposDocId: 1, path: 1, lastPublishedAt: 1, aposLocale: 1, aposMode: 1, metaType: 1, createdAt: 1, archived: 1, titleSortified: 1, updatedAt: 1, cacheInvalidatedAt: 1, updatedBy: 1, highSearchText: 1, highSearchWords: 1, lowSearchText: 1, searchSummary: 1, _url: 1 } }, init(self) { self.perPage = self.options.perPage; if (self.options.suggestions === undefined) { // bc fallback, not great self.options.suggestions = { url: '/search' }; } else { // will catch the new, better standard route URL self.options.suggestions = {}; } self.options.suggestions.url = self.options.suggestions.url || self.action + '/suggest'; self.dispatchAll(); self.enableFilters(); self.addMigrations(); }, routes(self) { return { get: { async suggest(req, res) { try { const docs = await self.suggest(req, self.apos.launder.string(req.query.q)); return res.send(docs); } catch (err) { self.apos.util.error(err); return res.status(500).send('error'); } } } }; }, handlers(self) { return { 'apostrophe:modulesRegistered': { determineTypes() { self.types = self.options.types || _.map(self.apos.page.typeChoices, 'name'); if (self.options.types) { // Explicit configuration was chosen return; } // A chance to rewrite the array return self.emit('determineTypes', self.types); } }, '@apostrophecms/doc-type:beforeSave': { indexDoc(req, doc) { self.indexDoc(req, doc); } } }; }, methods(self) { return { enableFilters() { if (self.options.filters) { self.filters = self.options.filters; if (!_.find(self.filters, { name: '__else' })) { self.filters = self.options.filters.concat([ { label: 'apostrophe:everythingElse', name: '__else' } ]); } } }, addMigrations() { self.addIndexFixMigration(); }, addIndexFixMigration() { // Search index lacked most text fields, correct that with a one-time // migration self.apos.migration.add('search-index-fix', async () => { return self.indexTask(); }); }, suggest(req, q) { return self.apos.doc .find(req) .limit(self.options.suggestions && (self.options.suggestions.limit || 10)) .search(q) .project({ _url: 1, title: 1 }).toArray(); }, // This method implements the search results page. It populates // `req.data.docs` and provides pagination via `req.data.currentPage` and // `req.data.totalPages`, not to be confused with `req.data.totalDocs` // which is the total number of documents matching the search. The filters // configured for the module are respected. async indexPage(req) { // Finesse so we can use applyBuildersSafely but we still support q, // which is a common expectation/preference req.query.search = req.query.search || req.query.q; // Cope with filters let allowedTypes; let defaultingToAll = false; const query = self.apos.doc .find(req, {}) .applyBuildersSafely(req.query) .perPage(self.perPage); if (self.filters) { const filterTypes = _.filter(_.map(self.filters, 'name'), function (name) { return name !== '__else'; }); allowedTypes = _.filter(self.types, function (name) { return _.has(req.query, name); }); if (req.query.__else) { allowedTypes = allowedTypes.concat(_.difference(self.types, filterTypes)); } if (!allowedTypes.length) { // Default is everything defaultingToAll = true; allowedTypes = self.types; } } else { allowedTypes = self.types; } query.and({ type: { $in: allowedTypes } }); if (self.filters) { req.data.filters = _.cloneDeep(self.filters); _.each(req.data.filters, function (filter) { if (defaultingToAll || req.query[filter.name]) { filter.value = true; } }); } const count = await query.toCount(); if (query.get('page') > query.get('totalPages')) { req.notFound = true; return; } req.data.totalDocs = count; req.data.totalPages = query.get('totalPages'); const docs = await findDocs(); if (self.apos.util.isAjaxRequest(req)) { self.setTemplate(req, 'indexAjax'); } else { self.setTemplate(req, 'index'); } req.data.currentPage = query.get('page'); req.data.docs = docs; return self.emit('beforeIndex', req); async function findDocs() { req.aposAncestors = true; req.aposAncestorsApiProjection = self.options.ancestorsApiProjection; // Polymorphic find: fetch just the ids at first, then go back // and fetch them via their own type managers so that we get the // expected relationships and urls and suchlike. const idsAndTypes = await query.project({ _id: 1, type: 1 }).toArray(); const byType = _.groupBy(idsAndTypes, 'type'); let docs = []; for (const type in byType) { await getDocsOfType(type); } // Restore the intended order ($in doesn't respect it and neither does // fetching them all by type). ACHTUNG: without this search quality // goes right out the window. -Tom return self.apos.util.orderById(_.map(idsAndTypes, '_id'), docs); async function getDocsOfType(type) { const manager = self.apos.doc.getManager(type); if (!manager) { return; } docs = docs.concat(await manager.find(req, { type, _id: { $in: _.map(byType[type], '_id') } }).toArray()); } } }, dispatchAll() { self.dispatch('/', self.indexPage); }, indexDoc(req, doc) { const texts = self.getSearchTexts(doc); _.each(texts, function (text) { if (text.text === undefined) { text.text = ''; } }); const highTexts = _.filter(texts, function (text) { return text.weight > 10; }); const searchSummary = _.map(_.filter(texts, function (text) { return !text.silent; }), function (text) { return text.text; }).join(' '); const highText = self.boilTexts(highTexts); const lowText = self.boilTexts(texts); const titleSortified = self.apos.util.sortify(doc.title); const highWords = _.uniq(highText.split(/ /)); // merge our doc with its various search texts _.assign(doc, { titleSortified, highSearchText: highText, highSearchWords: highWords, lowSearchText: lowText, searchSummary }); }, // Indexes just one document as part of the implementation of the // `@apostrophecms/search:index` task. This isn't the method you want to // override. See `indexDoc` and `getSearchTexts` async indexTaskOne(req, doc) { self.indexDoc(req, doc); return self.apos.doc.db.updateOne({ _id: doc._id }, { $set: { titleSortified: doc.titleSortified, highSearchText: doc.highSearchText, highSearchWords: doc.highSearchWords, lowSearchText: doc.lowSearchText, searchSummary: doc.searchSummary } }); }, // Returns texts which are a reasonable basis for // generating search results for this page. Should return // an array in which each entry is an object with // 'weight' and 'text' properties. 'weight' is a measure // of relative importance. 'text' is the text associated // with that chunk of content. getSearchTexts(doc) { const texts = []; // Shown separately, so don't include it in the summary texts.push({ weight: 100, text: doc.title, silent: true }); // Usually redundant to the text of the page, so don't // show it in the description, but it's highly-weighted stuff // because we use it as the summary in a google search // result texts.push({ weight: 100, text: doc.seoDescription, silent: true }); // The slug often reveals more useful search-targeting information texts.push({ weight: 100, text: doc.slug, silent: true }); const manager = self.apos.doc.getManager(doc.type); if (manager) { const schema = manager.schema; self.apos.schema.indexFields(schema, doc, texts); } return texts; }, // Reduces array of texts to a single space-separated string, passes the // result through apos.util.sortify to eliminate unwanted characters and // case differences boilTexts(texts) { let text = _.reduce(texts, function (memo, text) { return memo + ' ' + text.text; }, ''); text = self.apos.util.sortify(text); return text; }, // Invoked by the @apostrophecms/version module. // Identify fields that should never be rolled back docUnversionedFields(req, doc, fields) { fields.push('titleSortified', 'highSearchText', 'highSearchWords', 'lowSearchText', 'searchSummary'); }, async indexTask() { const req = self.apos.task.getReq(); return self.apos.migration.eachDoc({}, doc => { return self.indexTaskOne(req, doc); }); } }; }, tasks(self) { return { index: { usage: stripIndent` Rebuild the search index. Normally this happens automatically. This should only be needed if you have changed the "searchable" property for various fields or types. `, async task(argv) { await self.indexTask(); } } }; } };