apostrophe
Version:
Apostrophe is a user-friendly content management system. You'll need more than this core module. See apostrophenow.org to get started.
1,363 lines (1,208 loc) • 48.5 kB
JavaScript
var _ = require('lodash');
var async = require('async');
var extend = require('extend');
var jsDiff = require('diff');
var wordwrap = require('wordwrap');
var util = require('util');
/**
* pages
* @augments Augments the apos object with methods which store and
* retrieve pages. A page is a MongoDB document
* with a title, a slug, and an areas property containing zero or more
* named area properties. Pages are stored in the aposPages collection.
* See also the pages module and the snippets module, which augment this
* idea in different ways.
*
* @see areas for methods that read and write single areas within a
* page.
*/
module.exports = function(self) {
// An internal function for locating a page by slug or recognizing that
// it is already a page object. This function does NOT check permissions
// or call loaders. It is useful in migrations and versioning.
function findByPageOrSlug(pageOrSlug, callback) {
var finder;
if (typeof(pageOrSlug) === 'string') {
finder = function(pageOrSlug, callback) {
return self.pages.findOne({ slug: pageOrSlug }, callback);
};
} else {
finder = function(pageOrSlug, callback) {
return callback(null, pageOrSlug);
};
}
finder(pageOrSlug, function(err, page) {
if (err) {
return callback(err);
}
return callback(null, page);
});
}
// Given a request object (for permissions), a page object, and a version
// object (an old version of the page from the versions collection), roll
// back the page to the content in the version object. This method does not
// roll back changes to the slug property, or to the rank or path property of
// any page with a slug beginning with /, because these are part
// of the page's relationship to other pages which may not be rolling back and
// could lead to an unusable page tree and/or conflicting slugs and paths
self.rollBackPage = function(req, page, version, callback) {
var slug = page.slug;
var path = page.path;
var rank = page.rank;
delete version.diff;
delete version.author;
delete version.createdAt;
extend(true, page, version);
page.slug = slug;
if (slug.chart(0) === '/') {
page.path = path;
page.rank = rank;
}
return self.putPage(req, page.slug, page, callback);
};
// Save a copy of the specified page so that it can be rolled back to
// at any time. The req object is needed to identify the author of
// the change. Typically called only from self.putPage
self.versionPage = function(req, page, callback) {
var now = new Date();
function addVersion(callback) {
// Turn the page object we fetched into a version object.
// But don't modify the page object!
var version = _.cloneDeep(page);
version.createdAt = now;
version.pageId = version._id;
version.author = (req && req.user && req.user.username) ? req.user.username : 'unknown';
version._id = self.generateId();
delete version.searchText;
return self.versions.insert(version, callback);
}
function pruneVersions(callback) {
// This is a process meant to continue in the background,
// intentionally. Thus we immediately trigger our callback
// but go on executing code. On purpose. Really.
callback(null);
// THIS CODE RUNS INDEPENDENTLY EVEN AFTER THE REQUEST IF NEEDED
// Strategy: if a version's time difference relative to the previous
// version is less than 1/24th the time difference from the latest
// version, that version can be removed. Thus versions become more
// sparse as we move back through time. However if two consecutive
// versions have different authors we never discard them because
// we don't want to create a false audit trail. -Tom
var last = null;
var cursor = self.versions.find({ createdAt: { $lt: now }, pageId: page._id }, { createdAt: 1, _id: 1 }).sort({ createdAt: -1 });
return cursor.nextObject(iterator);
function iterator(err, version) {
if (err) {
// We are running independently, it is too late to fail the request, just log it
console.error('An error occurred while pruning versions.');
console.error(err);
return;
}
if (version === null) {
// We're done
return;
}
var age = now.getTime() - version.createdAt.getTime();
var difference;
var remove = false;
if (last) {
if (last.author === version.author) {
difference = last.createdAt.getTime() - version.createdAt.getTime();
if (difference < (age / 24)) {
remove = true;
}
}
}
if (!remove) {
last = version;
return cursor.nextObject(iterator);
}
return self.versions.remove({ _id: version._id }, function(err) {
if (err) {
console.error('An error occurred while pruning versions (remove)');
console.error(err);
}
return cursor.nextObject(iterator);
});
}
}
return async.series([ addVersion, pruneVersions], callback);
};
self.diffPages = function(page1, page2) {
var lines1 = self.diffPageLines(page1);
var lines2 = self.diffPageLines(page2);
var results = jsDiff.diffLines(lines1.join("\n"), lines2.join("\n"));
// We're not interested in what stayed the same
return _.filter(results, function(result) { return result.added || result.removed; });
};
// Returns a list of lines of text which, when diffed against the
// results for another version of the page, will result in a reasonable
// summary of what has changed
self.diffPageLines = function(page) {
var lines = [];
lines.push('title: ' + page.title);
lines.push('type: ' + page.type);
if (page.tags) {
lines.push('tags: ' + page.tags.join(','));
}
self.emit('diff', page, lines);
self.walkAreas(page, function(area, dotPath) {
_.each(area.items, function(item) {
lines.push(dotPath + ': ' + item.type);
var itemType = self.itemTypes[item.type];
if (itemType) {
if (itemType.addDiffLines) {
itemType.addDiffLines(item, lines);
}
}
});
});
return lines;
};
// Given some plaintext, add diff-friendly lines to the lines array
// based on its contents
self.addDiffLinesForText = function(text, lines) {
var wrapper = wordwrap(0, 60);
var rawLines = text.split("\n");
_.each(rawLines, function(line) {
line = wrapper(line);
_.each(line.split("\n"), function(finalLine) {
if (!finalLine.length) {
return;
}
lines.push(finalLine);
});
});
};
// Index the page for search purposes.
self.indexPage = function(req, pageOrSlug, callback) {
var page;
var prior;
function findPage(callback) {
var finder;
if (typeof(pageOrSlug) === 'string') {
finder = function(pageOrSlug, callback) {
return self.pages.findOne({ slug: pageOrSlug }, callback);
};
} else {
finder = function(pageOrSlug, callback) {
return callback(null, pageOrSlug);
};
}
finder(pageOrSlug, function(err, pageArg) {
if (err) {
return callback(err);
}
page = pageArg;
return callback(null);
});
}
function index(callback) {
// Index the page
var texts = self.getSearchTextsForPage(page);
// These texts have a weight property so they are ideal for feeding
// to something better, but for now we'll prep for a dumb, simple regex search
// via mongo that is not aware of the weight of fields. This is pretty
// slow on big corpuses but it does have the advantage of being compatible
// with the presence of other criteria. Our workaround for the lack of
// really good weighting is to make separate texts available for searches
// based on high-weight fields and searches based on everything
// Individual widget types play with weights a little, but the really
// big numbers are reserved for metadata fields. Look for those
var highTexts = _.filter(texts, function(text) {
return text.weight > 10;
});
function boilTexts(texts) {
var text = _.reduce(texts, function(memo, text) {
return memo + ' ' + text.text;
}, '');
text = self.sortify(text);
return text;
}
var searchSummary = _.map(_.filter(texts, function(text) { return !text.silent; } ), function(text) { return text.text; }).join(" ");
var highText = boilTexts(highTexts);
var lowText = boilTexts(texts);
var sortTitle = self.sortify(page.title);
var highWords = _.uniq(highText.split(/ /));
return self.pages.update({ slug: page.slug }, { $set: { sortTitle: sortTitle, highSearchText: highText, highSearchWords: highWords, lowSearchText: lowText, searchSummary: searchSummary } }, callback);
}
return async.series([findPage, index], callback);
};
// Returns texts which are a reasonable basis for
// generating search results for this page. Should return
// an array in which each entry is an object with
// 'weight' and 'text' properties. 'weight' is a measure
// of relative importance. 'text' is the text associated
// with that chunk of content.
self.getSearchTextsForPage = function(page) {
var texts = [];
// Shown separately, so don't include it in the summary
texts.push({ weight: 100, text: page.title, silent: true });
// Usually redundant to the text of the page, so don't
// show it in the description, but it's highly-weighted stuff
// because we use it as the summary in a google search
// result
texts.push({ weight: 100, text: page.seoDescription, silent: true });
// The slug often reveals more useful search-targeting information
texts.push({ weight: 100, text: page.slug, silent: true });
// Not great to include in the summary either
texts.push({ weight: 100, text: (page.tags || []).join("\n"), silent: true });
// This event is an opportunity to add custom texts for
// various types of pages
self.emit('index', page, texts);
// Areas can be schemaless so find them automatically
self.walkAreas(page, function(area, dotPath) {
// Do not examine areas accessed via temporarily
// joined information, such as snippets in a snippet
// widget. Allow those items to be found on their
// own as search results, and avoid bloating the
// search text up to the 16MB limit as happened on DR
if (dotPath.match(/\._\w/)) {
return;
}
_.each(area.items, function(item) {
var itemType = self.itemTypes[item.type];
if (itemType) {
if (itemType.addSearchTexts) {
itemType.addSearchTexts(item, texts);
}
}
});
});
return texts;
};
// Given some plaintext, add diff-friendly lines to the lines array
// based on its contents
self.addDiffLinesForText = function(text, lines) {
var wrapper = wordwrap(0, 60);
var rawLines = text.split("\n");
_.each(rawLines, function(line) {
line = wrapper(line);
_.each(line.split("\n"), function(finalLine) {
if (!finalLine.length) {
return;
}
lines.push(finalLine);
});
});
};
// apos.get delivers pages that the current user is permitted to
// view, with areas fully populated and ready to render if
// they are present.
//
// Pages are also marked with a ._edit property if they are editable
// by this user.
//
// The results are delivered as the second argument of the callback
// if there is no error. The results object will have a `pages` property
// containing 0 or more pages. The results object will also have a
// `criteria` property containing the final MongoDB criteria used to
// actually fetch the pages. This criteria can be reused for direct
// MongoDB queries, for instance `distinct` queries to identify
// unique tags relevant to the pages returned.
//
// WHO SHOULD USE THIS METHOD
//
// Developers who need something different from a simple fetch of one
// page (use `apos.getPage`), fetch of ancestors, descendants, etc. of
// tree pages (use `pages.getAncestors`, `pages.getDescendants`, etc.),
// or fetch of snippets of some type such as blog posts or events
// (use `snippets.get`, `blog.get`, etc). All of these methods are
// built on this method.
//
// WARNING
//
// This function doesn't care if a page is a "tree page" (slug starting
// with a `/`) or not. If you are only interested in tree pages and you
// are not filtering by page type, consider setting
// `criteria.slug` to a regular expression matching a leading /.
//
// CRITERIA
//
// A `criteria` object can be, and almost always is, passed
// as the second argument.
//
// The `criteria` object is included in the MongoDB query made by
// this method to fetch pages. This object can contain any
// MongoDB criteria you wish. For instance, { type: 'default' }
// would fetch only pages of that type. Other criteria, such as
// permissions, are automatically applied as well via MongoDB's
// `$and` keyword so that you are not restricted in what you can
// do in your own criteria object.
//
// OPTIONS
//
// An options object can be passed as the third argument.
//
// If `options.editable` is true, only pages the current user can
// edit are returned. If `options.permission` is true, only pages
// with that specific permission (such as `edit-page`) are
// returned. Otherwise pages the user can see are returned.
//
// If `options.sort` is present, it is passed as the argument to the
// MongoDB sort() function. The default sort is by title, on the
// `sortTitle` property which is always lowercase for case insensitive
// results.
//
// `options.limit` indicates the maximum number of results to return.
// `options.skip` indicates the number of results to skip. These can
// be used to implement pagination.
//
// If `options.fields` is present it is used to limit the fields
// returned by MongoDB for performance reasons (the second argument
// to MongoDB's find()). To generate valid links, make sure you include
// `slug` in the fields you ask for. It may be easiest to ask
// NOT to get the properties that are areas: { body: 0, thumbnail: 0},
// for instance.
//
// If `options.areas` is explicitly set to false, no areas are returned.
// If `options.areas` is set to an array then only those areas are returned.
// The `options.areas` array may use dot notation. `options.areas` does
// not have as large a performance benefit as `options.fields` but
// it is less fussy to work with.
//
// `options.search` searches for the given text, and allows
// the use of quotation marks for intact phrases and "-" for negation
// in the same way that Google does. `options.search` is best for
// a thorough, well-ranked search of documents. `options.search` is
// not suited for autocomplete because it cannot match on partial words.
//
// `options.autocomplete` searches for a partial match based on what
// the user as has typed so far. It can handle partial words well, but
// matches only titles, tags and other important metadata, not the
// full text of a document.
//
// `options.q` is accepted as a synonym for `search`, and
// `options.titleSearch` is accepted as a synonym for `autocomplete`.
//
//
// `options.published` indicates whether to return only published pages
// ('1' or true), return only unpublished pages (`0` or false), or
// return both ('any' or null). It defaults to 'any', allowing suitable
// users to preview unpublished pages.
//
// `options.trash` indicates whether to return only pages in the
// trashcan the trashcan ('1' or true), return only pages not in the
// trashcan ('0' or false), or return both ('any' or null). It defaults
// to '0'.
//
// `options.orphan` indicates whether to return only pages that are
// accessible yet hidden from normal navigation links ('1' or true),
// never return such orphans ('0' or false), or return both
// ('any' or null). It defaults to 'any' to ensure such pages
// are reachable.
//
// `options.tags` is a convenient way to find content that has
// at least one of the given array of tags. `options.notTags`
// does the reverse: it excludes content that has at least one
// of the given array of tags.
//
// `options.draft` returns the latest unapproved draft of the
// page if Apostrophe was configured with `workflow: true`.
// Otherwise the latest live version of the page is returned.
//
// In any case the user's identity limits what they can see.
// Permissions are checked according to the Apostrophe permissions
// model (see permissions.js).
//
// You may disable permissions entirely by setting `options.permissions`
// to `false`. This can make sense when you are using pages as storage
// in a context where Apostrophe's permissions model is not relevant.
//
// If `options.getDistinctTags` is true, an array of distinct tags
// matching the current criteria is delivered in lieu of the usual
// results object. Alternatively, if `options.getDistinct` is set to a
// property name, then distinct values for that property are delivered.
// This is useful when implementing filters.
//
// `options.lateCriteria`
//
// Unfortunately at least one MongoDB operator, `$near`, cannot be
// combined with other operators using `$and` as this method normally
// does to combine permissions checks with other criteria. You may
// place such operators in `options.lateCriteria`, a MongoDB criteria
// object which is merged into the query at the last possible moment.
// This object must not contain an `$and` clause at the top level.
// See https://jira.mongodb.org/browse/SERVER-4572 for more information.
// The `criteria` and `options` arguments may be skipped.
// (Getting everything is a bit unusual, but it's not forbidden.)
//
self.get = function(req, userCriteria, options, mainCallback) {
req.traceIn('get');
if (arguments.length === 2) {
mainCallback = userCriteria;
userCriteria = {};
options = {};
} else if (arguments.length === 3) {
mainCallback = options;
options = {};
}
var superMainCallback = mainCallback;
mainCallback = function() {
req.traceOut('get');
return superMainCallback.apply(null, arguments);
};
function time(fn, name) {
return function(callback) {
req.traceIn(name);
return fn(function(err) {
req.traceOut();
return callback(err);
});
};
}
// Second criteria object based on our processing of `options`
var filterCriteria = {};
var permission = options.permission || (options.editable && 'edit-page') || 'view-page';
var search = options.search || options.q;
var autocomplete = options.autocomplete || options.titleSearch;
if (autocomplete) {
var _options = _.cloneDeep(options);
var words;
_options.getDistinct = 'highSearchWords';
delete _options.autocomplete;
delete _options.titleSearch;
autocomplete = self.sortify(autocomplete);
if (autocomplete.length) {
words = autocomplete.split(/ /);
// Use an indexed collection of words to optimize the query.
// Use rooted regexes to take advantage of the index.
filterCriteria.$and = _.map(words, function(word) {
return { highSearchWords: self.searchify(word, true) };
});
// Then use a regex so that multiple word matches
// are still required when the user types multiple words
filterCriteria.highSearchText = self.searchify(autocomplete);
}
var autocompleteCriteria = {
$and: [ userCriteria, filterCriteria ]
};
return self.get(req, autocompleteCriteria, _options, function(err, results) {
if (err) {
return mainCallback(err);
}
// This will be ALL the distinct high search words for
// the matched documents, so we need to filter out those
// that don't actually match one of the words in the
// autocomplete phrase
results = _.filter(results, function(result) {
return _.some(words, function(word) {
if (result.substr(0, word.length) === word) {
return true;
}
});
});
// If we match nothing, return nothing. Don't assume
// we know what kind of query it was though.
if (!results.length) {
delete _options.getDistinct;
return self.get(req, { _thisWillNeverHappen: true }, _options, mainCallback);
}
// Set up a recursive call using MongoDB
// full text search
delete _options.getDistinct;
_options.search = results.join(' ');
return self.get(req, userCriteria, _options, mainCallback);
});
}
var sort = options.sort;
if (sort === false) {
// OK, you really truly don't want a sort
// (for instance, you are relying on the
// implicit sort of $near)
} else if (search) {
// Text search is in the picture. If they don't
// specify a sort or specify sort: 'q' or
// sort: 'search', sort by search result quality
if ((!sort) || (sort === 'q') || (sort === 'search')) {
sort = { textScore: { $meta: 'textScore' } };
}
} else if (!sort) {
// A reasonable default sorting behavior
sort = { sortTitle: 1 };
}
var limit = options.limit || undefined;
var skip = options.skip || undefined;
var fields = options.fields || undefined;
var areas = (options.areas === undefined) ? true : options.areas;
var tags = options.tags || undefined;
var notTags = options.notTags || undefined;
var permissions = (options.permissions === false) ? false : true;
var lateCriteria = options.lateCriteria || undefined;
self.convertBooleanFilterCriteria('trash', options, filterCriteria, '0');
self.convertBooleanFilterCriteria('orphan', options, filterCriteria, 'any');
self.convertBooleanFilterCriteria('published', options, filterCriteria);
if (tags || notTags) {
filterCriteria.tags = { };
if (tags) {
filterCriteria.tags.$in = tags;
}
if (notTags) {
filterCriteria.tags.$nin = notTags;
}
}
if (search) {
// Set up MongoDB text index search
filterCriteria.$text = { $search: search };
}
var projection = _.cloneDeep(fields || {});
if (search) {
// MongoDB mandates this if we want to sort on search result quality
projection.textScore = { $meta: 'textScore' };
}
var results = {};
var combine = [ userCriteria, filterCriteria ];
if (permissions) {
combine.push(self.permissions.criteria(req, permission));
}
var criteria = {
$and: combine
};
// The lateCriteria option is merged with the criteria option last
// so that it is not subject to any $and clauses, due to this
// limitation of MongoDB which prevents the highly useful $near
// clause from being used otherwise:
//
// https://jira.mongodb.org/browse/SERVER-4572
if (lateCriteria) {
extend(true, criteria, lateCriteria);
}
if (options.getDistinctTags) {
// This is purely for bc so just modify options
options.getDistinct = 'tags';
}
if (options.getDistinct) {
if (self._scanCriteriaFor(criteria, { $near: 1 })) {
// There is a MongoDB bug as of 2.6.1 that causes crashes
// if $near is combined with "distinct". Work around it by
// getting all the IDs and doing a "distinct" on those.
// It's not great, but it's not a crash. -Tom
return self.pages.find(criteria, { _id: 1 }).toArray(function(err, results) {
if (err) {
return mainCallback(err);
}
var ids = _.pluck(results, '_id');
return self.pages.distinct(options.getDistinct, { _id: { $in: ids } }, mainCallback);
});
}
// Just return distinct values for some field matching the current criteria,
// rather than the normal results. This is a bit of a hack, we need
// to consider refactoring all of 'fetchMetadata' here
return self.pages.distinct(options.getDistinct, criteria, mainCallback);
}
if (!options.hint) {
options.hint = self.hintGetCriteria(criteria);
// console.log(JSON.stringify(options.hint) + ': ' + JSON.stringify(criteria));
}
var findOptions = {};
// If a sort is present, we must let
// mongodb use the matching index or
// we'll get the dreaded:
// https://jira.mongodb.org/browse/SERVER-15231
if (options.hint && (!sort)) {
findOptions.hint = options.hint;
console.log('we have a hint for ' + JSON.stringify(criteria));
}
// var start = (new Date()).getTime();
async.series([time(count, 'count'), time(loadPages, 'loadPages'), time(markPermissions, 'markPermissions'), time(beforeLoadWidgets, 'beforeLoadWidgets'),time(loadWidgets, 'loadWidgets'), time(afterGet, 'afterGet')], done);
function count(callback) {
// console.log(util.inspect(criteria, { depth: null }));
if ((skip === undefined) && (limit === undefined)) {
// Why query twice if we're getting everything anyway? Especially
// when count() ignores optimizer hints (until 2.5.5 at least)?
return callback(null);
}
// find() modifies its third argument, so make sure it's a copy
var o = {};
extend(true, o, findOptions);
var start = Date.now();
self.pages.find(criteria, {}, o).count(function(err, count) {
if (!req.traceQueries) {
req.traceQueries = [];
}
var query = {
criteria: util.inspect(criteria, { depth: 10 }),
projection: 'COUNT',
time: Date.now() - start,
hint: findOptions.hint
};
req.traceQueries.push(query);
results.total = count;
return callback(err);
});
}
function loadPages(callback) {
// find() modifies its third argument, so make sure it's a copy
var o = {};
extend(true, o, findOptions);
var start = Date.now();
var q = self.pages.find(criteria, projection, o);
// At last we can use skip and limit properly thanks to permissions stored
// in the document
if (skip !== undefined) {
q.skip(skip);
}
if (limit !== undefined) {
q.limit(limit);
}
if (sort) {
q.sort(sort);
}
q.toArray(function(err, pagesArg) {
if (err) {
console.error(err);
return callback(err);
}
if (!req.traceQueries) {
req.traceQueries = [];
}
var query = {
criteria: util.inspect(criteria, { depth: 10 }),
projection: util.inspect(projection, { depth: 10 }),
time: Date.now() - start,
hint: findOptions.hint,
sort: sort
};
req.traceQueries.push(query);
// var end = (new Date()).getTime();
// console.log((end - start) + ': ' + JSON.stringify(criteria));
results.pages = pagesArg;
if (results.total === undefined) {
results.total = results.pages.length;
}
// If we are interested in drafts, return the draft content
// as if it were public, otherwise return the public content
if (self.options.workflow) {
var draft = (req.session && (req.session.workflowMode === 'draft'));
if (draft) {
// Get the drafts as if they were the pages
results.pages = self.workflowGetDrafts(results.pages);
} else {
// Get the pages without the draft property
self.workflowCleanPages(results.pages);
}
}
var now = Date.now();
// Except for ._id, no property beginning with a _ should be
// loaded from the database. These are reserved for dynamically
// determined properties like permissions and joins
_.each(results.pages, function(page) {
// If we don't remove these we spend double resources
// loading them. TODO: a migration soon to eliminate
// these backups; we trust the results of the 0.5 migrator
// at this point.
if (page.preMigrationAreas) {
delete page.preMigrationAreas;
}
self.pruneTemporaryProperties(page);
});
if (areas !== true) {
if (Array.isArray(areas)) {
_.each(results.pages, function(page) {
self.walkAreas(page, function(area, dotPath) {
return (!_.contains(areas, dotPath));
});
});
} else {
// Removing all areas is simpler
_.each(results.pages, function(page) {
self.walkAreas(page, function(area) {
return true;
});
});
}
}
return callback(err);
});
}
function markPermissions(callback) {
self.permissions.annotate(req, 'edit-page', results.pages);
self.permissions.annotate(req, 'publish-page', results.pages);
return callback(null);
}
function beforeLoadWidgets(callback) {
return self.beforeLoadWidgets(req, results, callback);
}
function loadWidgets(callback) {
// Use eachSeries to avoid devoting overwhelming mongodb resources
// to a single user's request. There could be many snippets on this
// page, and callLoadersForPage is parallel already
async.forEachSeries(results.pages, function(page, callback) {
// Do not crash the stack
return setImmediate(function() {
return self.callLoadersForPage(req, page, callback);
});
}, function(err) {
if (err) {
console.error('error from loadWidgets:');
console.error(err);
}
return callback(err);
});
}
function afterGet(callback) {
if(typeof self.afterGet === 'function'){
self.afterGet(req, results, callback);
} else {
return callback(null);
}
}
function done(err) {
return mainCallback(err, results);
}
};
// Identical to apos.get, but delivers only the first
// page, or null if there is no page matching the
// criteria.
//
// If you use options like getDistinctTags that
// do not return pages, you will receive the same response
// you would with self.get.
self.getOne = function(req, userCriteria, options, callback) {
return self.get(req, userCriteria, options, function(err, results) {
if (err) {
return callback(err);
}
if (!results.pages) {
return callback(null, results);
}
if (!results.pages.length) {
return callback(null, null);
}
return callback(null, results.pages[0]);
});
};
// Similar to afterGet, but fired before the widget loaders
// are called.
self.beforeLoadWidgets = function(req, results, callback) {
return callback(null);
};
// The afterGet method can be overridden to modify the
// results of all calls to apos.get easily. Be mindful that
// the `results` object might not have a pages property at all
// in cases where only distinct tags (or other distinct properties)
// were asked for. You can also use the afterGet option when
// configuring the apostrophe module, and the apostrophe-site
// module offers an afterGet option which is passed on in that way.
self.afterGet = function(req, results, callback) {
return callback(null);
};
// Fetch the "page" with the specified slug. As far as
// apos is concerned, the "page" with the slug /about
// is expected to be an object with a .about property.
//
// A 'req' object is needed to provide a context for permissions.
// Permissions are checked on the page based on the user's identity.
// A ._edit property will be set on the page if it is editable by
// the current user and it will not be returned at all if it is
// not viewable by the current user.
//
// The first callback parameter is an error or null.
// In the event of an exact slug match, the second parameter
// to the callback is the matching page object. If there is a
// partial slug match followed by a / in the URL or an exact
// slug match, the longest such match is the third parameter.
// The fourth parameter is the remainder of the URL following
// the best match, or the empty string in the event of an
// exact match.
//
// If the slug passed does not begin with a leading /,
// partial matches are never returned.
//
// You MAY also store entirely unrelated properties in
// your "page" objects, via your own mongo code.
//
// This allows the composition of objects as
// different (and similar) as webpages, blog articles,
// upcoming events, etc. Usually objects other than
// webpages do not have a leading / on their slugs
// (and when using the pages module they must not).
//
// The `options` parameter may be skipped. If it is not
// skipped, it is passed on to `apos.get`.
self.getPage = function(req, slug, optionsArg, callback) {
if (!callback) {
callback = optionsArg;
optionsArg = {};
}
if (!optionsArg) {
optionsArg = {};
}
var slugs = [];
var components;
// Partial matches. Avoid an unnecessary OR of '/' and '/' for the
// homepage by checking that slug.length > 1
if (slug.length && (slug.substr(0, 1) === '/') && (slug.length > 1)) {
var path = '';
// homepage is always interesting
slugs.unshift('/');
components = slug.substr(1).split('/');
for (var i = 0; (i < (components.length - 1)); i++) {
var component = components[i];
path += '/' + component;
slugs.unshift(path);
}
} // And of course always consider an exact match. We use unshift to
// put the exact match first in the query, but we still need to use
// sort() and limit() to guarantee that the best result wins
slugs.unshift(slug);
// Ordering in reverse order by slug gives us the longest match first
var options = {
sort: { slug: -1 },
limit: 1
};
extend(true, options, optionsArg);
self.get(req, { slug: { $in: slugs } }, options, function(err, results) {
if (err) {
return callback(err);
}
if (results.pages.length) {
var page = results.pages[0];
var bestPage = page;
if (page.slug !== slug) {
// partial match only
page = null;
}
var remainder = slug.substr(bestPage.slug.length);
// Strip trailing slashes for consistent results
remainder = remainder.replace(/\/+$/, '');
// For consistency, guarantee a leading / if the remainder
// is not empty. This way parsing remainders attached to the
// home page (the slug of which is '/') is not a special case
if (remainder.length && (remainder.charAt(0) !== '/')) {
remainder = '/' + remainder;
}
return callback(err, page, bestPage, remainder);
} else {
// Nonexistence is not an error
return callback(null, null);
}
});
};
// Return an object to be passed as the hint option to MongoDB,
// or undefined if we can't make any clever suggestions. This method
// is called by apos.get because the MongoDB query optimizer is
// sometimes not bright enough to spot the smartest indexes to use
// with Apostrophe queries. If we get this wrong Apostrophe is very
// slow on sites with tends of thousands of pages
self.hintGetCriteria = function(criteria) {
// If a query mentions $text at any point, hinting is forbidden
// (cue Master Shake)
if (self._scanCriteriaFor(criteria, { $text: 1, $near: 1 })) {
return undefined;
}
// If a query mentions "path" or "slug" at any point, it is likely
// that we can greatly optimize it by indexing on those fields
var field = self._scanCriteriaFor(criteria, { 'path': 1, 'slug': 1 });
if (field) {
var o = {};
o[field] = 1;
return o;
} else {
return undefined;
}
};
// Scan a MongoDB criteria object for any mention of
// particular properties and, if found, return the
// first such property name. The second argument
// looks like: { 'path': 1 }
self._scanCriteriaFor = function(criteria, fields) {
var p;
var v;
var i;
var result;
for (p in criteria) {
if (fields[p]) {
v = criteria[p];
if ((p === 'slug') && (v instanceof RegExp) && (v.toString() === '/^\\//')) {
// This is just a test to rule out non-tree pages.
// There are typically enough tree pages that this
// is not an interesting criterion for a hint.
continue;
}
return p;
}
v = criteria[p];
if (typeof(v) === 'object') {
if (Array.isArray(v)) {
for (i = 0; (i < v.length); i++) {
result = self._scanCriteriaFor(v[i], fields);
if (result) {
return result;
}
}
} else {
result = self._scanCriteriaFor(v, fields);
if (result) {
return result;
}
}
}
}
return undefined;
};
// Insert or update an entire page object at once.
//
// slug is the existing slug of the page in the database. If page.slug is
// different then the slug of the page is changed. If page.slug is not defined
// it is set to the slug parameter for your convenience. The slug of the page,
// and the path of the page if it is defined, are both automatically made
// unique through successive addition of random digits if necessary.
//
// You MAY add unrelated properties to page objects between calls to
// getPage and putPage, or directly manipulate page objects with mongodb.
//
// You MUST pass the req object for permissions checking.
//
// If the page does not already exist this method will create it.
//
// A copy of the page is inserted into the versions collection unless you
// explicitly specify "version: false" as an option. This also bypasses
// workflow as described below.
//
// If Apostrophe's global "workflow" option is true and options.workflow
// is true, this method will push your updates into a "draft" object
// property of the page rather than directly updating the live
// properties of the page.
//
// Please let this function generate ._id for you on a new page. This is
// necessary to allow putPage to distinguish new pages from old when
// automatically fixing unique slug errors.
//
// The options argument may be skipped. If options.permissions is explicitly
// false, the operation takes place without checking permissions.
self.putPage = function(req, slug, options, page, callback) {
// Allow skipping of the options parameter
if (!callback) {
callback = page;
page = options;
options = {};
}
var newPage = false;
if (!page.slug) {
page.slug = slug;
}
// We identify new pages by whether they already have
// an ID. However to accommodate rare cases where the ID needs
// to be determined externally in advance we check for
// a _newId property before generating one.
if (!page._id) {
page._id = page._newId || self.generateId();
delete page._newId;
newPage = true;
}
page.sortTitle = self.sortify(page.title);
var workflowApplied = false;
// Provide the object rather than the slug since we have it and we can
// avoid extra queries that way and also do meaningful permissions checks
// on new pages
function permissions(callback) {
if (options.permissions === false) {
return callback(null);
}
return callback(self.permissions.can(req, 'edit-page', page) ? null : 'forbidden');
}
function beforePutPage(callback) {
return self.beforePutPage(req, page, callback);
}
function save(callback) {
function afterUpdate(err) {
if (err && self.isUniqueError(err))
{
var num = (Math.floor(Math.random() * 10)).toString();
if (page.slug === undefined) {
return callback('page.slug is not set');
}
page.slug += num;
// Path index is sparse, not everything is part of a page tree,
// don't create materialized paths where none are desired
// (for instance, blog posts)
if (page.path) {
page.path += num;
}
// Retry on an existing page must use the OLD slug or it will
// create unwanted clones. For a new page it must NOT use the old slug
// or it will keep failing
return save(callback);
}
return callback(err);
}
// TODO a safe shallow cloning mechanism would
// be tricky but faster
var copy = _.cloneDeep(page);
self.pruneTemporaryProperties(copy);
if (newPage) {
return self.pages.insert(copy, { safe: true }, afterUpdate);
} else if (self.options.workflow && (options.workflow)) {
workflowApplied = true;
return self.workflowUpdatePage(req, copy, options.workflow, afterUpdate);
} else {
if (!newPage) {
// Makes it less likely we'll have a fussy issue with MongoDB thinking
// we tried to change the ID if someone has a page with an ObjectID rather
// than a string id (although we emphasize it should be the latter).
// This is an old issue, probably not current, but for bc keep
// this in projects not using workflow for now.
delete copy._id;
}
return self.pages.update({ slug: slug }, copy, { safe: true }, afterUpdate);
}
}
function versioning(callback) {
if (options.version === false) {
return callback(null);
}
return self.versionPage(req, page, callback);
}
function indexing(callback) {
// Search should not be updated if this change is
// subject to approval
if (workflowApplied) {
return setImmediate(callback);
}
return self.indexPage(req, page, callback);
}
function afterPutPage(callback) {
return self.afterPutPage(req, page, callback);
}
function finish(err) {
if (err) {
return callback(err);
}
return callback(null, page);
}
async.series([ permissions, beforePutPage, save, versioning, indexing, afterPutPage], finish);
};
// Invoked when putPage is about to actually
// store the page. Permissions have already
// been verified.
self.beforePutPage = function(req, page, callback) {
return setImmediate(callback);
};
// Invoked when putPage is about to invoke its
// final callback. Only invoked when putPage was
// successful.
self.afterPutPage = function(req, page, callback) {
return setImmediate(callback);
};
// Invoke loaders for any items in any area of the page that have loaders,
// then invoke `callback`. Loaders implement the fetching of related
// file objects and other data not stored directly in the page object.
// Loaders are expected to report failure as appropriate
// to their needs by setting item properties that their templates can
// use to display that when relevant, so there is no formal error
// handling for loaders.
// This method also updates the "slug" property of each area, which
// facilitates the use of the edit-area API.
// The `req` object is available so that loaders can consider permissions
// and perform appropriate caching for the lifetime of the request.
// What happens if the loader for a page triggers a load of that same page?
// To avoid infinite recursion we track the current recursion level for each
// page id. We tolerate it but only up to a point. This allows some semi-reasonable
// cases without crashing the site.
self.callLoadersForPage = function(req, page, callback) {
self.maxLoaderRecursion = self.maxLoaderRecursion || 3;
if (!req.loaderRecursion) {
req.loaderRecursion = {};
}
if (req.loaderRecursion[page._id]) {
if (req.loaderRecursion[page._id] === self.maxLoaderRecursion) {
// Not something we need to warn about, we just don't do it.
// It's common for users to build widgets that point in
// a circle. -Tom
return callback(null);
}
req.loaderRecursion[page._id]++;
} else {
req.loaderRecursion[page._id] = 1;
}
// Call loaders for all areas in a page.
var areas = [];
self.walkAreas(page, function(area, dotPath) {
area.slug = page.slug + ':' + dotPath;
areas.push(area);
});
// We should run area loaders in series so that semaphores
// in the req object intended to detect recursion behave reasonably.
// We get plenty of parallelism from multiple users as it is
async.mapSeries(
_.values(areas),
function(area, callback) {
return setImmediate(function() { self.callLoadersForArea(req, area, callback); });
}, function(err, results) {
req.loaderRecursion[page._id]--;
return callback(err);
}
);
};
// An easy way to leave automatic redirects behind as things are renamed.
// Can be used with anything that lives in the pages table - regular pages,
// blog posts, events, etc. See the pages and blog modules for examples of usage.
self.updateRedirect = function(originalSlug, slug, callback) {
if (slug !== originalSlug) {
return self.redirects.update(
{ from: originalSlug },
{ from: originalSlug, to: slug },
{ upsert: true, safe: true },
function(err, doc) {
return callback(err);
}
);
}
return callback(null);
};
// perform a recursive operation on a page. Optionally deletes properties.
// The second argument must be a function that takes an object, a key, a value
// and a "dot path" and returns true if that key should be discarded.
// Remember, keys can be numeric; toString() is your friend.
//
// If the original object looks like:
//
// { a: { b: 5 } }
//
// Then when the callback is invoked for b, the key will be 'b' and the
// dotPath will be the string 'a.b'.
//
// You do not need to pass a _dotPath argument to walkPage itself, that
// argument is used for recursive invocation.
self.walkPage = function(page, callback, _dotPath) {
// We do not use underscore here because of performance issues.
// Pruning big nested objects is not something we can afford
// to do slowly. -Tom
var key;
var val;
var __dotPath;
if (_dotPath !== undefined) {
_dotPath += '.';
} else {
_dotPath = '';
}
var remove = [];
for (key in page) {
__dotPath = _dotPath + key.toString();
if (callback(page, key, page[key], __dotPath)) {
remove.push(key);
} else {
val = page[key];
if (typeof(val) === 'object') {
self.walkPage(val, callback, __dotPath);
}
}
}
_.each(remove, function(key) {
delete page[key];
});
};
// Walk the areas in a page. The callback receives the
// area object and the dot-notation path to that object. If the
// callback returns true, the area is *removed* from the page object,
// otherwise it is left in place.
self.walkAreas = function(page, callback) {
return self.walkPage(page, function(o, k, v, dotPath) {
if (v && (v.type === 'area')) {
return callback(v, dotPath);
}
return false;
});
};
};