apostrophe
Version:
The Apostrophe Content Management System.
635 lines (603 loc) • 24.1 kB
JavaScript
// Provides the `build` method, a flexible and powerful way to build
// URLs with query parameters and more. This method is made available
// as the `build` filter in Nunjucks. This is also the logical place
// to add new utility methods relating to URLs.
const _ = require('lodash');
const qs = require('qs');
module.exports = {
options: {
alias: 'url',
static: false
},
restApiRoutes(self) {
return {
// GET /api/v1/@apostrophecms/url
//
// Returns the result of `getAllUrlMetadata` — an object
// with `pages` and `attachments` properties.
// See the `getAllUrlMetadata` method for full documentation.
async getAll(req) {
if (!self.isExternalFront(req)) {
throw self.apos.error('forbidden');
}
if (!self.options.static) {
throw self.apos.error('invalid',
'The @apostrophecms/url module must be configured with the "static: true" option to use this API. ' +
'Without it, URL metadata for filters and pagination cannot be fully enumerated for a static build.'
);
}
// Parse and sanitize attachment options from the query string.
const launder = self.apos.launder;
const wantAttachments = launder.boolean(req.query.attachments);
const splitSizes = (val) => {
const list = launder.string(val)
.split(',').map(s => s.trim()).filter(Boolean);
return list.length ? list : undefined;
};
const attachments = wantAttachments
? {
sizes: splitSizes(req.query.attachmentSizes),
skipSizes: splitSizes(req.query.attachmentSkipSizes),
scope: launder.select(
req.query.attachmentScope,
[ 'used', 'all' ],
'used'
)
}
: false;
return self.getAllUrlMetadata(req, {
attachments
});
}
};
},
handlers(self) {
return {
'@apostrophecms/page:beforeSend': {
addStaticUrlsFlag(req) {
req.data.staticUrls = !!self.options.static;
}
}
};
},
methods(self) {
return {
// Returns `true` if the given `req` represents a static build
// request. This is the single source of truth — modules should
// use this method rather than inspecting `req` properties
// directly.
//
// Static build requests are those made by an external frontend
// (e.g. Astro) that opted in to static-build URL handling via
// the `x-apos-static-base-url: 1` header. The Express
// middleware sets `req.aposStaticBuild` and (when configured)
// `req.staticBaseUrl` in response.
isStaticBuild(req) {
return !!req.aposStaticBuild;
},
// Returns `true` if the given `req` originates from an
// external frontend integration (e.g. Astro, Next.js).
// This is the single source of truth — modules should use
// this method rather than inspecting `req.aposExternalFront`
// directly.
isExternalFront(req) {
return !!req.aposExternalFront;
},
// Returns the effective base URL for the given request.
//
// Resolution order:
// 1. If a hostname is configured for the active locale,
// `<protocol>://<hostname>` is returned (locale-specific
// host always wins, prefix is never appended).
// 2. If the request is a static build (`isStaticBuild(req)`),
// `req.staticBaseUrl` + prefix is returned (or the empty
// string when none is configured).
// 3. Otherwise, `apos.baseUrl` + prefix is returned (or the
// empty string).
//
// ### `options.strict`
//
// When `true`, guarantees a non-empty return value:
// - In a static build where `staticBaseUrl` is empty,
// falls back to `apos.baseUrl`.
// - In a non-static context where `apos.baseUrl` is empty,
// still returns the empty string (nothing more to fall
// back to).
//
// Use `strict: true` when an absolute URL is required (e.g.
// sitemap `<loc>` values).
//
// ### `options.prefix`
//
// When `true` (the default), the global `apos.prefix` is
// appended to the returned URL (e.g. `/blog`). The prefix
// is **not** appended when a locale-specific hostname is
// used — that hostname already represents the full origin.
//
// Pass `prefix: false` to obtain only the origin / base URL
// without the prefix. This is the legacy behavior of
// `apos.page.getBaseUrl(req)` before the delegation to this
// method.
getBaseUrl(req, { strict = false, prefix = true } = {}) {
const hostname = self.apos.i18n.locales?.[req.locale]?.hostname;
if (hostname) {
// Locale hostnames are fully qualified origins;
// the global prefix does not apply.
return `${req.protocol}://${hostname}`;
}
const aposPrefix = prefix ? (self.apos.prefix || '') : '';
if (self.isStaticBuild(req)) {
const staticUrl = req.staticBaseUrl || '';
if (staticUrl || !strict) {
return staticUrl + aposPrefix;
}
}
return (self.apos.baseUrl || '') + aposPrefix;
},
// Build filter URLs. `data` is an object whose properties
// become new query parameters. These parameters override any
// existing parameters of the same name in the URL. If you
// pass a property with a value of `undefined`, `null` or an
// empty string, that parameter is removed from the
// URL if already present (note that the number `0` does not
// do this). This is very useful for maintaining filter
// parameters in a query string without redundant code.
//
// Pretty URLs
//
// If the optional `path` argument is present, it must be an
// array. (You may skip this argument if you are just
// adding query parameters.)
//
// Any properties of `data` whose names appear in `path`
// are concatenated to the URL directly, separated by slashes,
// in the order they appear in that array.
//
// The first missing or empty value for a property in `path`
// stops this process to prevent an ambiguous URL.
//
// Note that there is no automatic detection that this has
// already happened in an existing URL, so you can't override
// existing components of the path.
//
// If a property's value is not equal to the slugification of
// itself as determined by apos.util.slugify, then a query
// parameter is set instead.
//
// If you don't want to handle a property as a query parameter,
// make sure it is always slug-safe.
//
// Overrides: multiple data objects
//
// You may pass additional data objects. The last one wins, so
// you can pass your existing parameters first and pass new
// parameters you are changing as a second data object.
//
// Working with Arrays
//
// Normally, a new value for a property replaces any old one,
// and `undefined`, `null` or `''` removes the old one. If you
// wish to build up an array property instead you'll need
// to use the MongoDB-style $addToSet and $pull operators to add and
// remove values from an array field in the URL:
//
// Add colors[]=blue to the query string, if not already present
//
// `{ colors: { $addToSet: 'blue' } }`
//
// Remove colors[]=blue from the query string, if present
//
// `{ colors: { $pull: 'blue' } }`
//
// All values passed to $addToSet or $pull must be strings or
// convertible to strings via `toString()` (e.g. numbers, booleans)
//
// (The actual query string syntax includes array indices and
// is fully URI escaped, so it's slightly different but has
// the same impact. PHP does the same thing.)
build(url, path, data) {
let hash;
// Preserve hash separately
const matches = url.match(/^(.*)?#(.*)$/);
if (matches) {
url = matches[1];
hash = matches[2];
if (url === undefined) {
// Why, JavaScript? Why? -Tom
url = '';
}
}
// Sometimes necessary with nunjucks, we may otherwise be
// exposed to a SafeString object and throw an exception
url = url.toString();
const qat = url.indexOf('?');
let base = url;
const dataObjects = [];
let pathKeys;
let original;
const query = {};
let i, j;
let key;
if (qat !== -1) {
original = qs.parse(url.substr(qat + 1));
base = url.substr(0, qat);
}
let dataStart = 1;
if (path && Array.isArray(path)) {
pathKeys = path;
dataStart = 2;
} else {
pathKeys = [];
}
// Process data objects in reverse order so the last
// override wins
for (i = arguments.length - 1; i >= dataStart; i--) {
dataObjects.push(arguments[i]);
}
if (original) {
dataObjects.push(original);
}
const done = {};
let stop = false;
let dataObject;
let value;
for (i = 0; i < pathKeys.length; i++) {
if (stop) {
break;
}
key = pathKeys[i];
for (j = 0; j < dataObjects.length; j++) {
dataObject = dataObjects[j];
if (dataObject[key] !== undefined) {
value = dataObject[key];
// If we hit an empty value we need to stop all path processing
// to avoid ambiguous URLs
if (value === undefined || value === null || value === '') {
done[key] = true;
stop = true;
break;
}
// If the value is an object it can't be stored in the path,
// so stop path processing, but don't mark this key 'done'
// because we can still store it as a query parameter
if (typeof value === 'object') {
stop = true;
break;
}
const s = dataObject[key].toString();
if (s === self.apos.util.slugify(s)) {
// Don't append double /
if (base !== '/') {
base += '/' + s;
} else {
base += s;
}
done[key] = true;
break;
} else {
// A value that cannot be slugified also forces an end to
// path processing
stop = true;
break;
}
}
}
}
// For non-path parameters we process starting with the original
// object so cumulative operations like $addToSet and $pull can work
for (i = dataObjects.length - 1; i >= 0; i--) {
dataObject = dataObjects[i];
for (key in dataObject) {
if (done[key]) {
continue;
}
value = dataObject[key];
if (value && value.$pull !== undefined) {
value = _.difference(query[key] || [], [ value.$pull.toString() ]);
if (!value.length) {
value = undefined;
}
} else if (value && value.$addToSet !== undefined) {
value = _.union(query[key] || [], [ value.$addToSet.toString() ]);
if (!value.length) {
value = undefined;
}
}
if (value === undefined || value === null || value === '') {
delete query[key];
} else {
query[key] = value;
}
}
}
function restoreHash(url) {
if (hash !== undefined) {
return url + '#' + hash;
} else {
return url;
}
}
if (_.size(query)) {
return restoreHash(base + '?' + qs.stringify(query));
} else {
return restoreHash(base);
}
},
// Generate a list of all URLs reachable with the given
// req object. Used internally to implement static site
// generation and sitemaps. Usually called in a loop,
// once for each locale.
//
// ## Returned shape
//
// The return value is always:
//
// ```js
// {
// pages: [ ...page metadata entries ],
// attachments: { // null when not requested
// uploadsUrl: '/uploads',
// results: [
// { _id: 'abc', urls: [{ size?, path }] },
// ...
// ]
// }
// }
// ```
//
// ## Page metadata entries (`pages`)
//
// Each entry in the `pages` array may contain the
// following properties:
//
// ### `url` (string, always present)
// The URL path for this entry — a purely relative path
// without origin or prefix (e.g. `/articles` or
// `/articles/category/tech`, never
// `https://example.com/my-repo/articles`).
//
// For document entries, the framework strips the base
// URL (origin + prefix) automatically after collection.
// For **literal content** entries added by event
// handlers, the URL **must** be provided as a relative,
// prefix-free path (e.g. `/robots.txt`, not
// `/my-repo/robots.txt`). The consumer (e.g. Astro
// integration) is responsible for prepending the prefix
// when fetching from the backend.
//
// ### `i18nId` (string, always present)
// A stable identifier that is consistent across localized
// versions of the same logical URL. Used by external
// frontends (e.g. Astro) to correlate URLs across locales.
// For the primary view of a document this equals `aposDocId`.
// For derived URLs (pagination, filter combinations) it is
// built by appending suffixes to the base doc's `aposDocId`,
// e.g. `myDocId.category.tech.1` or `myDocId.2`.
//
// ### `type` (string, present for document entries)
// The Apostrophe doc `type` name (e.g. `'article'`,
// `'@apostrophecms/home-page'`). Absent on non-document
// entries such as literal content URLs.
//
// ### `aposDocId` (string, present for document entries)
// The locale-independent document ID. Absent on
// non-document entries.
//
// ### `_id` (string, present for document entries)
// The full locale-qualified MongoDB `_id` of the document
// (e.g. `'xyz:en:published'`). Absent on non-document
// entries.
//
// ### `contentType` (string, present for literal content entries only)
// A MIME type such as `'text/css'` or `'text/plain'`.
// When present, this signals that the URL returns non-HTML
// content that should be proxied literally by the consumer
// (e.g. an Astro static build). The consumer should fetch
// the `url` and write the response body to disk with the
// given content type instead of rendering it as a page.
// When absent, the URL is an ordinary HTML page.
//
// Document entries (pages, pieces, etc.) should NEVER set
// `contentType`. Consumers such as the sitemap module and
// Astro use its absence to identify renderable HTML pages
// vs literal assets.
//
// Literal content entries should NOT include `changefreq`
// or `priority` — those are only meaningful for document
// entries in sitemaps.
//
// ### `sitemap` (boolean, optional, default `true`)
// When explicitly set to `false`, the entry is excluded
// from sitemap generation but still included in static
// builds. Useful for URLs that must exist in the build
// (e.g. paginated filter pages, CSS files) but should
// not appear in `sitemap.xml`. If omitted, the entry is
// included in sitemaps.
//
// ### `changefreq` (string, optional, document entries only)
// Sitemap hint (e.g. `'daily'`). Included for legacy
// sitemap compatibility. Google explicitly ignores this.
// Must NOT be set on literal content entries.
//
// ### `priority` (number, optional, document entries only)
// Sitemap priority hint (e.g. `1.0`). Included for legacy
// sitemap compatibility. Google explicitly ignores this.
// Must NOT be set on literal content entries.
//
// ## Literal content entries
//
// Some entries represent non-HTML content that should be
// served literally with a specific MIME type, such as
// CSS stylesheets, `robots.txt`, `llms.txt`, etc. These
// entries include a `contentType` property (e.g.
// `text/css`, `text/plain`). Consumers of this API
// (e.g. an Astro static build) should fetch the `url`
// and serve the response body with the specified content
// type rather than rendering it as an HTML page.
//
// ## Extension points
//
// This method emits the
// `@apostrophecms/url:getAllUrlMetadata` event, so
// that handlers in any module can add URLs to the
// results. The default implementation already calls
// `getAllUrlMetadata` on every doc type manager that
// has at least one doc in the database, so listening
// for the event is only for edge cases that can't be
// covered by extending `getAllUrlMetadata` or
// `getUrlMetadata` on such a manager.
//
// Handlers should respect `excludeTypes`.
//
// **Important:** handlers that push literal content
// entries must provide a relative, prefix-free `url`
// path (e.g. `/robots.txt`). The base URL stripping
// that runs after collection only applies to document
// entries whose `url` starts with the effective base
// URL — it will not strip a prefix that was manually
// added by a handler. Providing a relative path
// ensures correct behaviour regardless of whether a
// prefix is configured.
//
// ## Attachment metadata (`attachments`)
//
// When `options.attachments` is a truthy object, attachment
// metadata is collected after URL enumeration and returned
// alongside the pages. The option accepts:
//
// - `scope` (`'used'` | `'all'`): `'used'` (default) limits
// to attachments referenced by documents present in the
// results. `'all'` returns every non-archived attachment.
// - `sizes` (string[]): explicit image sizes to include.
// - `skipSizes` (string[]): image sizes to exclude.
//
// `attachments.uploadsUrl` is the uploadfs base URL prefix
// (e.g. `/uploads` or `https://cdn.example.com`).
//
// Each entry in `attachments.results` contains:
// - `_id` (string): the attachment record ID.
// - `urls` (array): `{ size, path }` objects where `path`
// is the uploadfs-relative file path.
//
// After attachment metadata is collected, the
// `@apostrophecms/url:getAllAttachmentMetadata` event is
// emitted. Handlers receive `(req, results, options)` where
// `results` is the attachments results array and `options`
// includes `{ sizes, skipSizes, scope, uploadsUrl }`. This
// is an escape hatch for edge cases where a module needs to
// contribute additional attachment entries or modify the
// results programmatically.
//
async getAllUrlMetadata(req, { excludeTypes = [], attachments = false } = {}) {
// Ensure global doc is available for event handlers
// that may need it (e.g. @apostrophecms/styles)
await self.apos.global.addGlobalToData(req);
const results = [];
const allAttachmentDocIds = new Set();
const collectDocIds = !!attachments && attachments.scope !== 'all';
const types = await self.apos.doc.db.distinct('type');
for (const type of types) {
if (!excludeTypes.includes(type)) {
const manager = self.apos.doc.getManager(type);
if (!manager?.getAllUrlMetadata) {
continue;
}
const {
metadata,
attachmentDocIds
} = await manager
.getAllUrlMetadata(req, { attachments: collectDocIds });
for (const entry of metadata) {
results.push(entry);
}
for (const id of attachmentDocIds) {
allAttachmentDocIds.add(id);
}
}
}
await self.emit('getAllUrlMetadata', req, results, { excludeTypes });
const response = {
pages: results,
attachments: null
};
if (attachments) {
const {
sizes, skipSizes, scope
} = attachments;
const docIds = collectDocIds
? [ ...allAttachmentDocIds ]
: undefined;
response.attachments = {
uploadsUrl: self.apos.attachment.uploadfs.getUrl(),
results: await self.apos.attachment.getStaticMetadata({
docIds,
sizes,
skipSizes
})
};
await self.emit(
'getAllAttachmentMetadata',
req,
response.attachments.results,
{
sizes,
skipSizes,
scope,
uploadsUrl: response.attachments.uploadsUrl
}
);
}
// Strip the base URL (origin + prefix) that `_url` values
// were built with, producing purely relative, prefix-free
// paths (e.g. `/about`, `/fr/articles/page/2`).
const effectiveBaseUrl = self.getBaseUrl(req);
if (effectiveBaseUrl) {
for (const entry of response.pages) {
if (entry.url?.startsWith(effectiveBaseUrl)) {
entry.url = entry.url.slice(effectiveBaseUrl.length) || '/';
}
}
}
// Strip the backend origin from uploadsUrl so that the
// consumer receives a relative, prefix-qualified path
// (e.g. `/uploads` or `/cms/uploads`).
// CDN URLs (different origin) and already-relative URLs
// are left untouched.
if (response.attachments) {
const baseUrl = self.apos.baseUrl || '';
if (baseUrl && response.attachments.uploadsUrl.startsWith(baseUrl)) {
response.attachments.uploadsUrl =
response.attachments.uploadsUrl.slice(baseUrl.length);
}
}
return response;
},
// Returns a string suitable to append to the original page URL when we're
// specifying a particular filter and a page number. Pages start with 1
getChoiceFilter(name, value, page) {
if (value === null) {
return '';
}
name = encodeURIComponent(name);
value = encodeURIComponent(value);
if (self.options.static) {
return `/${name}/${value}${page > 1 ? `/page/${page}` : ''}`;
} else {
return `?${name}=${value}${page > 1 ? `&page=${page}` : ''}`;
}
},
// Returns a string suitable to append to the original page URL when all we're
// adding is a page number. Pages start with 1
getPageFilter(page) {
if (page <= 1) {
return '';
}
if (self.options.static) {
return `/page/${page}`;
} else {
return `?page=${page}`;
}
}
};
}
};