UNPKG

gatsby-source-wordpress

Version:

Source data from WordPress in an efficient and scalable way.

525 lines (510 loc) • 20.3 kB
"use strict"; var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault"); exports.__esModule = true; exports.createMediaItemNode = exports.addImageCDNFieldsToNode = void 0; exports.default = fetchReferencedMediaItemsAndCreateNodes; exports.stripImageSizesFromUrl = exports.fetchMediaItemsBySourceUrl = exports.fetchMediaItemsById = void 0; var _chunk = _interopRequireDefault(require("lodash/chunk")); var _store = require("../../../store"); var _atob = _interopRequireDefault(require("atob")); var _filesize = _interopRequireDefault(require("filesize")); var _pQueue = _interopRequireDefault(require("p-queue")); var _createLocalFileNode = require("../create-nodes/create-local-file-node"); var _fetchNodesPaginated = require("./fetch-nodes-paginated"); var _helpers = require("../../create-schema-customization/helpers"); var _fetchGraphql = _interopRequireDefault(require("../../../utils/fetch-graphql")); var _uniq = _interopRequireDefault(require("lodash/uniq")); var _url = _interopRequireDefault(require("url")); var _path = _interopRequireDefault(require("path")); var _getGatsbyApi = require("../../../utils/get-gatsby-api"); var _formatLogMessage = require("../../../utils/format-log-message"); var _processNode = require("../create-nodes/process-node"); var _process$env$GATSBY_C; const nodeFetchConcurrency = 2; const concurrency = Number((_process$env$GATSBY_C = process.env.GATSBY_CONCURRENT_DOWNLOAD) !== null && _process$env$GATSBY_C !== void 0 ? _process$env$GATSBY_C : 200); const adjustedConcurrency = Number(concurrency !== null && concurrency !== void 0 ? concurrency : 200) - nodeFetchConcurrency; const normalizedConcurrency = adjustedConcurrency <= nodeFetchConcurrency ? concurrency : adjustedConcurrency; const mediaFileFetchQueue = new _pQueue.default({ concurrency: normalizedConcurrency, carryoverConcurrencyCount: true }); const mediaNodeFetchQueue = new _pQueue.default({ concurrency: nodeFetchConcurrency, carryoverConcurrencyCount: true }); const previouslyRetriedPromises = {}; const pushPromiseOntoRetryQueue = ({ node, helpers, createContentDigest, actions, queue, retryKey, retryPromise }) => { queue.add(async () => { const timesRetried = previouslyRetriedPromises[retryKey] || 0; if (timesRetried >= 2) { // if we've retried this more than once, pause for a sec. await new Promise(resolve => setTimeout(() => resolve(), timesRetried * 500)); } try { await retryPromise({ createContentDigest, actions, helpers, node, queue, retryKey, retryPromise, timesRetried }); } catch (error) { // Errors that should exit are handled one level down // in createLocalFileNode // // if we haven't reqeued this before, // add it to the end of the queue to // try once more later if (timesRetried < 5) { if (timesRetried > 1) { helpers.reporter.info(`pushing ${retryKey} to the end of the request queue.`); helpers.reporter.info(`Previously retried ${timesRetried} times already.`); } previouslyRetriedPromises[retryKey] = timesRetried + 1; pushPromiseOntoRetryQueue({ node, helpers, createContentDigest, actions, queue, retryKey, retryPromise }); } else { helpers.reporter.info(`\n\nalready re-queued ${retryKey} 5 times :( sorry.\nTry lowering process.env.GATSBY_CONCURRENT_DOWNLOAD.\nIt's currently set to ${process.env.GATSBY_CONCURRENT_DOWNLOAD}\n\n`); // we already tried this earlier in the queue // no choice but to give up :( helpers.reporter.panic(error); } } }); }; const addImageCDNFieldsToNode = (node, pluginOptions) => { var _node$__typename, _node$mediaDetails, _node$mediaDetails$fi, _node$mediaDetails$fi2, _node$mediaDetails2, _node$mediaDetails3, _node$mediaDetails4, _ref, _node$mediaDetails5, _node$mediaDetails5$s, _node$mediaDetails5$s2; if (!(node !== null && node !== void 0 && (_node$__typename = node.__typename) !== null && _node$__typename !== void 0 && _node$__typename.includes(`MediaItem`))) { return node; } const placeholderUrl = (0, _processNode.getPlaceholderUrlFromMediaItemNode)(node, pluginOptions); const url = node.sourceUrl || node.mediaItemUrl; const filename = (node === null || node === void 0 ? void 0 : (_node$mediaDetails = node.mediaDetails) === null || _node$mediaDetails === void 0 ? void 0 : (_node$mediaDetails$fi = _node$mediaDetails.file) === null || _node$mediaDetails$fi === void 0 ? void 0 : (_node$mediaDetails$fi2 = _node$mediaDetails$fi.split(`/`)) === null || _node$mediaDetails$fi2 === void 0 ? void 0 : _node$mediaDetails$fi2.pop()) || _path.default.basename(_url.default.parse(url).pathname); return { ...node, url, contentType: node.contentType, mimeType: node.mimeType, filename, filesize: node === null || node === void 0 ? void 0 : (_node$mediaDetails2 = node.mediaDetails) === null || _node$mediaDetails2 === void 0 ? void 0 : _node$mediaDetails2.fileSize, width: node === null || node === void 0 ? void 0 : (_node$mediaDetails3 = node.mediaDetails) === null || _node$mediaDetails3 === void 0 ? void 0 : _node$mediaDetails3.width, height: node === null || node === void 0 ? void 0 : (_node$mediaDetails4 = node.mediaDetails) === null || _node$mediaDetails4 === void 0 ? void 0 : _node$mediaDetails4.height, placeholderUrl: (_ref = placeholderUrl !== null && placeholderUrl !== void 0 ? placeholderUrl : node === null || node === void 0 ? void 0 : (_node$mediaDetails5 = node.mediaDetails) === null || _node$mediaDetails5 === void 0 ? void 0 : (_node$mediaDetails5$s = _node$mediaDetails5.sizes) === null || _node$mediaDetails5$s === void 0 ? void 0 : (_node$mediaDetails5$s2 = _node$mediaDetails5$s[0]) === null || _node$mediaDetails5$s2 === void 0 ? void 0 : _node$mediaDetails5$s2.sourceUrl) !== null && _ref !== void 0 ? _ref : url }; }; exports.addImageCDNFieldsToNode = addImageCDNFieldsToNode; const createMediaItemNode = async ({ node, helpers, createContentDigest, actions, parentName, allMediaItemNodes = [] }) => { const existingNode = await helpers.getNode(node.id); if (existingNode) { return existingNode; } (0, _store.getStore)().dispatch.logger.incrementActivityTimer({ typeName: `MediaItem`, by: 1 }); allMediaItemNodes.push(node); let resolveFutureNode; const futureNode = new Promise(resolve => { resolveFutureNode = resolve; }); pushPromiseOntoRetryQueue({ node, helpers, createContentDigest, actions, queue: mediaFileFetchQueue, retryKey: node.mediaItemUrl, retryPromise: async ({ createContentDigest, actions, helpers, node, retryKey, timesRetried }) => { const fetchTimeout = setTimeout(() => { helpers.reporter.log((0, _formatLogMessage.formatLogMessage)(`Fetching ${node.mediaItemUrl} is taking a long time time (longer than 15 seconds). This file is ${(0, _filesize.default)(node.fileSize)}`)); }, 15000); const pluginOptions = (0, _getGatsbyApi.getPluginOptions)(); const { createFileNodes } = pluginOptions.type.MediaItem; const localFileNode = createFileNodes ? await (0, _createLocalFileNode.createLocalFileNode)({ mediaItemNode: node, helpers, parentName }) : null; clearTimeout(fetchTimeout); if (timesRetried > 1) { helpers.reporter.info(`Successfully fetched ${retryKey} after retrying ${timesRetried} times`); } node = addImageCDNFieldsToNode({ ...node, parent: null, internal: { contentDigest: createContentDigest(node), type: (0, _helpers.buildTypeName)(`MediaItem`) } }, pluginOptions); if (localFileNode !== null && localFileNode !== void 0 && localFileNode.id) { node.localFile = { id: localFileNode === null || localFileNode === void 0 ? void 0 : localFileNode.id }; } const normalizedNode = (0, _fetchNodesPaginated.normalizeNode)({ node, nodeTypeName: `MediaItem` }); await actions.createNode(normalizedNode); return resolveFutureNode(node); } }); return futureNode; }; exports.createMediaItemNode = createMediaItemNode; const urlToFileExtension = url => { const { pathname } = _url.default.parse(url); const fileExtension = _path.default.extname(pathname); return fileExtension; }; const stripImageSizesFromUrl = url => { const fileExtension = urlToFileExtension(url); const imageSizesPattern = new RegExp( // eslint-disable-next-line no-useless-escape `(?:[-_]([0-9]+)x([0-9]+))${fileExtension ? `\.${fileExtension}` : ``}`); let urlWithoutSizes = url.replace(imageSizesPattern, ``); if (urlWithoutSizes !== url && fileExtension) { urlWithoutSizes = `${urlWithoutSizes}${fileExtension}`; } return urlWithoutSizes; }; exports.stripImageSizesFromUrl = stripImageSizesFromUrl; const createScaledImageUrl = url => { const fileExtension = urlToFileExtension(url); const isAlreadyScaled = url.includes(`-scaled${fileExtension || ``}`); if (isAlreadyScaled) { return url; } let scaledUrl; if (fileExtension) { scaledUrl = url.replace(fileExtension, `-scaled${fileExtension}`); } else { scaledUrl = `${url}-scaled`; } return scaledUrl; }; // takes an array of image urls and returns them + additional urls if // any of the provided image urls contain what appears to be an image resize signifier // for ex https://site.com/wp-content/uploads/01/your-image-500x1000.jpeg // that will add https://site.com/wp-content/uploads/01/your-image.jpeg to the array // this is necessary because we can only get image nodes by the full source url. // simply removing image resize signifiers from all urls would be a mistake since // someone could upload a full-size image that contains that pattern - so the full // size url would have 500x1000 in it, and removing it would make it so we can never // fetch this image node. const processAndDedupeImageUrls = urls => (0, _uniq.default)(urls.reduce((accumulator, url) => { const scaledUrl = createScaledImageUrl(url); accumulator.push(scaledUrl); const strippedUrl = stripImageSizesFromUrl(url); // if the url had no image sizes, don't do anything special if (strippedUrl === url) { return accumulator; } accumulator.push(strippedUrl); const scaledStrippedUrl = createScaledImageUrl(strippedUrl); accumulator.push(scaledStrippedUrl); return accumulator; }, urls)); const fetchMediaItemsBySourceUrl = async ({ mediaItemUrls, selectionSet, builtFragments, createContentDigest, actions, helpers, allMediaItemNodes = [] }) => { const processedMediaItemUrls = processAndDedupeImageUrls(mediaItemUrls); const { cachedMediaItemNodeIds, uncachedMediaItemUrls } = processedMediaItemUrls.reduce((accumulator, url) => { const { id } = (0, _createLocalFileNode.getFileNodeMetaBySourceUrl)(url) || {}; // if we have a cached image and we haven't already recorded this cached image if (id && !accumulator.cachedMediaItemNodeIds.includes(id)) { // save it accumulator.cachedMediaItemNodeIds.push(id); } else if (!id) { // otherwise we need to fetch this media item by url accumulator.uncachedMediaItemUrls.push(url); } return accumulator; }, { cachedMediaItemNodeIds: [], uncachedMediaItemUrls: [] }); // take our previously cached id's and get nodes for them const previouslyCachedMediaItemNodes = await Promise.all(cachedMediaItemNodeIds.map(async nodeId => { var _node$internal; const node = await helpers.getNode(nodeId); const parentNode = (node === null || node === void 0 ? void 0 : (_node$internal = node.internal) === null || _node$internal === void 0 ? void 0 : _node$internal.type) === `File` && node !== null && node !== void 0 && node.parent ? helpers.getNode(node.parent) : null; return parentNode || node; })); const { schema: { perPage } } = (0, _getGatsbyApi.getPluginOptions)(); // chunk up all our uncached media items const mediaItemUrlsPages = (0, _chunk.default)(uncachedMediaItemUrls, perPage); // since we're using an async queue, we need a way to know when it's finished // we pass this resolve function into the queue function so it can let us // know when it's finished // we have no media items to fetch, // so we need to resolve this promise // otherwise it will never resolve below. if (!mediaItemUrlsPages.length) { return Promise.resolve(previouslyCachedMediaItemNodes); } const allPromises = []; // for all the images we don't have cached, loop through and get them all for (const [index, sourceUrls] of mediaItemUrlsPages.entries()) { const curPromise = new Promise(resolve => { pushPromiseOntoRetryQueue({ helpers, createContentDigest, actions, queue: mediaNodeFetchQueue, retryKey: `Media Item by sourceUrl query #${index}, digest: ${createContentDigest(sourceUrls.join())}`, retryPromise: async () => { const query = /* GraphQL */` query MEDIA_ITEMS { ${sourceUrls.map((sourceUrl, index) => /* GraphQL */` mediaItem__index_${index}: mediaItem(id: "${sourceUrl}", idType: SOURCE_URL) { ...MediaItemFragment } `).join(` `)} } fragment MediaItemFragment on MediaItem { ${selectionSet} } ${builtFragments || ``} `; const { data } = await (0, _fetchGraphql.default)({ query, variables: { first: perPage, after: null }, errorContext: `Error occurred while fetching "MediaItem" nodes in inline html.` }); // since we're getting each media item on it's single node root field // we just needs the values of each property in the response // anything that returns null is because we tried to get the source url // plus the source url minus resize patterns. So there will be nulls // since only the full source url will return data const thisPagesNodes = Object.values(data).filter(Boolean); // take the WPGraphQL nodes we received and create Gatsby nodes out of them const nodes = await Promise.all(thisPagesNodes.map(node => createMediaItemNode({ node, helpers, createContentDigest, actions, allMediaItemNodes, parentName: `Fetching referenced MediaItem nodes by sourceUrl` }))); nodes.forEach((node, index) => { var _node$localFile; if (!node || !(node !== null && node !== void 0 && (_node$localFile = node.localFile) !== null && _node$localFile !== void 0 && _node$localFile.id)) { return; } // this is how we're caching nodes we've previously fetched. (0, _store.getStore)().dispatch.imageNodes.pushNodeMeta({ id: node.localFile.id, sourceUrl: sourceUrls[index], modifiedGmt: node.modifiedGmt }); }); resolve(nodes); } }); }); allPromises.push(curPromise); } await mediaNodeFetchQueue.onIdle(); await mediaFileFetchQueue.onIdle(); const allResults = await Promise.all(allPromises); return [...previouslyCachedMediaItemNodes, ...allResults.flat()]; }; exports.fetchMediaItemsBySourceUrl = fetchMediaItemsBySourceUrl; const fetchMediaItemsById = async ({ mediaItemIds, settings, url, selectionSet, builtFragments, createContentDigest, actions, helpers, typeInfo }) => { const newMediaItemIds = mediaItemIds.filter(id => !helpers.getNode(id)); const { schema: { perPage } } = (0, _getGatsbyApi.getPluginOptions)(); const chunkedIds = (0, _chunk.default)(newMediaItemIds, perPage); if (!newMediaItemIds.length) { return Promise.resolve([]); } const allMediaItemNodes = []; const allPromises = []; for (const [index, relayIds] of chunkedIds.entries()) { const curPromise = new Promise(resolve => { pushPromiseOntoRetryQueue({ helpers, createContentDigest, actions, queue: mediaNodeFetchQueue, retryKey: `Media Item query #${index}, digest: ${createContentDigest(relayIds.join())}`, retryPromise: async () => { // relay id's are base64 encoded from strings like attachment:89381 // where 89381 is the id we want for our query // so we split on the : and get the last item in the array, which is the id // once we can get a list of media items by relay id's, we can remove atob const ids = relayIds.map(id => (0, _atob.default)(id).split(`:`).slice(-1)[0]); const query = ` query MEDIA_ITEMS($in: [ID]) { mediaItems(first: ${perPage}, where:{ in: $in }) { nodes { ${selectionSet} } } } ${builtFragments || ``} `; const allNodesOfContentType = await (0, _fetchNodesPaginated.paginatedWpNodeFetch)({ first: perPage, contentTypePlural: typeInfo.pluralName, nodeTypeName: typeInfo.nodesTypeName, query, url, helpers, settings, in: ids, // this allows us to retry-on-end-of-queue throwFetchErrors: true }); const nodes = await Promise.all(allNodesOfContentType.map(node => createMediaItemNode({ node, helpers, createContentDigest, actions, allMediaItemNodes, referencedMediaItemNodeIds: mediaItemIds, parentName: `Fetching referenced MediaItem nodes by id` }))); resolve(nodes); } }); }); allPromises.push(curPromise); } await mediaNodeFetchQueue.onIdle(); await mediaFileFetchQueue.onIdle(); const allResults = await Promise.all(allPromises); return allResults.flat(); }; exports.fetchMediaItemsById = fetchMediaItemsById; async function fetchReferencedMediaItemsAndCreateNodes({ referencedMediaItemNodeIds, mediaItemUrls }) { var _pluginOptions$type, _pluginOptions$type$M; const state = (0, _store.getStore)().getState(); const queryInfo = state.remoteSchema.nodeQueries.mediaItems; const { helpers, pluginOptions } = state.gatsbyApi; // don't fetch media items if they are excluded via pluginOptions if ((_pluginOptions$type = pluginOptions.type) !== null && _pluginOptions$type !== void 0 && (_pluginOptions$type$M = _pluginOptions$type.MediaItem) !== null && _pluginOptions$type$M !== void 0 && _pluginOptions$type$M.exclude) { return []; } const { createContentDigest, actions } = helpers; const { url } = pluginOptions; const { typeInfo, settings, selectionSet, builtFragments } = queryInfo; let createdNodes = []; if (referencedMediaItemNodeIds !== null && referencedMediaItemNodeIds !== void 0 && referencedMediaItemNodeIds.length) { const nodesSourcedById = await fetchMediaItemsById({ mediaItemIds: referencedMediaItemNodeIds, settings, url, selectionSet, builtFragments, createContentDigest, actions, helpers, typeInfo }); createdNodes = nodesSourcedById; } if (mediaItemUrls !== null && mediaItemUrls !== void 0 && mediaItemUrls.length) { const nodesSourcedByUrl = await fetchMediaItemsBySourceUrl({ mediaItemUrls, settings, url, selectionSet, builtFragments, createContentDigest, actions, helpers, typeInfo }); createdNodes = [...createdNodes, ...nodesSourcedByUrl]; } return createdNodes.filter(Boolean); } //# sourceMappingURL=fetch-referenced-media-items.js.map