gatsby-source-wordpress
Version:
Source data from WordPress in an efficient and scalable way.
427 lines (398 loc) • 12.2 kB
JavaScript
;
var _stringEncoding = require("../../../../utils/string-encoding");
var _store = require("../../../../store");
const fs = require(`fs-extra`);
const {
remoteFileDownloaderBarPromise
} = require(`./progress-bar-promise`);
const got = require(`got`);
const {
createContentDigest
} = require(`gatsby-core-utils`);
const path = require(`path`);
const {
isWebUri
} = require(`valid-url`);
const Queue = require(`better-queue`);
const readChunk = require(`read-chunk`);
const fileType = require(`file-type`);
const {
createFileNode
} = require(`gatsby-source-filesystem/create-file-node`);
const {
getRemoteFileExtension,
getRemoteFileName,
createFilePath
} = require(`gatsby-source-filesystem/utils`);
const cacheId = url => (0, _store.withPluginKey)(`create-remote-file-node-${url}`);
let bar;
// Keep track of the total number of jobs we push in the queue
let totalJobs = 0;
/********************
* Type Definitions *
********************/
/**
* @typedef {GatsbyCache}
* @see gatsby/packages/gatsby/utils/cache.js
*/
/**
* @typedef {Reporter}
* @see gatsby/packages/gatsby-cli/lib/reporter.js
*/
/**
* @typedef {Auth}
* @type {Object}
* @property {String} htaccess_pass
* @property {String} htaccess_user
*/
/**
* @typedef {CreateRemoteFileNodePayload}
* @typedef {Object}
* @description Create Remote File Node Payload
*
* @param {String} options.url
* @param {GatsbyCache} options.cache
* @param {Function} options.createNode
* @param {Function} options.getCache
* @param {Auth} [options.auth]
* @param {Reporter} [options.reporter]
*/
const STALL_RETRY_LIMIT = process.env.GATSBY_STALL_RETRY_LIMIT ? parseInt(process.env.GATSBY_STALL_RETRY_LIMIT, 10) : 3;
const STALL_TIMEOUT = process.env.GATSBY_STALL_TIMEOUT ? parseInt(process.env.GATSBY_STALL_TIMEOUT, 10) : 30000;
const CONNECTION_TIMEOUT = process.env.GATSBY_CONNECTION_TIMEOUT ? parseInt(process.env.GATSBY_CONNECTION_TIMEOUT, 10) : 30000;
/********************
* Queue Management *
********************/
/**
* Queue
* Use the task's url as the id
* When pushing a task with a similar id, prefer the original task
* as it's already in the processing cache
*/
let queue = null;
const getQueue = limit => {
if (!queue) {
queue = new Queue(pushToQueue, {
id: `url`,
merge: (old, _, cb) => cb(old),
concurrent: limit || 100
});
// when the queue is empty we stop the progressbar
queue.on(`drain`, async () => {
if (awaitingCreateRemoteFileNodePromise) {
return;
}
awaitingCreateRemoteFileNodePromise = true;
await remoteFileDownloaderBarPromise;
awaitingCreateRemoteFileNodePromise = false;
if (bar) {
// this is to give us a little time to wait and see if there
// will be more jobs added with a break between
// sometimes the queue empties but then is recreated within 2 secs
doneQueueTimeout = setTimeout(() => {
bar.done();
totalJobs = 0;
}, 2000);
}
});
}
return queue;
};
let doneQueueTimeout;
let awaitingCreateRemoteFileNodePromise;
/**
* @callback {Queue~queueCallback}
* @param {*} error
* @param {*} result
*/
/**
* pushToQueue
* --
* Handle tasks that are pushed in to the Queue
*
*
* @param {CreateRemoteFileNodePayload} task
* @param {Queue~queueCallback} cb
* @return {Promise<null>}
*/
async function pushToQueue(task, cb) {
try {
const node = await processRemoteNode(task);
return cb(null, node);
} catch (e) {
return cb(e);
}
}
/******************
* Core Functions *
******************/
/**
* requestRemoteNode
* --
* Download the requested file
*
* @param {String} url
* @param {Headers} headers
* @param {String} tmpFilename
* @param {Object} httpOpts
* @param {number} attempt
* @return {Promise<Object>} Resolves with the [http Result Object]{@link https://nodejs.org/api/http.html#http_class_http_serverresponse}
*/
const requestRemoteNode = (url, headers, tmpFilename, httpOpts, attempt = 1) => new Promise((resolve, reject) => {
let timeout;
// Called if we stall without receiving any data
const handleTimeout = async () => {
fsWriteStream.close();
fs.removeSync(tmpFilename);
if (attempt < STALL_RETRY_LIMIT) {
// Retry by calling ourself recursively
resolve(requestRemoteNode(url, headers, tmpFilename, httpOpts, attempt + 1));
} else {
processingCache[url] = null;
totalJobs -= 1;
bar.total = totalJobs;
reject(new Error(`Failed to download ${url} after ${STALL_RETRY_LIMIT} attempts`));
}
};
const resetTimeout = () => {
if (timeout) {
clearTimeout(timeout);
}
timeout = setTimeout(handleTimeout, STALL_TIMEOUT);
};
const responseStream = got.stream(url, {
headers,
timeout: {
send: CONNECTION_TIMEOUT
},
...httpOpts
});
const fsWriteStream = fs.createWriteStream(tmpFilename);
responseStream.pipe(fsWriteStream);
// If there's a 400/500 response or other error.
responseStream.on(`error`, error => {
if (timeout) {
clearTimeout(timeout);
}
processingCache[url] = null;
totalJobs -= 1;
bar.total = totalJobs;
fs.removeSync(tmpFilename);
console.error(error);
reject(error);
});
fsWriteStream.on(`error`, error => {
if (timeout) {
clearTimeout(timeout);
}
processingCache[url] = null;
totalJobs -= 1;
bar.total = totalJobs;
reject(error);
});
responseStream.on(`response`, response => {
resetTimeout();
fsWriteStream.on(`finish`, () => {
if (timeout) {
clearTimeout(timeout);
}
resolve(response);
});
});
});
/**
* processRemoteNode
* --
* Request the remote file and return the fileNode
*
* @param {CreateRemoteFileNodePayload} options
* @return {Promise<Object>} Resolves with the fileNode
*/
async function processRemoteNode({
url,
cache,
createNode,
parentNodeId,
auth = {},
httpHeaders = {},
createNodeId,
ext,
name
}) {
const pluginCacheDir = cache.directory;
// See if there's response headers for this url
// from a previous request.
const cachedHeaders = await cache.get(cacheId(url));
const headers = {
...httpHeaders
};
if (cachedHeaders && cachedHeaders.etag) {
headers[`If-None-Match`] = cachedHeaders.etag;
}
// Add htaccess authentication if passed in. This isn't particularly
// extensible. We should define a proper API that we validate.
const httpOpts = {};
if (auth !== null && auth !== void 0 && auth.htaccess_pass && auth !== null && auth !== void 0 && auth.htaccess_user) {
headers[`Authorization`] = `Basic ${(0, _stringEncoding.b64e)(`${auth.htaccess_user}:${auth.htaccess_pass}`)}`;
}
// Create the temp and permanent file names for the url.
const digest = createContentDigest(url);
if (!name) {
name = getRemoteFileName(url);
}
if (!ext) {
ext = getRemoteFileExtension(url);
}
const tmpFilename = createFilePath(pluginCacheDir, `tmp-${digest}`, ext);
// Fetch the file.
const response = await requestRemoteNode(url, headers, tmpFilename, httpOpts);
if (response.statusCode == 200) {
// Save the response headers for future requests.
await cache.set(cacheId(url), response.headers);
}
// If the user did not provide an extension and we couldn't get one from remote file, try and guess one
if (ext === ``) {
const buffer = readChunk.sync(tmpFilename, 0, fileType.minimumBytes);
const filetype = fileType(buffer);
if (filetype) {
ext = `.${filetype.ext}`;
}
}
const filename = createFilePath(path.join(pluginCacheDir, digest), String(name), ext);
// If the status code is 200, move the piped temp file to the real name.
if (response.statusCode === 200) {
await fs.move(tmpFilename, filename, {
overwrite: true
});
// Else if 304, remove the empty response.
} else {
processingCache[url] = null;
totalJobs -= 1;
bar.total = totalJobs;
await fs.remove(tmpFilename);
}
// Create the file node.
const fileNode = await createFileNode(filename, createNodeId, {});
fileNode.internal.description = `File "${url}"`;
fileNode.url = url;
fileNode.parent = parentNodeId;
// Override the default plugin as gatsby-source-filesystem needs to
// be the owner of File nodes or there'll be conflicts if any other
// File nodes are created through normal usages of
// gatsby-source-filesystem.
await createNode(fileNode, {
name: `gatsby-source-filesystem`
});
return fileNode;
}
/**
* Index of promises resolving to File node from remote url
*/
const processingCache = {};
/**
* pushTask
* --
* pushes a task in to the Queue and the processing cache
*
* Promisfy a task in queue
* @param {CreateRemoteFileNodePayload} task
* @return {Promise<Object>}
*/
const pushTask = task => new Promise((resolve, reject) => {
getQueue(task.limit).push(task).on(`finish`, task => {
resolve(task);
}).on(`failed`, err => {
reject(new Error(`failed to process ${task.url}\n${err}`));
});
});
/***************
* Entry Point *
***************/
/**
* createRemoteFileNode
* --
*
* Download a remote file
* First checks cache to ensure duplicate requests aren't processed
* Then pushes to a queue
*
* @param {CreateRemoteFileNodePayload} options
* @return {Promise<Object>} Returns the created node
*/
module.exports = ({
url,
cache,
createNode,
getCache,
parentNodeId = null,
auth = {},
httpHeaders = {},
createNodeId,
ext = null,
name = null,
reporter,
pluginOptions
}) => {
var _pluginOptions$type, _pluginOptions$type$M, _pluginOptions$type$M2;
const limit = pluginOptions === null || pluginOptions === void 0 ? void 0 : (_pluginOptions$type = pluginOptions.type) === null || _pluginOptions$type === void 0 ? void 0 : (_pluginOptions$type$M = _pluginOptions$type.MediaItem) === null || _pluginOptions$type$M === void 0 ? void 0 : (_pluginOptions$type$M2 = _pluginOptions$type$M.localFile) === null || _pluginOptions$type$M2 === void 0 ? void 0 : _pluginOptions$type$M2.requestConcurrency;
if (doneQueueTimeout) {
// this is to give the bar a little time to wait when there are pauses
// between file downloads.
clearTimeout(doneQueueTimeout);
}
// if the url isn't already encoded
// so decoding it doesn't do anything
if (decodeURI(url) === url) {
// encode the uri
// this accounts for special characters in filenames
url = encodeURI(url);
}
// validation of the input
// without this it's notoriously easy to pass in the wrong `createNodeId`
// see gatsbyjs/gatsby#6643
if (typeof createNodeId !== `function`) {
throw new Error(`createNodeId must be a function, was ${typeof createNodeId}`);
}
if (typeof createNode !== `function`) {
throw new Error(`createNode must be a function, was ${typeof createNode}`);
}
if (typeof getCache === `function`) {
// use cache of this plugin and not cache of function caller
cache = getCache(`gatsby-source-filesystem`);
}
if (typeof cache !== `object`) {
throw new Error(`Neither "cache" or "getCache" was passed. getCache must be function that return Gatsby cache, "cache" must be the Gatsby cache, was ${typeof cache}`);
}
// Check if we already requested node for this remote file
// and return stored promise if we did.
if (processingCache[url]) {
return processingCache[url];
}
if (!url || isWebUri(url) === undefined) {
return Promise.reject(new Error(`url passed to create-remote-file-node is either missing or not a proper web uri: ${url}`));
}
if (totalJobs === 0) {
bar = reporter.createProgress(`Downloading remote files`);
bar.start();
}
totalJobs += 1;
bar.total = totalJobs;
const fileDownloadPromise = pushTask({
url,
cache,
createNode,
parentNodeId,
createNodeId,
auth,
httpHeaders,
ext,
name,
limit
});
processingCache[url] = fileDownloadPromise.then(node => {
bar.tick();
return node;
});
return processingCache[url];
};
//# sourceMappingURL=index.js.map