@netlify/content-engine
Version:
200 lines • 8.54 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.fetchRemoteFile = fetchRemoteFile;
const file_type_1 = __importDefault(require("file-type"));
const path_1 = __importDefault(require("path"));
const fs_extra_1 = __importDefault(require("fs-extra"));
const fastq_1 = __importDefault(require("fastq"));
const create_content_digest_1 = require("./create-content-digest");
const filename_utils_1 = require("./filename-utils");
const path_2 = require("./path");
const fetch_file_1 = require("./remote-file-utils/fetch-file");
const get_storage_1 = require("./utils/get-storage");
const mutex_1 = require("./mutex");
const GATSBY_CONCURRENT_DOWNLOAD = process.env.GATSBY_CONCURRENT_DOWNLOAD
? parseInt(process.env.GATSBY_CONCURRENT_DOWNLOAD, 10) || 0
: 50;
const alreadyCopiedFiles = new Set();
/**
* Downloads a remote file to disk
*/
async function fetchRemoteFile(args) {
// when cachekey is present we can do more persistance
if (args.cacheKey) {
const storage = (0, get_storage_1.getStorage)((0, get_storage_1.getDatabaseDir)());
const info = storage.remoteFileInfo.get(args.url);
const fileDirectory = (args.cache ? args.cache.directory : args.directory);
if (info?.cacheKey === args.cacheKey && fileDirectory) {
const cachedPath = path_1.default.join(info.directory, info.path);
const downloadPath = path_1.default.join(fileDirectory, info.path);
if (await fs_extra_1.default.pathExists(cachedPath)) {
// If the cached directory is not part of the public directory, we don't need to copy it
// as it won't be part of the build.
if (isPublicPath(downloadPath) && cachedPath !== downloadPath) {
return copyCachedPathToDownloadPath({ cachedPath, downloadPath });
}
return cachedPath;
}
}
}
return pushTask({ args });
}
function isPublicPath(downloadPath) {
return downloadPath.startsWith(path_1.default.join(global.__GATSBY?.root ?? process.cwd(), `public`));
}
async function copyCachedPathToDownloadPath({ cachedPath, downloadPath, }) {
// Create a mutex to do our copy - we could do a md5 hash check as well but that's also expensive
if (!alreadyCopiedFiles.has(downloadPath)) {
const copyFileMutex = (0, mutex_1.createMutex)(`core-utils:copy-fetch:${downloadPath}`, 200);
await copyFileMutex.acquire();
if (!alreadyCopiedFiles.has(downloadPath)) {
await fs_extra_1.default.copy(cachedPath, downloadPath, {
overwrite: true,
});
}
alreadyCopiedFiles.add(downloadPath);
await copyFileMutex.release();
}
return downloadPath;
}
const queue = (0, fastq_1.default)(
/**
* fetchWorker
* --
* Handle fetch requests that are pushed in to the Queue
*/
async function fetchWorker(task, cb) {
try {
cb(null, await fetchFile(task.args));
}
catch (e) {
cb(e);
}
}, GATSBY_CONCURRENT_DOWNLOAD);
/**
* pushTask
* --
* pushes a task in to the Queue and the processing cache
*
* Promisfy a task in queue
* @param {CreateRemoteFileNodePayload} task
* @return {Promise<Buffer | string>}
*/
async function pushTask(task) {
return new Promise((resolve, reject) => {
queue.push(task, (err, node) => {
if (!err) {
resolve(node);
}
else {
reject(err);
}
});
});
}
async function fetchFile({ url, cache, directory, auth = {}, httpHeaders = {}, ext, name, cacheKey, excludeDigest, }) {
// global introduced in gatsby 4.0.0
const BUILD_ID = global.__GATSBY?.buildId ?? ``;
const fileDirectory = (cache ? cache.directory : directory);
const storage = (0, get_storage_1.getStorage)((0, get_storage_1.getDatabaseDir)());
if (!cache && !directory) {
throw new Error(`You must specify either a cache or a directory`);
}
const fetchFileMutex = (0, mutex_1.createMutex)(`core-utils:fetch:${url}`);
await fetchFileMutex.acquire();
// Fetch the file.
try {
const digest = (0, create_content_digest_1.createContentDigest)(url);
const finalDirectory = excludeDigest
? path_1.default.resolve(fileDirectory)
: path_1.default.join(fileDirectory, digest);
if (!name) {
name = (0, filename_utils_1.getRemoteFileName)(url);
}
if (!ext) {
ext = (0, filename_utils_1.getRemoteFileExtension)(url);
}
const cachedEntry = await storage.remoteFileInfo.get(url);
const inFlightValue = getInFlightObject(url, BUILD_ID);
if (inFlightValue) {
const downloadPath = (0, filename_utils_1.createFilePath)(finalDirectory, name, ext);
if (!isPublicPath(finalDirectory) || downloadPath === inFlightValue) {
return inFlightValue;
}
return await copyCachedPathToDownloadPath({
cachedPath: inFlightValue,
downloadPath: (0, filename_utils_1.createFilePath)(finalDirectory, name, ext),
});
}
// Add htaccess authentication if passed in. This isn't particularly
// extensible. We should define a proper API that we validate.
const httpOptions = {};
if (auth && (auth.htaccess_pass || auth.htaccess_user)) {
httpOptions.username = auth.htaccess_user;
httpOptions.password = auth.htaccess_pass;
}
await fs_extra_1.default.ensureDir(finalDirectory);
const tmpFilename = (0, filename_utils_1.createFilePath)(fileDirectory, `tmp-${digest}`, ext);
let filename = (0, filename_utils_1.createFilePath)(finalDirectory, name, ext);
// See if there's response headers for this url
// from a previous request.
const headers = { ...httpHeaders };
if (cachedEntry?.headers?.etag && (await fs_extra_1.default.pathExists(filename))) {
headers[`If-None-Match`] = cachedEntry.headers.etag;
}
const response = await (0, fetch_file_1.requestRemoteNode)(url, headers, tmpFilename, httpOptions);
if (response.statusCode === 200) {
// Save the response headers for future requests.
// If the user did not provide an extension and we couldn't get one from remote file, try and guess one
if (!ext) {
// if this is fresh response - try to guess extension and cache result for future
const filetype = await file_type_1.default.fromFile(tmpFilename);
if (filetype) {
ext = `.${filetype.ext}`;
filename += ext;
}
}
await fs_extra_1.default.move(tmpFilename, filename, { overwrite: true });
const slashedDirectory = (0, path_2.slash)(finalDirectory);
await setInFlightObject(url, BUILD_ID, {
cacheKey,
extension: ext,
headers: response.headers.etag ? { etag: response.headers.etag } : {},
directory: slashedDirectory,
path: (0, path_2.slash)(filename).replace(`${slashedDirectory}/`, ``),
});
}
else if (response.statusCode === 304) {
await fs_extra_1.default.remove(tmpFilename);
}
return filename;
}
finally {
await fetchFileMutex.release();
}
}
const inFlightMap = new Map();
function getInFlightObject(key, buildId) {
if (!buildId) {
return inFlightMap.get(key);
}
const remoteFile = (0, get_storage_1.getStorage)((0, get_storage_1.getDatabaseDir)()).remoteFileInfo.get(key);
// if buildId match we know it's the same build and it already processed this url this build
if (remoteFile && remoteFile.buildId === buildId) {
return path_1.default.join(remoteFile.directory, remoteFile.path);
}
return undefined;
}
async function setInFlightObject(key, buildId, value) {
if (!buildId) {
inFlightMap.set(key, path_1.default.join(value.directory, value.path));
}
await (0, get_storage_1.getStorage)((0, get_storage_1.getDatabaseDir)()).remoteFileInfo.put(key, {
...value,
buildId,
});
}
//# sourceMappingURL=fetch-remote-file.js.map
;