UNPKG

@cumulus/cmrjs

Version:
1,113 lines (1,112 loc) 60.4 kB
'use strict'; //@ts-check const got = require('got'); const get = require('lodash/get'); const pick = require('lodash/pick'); const set = require('lodash/set'); const cloneDeep = require('lodash/cloneDeep'); const { promisify } = require('util'); const js2xmlParser = require('js2xmlparser'); const path = require('path'); const urljoin = require('url-join'); const xml2js = require('xml2js'); const omit = require('lodash/omit'); const { buildS3Uri, parseS3Uri, promiseS3Upload, s3GetObjectTagging, s3ObjectExists, s3TagSetToQueryString, waitForObject, getObjectStreamContents, } = require('@cumulus/aws-client/S3'); const { s3 } = require('@cumulus/aws-client/services'); const { getSecretString } = require('@cumulus/aws-client/SecretsManager'); const launchpad = require('@cumulus/launchpad-auth'); const Logger = require('@cumulus/logger'); const errors = require('@cumulus/errors'); const { CMR, getSearchUrl, ummVersion } = require('@cumulus/cmr-client'); const { constructDistributionUrl } = require('@cumulus/distribution-utils'); const { getBucketAccessUrl } = require('@cumulus/cmr-client/getUrl'); const { constructCollectionId } = require('@cumulus/message/Collections'); const { xmlParseOptions, ummVersionToMetadataFormat, } = require('./utils'); const { updateEcho10XMLGranuleUrAndGranuleIdentifier } = require('./echo10Modifiers'); const { updateUMMGGranuleURAndGranuleIdentifier } = require('./ummgModifiers'); /* eslint-disable max-len */ /** * @typedef {import('@cumulus/cmr-client/CMR').CMRConstructorParams} CMRConstructorParams * @typedef {import('@cumulus/distribution-utils/dist/types').DistributionBucketMap} DistributionBucketMap * @typedef {import('@cumulus/types').ApiFileGranuleIdOptional} ApiFileGranuleIdOptional * @typedef { ApiFileGranuleIdOptional & { filepath?: string }} ApiFileWithFilePath */ /** * @typedef {Object} CmrFile * @property {string} bucket - The S3 bucket name * @property {string} key - The S3 key for the metadata file * @property {string} granuleId - The granule ID associated with the file * @property {string} [etag] - Optional entity tag for file versioning */ /** /** * @typedef {{ * provider: string, * clientId: string, * username?: string, * password?: string, * token?: string * }} CmrCredentials */ /** * @typedef {Object} Echo10URLObject * @property {string} URL * @property {string} [Type] * @property {string} [Description] * @property {string} [URLDescription] */ /** * @typedef {Object} Echo10MetadataObject * @property {Object} Granule - The root ECHO10 granule object * @property {{ OnlineAccessURL?: Echo10URLObject[] }} [Granule.OnlineAccessURLs] * @property {{ OnlineResource?: Echo10URLObject[] }} [Granule.OnlineResources] * @property {{ ProviderBrowseUrl?: Echo10URLObject[] }} [Granule.AssociatedBrowseImageUrls] */ /** * @typedef {Object} getS3UrlOfFileFile * @property {string} [filename] - Full S3 URI (e.g., s3://bucket/key) * @property {string} [bucket] - Bucket name (used with `key` or `filepath`) * @property {string} [key] - S3 key (used with `bucket`) * @property {string} [filepath] - Alternate key for the file within the bucket */ /* eslint-enable max-len */ const log = new Logger({ sender: '@cumulus/cmrjs/src/cmr-utils' }); const s3CredsEndpoint = 's3credentials'; function getS3KeyOfFile(file) { if (file.filename) return parseS3Uri(file.filename).Key; if (file.filepath) return file.filepath; if (file.key) return file.key; throw new Error(`Unable to determine s3 key of file: ${JSON.stringify(file)}`); } /** * Validates that required granule metadata parameters are provided. * Throws an error if either parameter is missing or falsy. * * @param {Object} params - Parameter object * @param {string} params.producerGranuleId - The original granule identifier (must be non-empty) * @param {string} params.granuleId - The updated granule identifier (must be non-empty) * * @throws {Error} if either `producerGranuleId` or `granuleId` is not provided */ function checkRequiredMetadataParms({ producerGranuleId, granuleId }) { if (!producerGranuleId) { throw new Error('No producerGranuleId was provided when required for CMR metadata update'); } if (!granuleId) { throw new Error('No granuleId was provided when required for CMR Metadata update'); } } /** * Returns the S3 URI for a given file object. * * Accepts multiple file shapes commonly used throughout Cumulus and resolves * them to a valid `s3://bucket/key` URI. * * @param {getS3UrlOfFileFile} file - File object containing filename or bucket/key data * @returns {string} - A string representing the S3 URI (e.g., `s3://bucket/key`) * @throws {Error} if the file does not contain enough information to construct the URI */ function getS3UrlOfFile(file) { if (file.filename) return file.filename; if (file.bucket && file.filepath) return buildS3Uri(file.bucket, file.filepath); if (file.bucket && file.key) return buildS3Uri(file.bucket, file.key); throw new Error(`Unable to determine location of file: ${JSON.stringify(file)}`); } /** * Returns the file 'name' of a given object. * * Accepts multiple file shapes commonly used throughout Cumulus and resolves * them to a valid `s3://bucket/key` URI. * * @param {ApiFileWithFilePath} file - API File * @returns {string | undefined} - The file name, or undefined if not found */ function getFilename(file) { if (file.fileName) return file.fileName; if (file.name) return file.name; if (file.filename) return path.basename(file.filename); if (file.filepath) return path.basename(file.filepath); if (file.key) return path.basename(file.key); return undefined; } function getFileDescription(file, urlType = 'distribution') { if (urlType === 's3') { return 'This link provides direct download access via S3 to the granule'; } const filename = getFilename(file); return filename ? `Download ${filename}` : 'File to download'; } const isECHO10Filename = (filename) => filename.endsWith('cmr.xml'); const isUMMGFilename = (filename) => filename.endsWith('cmr.json'); const isISOFilename = (filename) => filename.endsWith('.iso.xml'); const isCMRISOFilename = (filename) => filename.endsWith('cmr_iso.xml'); const isCMRFilename = (filename) => isECHO10Filename(filename) || isUMMGFilename(filename) || isCMRISOFilename(filename); const constructCmrConceptLink = (conceptId, extension) => `${getSearchUrl()}concepts/${conceptId}.${extension}`; /** * Returns True if this object can be determined to be a cmrMetadata object. * * @param {Object} fileobject * @returns {boolean} true if object references cmr metadata. */ function isCMRFile(fileobject) { const cmrfilename = fileobject.key || fileobject.name || fileobject.filename || ''; return isCMRFilename(cmrfilename); } /** * Returns True if this object can be determined to be an ISO file object. * * @param {Object} fileobject * @returns {boolean} true if object references an ISO file metadata. */ function isISOFile(fileobject) { const filename = fileobject.key || fileobject.name || fileobject.filename || ''; return isISOFilename(filename) || isCMRISOFilename(filename); } /** * Extracts CMR file objects from the specified granule object. * * @param {Object} granule - granule object containing CMR files within its * `files` property * @param {Array<Object>} granule.files - array of files for a granule * @param {string} granule.granuleId - granule ID * @param {Function} filterFunc - function to determine if the given file object is a CMR file; defaults to `isCMRFile` * @returns {Array<CmrFile>} an array of CMR file objects, each with properties * `granuleId`, `bucket`, `key`, and possibly `etag` (if present on input) */ function granuleToCmrFileObject({ granuleId, files = [] }, filterFunc = isCMRFile) { return files .filter(filterFunc) .map((file) => { const { Bucket, Key } = parseS3Uri(getS3UrlOfFile(file)); return { // Include etag only if file has one ...pick(file, 'etag'), bucket: Bucket, key: Key, granuleId, }; }); } /** * Reduce granule object array to CMR files array * * @param {Array<Object>} granules - granule objects array * @param {Function} filterFunc - function to determine if the given file object is a CMR file; defaults to `isCMRFile` * * @returns {Array<CmrFile>} - CMR file object array: { etag, bucket, key, granuleId } */ function granulesToCmrFileObjects(granules, filterFunc = isCMRFile) { return granules.flatMap((granule) => granuleToCmrFileObject(granule, filterFunc)); } /** * Posts CMR XML files from S3 to CMR. * * @param {Object} cmrFile - an object representing the cmr file * @param {string} cmrFile.granuleId - the granuleId of the cmr xml File * @param {string} cmrFile.filename - the s3 uri to the cmr xml file * @param {string} cmrFile.metadata - granule xml document * @param {Object} cmrClient - a CMR instance * @param {string} revisionId - Optional CMR Revision ID * @returns {Promise<Object>} CMR's success response which includes the concept-id */ async function publishECHO10XML2CMR(cmrFile, cmrClient, revisionId) { const builder = new xml2js.Builder(); const xml = builder.buildObject(cmrFile.metadataObject); const res = await cmrClient.ingestGranule(xml, revisionId); const conceptId = res.result['concept-id']; let resultLog = `Published ${cmrFile.granuleId} to the CMR. conceptId: ${conceptId}`; if (revisionId) resultLog += `, revisionId: ${revisionId}`; log.info(resultLog); return { granuleId: cmrFile.granuleId, filename: getS3UrlOfFile(cmrFile), conceptId, metadataFormat: 'echo10', link: constructCmrConceptLink(conceptId, 'echo10'), }; } /** * Posts CMR JSON files from S3 to CMR. * * @param {Object} cmrFile - an object representing the CMR file * @param {string} cmrFile.filename - the cmr filename * @param {Object} cmrFile.metadataObject - the UMMG JSON cmr metadata * @param {Object} cmrFile.granuleId - the metadata's granuleId * @param {Object} cmrClient - a CMR instance * @param {string} revisionId - Optional CMR Revision ID * @returns {Promise<Object>} CMR's success response which includes the concept-id */ async function publishUMMGJSON2CMR(cmrFile, cmrClient, revisionId) { const granuleId = cmrFile.metadataObject.GranuleUR; const res = await cmrClient.ingestUMMGranule(cmrFile.metadataObject, revisionId); const conceptId = res['concept-id']; const filename = getS3UrlOfFile(cmrFile); const metadataFormat = ummVersionToMetadataFormat(ummVersion(cmrFile.metadataObject)); const link = constructCmrConceptLink(conceptId, 'umm_json'); let resultLog = `Published UMMG ${granuleId} to the CMR. conceptId: ${conceptId}`; if (revisionId) resultLog += `, revisionId: ${revisionId}`; log.info(resultLog); return { granuleId, filename, conceptId, metadataFormat, link, }; } /** * Determines what type of metadata object and posts either ECHO10XML or UMMG * JSON data to CMR. * * @param {Object} cmrPublishObject - * @param {string} cmrPublishObject.filename - the cmr filename * @param {Object} cmrPublishObject.metadataObject - the UMMG JSON cmr metadata * @param {Object} cmrPublishObject.granuleId - the metadata's granuleId * @param {Object} creds - credentials needed to post to CMR service * @param {string} creds.provider - the name of the Provider used on the CMR side * @param {string} creds.clientId - the clientId used to generate CMR token * @param {string} creds.username - the CMR username, not used if creds.token is provided * @param {string} creds.password - the CMR password, not used if creds.token is provided * @param {string} creds.token - the CMR or Launchpad token, * @param {string} cmrRevisionId - Optional CMR Revision ID * if not provided, CMR username and password are used to get a cmr token */ function publish2CMR(cmrPublishObject, creds, cmrRevisionId) { const cmrClient = new CMR(creds); const cmrFileName = getFilename(cmrPublishObject); // choose xml or json and do the things. if (isECHO10Filename(cmrFileName)) { return publishECHO10XML2CMR(cmrPublishObject, cmrClient, cmrRevisionId); } if (isUMMGFilename(cmrFileName)) { return publishUMMGJSON2CMR(cmrPublishObject, cmrClient, cmrRevisionId); } throw new Error(`invalid cmrPublishObject passed to publis2CMR ${JSON.stringify(cmrPublishObject)}`); } /** * Remove granule from CMR. * * @param {string} granuleUR - the granuleUR * @param {CmrCredentials} creds - credentials needed to post to CMR service */ async function removeFromCMR(granuleUR, creds) { const cmrClient = new CMR(creds); return await cmrClient.deleteGranule(granuleUR); } /** * Returns the S3 object identified by the specified S3 URI and (optional) * entity tag, retrying up to 5 times, if necessary. * * @param {string} filename - S3 URI of the desired object * @param {string|undefined} [etag] - entity tag of the desired object (optional) * @returns {Promise} result of `AWS.S3.getObject()` as a Promise */ async function getObjectByFilename(filename, etag) { const { Bucket, Key } = parseS3Uri(filename); const params = etag ? { Bucket, Key, IfMatch: etag } : { Bucket, Key }; return await waitForObject(s3(), params, { retries: 5 }); } /** * Gets metadata for a CMR XML file from S3. * * @param {string} xmlFilePath - S3 URI to the XML metadata document * @param {string} [etag] - optional entity tag for the desired version of the * CMR file * @returns {Promise<string>} stringified XML document downloaded from S3 */ async function getXMLMetadataAsString(xmlFilePath, etag) { if (!xmlFilePath) { throw new errors.XmlMetaFileNotFound('XML Metadata file not provided'); } const obj = await getObjectByFilename(xmlFilePath, etag); return getObjectStreamContents(obj.Body); } /** * Parse an xml string * * @param {string} xml - xml to parse * @returns {Promise<Object>} promise resolves to object version of the xml */ async function parseXmlString(xml) { return await promisify(xml2js.parseString)(xml, xmlParseOptions); } /** * Returns UMMG metadata object from CMR UMM-G JSON file in S3. * * @param {string} cmrFilename - S3 path to JSON file * @param {string} [etag] - optional entity tag for the desired version of the * CMR file * @returns {Promise<Object>} CMR UMMG metadata object */ async function metadataObjectFromCMRJSONFile(cmrFilename, etag) { const obj = await getObjectByFilename(cmrFilename, etag); return JSON.parse(await getObjectStreamContents(obj.Body)); } /** * Returns metadata object from CMR Echo10 XML file in S3. * * @param {string} cmrFilename - S3 path to XML file * @param {string} [etag] - optional entity tag for the desired version of the * CMR file * @returns {Promise<Object>} CMR XML metadata object */ const metadataObjectFromCMRXMLFile = (cmrFilename, etag) => getXMLMetadataAsString(cmrFilename, etag).then(parseXmlString); /** * Returns CMR metadata object from a CMR ECHO-10 XML file or CMR UMMG JSON * file in S3. * * @param {string} cmrFilename - S3 path to CMR file * @param {string} [etag] - optional entity tag for the desired version of the * CMR file * @returns {Promise<Object>} metadata object from the file * @throws {Error} if the specified filename does not represent an ECHO-10 XML * file or a UMMG file * @see isECHO10Filename * @see isUMMGFilename */ function metadataObjectFromCMRFile(cmrFilename, etag) { if (isECHO10Filename(cmrFilename) || isISOFilename(cmrFilename)) { return metadataObjectFromCMRXMLFile(cmrFilename, etag); } if (isUMMGFilename(cmrFilename)) { return metadataObjectFromCMRJSONFile(cmrFilename, etag); } throw new Error(`Cannot retrieve metadata: invalid CMR filename: ${cmrFilename}`); } /** * Build and return an S3 Credentials Object for adding to CMR onlineAccessUrls * * @param {string} s3CredsUrl - full url pointing to the s3 credential distribution api * @returns {Echo10URLObject} Object with attributes required for adding an onlineAccessUrl */ function getS3CredentialsObject(s3CredsUrl) { return { URL: s3CredsUrl, URLDescription: 'api endpoint to retrieve temporary credentials valid for same-region direct s3 access', Description: 'api endpoint to retrieve temporary credentials valid for same-region direct s3 access', Type: 'VIEW RELATED INFORMATION', }; } /** * Returns UMM/ECHO10 resource type mapping for CNM file type * * @param {string} type - CNM resource type to convert to UMM/ECHO10 type * @param {string} [urlType = distribution] - url type, distribution or s3 * @param {boolean} [useDirectS3Type = false] - indicate if direct s3 access type is used * @returns {string} - UMM/ECHO10 resource type */ function mapCNMTypeToCMRType(type, urlType = 'distribution', useDirectS3Type = false) { /** @type {Record<string, string>} */ const mapping = { ancillary: 'VIEW RELATED INFORMATION', data: 'GET DATA', browse: 'GET RELATED VISUALIZATION', linkage: 'EXTENDED METADATA', metadata: 'EXTENDED METADATA', qa: 'EXTENDED METADATA', }; let mappedType = 'GET DATA'; if (type && type in mapping) { mappedType = mapping[type]; } // The CMR Type for the s3 link of science file is "GET DATA VIA DIRECT ACCESS". // For non-science file, the Type for the s3 link is the same as its Type for the HTTPS URL. if (urlType === 's3' && mappedType === 'GET DATA' && useDirectS3Type) { return 'GET DATA VIA DIRECT ACCESS'; } return mappedType; } /** * Add ETags to file objects as some downstream functions expect this structure. * * @param {Object} granule - input granule object * @param {Object} etags - map of s3URIs and ETags * @returns {Object} - updated granule object */ function addEtagsToFileObjects(granule, etags) { granule.files.forEach((incomingFile) => { const file = incomingFile; const fileURI = getS3UrlOfFile(file); if (etags[fileURI]) file.etag = etags[fileURI]; }); return granule; } /** * Remove ETags to match output schema * * @param {Object} granule - output granule object * @returns {undefined} */ function removeEtagsFromFileObjects(granule) { granule.files.forEach((incomingFile) => { const file = incomingFile; delete file.etag; }); } /** * Maps etag values from the specified granules' files. * * @param {Object[]} files - array of file objects with `bucket`, `key` and * `etag` properties * @returns {Object} mapping of file S3 URIs to etags */ function mapFileEtags(files) { return files.reduce((filesMap, file) => { const { bucket, key, etag } = file; const s3Uri = getS3UrlOfFile({ bucket, key }); filesMap[s3Uri] = etag; // eslint-disable-line no-param-reassign return filesMap; }, {}); } /** * generate a url for a given file and a url type. * * @param {Object} params - input parameters * @param {Object} params.file - file object * @param {string} params.distEndpoint - distribution endpoint from config * @param {string} [params.urlType = 'distribution'] - url type, distribution or s3 * @param {Object} params.distributionBucketMap - Object with bucket:tea-path mapping * for all distribution buckets * @returns {(string | undefined)} online access url object, undefined if no URL exists */ function generateFileUrl({ file, distEndpoint, urlType = 'distribution', distributionBucketMap, }) { if (urlType === 'distribution') { const fileKey = getS3KeyOfFile(file); return constructDistributionUrl(file.bucket, fileKey, distributionBucketMap, distEndpoint); } if (urlType === 's3') { /* The check for file.filename is here for legacy compliance reasons due to model simplification in CUMULUS-1139 where filename was remapped to bucket and key*/ if (file.filename) { return file.filename; } return buildS3Uri(file.bucket, file.key); } return undefined; } /** * @typedef {Object} OnlineAccessUrl * @property {string} URL - The generated file URL. * @property {string} URLDescription - The description of the URL (used by ECHO10). * @property {string} Description - The description of the URL (used by UMMG). * @property {string} Type - The type of the URL (used by ECHO10/UMMG). */ /** * Construct online access url for a given file and a url type. * * @param {Object} params * @param {ApiFileWithFilePath} params.file - File object * @param {string} params.distEndpoint - Distribution endpoint from config * @param {{ [key: string]: string }} params.bucketTypes - Map of bucket names to bucket types * @param {'distribution' | 's3'} params.urlType - URL type: 'distribution' or 's3' * @param {DistributionBucketMap} params.distributionBucketMap - Map of bucket to distribution path * @param {boolean} [params.useDirectS3Type=false] - Whether to use direct S3 Type * @returns {Echo10URLObject | undefined} - Online access URL object, or undefined if not applicable */ function constructOnlineAccessUrl({ file, distEndpoint, bucketTypes, urlType = 'distribution', distributionBucketMap, useDirectS3Type = false, }) { const bucketType = file.bucket ? bucketTypes[file.bucket] : undefined; const distributionApiBuckets = ['protected', 'public']; if (bucketType && distributionApiBuckets.includes(bucketType)) { const fileUrl = generateFileUrl({ file, distEndpoint, urlType, distributionBucketMap }); if (fileUrl) { const fileDescription = getFileDescription(file, urlType); return { URL: fileUrl, URLDescription: fileDescription, Description: fileDescription, Type: mapCNMTypeToCMRType(file.type, urlType, useDirectS3Type), // used by ECHO10/UMMG }; } } return undefined; } /** * Construct a list of online access urls grouped by link type. * * @param {Object} params * @param {ApiFileWithFilePath[]} params.files - Array of file objects * @param {string} params.distEndpoint - Distribution endpoint from config * @param {{ [key: string]: string }} params.bucketTypes - Map of bucket name to bucket type * @param {DistributionBucketMap} params.distributionBucketMap - Mapping of bucket to * distribution path * @param {string} [params.cmrGranuleUrlType=both] - Granule URL type: 's3', * 'distribution', or 'both' * @param {boolean} [params.useDirectS3Type=false] - Whether direct S3 URL types are used * @returns {Echo10URLObject[]} Array of online access URL objects */ function constructOnlineAccessUrls({ bucketTypes, cmrGranuleUrlType = 'both', distEndpoint, distributionBucketMap, files, useDirectS3Type = false, }) { if (['distribution', 'both'].includes(cmrGranuleUrlType) && !distEndpoint) { throw new Error(`cmrGranuleUrlType is ${cmrGranuleUrlType}, but no distribution endpoint is configured.`); } const [distributionUrls, s3Urls] = files.reduce(( /** @type {[Echo10URLObject[], Echo10URLObject[]]} */ [distributionAcc, s3Acc], file) => { if (['both', 'distribution'].includes(cmrGranuleUrlType)) { const url = constructOnlineAccessUrl({ file, distEndpoint, bucketTypes, urlType: 'distribution', distributionBucketMap, useDirectS3Type, }); if (url) distributionAcc.push(url); } if (['both', 's3'].includes(cmrGranuleUrlType)) { const url = constructOnlineAccessUrl({ file, distEndpoint, bucketTypes, urlType: 's3', distributionBucketMap, useDirectS3Type, }); if (url) s3Acc.push(url); } return [distributionAcc, s3Acc]; }, [[], []]); const urlList = distributionUrls.concat(s3Urls); return urlList.filter((urlObj) => urlObj); } /** * Construct a list of UMMG related urls * * @param {Object} params - input parameters * @param {Array<Object>} params.files - array of file objects * @param {string} params.distEndpoint - distribution endpoint from config * @param {{ [key: string]: string }} params.bucketTypes - map of bucket names to bucket types * @param {string} params.cmrGranuleUrlType - cmrGranuleUrlType from config * @param {DistributionBucketMap} params.distributionBucketMap - Object with bucket:tea-path * mapping for all distribution buckets * @param {boolean} params.useDirectS3Type - indicate if direct s3 access type is used * @returns {[{URL: string, string, Description: string, Type: string}]} * an array of online access url objects */ function constructRelatedUrls({ files, distEndpoint, bucketTypes, cmrGranuleUrlType = 'both', distributionBucketMap, useDirectS3Type = false, }) { const credsUrl = urljoin(distEndpoint, s3CredsEndpoint); const s3CredentialsObject = getS3CredentialsObject(credsUrl); const cmrUrlObjects = constructOnlineAccessUrls({ files, distEndpoint, bucketTypes, cmrGranuleUrlType, distributionBucketMap, useDirectS3Type, }); const relatedUrls = cmrUrlObjects.concat(s3CredentialsObject); return relatedUrls.map((urlObj) => omit(urlObj, 'URLDescription')); } /** * Create a list of URL objects that should not appear under onlineAccess in the CMR metadata. * @param {Array<Object>} files - array of updated file objects * @param {{ [key: string]: string }} bucketTypes - map of buckets name to bucket types * @returns {Array<Object>} array of files to be omitted in cmr's OnlineAccessURLs */ function onlineAccessURLsToRemove(files, bucketTypes) { const typesToKeep = ['public', 'protected']; return files.reduce((acc, file) => { if (typesToKeep.includes(bucketTypes[file.bucket])) { return acc; } return [ ...acc, { URL: getS3KeyOfFile(file) }, ]; }, []); } /** * Returns a list of possible metadata file objects based on file.name extension. * * @param {Array<Object>} files - list of file objects that might be metadata files. * @param {string} files.name - file name * @param {string} files.bucket - current bucket of file * @param {string} files.filepath - current s3 key of file * @returns {Array<Object>} any metadata type file object. */ function getCmrFileObjs(files) { return files.filter((file) => isCMRFile(file)); } /** * Merge lists of URL objects. * * @param {Array<Object>} original - Array of URL Objects representing the cmr file previous state * @param {Array<Object>} updated - Array of updated URL Objects representing moved/updated files * @param {Array<Object>} removed - Array of URL Objects to remove from OnlineAccess. * @returns {Array<Object>} list of updated an original URL objects representing the updated state. */ function mergeURLs(original, updated = [], removed = []) { const newURLBasenames = updated.map((url) => path.basename(url.URL)); const removedBasenames = removed.map((url) => path.basename(url.URL)); const unchangedOriginals = original.filter((url) => !newURLBasenames.includes(path.basename(url.URL)) && !removedBasenames.includes(path.basename(url.URL))); const updatedWithMergedOriginals = updated.map((url) => { const matchedOriginal = original.filter((ourl) => path.basename(ourl.URL) === path.basename(url.URL)); if (matchedOriginal.length === 1) { // merge original urlObject into the updated urlObject const updatedMetadata = pick(url, ['URL', 'Description', 'URLDescription', 'Type']); return { ...url, ...matchedOriginal[0], ...updatedMetadata, }; } return url; }); return [...unchangedOriginals, ...updatedWithMergedOriginals]; } /** * Updates CMR JSON file with stringified 'metadataObject' * * @param {Object} metadataObject - JSON Object to stringify * @param {CmrFile} cmrFile - cmr file object to write body to * @returns {Promise<{[key: string]: any, ETag?: string | undefined }>} returns promised * promiseS3Upload response */ async function uploadUMMGJSONCMRFile(metadataObject, cmrFile) { const tags = await s3GetObjectTagging(cmrFile.bucket, getS3KeyOfFile(cmrFile)); const tagsQueryString = s3TagSetToQueryString(tags.TagSet); return promiseS3Upload({ params: { Bucket: cmrFile.bucket, Key: getS3KeyOfFile(cmrFile), Body: JSON.stringify(metadataObject), Tagging: tagsQueryString, ContentType: 'application/json', }, }); } /** * check if the direct s3 access type should be used, * s3 link type 'GET DATA VIA DIRECT ACCESS' isn't valid until UMM-G version 1.6.2 * * @param {Object} metadataObject - the UMMG metadata object * @returns {boolean} indicate if direct s3 access type is used */ function shouldUseDirectS3Type(metadataObject) { const versionWithDirectS3Type = 1.62; const versionString = ummVersion(metadataObject); // convert version string like 1.6.1 to 1.61 for comparision if (Number(versionString.replace('.', '_').replace(/\./g, '').replace('_', '.')) >= versionWithDirectS3Type) { return true; } return false; } /** * Update the UMMG cmr metadata object to have corrected urls * * @param {Object} params - Parameters for updating the metadata object * @param {Object} params.metadataObject - The existing UMMG CMR metadata object to update * @param {ApiFileWithFilePath[]} params.files - Array of file * objects used to generate URLs * @param {string} params.distEndpoint - Base URL for distribution endpoints (e.g., CloudFront) * @param {{ [bucket: string]: string }} params.bucketTypes - Map of bucket names * to types (e.g., public, protected) * @param {string} [params.cmrGranuleUrlType='both'] - Type of URLs to generate: 'distribution', * 's3', or 'both' * @param {DistributionBucketMap} params.distributionBucketMap - Mapping of bucket names to * distribution paths * * @returns {Object} - A deep clone of the original metadata object with updated RelatedUrls */ function updateUMMGMetadataObject({ metadataObject, files, distEndpoint, bucketTypes, cmrGranuleUrlType = 'both', distributionBucketMap, }) { const updatedMetadataObject = cloneDeep(metadataObject); const useDirectS3Type = shouldUseDirectS3Type(updatedMetadataObject); const newURLs = constructRelatedUrls({ files, distEndpoint, bucketTypes, cmrGranuleUrlType, distributionBucketMap, useDirectS3Type, }); const removedURLs = onlineAccessURLsToRemove(files, bucketTypes); /** @type {Array<{ URL: string, Description?: string, Type?: string }>} */ const originalURLs = get(updatedMetadataObject, 'RelatedUrls', []); const mergedURLs = mergeURLs(originalURLs, newURLs, removedURLs); set(updatedMetadataObject, 'RelatedUrls', mergedURLs); return updatedMetadataObject; } /** * After files are moved, create new online access URLs and then update the S3 * UMMG cmr.json file with this information. * * @param {Object} params - parameter object * @param {CmrFile} params.cmrFile - cmr.json file whose contents will be updated. * @param {ApiFileWithFilePath[]} params.files - array of moved file objects. * @param {string} params.distEndpoint - distribution endpoint form config. * @param {{ [bucket: string]: string }} params.bucketTypes - map of bucket names to bucket types * @param {string} params.cmrGranuleUrlType - cmrGranuleUrlType from config * @param {DistributionBucketMap} params.distributionBucketMap - Object with bucket:tea-path * mapping for all distribution buckets * @param {string} params.producerGranuleId - producer granule id * @param {string} params.granuleId - granule id * @param {boolean} [params.updateGranuleIdentifiers=false] - whether to update the granule UR/add * producerGranuleID to the CMR metadata object * @param {boolean} [params.excludeDataGranule=false] - whether to add or update the DataGranule * node in the granule's metadata * @param {any} [params.testOverrides] - overrides for testing * @returns {Promise<{ metadataObject: Object, etag: string | undefined}>} an object * containing a `metadataObject` (the updated UMMG metadata object) and the * `etag` of the uploaded CMR file */ async function updateUMMGMetadata({ cmrFile, files, distEndpoint, bucketTypes, cmrGranuleUrlType = 'both', distributionBucketMap, producerGranuleId, granuleId, updateGranuleIdentifiers = false, excludeDataGranule = false, testOverrides = {}, }) { const { uploadUMMGJSONCMRFileMethod = uploadUMMGJSONCMRFile, metadataObjectFromCMRJSONFileMethod = metadataObjectFromCMRJSONFile, } = testOverrides; const filename = getS3UrlOfFile(cmrFile); const metadataObject = await metadataObjectFromCMRJSONFileMethod(filename); let updatedMetadataObject = updateUMMGMetadataObject({ metadataObject, files, distEndpoint, bucketTypes, cmrGranuleUrlType, distributionBucketMap, }); if (updateGranuleIdentifiers) { // Type checks are needed as this callers/API are not all typed/ts converted yet checkRequiredMetadataParms({ producerGranuleId, granuleId }); updatedMetadataObject = updateUMMGGranuleURAndGranuleIdentifier({ granuleUr: granuleId, producerGranuleId, metadataObject: updatedMetadataObject, excludeDataGranule, }); } const { ETag: etag } = await uploadUMMGJSONCMRFileMethod(updatedMetadataObject, cmrFile); return { metadataObject: updatedMetadataObject, etag }; } /** * Helper to build an CMR settings object, used to initialize CMR. * * @param {Object} cmrConfig - CMR configuration object * @param {string} cmrConfig.oauthProvider - Oauth provider: launchpad or earthdata * @param {string} cmrConfig.provider - the CMR provider * @param {string} cmrConfig.clientId - Client id for CMR requests * @param {string} cmrConfig.passphraseSecretName - Launchpad passphrase secret name * @param {string} cmrConfig.api - Launchpad api * @param {string} cmrConfig.certificate - Launchpad certificate * @param {string} cmrConfig.username - EDL username * @param {string} cmrConfig.passwordSecretName - CMR password secret name * @returns {Promise<CMRConstructorParams>} object to create CMR instance - contains the * provider, clientId, and either launchpad token or EDL username and * password */ async function getCmrSettings(cmrConfig = {}) { const oauthProvider = cmrConfig.oauthProvider || process.env.cmr_oauth_provider; const cmrCredentials = { provider: cmrConfig.provider || process.env.cmr_provider, clientId: cmrConfig.clientId || process.env.cmr_client_id, oauthProvider, }; if (oauthProvider === 'launchpad') { const launchpadPassphraseSecretName = cmrConfig.passphraseSecretName || process.env.launchpad_passphrase_secret_name; const passphrase = await getSecretString(launchpadPassphraseSecretName); const config = { passphrase, api: cmrConfig.api || process.env.launchpad_api, certificate: cmrConfig.certificate || process.env.launchpad_certificate, }; log.debug('cmrjs.getCreds getLaunchpadToken'); const token = await launchpad.getLaunchpadToken(config); return { ...cmrCredentials, token, }; } const passwordSecretName = cmrConfig.passwordSecretName || process.env.cmr_password_secret_name; const password = await getSecretString(passwordSecretName); return { ...cmrCredentials, password, username: cmrConfig.username || process.env.cmr_username, }; } function generateEcho10XMLString(granule) { const mapping = new Map([]); Object.keys(granule).forEach((key) => { if (key === 'OnlineAccessURLs') { mapping.set(key, granule[key]); mapping.set('OnlineResources', granule.OnlineResources); } else if (key !== 'OnlineResources') { mapping.set(key, granule[key]); } }); return js2xmlParser.parse('Granule', mapping); } /** * Updates CMR xml file with 'xml' string * * @param {string} xml - XML to write to cmrFile * @param {Object} cmrFile - cmr file object to write xml to * @returns {Promise} returns promised promiseS3Upload response */ async function uploadEcho10CMRFile(xml, cmrFile) { const tags = await s3GetObjectTagging(cmrFile.bucket, getS3KeyOfFile(cmrFile)); const tagsQueryString = s3TagSetToQueryString(tags.TagSet); return promiseS3Upload({ params: { Bucket: cmrFile.bucket, Key: getS3KeyOfFile(cmrFile), Body: xml, Tagging: tagsQueryString, ContentType: 'application/xml', }, }); } /** * Method takes an array of URL objects to update, an 'origin' array of original URLs * and a list of URLs to remove and returns an array of merged URL objects * * @param {Array<Object>} URLlist - array of URL objects * @param {Array<Object>} originalURLlist - array of URL objects * @param {Array<Object>} removedURLs - array of URL objects * @param {Array<Object>} URLTypes - array of UMM/Echo FileTypes to include * @param {Array<Object>} URLlistFieldFilter - array of URL Object keys to omit * @returns {Array<Object>} array of merged URL objects, filtered */ function buildMergedEchoURLObject(URLlist = [], originalURLlist = [], removedURLs = [], URLTypes, URLlistFieldFilter) { let filteredURLObjectList = URLlist.filter((urlObj) => URLTypes.includes(urlObj.Type)); filteredURLObjectList = filteredURLObjectList.map((urlObj) => omit(urlObj, URLlistFieldFilter)); return mergeURLs(originalURLlist, filteredURLObjectList, removedURLs); } /** * Updates the OnlineAccessURLs, OnlineResources, and AssociatedBrowseImageUrls * fields of an ECHO10 CMR metadata object with newly constructed URLs. * * This function: * - Extracts the original URL sets from the ECHO10 XML metadata. * - Constructs new URL entries based on the provided file list and configuration. * - Merges new URLs with original ones, removing outdated or irrelevant URLs. * - Returns a new metadata object with an updated `Granule` field. * * @param {Object} params - Input parameters * @param {Echo10MetadataObject} params.metadataObject - The parsed ECHO10 metadata XML * object (as a JavaScript object), expected to include a `Granule` key * @param {ApiFileWithFilePath[]} params.files - Granule files to generate * updated URLs from * @param {string} params.distEndpoint - The base distribution endpoint URL * (e.g., CloudFront origin) * @param {{ [bucketName: string]: string }} params.bucketTypes - Mapping of bucket names * to access types ('public', 'protected', etc.) * @param {string} [params.cmrGranuleUrlType='both'] - Type of URLs to generate * for CMR: 'distribution', 's3', or 'both' * @param {DistributionBucketMap} params.distributionBucketMap - Maps S3 buckets to their * distribution URL paths * * @returns {Echo10MetadataObject} A new ECHO10 metadata object with updated * `Granule.OnlineAccessURLs`, `Granule.OnlineResources`, and `Granule.AssociatedBrowseImageUrls` * fields */ function updateEcho10XMLMetadataObjectUrls({ metadataObject, files, distEndpoint, bucketTypes, cmrGranuleUrlType = 'both', distributionBucketMap, }) { const metadataGranule = metadataObject.Granule; const updatedGranule = { ...metadataGranule }; /** @type {Echo10URLObject[]} */ const originalOnlineAccessURLs = /** @type {Echo10URLObject[]} */ ( /** @type {Echo10URLObject[]} */ ([]).concat(get(metadataGranule, 'OnlineAccessURLs.OnlineAccessURL') ?? [])); /** @type {Echo10URLObject[]} */ const originalOnlineResourceURLs = /** @type {Echo10URLObject[]} */ ( /** @type {Echo10URLObject[]} */ ([]).concat(get(metadataGranule, 'OnlineResources.OnlineResource') ?? [])); /** @type {Echo10URLObject[]} */ const originalAssociatedBrowseURLs = /** @type {Echo10URLObject[]} */ ( /** @type {Echo10URLObject[]} */ ([]).concat(get(metadataGranule, 'AssociatedBrowseImageUrls.ProviderBrowseUrl') ?? [])); const removedURLs = onlineAccessURLsToRemove(files, bucketTypes); const newURLs = constructOnlineAccessUrls({ files, distEndpoint, bucketTypes, cmrGranuleUrlType, distributionBucketMap, }); newURLs.push(getS3CredentialsObject(urljoin(distEndpoint, s3CredsEndpoint))); const mergedOnlineResources = buildMergedEchoURLObject(newURLs, originalOnlineResourceURLs, removedURLs, ['EXTENDED METADATA', 'VIEW RELATED INFORMATION'], ['URLDescription']); const mergedOnlineAccessURLs = buildMergedEchoURLObject(newURLs, originalOnlineAccessURLs, removedURLs, ['GET DATA', 'GET DATA VIA DIRECT ACCESS'], ['Type', 'Description']); const mergedAssociatedBrowse = buildMergedEchoURLObject(newURLs, originalAssociatedBrowseURLs, removedURLs, ['GET RELATED VISUALIZATION'], ['URLDescription', 'Type']); // Update the Granule with the updated/merged lists set(updatedGranule, 'OnlineAccessURLs.OnlineAccessURL', mergedOnlineAccessURLs); set(updatedGranule, 'OnlineResources.OnlineResource', mergedOnlineResources); set(updatedGranule, 'AssociatedBrowseImageUrls.ProviderBrowseUrl', mergedAssociatedBrowse); return { ...metadataObject, Granule: updatedGranule, }; } /** * Updates an ECHO10 CMR XML metadata file on S3 to reflect new URLs and optionally * a new GranuleUR and ProducerGranuleId. * * @param {Object} params * @param {string} params.granuleId - New GranuleUR to set in metadata * @param {string} params.producerGranuleId - Original ProducerGranuleId to record * @param {CmrFile} params.cmrFile - The cmr xml file to be updated * @param {ApiFileWithFilePath[]} params.files - List of granule files used * to generate OnlineAccess URLs * @param {string} params.distEndpoint - Distribution endpoint for download URLs * @param {{ [bucket: string]: string }} params.bucketTypes - Mapping of bucket names to their types * @param {string} [params.cmrGranuleUrlType] * - Type of URLs to generate ('distribution' | 's3' | 'both') * @param {DistributionBucketMap} params.distributionBucketMap * - Maps buckets to distribution paths * @param {boolean} [params.updateGranuleIdentifiers=false] * - If true, update the GranuleUR and ProducerGranuleId in metadata * @param {boolean} [params.excludeDataGranule=false] - Whether to add or update the DataGranule * node in the granule's metadata * @param {any} [params.testOverrides] * - Optional test overrides for internal functions * @returns {Promise<{ metadataObject: any, etag: string }>} * The updated metadata object and resulting ETag */ async function updateEcho10XMLMetadata({ granuleId, producerGranuleId, cmrFile, files, distEndpoint, bucketTypes, cmrGranuleUrlType = 'both', distributionBucketMap, updateGranuleIdentifiers = false, excludeDataGranule = false, testOverrides = {}, }) { const { generateEcho10XMLStringMethod = generateEcho10XMLString, uploadEcho10CMRFileMethod = uploadEcho10CMRFile, metadataObjectFromCMRXMLFileMethod = metadataObjectFromCMRXMLFile, } = testOverrides; // add/replace the OnlineAccessUrls const filename = getS3UrlOfFile(cmrFile); const metadataObject = await metadataObjectFromCMRXMLFileMethod(filename); let updatedMetadataObject = updateEcho10XMLMetadataObjectUrls({ metadataObject, files, distEndpoint, bucketTypes, cmrGranuleUrlType, distributionBucketMap, }); if (updateGranuleIdentifiers) { // Type checks are needed as this callers/API are not all typed/ts converted yet try { checkRequiredMetadataParms({ producerGranuleId, granuleId }); } catch (error) { throw new Error(`updateGranuleIdentifiers was set, but producerGranuleId ${producerGranuleId} or granuleId ${granuleId} is not set.`, { cause: error }); } updatedMetadataObject = updateEcho10XMLGranuleUrAndGranuleIdentifier({ granuleUr: granuleId, producerGranuleId, xml: updatedMetadataObject, excludeDataGranule, }); } const xml = generateEcho10XMLStringMethod(updatedMetadataObject.Granule); const { ETag: etag } = await uploadEcho10CMRFileMethod(xml, cmrFile); return { metadataObject: updatedMetadataObject, etag }; } /** * Modifies cmr metadata file with file's URLs updated to their new locations. * * @param {Object} params - parameter object * @param {string} params.granuleId - granuleId * @param {string} [params.producerGranuleId] - producer granuleId * @param {CmrFile} params.cmrFile - cmr file to be updated * @param {ApiFileWithFilePath[]} params.files - array of file objects * @param {string} params.distEndpoint - distribution enpoint from config * @param {boolean} params.published - indicate if publish is needed * @param {{ [key: string]: string }} params.bucketTypes - map of bucket names to bucket types * @param {string} params.cmrGranuleUrlType - type of granule CMR url * @param {boolean} [params.updateGranuleIdentifiers=false] * - If true, update the GranuleUR and ProducerGranuleId in metadata * @param {boolean} [params.excludeDataGranule=false] - Whether to add or update the DataGranule * node in the granule's metadata * @param {any} [params.testOverrides] * - Optional test overrides for internal functions * @param {DistributionBucketMap} params.distributionBucketMap - Object with bucket:tea-path * mapping for all distribution buckets * @returns {Promise<Object>} CMR file object with the `etag` of the newly * updated metadata file */ async function updateCMRMetadata({ granuleId, producerGranuleId, cmrFile, files, distEndpoint, published, bucketTypes, cmrGranuleUrlType = 'both', updateGranuleIdentifiers = false, excludeDataGranule = false, distributionBucketMap, testOverrides = {}, }) { const { publish2CMRMethod = publish2CMR, getCmrSettingsMethod = getCmrSettings, } = testOverrides; const filename = getS3UrlOfFile(cmrFile); log.debug(`cmrjs.updateCMRMetadata granuleId ${granuleId} cmrMetadata file ${filename}`); const cmrCredentials = (published) ? await getCmrSettingsMethod() : {}; const params = { bucketTypes, cmrFile, cmrGranuleUrlType, distEndpoint, distributionBucketMap, files, granuleId, producerGranuleId: producerGranuleId || granuleId, updateGranuleIdentifiers, excludeDataGranule, }; let metadataObject; let etag; if (isECHO10Filename(filename)) { ({ metadataObject, etag } = await updateEcho10XMLMetadata(params)); } else if (isUMMGFilename(filename)) { ({ metadataObject, etag } = await updateUMMGMetadata(params)); } else { throw new errors.CMRMetaFileNotFound(`Invalid CMR filetype: ${filename}`); } if (published) { // post metadata Object to CMR const cmrPublishObject = { filename, metadataObject, granuleId, }; return { ...await publish2CMRMethod(cmrPublishObject, cmrCredentials), etag }; } return { ...cmrFile, etag }; } /** * Update CMR Metadata record with the information contained in updatedFiles * @param {Object} params - parameter object * @param {string} params.granuleId - granuleId * @param {Object} params.updatedFiles - list of file objects that might have different * information from the cmr metadatafile and the CMR service. * @param {string} params.distEndpoint - distribution endpoint URL * @param {boolean} params.published - boolean true if the data should be published to * the CMR service. * @param {{ [key: string]: string }} params.bucketTypes - map of bucket names to bucket types * @param {string} params.cmrGranuleUrlType - type of granule CMR url * @param {distributionBucketMap} params.distributionBucketMap - Object with bucket:tea-path mapping * for all distribution buckets */ async function reconcileCMRMetadata({ granuleId, updatedFiles, distEndpoint, published, bucketTypes, cmrGranuleUrlType = 'both', distributionBucketMap, }) { const cmrMetadataFiles = getCmrFileObjs(updatedFiles); if (cmrMetadataFiles.length === 1) { return await updateCMRMetadata({ granuleId, cmrFile: cmrMetadataFiles[0], files: updatedFiles, distEndpoint, published, cmrGranuleUrlType, distributionBucketMap, bucketTypes, }); } if (cmrMetadataFiles.length > 1) { log.error('More than one cmr metadata file found.'); } return Promise.resolve(); } /** * Creates the query object used in POSTing to CMR. * This query is a compound conditional using JSONQueryLanguage supported by CMR. *