pds4-tools
Version:
Tools to generate and use PDS4 metadata.
454 lines (385 loc) • 16.4 kB
JavaScript
;
/*eslint-disable no-console*/
/**
* Build a PDS4 Collection inventory file.
*
* Extract information from a PDS4 label and generate a PDS4 collection inventory.
*
* @author Todd King
**/
const fs = require('fs');
const yargs = require('yargs');
const path = require('path');
const fastXmlParser = require('fast-xml-parser');
const XMLEngine = require('fast-xml-parser').j2xParser;
const readlines = require('n-readlines');
const util = require('util');
const walk = require('walk');
const crypt = require('crypto');
// Configure the app
var options = yargs
.version('1.0.1')
.usage('Build a collection index file.\n\nExtract information from a PDS4 labels in a directory and generate a collection inventory. If supplied an existing collection label the label will be updated to match the generated collection inventory file information.\n\nWhen a collection label is provided the tool will update the Time_Coordinates.start_date_time, Time_Coordinates.stop_date_time and File_Area_Inventory.File information based on information found during the scan. It will also replace the contents Primary_Results_Summary, Target_Information and Observing_System with a roll-up of information found in each of these sections in the data products.\n\nUsage:\n\npds-collection-builder [args] <directory>')
.example('$0 -i urn:nasa:pds:mission.collection -o inventory.csv .', 'generate a collection index for products in the current directory and write inventory in "inventory.csv"')
.epilog("Development funded by NASA's PDS project at UCLA.")
.showHelpOnFail(false, 'Specify --help for available options')
.help('h')
// version
.options({
// help text
'h' : {
alias : 'help',
description: 'Show information about the app.'
},
// Verbose
'v' : {
alias : 'verbose',
description: 'Show progress and other performance information.',
type: 'boolean'
},
// Collection ID
'i' : {
alias : 'id',
description: 'Logical ID for the collection.',
type: 'string',
default: ""
},
// Version ID
'd' : {
alias : 'vid',
description: 'Version ID for the collection.',
type: 'string',
default: ""
},
// Append to inventory
'a' : {
alias : 'append',
description: 'Append the found products to the current inventory',
type: 'boolean'
},
// Collection label file
'c' : {
alias : 'collection',
description: 'File name of the collection label.',
type: 'string',
default: ""
},
// Output file
'o' : {
alias : 'output',
description: 'Output file name for collection inventory.',
type: 'string',
default: ""
},
})
.argv
;
var args = options._; // Remaining non-hyphenated arguments
/**
* Check if two items are the same.
*
* Inspects two objects and compares values. Objects can be of mixed type (array, string, objects)
**/
var isSame = function(item1, item2) {
if( item1.constructor === Object
&& item2.constructor === Object ) {
// item1 and item2 are objects
// console.log('Checking objects');
var keys1 = Object.keys(item1);
var keys2 = Object.keys(item2);
if( ! isSame(keys1, keys2) ) return false; // Not same structure
// console.log('Objects have same elements');
// Compare each element
for(i = 0; i < keys1.length; i++) {
if( ! isSame( item1[keys1[i]], item2[keys1[i]] ) ) return false;
}
return true; // The objects are the same
}
// If item1 is an array and item2 is an array check if all values in item2 are in item1
if( Array.isArray(item1) && Array.isArray(item2)) {
// console.log('Checking array values');
for(var i = 0; i < item2.length; i++) {
if( ! isSame(item1, item2[i])) return false; // Lists are different
}
return true; // Lists are the same
}
// If item1 is an array and item2 is a value check if value in array
if( Array.isArray(item1) && ! Array.isArray(item2)) {
// console.log('Checking array membership');
for(var i = 0; i < item1.length; i++) {
if(isSame(item1[i], item2)) return true; // item2 is in list
}
return false; // Not in list
}
// If item1 is a value and item2 is a value check if values are equal
// console.log('Checking values: ' + item1 + ' ?= ' + item2);
return (item1 == item2);
}
/**
* Coerce an item to an array if its not already.
*
**/
var asArray = function(item) {
if( ! item) return []; // Empty array
if( ! Array.isArray(item)) { var t = []; t.push(item); item = t; } // Coerce to array
return item;
}
/**
* Merge two arrays into a unique list.
*
**/
var mergeArray = function(list1, list2) {
if( ! Array.isArray(list1) && ! Array.isArray(list2)) return []; // If neither a list return empty list
if( ! Array.isArray(list1) ) return list2; // if list1 not, return list2
if( ! Array.isArray(list2)) return list1; // if list2 not, return list1
// Merge lists
for(var i = 0; i < list2.length; i++) {
if( ! isSame(list1, list2[i]) ) { list1.push(list2[i]); }
}
return list1; // Merged list
}
var main = function(args)
{
var pathname = ".";
var cnt = 0;
var records = 0;
// Check arguments - must have a collection id
if(options.id.length == 0 && options.collection.length == 0) {
console.log("");
console.log("Missing collection id. Use -i or -c to specify. Use -h to show help information.");
return;
}
if(args.length > 0) { pathname = args[0]; }
if(options.verbose) { console.log('Processing: ' + pathname); }
// var options = {ext : ".xml", recurse: true};
var walkOptions = {
followLinks: false
};
var start = Date.now();
var stamp = new Date(start);
if(options.verbose) console.log("Start: " + stamp.toUTCString());
var collectionLabel = null;
var labelHeader = "";
if(options.collection.length > 0) { // Parse label - extract LID and inventory file name
var filename = path.normalize(options.collection);
var xmlDoc = fs.readFileSync(filename, 'utf8');
// console.log(xmlDoc);
var content = fastXmlParser.parse(xmlDoc, {
ignoreAttributes : false
});
var collectionLabel = content;
var product = Object.keys(content)[0];
if( product != 'Product_Collection') { // Not a PDS4 collection label
console.log("Error: File is not a collection label.");
console.log(options.collection);
return;
}
// Get collection id and output file name
if(options.id.length == 0) { // Set from label - otherwise command line option overrides
options.id = content[product].Identification_Area.logical_identifier;
}
if(options.vid.length == 0) { // Set from label - otherwise command line option overrides
options.vid = content[product].Identification_Area.version_id;
}
if(options.output.length == 0) { // Set from label - otherwise command line option overrides
options.output = path.normalize(path.join(path.dirname(options.collection), content[product].File_Area_Inventory.File.file_name));
}
if(options.append) { // Start with current record count
records = parseInt(content[product].File_Area_Inventory.Inventory.records);
}
// Read the XML file up to the root document tag (to <Identification_Area>)
// This is a cludge because fastXMLParser does no preserve processing instructions
// and does a poor job formating the attributes in the root document tag.
// We add this "header" to the output document.
var liner = new readlines(filename);
var next;
var delim = "";
while (next = liner.next()) {
var buffer = next.toString('utf8');
if(buffer.indexOf("<Identification_Area>") > -1) break; // Limit of "header"
labelHeader += delim + buffer; delim = "\n";
}
}
if(options.verbose) { console.log("Collection ID: " + options.id); }
var startTime = "";
var stopTime = "";
var targetList = [];
var primaryResultSummary = {};
var observingSystem = [];
var investigationArea = [];
var inventory = null;
var outputFlags = 'w'; // 'w' overwrite (old data will be lost)
if(options.append) outputFlags = 'a'; // 'a' append to file
if(options.output) { // Open inventory output file
inventory = fs.createWriteStream(options.output, {
flags: outputFlags
});
}
walk.walk(pathname, walkOptions)
.on("file", function (root, fileStats, next) {
if(fileStats.name.endsWith(".xml")) { // Parse
// console.log("Processing: " + root + "/" + fileStats.name);
// console.log(JSON.stringify(fileStats, null, 3));
var pathname = path.normalize(path.join(root, fileStats.name));
var xmlDoc = fs.readFileSync(pathname, 'utf8');
var content = fastXmlParser.parse(xmlDoc, { parseNodeValue : false } ); // Check syntax
var product = Object.keys(content)[0];
if(product.startsWith('Product_')) { // PDS4 product
cnt++;
if(product == 'Product_Collection') { next(); return; } // Don't include Product_Collections
var lid = content[product].Identification_Area.logical_identifier;
if( ! lid.startsWith(options.id)) { next(); return; } // Not part of collection
var vid = content[product].Identification_Area.version_id;
// Get min start_date_time and max stop_date_time
var dateTime = "";
try {
dateTime = content[product].Observation_Area.Time_Coordinates.stop_date_time;
if(stopTime < dateTime) { stopTime = dateTime; }
if(startTime.length < 1) { startTime = stopTime; }
dateTime = content[product].Observation_Area.Time_Coordinates.start_date_time;
if(startTime > dateTime) { startTime = dateTime; }
} catch(e) {
// Do nothing - elements are not required
}
// Get unique list of Target_Identification (if present)
try {
var targets = asArray(content[product].Observation_Area.Target_Identification);
for(var i = 0; i < targets.length; i++) {
var target = targets[i];
if( ! isSame(targetList, target)) targetList.push(target);
}
} catch(e) {
console.log('No Observation_Area/Target_Identification in file.');
console.log(pathname);
}
// Get Unique list of Primary_Results_Summary (if present)
try {
// Purpose
var purpose = asArray(content[product].Observation_Area.Primary_Result_Summary.purpose);
if(purpose.length > 0) { // Merge
primaryResultSummary.purpose = mergeArray(primaryResultSummary.purpose, purpose);
}
// Processing Level
var processing_level = asArray(content[product].Observation_Area.Primary_Result_Summary.processing_level);
if(processing_level.length > 0 ) { // Merge
primaryResultSummary.processing_level = mergeArray(primaryResultSummary.processing_level, processing_level);
}
// Description
if(content[product].Observation_Area.Primary_Result_Summary.description) {
prsDescription = content[product].Observation_Area.Primary_Result_Summary.description;
}
// Science Facets
var Science_Facets = asArray(content[product].Observation_Area.Primary_Result_Summary.Science_Facets);
if(Science_Facets.length > 0) { // Merge
primaryResultSummary.Science_Facets = mergeArray(primaryResultSummary.Science_Facets, Science_Facets);
}
} catch(e) {
// Do nothing - Element is optional
}
// Observing System
try{
// Observing System
var Observing_System = asArray(content[product].Observation_Area.Observing_System);
if(Observing_System.length > 0) { // Merge
observingSystem = mergeArray(observingSystem, Observing_System);
}
} catch(e) {
// Do nothing - Element is optional
}
// Investigation area
try{
// Observing System
var Investigation_Area = asArray(content[product].Observation_Area.Investigation_Area);
if(Investigation_Area.length > 0) { // Merge
investigationArea = mergeArray(investigationArea, Investigation_Area);
}
} catch(e) {
// Do nothing - Element is optional
}
// Write to inventory
var invrec = "P," + lid + "::" + vid;
if(inventory) { inventory.write(invrec + "\r\n"); } // CR/LF required
else { console.log(invrec); }
records++;
}
}
next();
})
.on("directories", function (root, dirStatsArray, next) {
cnt++;
if(options.verbose) console.log("Scanning: " + root);
next();
})
.on("errors", function (root, nodeStatsArray, next) {
next();
})
.on("end", function () {
if(inventory) inventory.end(); // Close inventory file
if(options.collection.length > 0) { // Update collection information
if(options.verbose) { console.log('Writing inventory to: ' + options.output); }
var stat = fs.statSync(options.output);
var hash = crypt.createHash('md5');
var stream = fs.createReadStream(options.output);
stream.on('data', function(data) {
hash.update(data, 'utf8')
});
stream.on('end', function() {
var md5Checksum = hash.digest('hex');
if(options.verbose) {
console.log("Extracted information")
console.log("---------------------")
console.log(' file_size: ' + stat.size);
console.log(' creation_date: ' + stat.mtime.toISOString());
console.log(' md5_checksum: ' + md5Checksum);
console.log(' records: ' + records);
console.log(' start_date_time: ' + startTime);
console.log(' stop_date_time: ' + stopTime);
console.log(' Target_Identification:')
console.log(JSON.stringify(targetList, null, 3));
console.log(' Primary_Result_Summary:');
console.log(JSON.stringify(primaryResultSummary, null, 3));
console.log(' Observing_System:');
console.log(JSON.stringify(observingSystem, null, 3));
console.log(' Investigation_Area:');
console.log(JSON.stringify(investigationArea, null, 3));
}
// Update label
var product = Object.keys(collectionLabel)[0];
// console.log("Product type: " + product);
// console.log(JSON.stringify(collectionLabel, null, 3));
collectionLabel[product].Identification_Area.logical_identifier = options.id;
if(options.vid.length > 0) collectionLabel[product].Identification_Area.version_id = options.vid;
collectionLabel[product].File_Area_Inventory.File.file_name = path.basename(options.output);
collectionLabel[product].File_Area_Inventory.File.file_size = { "#text" : stat.size.toString(), "@_unit": "byte" };
collectionLabel[product].File_Area_Inventory.File.creation_date_time = stat.mtime.toISOString();
collectionLabel[product].File_Area_Inventory.File.md5_checksum = md5Checksum;
collectionLabel[product].File_Area_Inventory.Inventory.records = records;
collectionLabel[product].Context_Area.Time_Coordinates.start_date_time = startTime;
collectionLabel[product].Context_Area.Time_Coordinates.stop_date_time = stopTime;
collectionLabel[product].Context_Area.Target_Identification = targetList;
collectionLabel[product].Context_Area.Primary_Result_Summary = primaryResultSummary;
collectionLabel[product].Context_Area.Observing_System = observingSystem;
collectionLabel[product].Context_Area.Investigation_Area = investigationArea;
var writer = new XMLEngine({
ignoreAttributes: false,
format: true,
indentBy: " ",
});
// Replace the "header" (processing instructions and root document tag)
// And fix formating when an element has an attribute by unwrapping line containing opening tag.
if(options.collection.length > 0) { // Write label
fs.writeFileSync(options.collection,
writer.parse(collectionLabel).replace(/<Product_Collection.*>/, labelHeader).replace(/\"\>\n([^ ]+) +\<\//g, '">$1</')
);
}
});
}
if(options.verbose) {
console.log((new Date(Date.now())).toUTCString());
console.log("Processed " + cnt + " files in seconds elapsed = " + (Date.now() - start)/1000);
}
})
;
}
main(args);