UNPKG

pds4-tools

Version:

Tools to generate and use PDS4 metadata.

545 lines (474 loc) 16.9 kB
#!/usr/bin/env node "use strict"; /*eslint-disable no-console*/ /** * Generate a DOI request. * * Extract information from a PDS4 label and generate an DOI request which can * be used with Interagency Data (IAD) web services. * * @author Todd King **/ const fs = require('fs'); const yargs = require('yargs'); const path = require('path'); const fastXmlParser = require('fast-xml-parser'); const util = require('util'); // Configure the app var options = yargs .version('1.0.1') .usage('Extract information from a PDS4 label and generate an DOI request which can be used with Interagency Data (IAD) web services.\n\nUsage:\n\n$0 [args] <files...>') .example('$0 example.xml', 'generate a DOI request') .epilog("Development funded by NASA's PDS project at UCLA.") .showHelpOnFail(false, "Specify --help for available options") .help('h') // version .options({ // help text 'h' : { alias : 'help', description: 'Show information about the app.' }, // Output 'o' : { alias : 'output', description: 'Output file.', type: 'string', default: null }, // Pretty-print 'x' : { alias : 'pretty', description: 'Pretty output.', type: 'boolean' }, // Collection product type 'c' : { alias : 'collection', description: 'Submit as a collection. Use for document collection, but not for data or borwse collections.' }, // Reserve DOI 'r' : { alias : 'reserve', description: 'Reserve generated DOI and do not publically release immeadiately.' }, // Author list (creators) 'u' : { alias : 'author', description: 'Author list. Separate names with semi-colon. Names are first, last[, middle]', type: 'string', default: null }, // Publisher 'p' : { alias : 'publisher', description: 'Name of the publisher.', type: 'string', default: "NASA's Planetary Data System (PDS)" }, // Sponsor 's' : { alias : 'sponsor', description: 'Name of the sponsor organization.', type: 'string', default: "National Aeronautics and Space Administration (NASA)" }, /* // Contact Name 'n' : { alias : 'name', description: 'Contact name.', type: 'string', default: "PDS Operator" }, // Contact Organization 'g' : { alias : 'organization', description: 'Name of the contact organization.', type: 'string', default: "Planetary Data System (PDS)" }, // Contact Email 'e' : { alias : 'email', description: 'Contact email address.', type: 'string', default: "pds-operator@jpl.nasa.gov" }, // Contact telephone number 't' : { alias : 'phone', description: 'Contact telephone number.', type: 'string', default: "818.393.7165" }, */ // Availability 'a' : { alias : 'availability', description: 'The name of any office or organization that can offer additional help in obtaining or utilizing the dataset.', type: 'string', default: "NSSDCA" }, // Landing page URL 'n' : { alias : 'landing', description: 'The landing page URL.', type: 'string', default: "https://pds.jpl.nasa.gov/ds-view/pds/viewCollection.jsp?identifier=%s" }, // Publication date 'd' : { alias : 'date', description: 'Publication date (YYYY-MM-DD).', type: 'string', default: "" }, // Contributor 'b' : { alias : 'contributor', description: 'Contributor organization.', type: 'string', default: "PDS Planetary Plasma Interactions (PPI) Node" }, // Country 'y' : { alias : 'country', description: 'Standard country code.', type: 'string', default: "US" }, // Language 'l' : { alias : 'language', description: 'Language for text.', type: 'string', default: "English" }, // Format 'f' : { alias : 'format', description: 'Output format.', choices: ['xml', 'json'], default: 'xml' }, }) .argv ; // Global variables var args = options._; // Remaining non-hyphenated arguments var outputFile = null; // None defined. /** * Write to output file if defined, otherwise to console.log() **/ var outputWrite = function(str) { if(outputFile == null) { console.log(str); } else { outputFile.write(str); if(options.pretty) outputFile.write("\n"); } } /** * Close an output file if one is assigned. **/ var outputEnd = function() { if(outputFile) { outputFile.end(); outputFile = null } } /** * Search through a Modification_History and find the most recent modification date. * **/ var getRecentModification = function(hist) { var modDate = ""; if(Array.isArray(hist.Modification_Detail)) { // Find most recent for(var i = 0; i < hist.Modification_Detail.length; i++) { var d = hist.Modification_Detail[i].modification_date if(d > modDate) modDate = d; } } else { // Single value modDate = hist.Modification_Detail.modification_date; } return modDate; } /** * Format an ISO data string into the required format for an OSTI IAD request. **/ var formatDate = function(isoDate) { var d = new Date(isoDate); var day = d.getDate().toString(); var month = d.getMonth().toString(); // Add zero padding (if needed) if(day.length < 2) day = "0" + day; if(month.length < 2) month = "0" + month; return month + "/" + day + "/" + d.getFullYear(); } /** * Format an ISO data string into the required format for an OSTI IAD request. **/ var formatDateYYYYMMDD = function(isoDate) { var d = new Date(isoDate); var year = d.getFullYear(); var day = d.getDate().toString(); var month = d.getMonth().toString(); // Add zero padding (if needed) if(day.length < 2) day = "0" + day; if(month.length < 2) month = "0" + month; return year + "-" + month + "-" + day; } /** * Format a PDS4 product type into a string useful as the product_type_specific in an OSTI IAD request. **/ var formatProduct = function(productType) { return "PDS4 " + productType.replace(/^Product_/, ""); } /** * Parse any author list string into an array of Author objects * * Author string has the format "last, first[, middle]" with the list of authors separated by a semi-colon (;) **/ var parseAuthors = function(authors) { var list = []; var alist = authors.split(";"); for(var i = 0; i < alist.length; i++) { var names = alist[i].split(","); var author = { // "first_name": "", // "last_name": "", // "middle_name": "", "affiliations": [] }; if(names.length > 0) author.last_name = names[0]; if(names.length > 1) author.first_name = names[1]; if(names.length > 2) author.middle_name = names[2]; list.push(author); } return list; } /** * Compile a semi-colon separated list of keywords by harvesting from all areas of a PDS label. **/ var getKeywords = function(product) { var delim = ""; var keywords = ""; if(product.Context_Area.Observing_System) { if(Array.isArray(product.Context_Area.Observing_System.Observing_System_Component)) { for(var i = 0; i < product.Context_Area.Observing_System.Observing_System_Component.length; i++) { keywords += delim + product.Context_Area.Observing_System.Observing_System_Component[i].name; delim = ";"; } } else { keywords += delim + product.Context_Area.Observing_System.Observing_System_Component.name; delim = ";"; } } if(product.Context_Area.Target_Identification) { if(Array.isArray(product.Context_Area.Target_Identification)) { for(var i = 0; i < product.Context_Area.Target_Identification.length; i++) { keywords += delim + product.Context_Area.Target_Identification[i].name; delim = ";"; } } else { keywords += delim + product.Context_Area.Target_Identification; delim = ";"; } keywords += delim + product.Context_Area.Investigation_Area.name; delim = ";"; if(product.Context_Area.Primary_Result_Summary) { // Introduced in PDS4 IM ??? keywords += delim + product.Context_Area.Primary_Result_Summary.purpose; delim = ";"; keywords += delim + product.Context_Area.Primary_Result_Summary.processing_level; delim = ";"; } } if(product.Context_Area.Mission_Area) { var keys = Object.keys(product.Context_Area.Mission_Area); for(var i = 0; i < keys.length; i++) { if(keys[i].mission_phase_name) { for(var n = 0; n < keys[i].mission_phase_name.length; n++) { keywords += delim + keys[i].mission_phase_name[n]; delim = ";"; } } } } if(product.Identification_Area.Citation_Information) { // Look for keywords if(product.Identification_Area.Citation_Information.keyword) { if(Array.isArray(product.Identification_Area.Citation_Information.keyword)) { for(var i = 0; i < product.Identification_Area.Citation_Information.keyword.length; i++) { keywords += delim + product.Identification_Area.Citation_Information.keyword[i]; delim = ";"; } } else { keywords += delim + product.Identification_Area.Citation_Information.keyword; delim = ";"; } } } return keywords; } /** * Application entry point. **/ var main = function(args) { // If no files or options show help if (args.length == 0) { yargs.showHelp(); return; } // Output if(options.output) { outputFile = fs.createWriteStream(options.output); } var productType = "Dataset"; if(options.collection) productType = "Collection"; var pathname = args[0]; var xmlDoc = fs.readFileSync(pathname, 'utf8'); var content = fastXmlParser.parse(xmlDoc); // Check syntax var product = Object.keys(content)[0]; // General info (lid, title, description) var lid = content[product].Identification_Area.logical_identifier; var version = content[product].Identification_Area.version_id; var title = content[product].Identification_Area.title; var description = title; if(content[product].Identification_Area.Citation_Information) { // Use description description = content[product].Identification_Area.Citation_Information.description; } // author list if(content[product].Identification_Area.Citation_Information) { // Use author list if(content[product].Identification_Area.Citation_Information.author_list) { options.author = content[product].Identification_Area.Citation_Information.author_list; } } // Publication date var hist = []; hist = content[product].Identification_Area.Modification_History; var pubdate = getRecentModification(hist); if(options.date.length > 0) pubdate = options.date; // Override // Keywords var keywords = getKeywords(content[product]); // Landing page var landing = util.format(options.landing, lid); // Check arguments if( ! options.author) { console.log('Warning: Missing author information. Use "-u" to specify on command line.'); options.author = "unknown"; } // IAD2 JSON format var iad2 = { "records": [{ // "id": "221299", // unique identifier for the record - used for updates // "accession_number" : "unique", // "title": title, "description": description, "authors": parseAuthors(options.author), "contributors": [ { "full_name": options.contributor, "contributor_type": "Editor", "affiliations": [] } ], // "doi": "10.5072/for-example-purposes/221299", // "doi_infix":"for-example-purposes", "publisher": options.publisher, "country": options.country, // "US" "product_type": productType, // "Dataset","Text" or "Collection" "product_type_specific": formatProduct(product), "language": options.language, // "English" "publication_date": formatDate(pubdate), "date_added": formatDate(pubdate), // "date_updated":"2017-11-27", "sponsoring_organization": options.sponsor, "research_organization": options.contrib, // "report_numbers": "98776", // "contract_numbers": "DE-39043-2017", "other_numbers": lid + '::' + version , "availability": options.availability, "keywords": keywords, // "related_identifiers": [ // { "identifier_value":"10.5072/23432", // "identifier_type":"DOI", // "relation_type":"Cites" // } // ] }], "start":0, "total":1 } // Conditional additions if(options.reserve) { // Do not put value in <site_url> iad2.records[0]['status'] = "Reserved"; } else { iad2.records[0]['site_url'] = landing; } var request = JSON.stringify(iad2.records, null, 3); // Formatted string if(options.output) { // Show some instructions console.log('DOI request information can be submitted with the command:'); console.log(""); if(options.format == 'json') { console.log('curl -u LOGINNAME:PASSWORD -X POST -H "Content-Type: application/json" --data' + ' @' + options.output + ' https://www.osti.gov/iad2/api/records'); console.log(""); if( ! options.pretty) { request = JSON.stringify(iad2.records); } // One long string outputWrite(request); } else { console.log('curl -u LOGINNAME:PASSWORD -X POST -H "Content-Type: application/json" --data' + ' @' + options.output + ' https://www.osti.gov/iad2/api/records'); console.log(""); } } if(options.format == 'json') { console.log('curl -u LOGINNAME:PASSWORD -X POST -H "Content-Type: application/xml" --data' + ' @' + options.output + ' https://www.osti.gov/iad2/api/records'); console.log(""); var request = JSON.stringify(iad2.records); // One long string outputWrite(request); } else { // IAD XML format outputWrite('<?xml version="1.0" encoding="UTF-8" ?>'); outputWrite('<!-- Generated from: ' + pathname + ' -->'); outputWrite('<records start="0" total="1">'); outputWrite(' <record>'); outputWrite(' <title>' + title + '</title>'); outputWrite(' <sponsoring_organization>' + options.sponsor + '</sponsoring_organization>'); outputWrite(' <research_organization>' + options.contrib + '</research_organization>'); outputWrite(' <product_type>' + productType + '</product_type>'); // "Dataset","Text" or "Collection" outputWrite(' <product_type_specific>' + formatProduct(product) + '</product_type_specific>'); outputWrite(' <language>' + options.language + '</language>'); // "English" outputWrite(' <publisher>' + options.publisher + '</publisher>'); outputWrite(' <publication_date>' + formatDateYYYYMMDD(pubdate) + '</publication_date>'); outputWrite(' <product_date_added>' + formatDateYYYYMMDD(pubdate) + '</product_date_added>'); outputWrite(' <other_nos>' + lid + '::' + version + '</other_nos>'); outputWrite(' <availability>' + options.availability + '</availability>'); outputWrite(' <country>' + options.country + '</country>'); outputWrite(' <description>' + description + '</description>'); if(options.reserve) { // Do not put value in <site_url> outputWrite(' <site_url/>'); } else { outputWrite(' <site_url>' + landing + '</site_url>'); } outputWrite(' <keywords>' + keywords + '</keywords>'); outputWrite(' <authors>'); var authors = parseAuthors(options.author); for(var i = 0; i < authors.length; i++) { outputWrite(' <author>'); if(authors[i].first_name) outputWrite(' <first_name>' + authors[i].first_name + '</first_name>'); if(authors[i].last_name) outputWrite(' <last_name>' + authors[i].last_name + '</last_name>'); if(authors[i].middle_name) outputWrite(' <middle_name>' + authors[i].middle_name + '</middle_name>'); outputWrite(' </author>'); } outputWrite(' </authors>'); outputWrite(' <contributors>'); outputWrite(' <full_name>' + options.contributor + '</full_name>'); outputWrite(' <contributor_type>Editor</contributor_type>'); outputWrite(' <affiliations/>'); outputWrite(' </contributors>'); outputWrite(' <related_identifier/>'); outputWrite(' <contract_numbers>' + '</contract_numbers>'); outputWrite(' <availability>' + options.availability + '</availability>'); /* outputWrite(' <contact_name>' + options.name + '</contact_name>'); outputWrite(' <contact_org>' + options.organization + '</contact_org>'); outputWrite(' <contact_email>' + options.email + '</contact_email>'); outputWrite(' <contact_phone>' + options.phone + '</contact_phone>'); */ if(outputWrite.reserve) { console.log(' <set_reserved/>'); } outputWrite(' </record>'); outputWrite('</records>'); } outputEnd(); } main(args);