UNPKG

spase-resource-tools

Version:
661 lines (556 loc) 17.5 kB
#!/usr/bin/env node "use strict"; /*eslint-disable no-console*/ /** Extract reference information from a DOI request XML Author: Todd King **/ const fs = require('fs'); const yargs = require('yargs'); const fastXmlParser = require('fast-xml-parser'); // Configure the app var options = yargs .version('1.0.2') .usage('Extract information from a SPASE resource description and generate a DataCite formated DOI request that can be submitted through EZID web service API.\n\nUsage:\n\n$0 [args] <files...>') .example('$0 example.xml', 'generate a DOI request') .epilog("Development funded by NASA's HPDE project at UCLA.") .showHelpOnFail(false, "Specify --help for available options") .help('h') // version .options({ // help text 'h' : { alias : 'help', description: 'Show information about the app.' }, // Author list (creators) 'a' : { alias : 'author', description: 'Author list. Separate names with semi-colon. Names are first, last[, middle]', type: 'string', default: null }, // Assigned DOI 'x' : { alias : 'doi', description: 'The DOI assigned to or reserved for the resource.', type: 'string', default: '(:tba)' }, // Contributor list 'c' : { alias : 'contributor', description: 'Contributor list. Separate names with semi-colon. Names are first, last[, middle]', type: 'string', default: null }, // Publisher 'p' : { alias : 'publisher', description: 'Name of the publisher.', type: 'string', default: null }, // Identifier 'i' : { alias : 'id', description: 'Resource identifier.', type: 'string', default: null }, // Landing page URL 'l' : { alias : 'landing', description: 'The landing page URL.', type: 'string', default: "https://hpde.io/%s" }, // Publication date 'd' : { alias : 'date', description: 'Publication date (YYYY-MM-DD).', type: 'string', default: "" }, // Title 't' : { alias : 'title', description: 'Publication title.', type: 'string', default: "" }, // Output 'o' : { alias : 'output', description: 'Output file. Written in format suitable for an API submission.', type: 'string', default: null }, }) .argv ; // Globals var args = options._; // Remaining non-hyphenated arguments var outputFile = null; // None defined. /** * Write to output file if defined, otherwise to console.log() **/ var outputWrite = function(indent, str) { if(outputFile == null) { var prefix = ""; for(var i = 0; i < indent; i++) { prefix += " "; } console.log(prefix + str); } else { outputFile.write(str); } } /** * Close an output file if one is assigned. **/ var outputEnd = function() { if(outputFile) { outputFile.end(); outputFile = null } } /** * List all files in a directory recursively and in a synchronous fashion **/ var walkSync = function(pathname, action) { var fs = fs || require('fs'); if ( fs.statSync(pathname).isDirectory() ) { var files = fs.readdirSync(pathname); files.forEach(function(file) { // console.log('dir: ' + pathname); // console.log('file: ' + file); if ( fs.statSync(pathname + '/' + file).isDirectory() ) { walkSync(pathname + '/' + file, action); } else { action(pathname + '/' + file); } }); } else { action(pathname); } return }; /** * Split a string on semi-colons. **/ var parseList = function(delimited) { var list = []; if(delimited) { list = delimited.split(";"); for(var i = 0; i < list.length; i++) { list[i] = list[i].trim(); } } return list; } /** * Convert a value to an array is value is singular, * other wise return the array. **/ var getList = function(value) { var list = []; if( ! value) return list; // Empty if(Array.isArray(value)) list = value; else list[0] = value; return list; } /** * Convert a camelcase string to multiple space separated words. **/ var multiWord = function(value) { return value.replace(/([a-z0-9])([A-Z])/g, '$1 $2'); } /** * Make a name from a PersionID in author format (last, first[, middle]) **/ var makeAuthorName = function(personID) { var part = personID.split('/'); var name = part[part.length - 1].split('.'); var author = name[name.length - 1]; for(var i = 0; i < name.length - 1; i++) { author += ", " + name[i]; } return author; } /** * Convert a SPASE Role to a DataCite Contributor type. * **/ var convertRole = function(role) { if(role == 'Contributor') return 'Other'; if(role == 'DataProducer') return 'Producer'; if(role == 'GeneralContact') return 'ContactPerson'; if(role == 'MetadataContact') return 'ContactPerson'; if(role == 'Scientist') return 'Researcher'; if(role == 'TeamLeader') return 'ProjectLeader'; if(role == 'TeamMember') return 'ProjectMember'; if(role == 'TechnicalContact') return 'ContactPerson'; return null; // Role doesn't map } /** * Convert a SPASE Resource Type to a DataCite Content type. * **/ var convertResourceType = function(resourceType) { if(resourceType == 'DisplayData') return 'Collection'; if(resourceType == 'NumericalData') return 'Dataset'; if(resourceType == 'Catalog') return 'Dataset'; return null; // doesn't map } /** * Get the resource description. **/ var getResource = function(content) { if( ! content.Spase ) return null; // Not a SPASE description var resourceType = Object.keys(content.Spase)[1]; // 0 = "Version", 1 = Resource return content.Spase[resourceType]; } /** * Get the resource type. **/ var getResourceType = function(content) { if( ! content.Spase ) return null; // Not a SPASE description var resourceType = Object.keys(content.Spase)[1]; // 0 = "Version", 1 = Resource return resourceType; } /** * Retrieve the publisher from a resource description * or return the default value from options. **/ var getResourceID = function(resource, options) { var id = options.id; if(resource) { id = resource.ResourceID; } return id; } /** * Retrieve the publisher from a resource description * or return the default value from options. **/ var getPublisher = function(resource, options) { var pub = getAuthority(getResourceID(resource, options)); if(options.publisher) pub = options.publisher; if( resource.ResourceHeader.PublicationInfo) { pub = resource.ResourceHeader.PublicationInfo.PublishedBy; } return pub; } /** * Retrieve the publication year from a resource description * or return the default value from options. **/ var getPubYear = function(resource, options) { var year = options.date; // Should be current year if( resource.ResourceHeader.PublicationInfo) { year = resource.ResourceHeader.PublicationInfo.PublicationDate.substring(0,4); } else { year = resource.ResourceHeader.ReleaseDate.substring(0,4) } return year; } /** * Retrieve the title (ResourceName) from a resource description * or return the default value from options. **/ var getTitle = function(resource, options) { var name = options.title; if(resource.ResourceHeader.ResourceName) { name = resource.ResourceHeader.ResourceName; } return name; } /** * Retrieve the funding information from a resource description. **/ var getFunding = function(resource) { return getList(resource.ResourceHeader.funding); } /** * Retrieve the funding information from a resource description * or return the default value from options. **/ var getDOI = function(resource, options) { var doi = options.doi; if(resource.ResourceHeader.DOI) { doi = resource.ResourceHeader.DOI; } return doi; } /** * Retrieve the description from a resource description * or return the default value from options. **/ var getDescription = function(resource, options) { var desc = options.description; // Should be current year desc = resource.ResourceHeader.Description; if(options.output) { // Remove new lines in text desc = desc.replace(/[\r\n]+/g, " "); } return desc; } /** * Parse a ResourceID and extract the naming authority. **/ var getAuthority = function(resourceID) { var buffer = ""; if( ! resourceID) return buffer; buffer = resourceID.replace(/spase:\/\//, ""); var part = buffer.split('/'); return part[0]; }; /** * Build up a list of keywords. * * List is a combinartion of ProcessingLevel, MeasurementType and values in Keyword **/ var getKeywords = function(resource) { var keywords = getList(resource.Keyword); if(resource.ProcessingLevel) keywords.push(resource.ProcessingLevel); if(resource.MeasurementType) keywords.push(multiWord(resource.MeasurementType)); return keywords; }; /** * Retrieve the author list from a resource description * or return the default value from options. * * If PublicationInfo is present use the declared authorlist, * otherwise build up an author list based on contacts. * Contacts with a Role of PrincipalInvestigator, CoInvestigator * or Contributor are included in the author list. **/ var getAuthorList = function(resource, options) { var list = parseList(options.author); // If publicationInfo - use given author list if(resource.ResourceHeader.PublicationInfo) { if(resource.ResourceHeader.PublicationInfo.Authors) { list = parseList(resource.ResourceHeader.PublicationInfo.Authors) return list; } } // If contacts - use them if( ! resource.ResourceHeader.Contact) return list; // Start with Principal Investigator var contacts = getList(resource.ResourceHeader.Contact) for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "PrincipalInvestigator") { list.push(makeAuthorName(item.PersonID)); } } }; // Add Co-Investigators for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "CoInvestigator") { list.push(makeAuthorName(item.PersonID)); } } }; // Add InstrumentLead for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "InstrumentLead") { list.push(makeAuthorName(item.PersonID)); } } }; // Add Co-Investigators for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "CoInvestigator") { list.push(makeAuthorName(item.PersonID)); } } }; // Add TeamLeader for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "TeamLeader") { list.push(makeAuthorName(item.PersonID)); } } }; // Add TeamMember for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "TeamMember") { list.push(makeAuthorName(item.PersonID)); } } }; // Add DataProducer for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "DataProducer") { list.push(makeAuthorName(item.PersonID)); } } }; // Add Contributor for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { if(role[n] == "Contributor") { list.push(makeAuthorName(item.PersonID)); } } }; return list; }; /** * Retrieve the contributor list from a resource description * or return the default value from options. * * Contacts with a Role of Contributor are included in the author list. **/ var getContributorList = function(resource, options) { var list = []; var names = parseList(options.contributor); if(names) { for(var i = 0; i < names.length; i++) { list.push({name: names[i], role: 'ContactPerson'}); } } // If contacts - use them if( ! resource.ResourceHeader.Contact) return list; // Add Contributors var contacts = getList(resource.ResourceHeader.Contact) for(var k = 0; k < contacts.length; k++) { var item = contacts[k]; var role = getList(item.Role); for(var n = 0; n < role.length; n++) { var contribType = convertRole(role[n]); if(contribType) { list.push({ name: makeAuthorName(item.PersonID), role: contribType} ); } } }; return list; } /** Write DOI request for a SPASE resource description. **/ var writeRequest = function(pathname) { // XML Document if(options.verbose) { console.log('Parsing: ' + pathname); } var xmlDoc = fs.readFileSync(pathname, 'utf8'); var content = fastXmlParser.parse(xmlDoc); // Check syntax var resource = getResource(content); if( ! resource) { console.log('File is not a SPASE resource description: ' + pathname); return; } var resourceType = getResourceType(content); outputWrite(0, '<resource xmlns="http://datacite.org/schema/kernel-4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4/metadata.xsd">'); outputWrite(1, '<identifier identifierType="DOI">' + getDOI(resource, options) + '</identifier>'); outputWrite(1, '<creators>'); var list = getAuthorList(resource, options); for(var i = 0; i < list.length; i++) { outputWrite(2, '<creator>'); outputWrite(3, '<creatorName>' + list[i] + '</creatorName>'); outputWrite(2, '</creator>'); } outputWrite(1, '</creators>'); outputWrite(1, '<titles>'); outputWrite(2, '<title>' + getTitle(resource, options) + '</title>'); outputWrite(1, '</titles>'); outputWrite(1, '<publisher>' + getPublisher(resource, options) + '</publisher>'); outputWrite(1, '<publicationYear>' + getPubYear(resource, options) + '</publicationYear>'); var keywords = getKeywords(resource); if(keywords.length > 0) { outputWrite(1, '<subjects>'); for(var i = 0; i < keywords.length; i++) { if( keywords[i] ) { outputWrite(2, '<subject xml:lang="en">' + keywords[i] + '</subject>'); } } outputWrite(1, '</subjects>'); } var contrib = getContributorList(resource, options); if(contrib.length > 0) { outputWrite(1, '<contributors>'); for(var i = 0; i < contrib.length; i++) { outputWrite(2, '<contributor contributorType="' + contrib[i].role + '">'); outputWrite(3, '<contributorName>' + contrib[i].name + '</contributorName>'); outputWrite(2, '</contributor>'); } outputWrite(1, '</contributors>'); } outputWrite(1, '<language>en-US</language>'); outputWrite(1, '<resourceType resourceTypeGeneral="' + convertResourceType(resourceType) + '">' + convertResourceType(resourceType) + '</resourceType>'); outputWrite(1, '<alternateIdentifiers>'); outputWrite(2, '<alternateIdentifier alternateIdentifierType="SPASE">' + getResourceID(resource, options) + '</alternateIdentifier>'); outputWrite(1, '</alternateIdentifiers>'); outputWrite(1, '<descriptions>'); outputWrite(2, '<description descriptionType="Abstract">'); outputWrite(0, getDescription(resource, options)); outputWrite(2, '</description>'); outputWrite(1, '</descriptions>'); var funding = getFunding(resource); if(funding.lenght > 0) { outputWrite(1, '<fundingReferences>'); for(var i =0; i < funding.length; i++) { outputWrite(2, '<fundingReference>'); outputWrite(3, '<funderName>' + funding[i].Agency + '</funderName>'); outputWrite(3, '<awardNumber>' + funding[i].AwardNumber + '</awardNumber>'); outputWrite(2, '</fundingReference>'); } outputWrite(1, '</fundingReferences>'); } outputWrite(0, '</resource>'); } /** * Application entry point. **/ var main = function(args) { if (process.argv.length == 0) { yargs.showHelp(); return; } // Output if(options.output) { outputFile = fs.createWriteStream(options.output); // outputWrite(0, 'datacite:'); } var pathname = args[0]; // XML Document if(options.verbose) { console.log('Parsing: ' + pathname); } var xmlDoc = fs.readFileSync(pathname, 'utf8'); var content = fastXmlParser.parse(xmlDoc); // Check syntax var resource = getResource(content); if( ! resource) { console.log('File is not a SPASE resource description: ' + pathname); return; } var resourceID = getResourceID(resource, options); resourceID = resourceID.replace("spase://", ""); var target = "_target=" + options.landing; target = target.replace("%s", resourceID + ".html"); outputWrite(0, target); outputWrite(0, 'datacite: <?xml version="1.0"?>'); writeRequest(pathname); /* No longer process all args, just first // For all passed arguments for(var i = 0; i < args.length; i++) { walkSync(args[i], writeRequest); } */ outputEnd(); } main(args);