UNPKG

reflib

Version:
410 lines (372 loc) 14.6 kB
var _ = { mapValues: require('lodash/mapValues'), pickBy: require('lodash/pickBy'), throttle: require('lodash/throttle'), }; var dateFns = { isValid: require('date-fns/isValid'), format: require('date-fns/format'), parse: require('date-fns/parse'), }; var events = require('events'); var fs = require('fs'); var fsPath = require('path'); var promisify = require('util').promisify; var reflib = module.exports = { // .supported - Supported file types {{{ /** * Collection of supported RefLib drivers and their details * @type {array<Object>} * @property {string} id The unique identifier of the driver * @property {string} name Human readable description of the driver * @property {array<string>} ext File extensions supported by the driver * @property {string} filename Default filename to use when outputting * @property {Object} driver NPM module of the driver */ supported: [ { id: 'csv', name: 'Comma Seperated Values', ext: ['.csv'], filename: 'references.csv', driver: require('reflib-csv'), }, { id: 'endnotexml', name: 'EndNote XML file', ext: ['.xml'], filename: 'endnote.xml', driver: require('reflib-endnotexml'), }, { id: 'json', name: 'JSON file', ext: ['.json'], filename: 'library.json', driver: require('reflib-json'), }, { id: 'medline', name: 'MEDLINE / PubMed file', ext: ['.nbib'], filename: 'medline.nbib', driver: require('reflib-medline'), }, { id: 'ris', name: 'RIS file', ext: ['.ris'], filename: 'ris.ris', driver: require('reflib-ris'), }, { id: 'tsv', name: 'Tab Seperated Values', ext: ['.tsv'], filename: 'references.tsv', driver: require('reflib-tsv'), }, ], // }}} // .refTypes - Supported reference types {{{ /** * A collection of supported RefLib reference types * @type {array<Object>} * @property {string} id The internal ID of the reference type * @property {string} title The human readable description of the reference type */ refTypes: [ {id: 'aggregatedDatabase', title: 'Aggregated Database'}, {id: 'ancientText', title: 'Ancient Text'}, {id: 'artwork', title: 'Artwork'}, {id: 'audiovisualMaterial', title: 'Audiovisual Material'}, {id: 'bill', title: 'Bill'}, {id: 'blog', title: 'Blog'}, {id: 'book', title: 'Book'}, {id: 'bookSection', title: 'Book Section'}, {id: 'case', title: 'Case'}, {id: 'catalog', title: 'Catalog'}, {id: 'chartOrTable', title: 'Chart or Table'}, {id: 'classicalWork', title: 'Classical Work'}, {id: 'computerProgram', title: 'Computer Program'}, {id: 'conferencePaper', title: 'Conference Paper'}, {id: 'conferenceProceedings', title:'Conference Proceedings'}, {id: 'dataset', title: 'Dataset.'}, {id: 'dictionary', title: 'Dictionary'}, {id: 'editedBook', title: 'Edited Book'}, {id: 'electronicArticle', title: 'Electronic Article'}, {id: 'electronicBook', title:', Electronic Book'}, {id: 'electronicBookSection', title:', Electronic Book Section'}, {id: 'encyclopedia', title: 'Encyclopedia'}, {id: 'equation', title: 'Equation'}, {id: 'figure', title: 'Figure'}, {id: 'filmOrBroadcast', title: 'Film or Broadcast'}, {id: 'generic', title: 'Generic'}, {id: 'governmentDocument', title: 'Government Document'}, {id: 'grant', title: 'Grant'}, {id: 'hearing', title: 'Hearing'}, {id: 'journalArticle', title: 'Journal Article'}, {id: 'legalRuleOrRegulation', title:', Legal Rule or Regulation'}, {id: 'magazineArticle', title: 'Magazine Article'}, {id: 'manuscript', title: 'Manuscript'}, {id: 'map', title: 'Map'}, {id: 'music', title: 'Music'}, {id: 'newspaperArticle', title: 'Newspaper Article'}, {id: 'onlineDatabase', title: 'Online Database'}, {id: 'onlineMultimedia', title: 'Online Multimedia'}, {id: 'pamphlet', title: 'Pamphlet'}, {id: 'patent', title: 'Patent'}, {id: 'personalCommunication', title: 'Personal Communication'}, {id: 'report', title: 'Report'}, {id: 'serial', title: 'Serial'}, {id: 'standard', title: 'Standard'}, {id: 'statute', title: 'Statute'}, {id: 'thesis', title: 'Thesis'}, {id: 'unpublished', title: 'Unpublished Work'}, {id: 'web', title: 'Web Page'}, ], // }}} /** * Identify the RefLib driver to use from a filename * @param {string} filename * @returns {string} Either a RefLib driver ID from Reflib.supported or boolean False if the file is unrecognised */ identify: function(filename) { var ext = fsPath.extname(filename).toLowerCase(); var found = reflib.supported.find(format => format.ext.includes(ext)); return found ? found.id : false; }, /** * Parse an input stream, buffer or string into references * @param {string} format The Reflib driver to use, must conform to the ID of a member of Reflib.supported * @param {string|Buffer|ReadableStream} input The input to parse * @param {Object} [options] Additional options to use when parsing * @param {Object} [options.fixes] List of fixes to apply while parsing * @param {boolean} [options.authors=false] Apply the behaviour of `reflib.fix.authors(ref)` * @param {boolean} [options.authors=false] Apply the behaviour of `reflib.fix.dates(ref)` * @param {boolean} [options.authors=false] Apply the behaviour of `reflib.fix.pages(ref)` * @param {function} [callback] Callback to call as `(refs)` when done * @returns {EventEmitter} An EventEmitter instance * * @emits ref Emitted as `(ref)` for each reference parsed * @emits error Emitted as `(error)` if an error occurs * @emits progress Emitted as `(currentProgress, maxProgress)` while parsing to show progress (if known) * @emits end Emitted as `()` when parsing has completed */ parse: function(format, input, options, callback) { var self = this; // Deal with arguments {{{ if (format && typeof format == 'string' && typeof options == 'object' && typeof callback == 'function') { // No changes } else if (typeof format == 'string' && input && typeof options == 'function') { // Omitted options callback = options; options = {}; } else if (typeof format == 'string' && input) { // Omitted options + callback // No changes } else { throw new Error('Parse must be called in the form: parse(format, input, [options], [callback])'); } // }}} var supported = reflib.supported.find(s => s.id == format); if (!supported) throw new Error('Format is unsupported: ' + format); var settings = { fixes: { authors: false, dates: false, pages: false, }, ...options, }; var refs = []; var reflibEmitter = new events.EventEmitter(); /** * Emit progress throttled every 100ms * @param {number} cur Number of refs parsed so far * @param {number} max Total number of refs */ emitProgress = _.throttle(function(cur, max, emitter) { emitter.emit('progress', cur, max); }, 200, { trailing: false }), supported.driver.parse(input) .on('error', function(err) { if (callback) { callback(err); } else { reflibEmitter.emit('error', err); } }) .on('ref', function(ref) { // Apply fixes {{{ if (settings.fixes.authors) ref = self.fix.authors(ref, options); if (settings.fixes.dates) ref = self.fix.dates(ref, options); if (settings.fixes.pages) ref = self.fix.pages(ref, options); // }}} if (callback) { refs.push(ref); } else { reflibEmitter.emit('ref', ref); } }) .on('progress', function(cur, max) { emitProgress(cur, max, reflibEmitter); }) .on('end', function() { if (callback) { callback(null, refs); } else { reflibEmitter.emit('end'); } }); return reflibEmitter; }, /** * Wrapper around parse() which opens a file as as stream and parses it automatically * @param {string} path The path of the file to parse * @param {Object} [options] Additional options to use when parsing, see `parse()` for full details * @param {function} [callback] Callback to call as `(refs)` when done * @returns {EventEmitter} An EventEmitter instance * @see parse * @see promises.parseFile */ parseFile: function(path, options, callback) { // Argument mangling {{{ if (typeof options == 'function') { // path, callback callback = options; options = {}; } // }}} var driver = reflib.identify(path); if (!driver) throw new Error('File type is unsupported'); return reflib.parse(driver, fs.createReadStream(path), options, callback); }, /** * Output a reference library using the requested driver * @param {Object} options Options to use while outputting * @param {WritableStream} options.stream Writable stream used to output * @param {string} options.format The Reflib driver to use, must conform to the ID of a member of Reflib.supported * @param {array<Object>|function} options.content reference library to output. If an array each item is used in turn, if an object a single item is output, if a callback this is called with the arguments `(next, batchNo)` until it returns null. The callback function can return a single object or an array * @param {string} [options.defaultType] If the driver requires a default reference type this value is used if that field is omitted from the input * @param {function} [options.encode] Overridable callback to use on each reference output * @param {function} [options.escape] Overridable callback to use when encoding text * @param {string|array<String>|boolean} [options.fields] If undefined only supported fields are output, if an array only those specified fields are output, if true all fields even those not recognised are output. If the input is a string it is split into an array as a CSV * @returns {WritableStream} A WriteableSteam instance which will fire `.on('end')` when writing has finished * @see promises.output */ output: function(options) { if (typeof options != 'object') throw new Error('output(options) must be an object'); if (!options.format) throw new Error('output(options) must specify a format'); var supported = reflib.supported.find(s => s.id == options.format); if (!supported) throw new Error('Format is unsupported: ' + options.format); var settings = { fields: typeof options.fields == 'string' ? options.fields.split(/\s*,\s*/) : undefined, // Split field list into an array if given a CSV ...options, }; return supported.driver.output(settings); }, /** * Output a reference library to a file using the requested driver * @param {string} path The file path to write to * @param {array<Object>} refs The array of references to write * @param {Object} options Options to use while outputting, see `output()` for more details * @returns {WritableStream} A WriteableSteam instance which will fire `.on('end')` when writing has finished * @param {function} [callback] Callback to call as `(refs)` when done * @see output * @see promises.outputFile */ outputFile: function(path, refs, options, callback) { // Argument mangling {{{ if (typeof options == 'function') { // path, refs, callback callback = options; options = {}; } // }}} var driver = reflib.identify(path); if (!driver) throw new Error('File type is unsupported for path: ' + path); var stream = fs.createWriteStream(path); var out = reflib.output({ format: driver, stream: stream, content: refs, ...options, }); if (callback) { // If optional callback is specified attach it as a handler out.on('error', callback); out.on('finish', callback); } return out; }, // Fixes {{{ /** * A collection of reference fixes * @type {Object} */ fix: { /** * Attempt to split mangled author fields into an array of strings * @param {Object} ref The reference to fix * @returns {Object} The fixed reference */ authors: function(ref, options) { if (Array.isArray(ref.authors) && ref.authors.length == 1 && /;/.test(ref.authors[0])) ref.authors = ref.authors[0].split(/\s*;\s*/); return ref; }, /** * Attempt to fix mangled date formats * @param {Object} ref The reference to fix * @returns {Object} The fixed reference */ dates: function(ref, options) { var settings = { dateFormats: [ {format: 'MM-dd-yyyy', year: true, month: true, day: true, output: v => dateFns.format(v, 'yyyy-MM-dd')}, {format: 'dd/MM/yyyy', year: true, month: true, day: true, output: v => dateFns.format(v, 'yyyy-MM-dd')}, {format: 'dd-MM-yyyy', year: true, month: true, day: true, output: v => dateFns.format(v, 'yyyy-MM-dd')}, {format: 'yyyy-MM-dd', year: true, month: true, day: true, output: v => dateFns.format(v, 'yyyy-MM-dd')}, {format: 'do MMMM yy', year: true, month: true, day: true, output: v => dateFns.format(v, 'yyyy-MM-dd')}, {format: 'do MMMM yyyy', year: true, month: true, day: true, output: v => dateFns.format(v, 'yyyy-MM-dd')}, {format: 'MMM yyyy', year: true, month: true, day: false, output: v => dateFns.format(v, 'MMM yyyy')}, {format: 'MMM', year: false, month: true, day: false, output: v => dateFns.format(v, 'MMM')}, {format: 'MMMM', year: false, month: true, day: false, output: v => dateFns.format(v, 'MMM')}, {format: 'yyyy', year: true, month: false, day: false, output: v => dateFns.format(v, 'yyyy')}, ], ...options, }; var parsed = settings.dateFormats.find(attempt => dateFns.isValid( dateFns.parse(ref.date, attempt.format, new Date()) ) ); if (parsed && parsed.year) ref.year = dateFns.format(dateFns.parse(ref.date, parsed.format, new Date()), 'yyyy'); if (parsed && parsed.month) ref.month = dateFns.format(dateFns.parse(ref.date, parsed.format, new Date()), 'MMM'); if (parsed) ref.date = parsed.output(dateFns.parse(ref.date, parsed.format, new Date())); return ref; }, /** * Attempt to fix mangled page formats * @param {Object} ref The reference to fix * @returns {Object} The fixed reference */ pages: function(ref) { var p = /^\s*([0-9]+)\s*--?\s*([0-9]+)\s*$/.exec(ref.pages); if (p) { var numericLeft = parseInt(p[1]); var numericRight = parseInt(p[2]); if (numericRight < numericLeft) { // Relative number reference e.g. '123 - 4' numericRight = numericLeft.toString().substr(0, numericLeft.toString().length - numericRight.toString().length) + numericRight.toString(); } ref.pages = numericLeft + '-' + numericRight; return ref; } return ref; }, }, // }}} }; // Compute promises {{{ reflib.promises = _.mapValues( _.pickBy(reflib, v => typeof v == 'function') , v => promisify(v) ); // }}}