UNPKG

marc4js

Version:

a node.js module for handling MARC data

153 lines (138 loc) 4.94 kB
'use strict'; var sax = require('sax'); var util = require('util'); var Transform = require('stream').Transform; var Record = require('../marc/record'); var ControlField = require('../marc/control_field'); var DataField = require('../marc/data_field'); var Subfield = require('../marc/subfield'); var Leader = require('../marc/leader'); function MarcxmlParser (opts) { if (!(this instanceof MarcxmlParser)) return new MarcxmlParser(opts); opts = opts || {}; opts.objectMode = true; // this has to be true. Emit only Record objects opts.highWaterMark = 16; // max. # of output records buffered Transform.call(this, opts); this.init(); var strict = opts.strict || false; this.resume_saxerror = opts.resume_saxerror || false; // See https://github.com/isaacs/sax-js for more info this.stream = sax.createStream(strict /* strict mode - no by default */, {lowercase: true, xmlns: true }); this.stream.on('error', this.handleSaxError.bind(this)); //this.stream.on('processinginstruction', this.handleProcessingInstruction.bind(this)); this.stream.on('opentag', this.handleOpenTag.bind(this)); this.stream.on('closetag',this.handleCloseTag.bind(this)); this.stream.on('text', this.handleText.bind(this)); this.stream.on('cdata', this.handleText.bind(this)); this.stream.once('end', this.handleEnd.bind(this)); } util.inherits(MarcxmlParser, Transform); MarcxmlParser.prototype.init = function (){ this.stack = []; }; MarcxmlParser.prototype.handleEnd = function (){ this.stream.removeListener('error', this.handleSaxError.bind(this)); this.stream.removeListener('opentag', this.handleOpenTag.bind(this)); this.stream.removeListener('closetag',this.handleCloseTag.bind(this)); this.stream.removeListener('text', this.handleText.bind(this)); this.stream.removeListener('cdata', this.handleText.bind(this)); this.push(null); }; MarcxmlParser.prototype.handleSaxError = function (e) { this.emit('error', e); if (this.resume_saxerror) { if (this.stream._parser) { this.stream._parser.error = null; this.stream._parser.resume(); } } }; MarcxmlParser.prototype.handleError = function (e){ this.emit('error', e); }; MarcxmlParser.prototype.handleOpenTag = function (node){ var obj; switch (node.local) { case 'record': obj = new Record(); break; case 'leader': obj = new Leader(); break; case 'controlfield': obj = new ControlField(); if (node.attributes.tag) { obj.tag = node.attributes.tag.value; } else { // TODO: throw an error } break; case 'datafield': obj = new DataField(); if (node.attributes.tag) { obj.tag = node.attributes.tag.value; } else { // TODO: throw an error } if (node.attributes.ind1) { obj.indicator1 = node.attributes.ind1.value; } else { // TODO: throw an error } if (node.attributes.ind2) { obj.indicator2 = node.attributes.ind2.value; } else { // TODO: throw an error } break; case 'subfield': obj = new Subfield(); if (node.attributes.code) { obj.code = node.attributes.code.value; } else { // TODO: throw an error } break; default: break; } if (typeof obj !== 'undefined') this.stack.push(obj); }; MarcxmlParser.prototype.handleCloseTag = function (el){ var obj = this.stack.pop(); if (obj instanceof Subfield) { var field = this.stack.pop(); field.addSubfield(obj); this.stack.push(field); } else if (obj instanceof ControlField || obj instanceof DataField) { var record = this.stack.pop(); record.addVariableField(obj); this.stack.push(record); } else if (obj instanceof Leader) { var record = this.stack.pop(); record.leader = obj; this.stack.push(record); } else if (obj instanceof Record) { this.push(obj); } }; MarcxmlParser.prototype.handleText = function (text){ var obj = this.stack.pop(); if (obj instanceof Subfield || obj instanceof ControlField) { obj.data = text; } else if (obj instanceof Leader) { obj.unmarshal(text); } this.stack.push(obj); }; // Naive Stream API MarcxmlParser.prototype._transform = function (data, encoding, done) { try { this.stream.write(data); done(); } catch (e) { done(e); this.push(null); // Manually trigger and end, since we can't reliably do any more parsing } }; module.exports = MarcxmlParser;