cray
Version:
Epub parser
214 lines (172 loc) • 5.38 kB
JavaScript
/**
* @author: Akshay
* @date: 12/13/2015
* @github: https://github.com/akshayKrSingh
* @npm: https://npmjs.com/~akshaysingh
*/
var Duplex = require('stream').Duplex;
var utils = require('util');
var fs = require('fs-extra');
var ZipStream = require('node-stream-zip');
var streamBuffers = require('stream-buffers');
var path = require('path');
var _ = require('lodash');
var EpubErrors = require('../epub/errors');
var EpubHelpers = require('../epub/helpers');
function Reader(options) {
//Always run a new instance
if (!(this instanceof Reader)) {
return new Reader(options);
}
Duplex.call(this, options);
this.options = options || {};
this.isFile = null;
this.epub = {};
this._$$stream = null;
this._$$destroyed = false;
this.error = null;
this._$$OpfRoot = null;
this._$$Processed = {container: false, opf: false, spine: false};
this._$$ProcessQueue = [{
checkPoint: 'container',
type: 'EPUB#container',
path: 'META-INF/container.xml',
processor: 'generateContainerInfo'
}, {
checkPoint: 'opf',
type: 'EPUB#opf',
path: '',
processor: 'generateOpfInfo'
}];
}
utils.inherits(Reader, Duplex);
var proto = Reader.prototype;
proto.parse = function() {
var self = this;
self.pipe(self);
return self;
};
proto._destroy = function(err) {
// Allows to destruct the stream, nothing will be emitted after this point. No Warnings or Errors
this.push(null);
this.readable = false;
this._$$destroyed = true;
if (err) {
this.error = err;
return this.emit('error', err);
}
this.emit('close');
};
proto._read = function() {
var self = this;
fs.lstat(self.options.path, function(err, stat) {
if (err) {
return self._destroy(err);
}
self.isFile = stat.isFile();
var props = {};
if (!self._$$ProcessQueue.length) {
self._autoDrain();
return self.push(null);
}
function update(string, cb) {
var epub = EpubHelpers(), isContainer = /.*<(container).*/.test(string),
current = self._$$ProcessQueue[0];
if (epub[current.processor]) {
epub[current.processor](string);
}
_.extend(props, epub);
if (current.type === 'EPUB#spine') {
props.href = current.href;
props.idref = current.checkPoint;
}
props.base = self.options.path;
if (epub.error) {
return self._destroy(epub.error);
}
if (isContainer) {
self._$$OpfRoot = epub.opfRoot;
self._$$ProcessQueue[1].path = epub.opfPath;
}
self._$$Processed[current.checkPoint] = true;
if (self._$$Processed.opf) {
if (epub.spines) {
props.spineLength = epub.spines.length;
}
if (!self.isFile) {
_.each(epub.spines, function(spine) {
self._$$ProcessQueue.push({
checkPoint: spine.id,
type: 'EPUB#spine',
href: spine.href,
path: path.resolve(props.base, self._$$OpfRoot, spine.href),
processor: ''
});
});
}
}
self._$$ProcessQueue.shift();
cb();
}
if (self.isFile) {
self._$$stream = new ZipStream({file: self.options.path, storeEntries: true});
self._$$stream.on('error', function(err) {
self._destroy(err);
});
self._$$stream.on('ready', function() {
var obj = self._$$ProcessQueue[0];
props.type = obj.type;
self._$$stream.stream(obj.path, function(err, file) {
if (err) {return self._destroy(EpubErrors[obj.type].missing);}
var stream = new streamBuffers.WritableStreamBuffer();
file.pipe(stream).on('finish', function() {
props.file = stream.getContentsAsString();
update(props.file, function() {
self.push(JSON.stringify(props));
});
});
});
});
return null;
}
var obj = self._$$ProcessQueue[0];
props.type = obj.type;
props.loc = path.resolve(self.options.path, obj.path);
fs.readFile(path.resolve(self.options.path, obj.path), 'utf8', function (err, file) {
if (err) {return self._destroy(EpubErrors[obj.type].missing);}
update(file, function() {
props.file = file;
self.push(JSON.stringify(props));
});
});
});
};
/**
* @name: _autoDrain()
* @description:
* Removes Reader properties used during stream _read & _write process.
* _autoDrain() is called automatically at the end of Writable Stream.
*/
proto._autoDrain = function() {
var self = this;
var gb = ['_$$stream', '_$$ProcessQueue', '_$$Processed'];
_.each(gb, function(collect) {
delete self[collect];
});
};
proto._write = function(chunk, enc, next) {
var epub = EpubHelpers(), self = this, data = JSON.parse(chunk.toString()), run = '';
switch (data.type) {
case "EPUB#container":
run = 'generateContainerInfo';
epub[run](data.file);
break;
case "EPUB#opf":
run = 'generateOpfInfo';
epub[run](data.file);
break;
}
_.extend(self.epub, epub);
next();
};
module.exports = Reader;