ineed
Version:
Web scraping and HTML-reprocessing. The easy way.
70 lines (53 loc) • 1.93 kB
JavaScript
var util = require('util'),
Pipeline = require('./pipeline'),
SerializationPlugin = require('./plugins/serialization');
//Reprocessor
var Reprocessor = module.exports = function (availablePlugins) {
Pipeline.call(this);
this.tokensToEmit = [];
this.plugins.push(SerializationPlugin);
var reprocessor = this;
availablePlugins.forEach(function (plugin) {
reprocessor[plugin.name] = function () {
this._enablePlugin(plugin, arguments);
return this;
};
});
};
util.inherits(Reprocessor, Pipeline);
//Internals
Reprocessor.prototype._onTokenProcessed = function () {
while (this.tokensToEmit.length) {
var entry = this.tokensToEmit.shift();
this[entry.handlerName](entry.token);
}
};
Reprocessor.prototype._createCtxObj = function (baseUrl) {
var emitMethods = {
'doctype': 'onDoctype',
'startTag': 'onStartTag',
'endTag': 'onEndTag',
'text': 'onText',
'comment': 'onComment'
},
tokensToEmit = this.tokensToEmit,
ctx = Pipeline.prototype._createCtxObj.call(this, baseUrl);
ctx.emit = {};
Object.keys(emitMethods).forEach(function (method) {
ctx.emit[method] = function (token) {
tokensToEmit.push({handlerName: emitMethods[method], token: token});
};
});
return ctx;
};
Reprocessor.prototype._aggregatePluginResults = function () {
return SerializationPlugin.getHtml();
};
Reprocessor.prototype._enablePlugin = function (plugin, pluginArgs) {
if (!this._isPluginEnabled(plugin)) {
var serializationPluginIdx = this.plugins.indexOf(SerializationPlugin);
//NOTE: SerializationPlugin should be the last in the chain
this.plugins.splice(serializationPluginIdx, 0, plugin);
this.pluginInitArgs[plugin.name] = Array.prototype.slice.call(pluginArgs);
}
};