zombie-globbies
Version:
A very quick fix for [**Zombie**](https://github.com/assaf/zombie) to permit to crawl correctly webpages with attributes on the html tag (eg: html lang="en").
258 lines (237 loc) • 6.64 kB
JavaScript
var BULK, Context, ERROR, INTEGER, MULTI, Protocol, SINGLE, net;
net = require("net");
ERROR = -1;
SINGLE = 0;
INTEGER = 1;
BULK = 2;
MULTI = 3;
Context = (function() {
function Context(stream, debug) {
var argc, argl, argv, input, last, queue, respond;
this.stream = stream;
this.debug = debug;
this.reset();
argc = 0;
argl = 0;
argv = [];
input = "";
last = null;
this.process = function(chunk) {
if (chunk) {
input += chunk;
}
if (argc) {
if (argl) {
if (input.length >= argl) {
argv.push(input.slice(0, argl));
input = input.slice(argl);
argl = 0;
if (argv.length === argc) {
queue(argv);
argc = 0;
argv = [];
}
if (input.length > 0) {
return this.process();
}
}
} else {
input = input.replace(/^\$(\d+)\r\n/, (function(_this) {
return function(_, value) {
argl = parseInt(value, 10);
if (_this.debug) {
console.log("Expecting argument of size " + argl);
}
return "";
};
})(this));
if (argl) {
return this.process();
} else {
if (input.length > 0 && input[0] !== "$") {
throw new Error("Expecting $<argc>CRLF");
}
}
}
} else {
input = input.replace(/^\*(\d+)\r\n/, (function(_this) {
return function(_, value) {
argc = parseInt(value, 10);
if (_this.debug) {
console.log("Expecting " + argc + " arguments");
}
return "";
};
})(this));
if (argc) {
return this.process();
} else {
if (input.length > 0 && input[0] !== "*") {
throw new Error("Expecting *<argc>CRLF");
}
}
}
};
queue = (function(_this) {
return function(argv) {
var command;
command = {};
command.invoke = function() {
var error, fn;
try {
if (fn = _this[argv[0].toLowerCase()]) {
if (debug) {
console.log("Executing " + (argv.join(" ")));
}
argv[0] = command.reply;
return fn.apply(_this, argv);
} else {
return command.reply(ERROR, "Unknown command " + argv[0]);
}
} catch (_error) {
error = _error;
return command.reply(ERROR, "Failed on " + argv[0] + ": " + error.message);
}
};
command.reply = function(type, value) {
respond(_this.stream, type, value);
if (last === command) {
last = command.next;
}
if (command.next) {
return process.nextTick(function() {
return command.next.invoke();
});
}
};
if (last) {
last.next = command;
return last = command;
} else {
last = command;
return command.invoke();
}
};
})(this);
respond = function(stream, type, value) {
var item, _i, _len, _results;
switch (type) {
case ERROR:
return stream.write("-" + value.message + "\r\n");
case SINGLE:
return stream.write("+" + value + "\r\n");
case INTEGER:
return stream.write(":" + value + "\r\n");
case BULK:
if (value) {
stream.write("$" + value.length + "\r\n");
stream.write(value);
return stream.write("\r\n");
} else {
return stream.write("$-1\r\n");
}
break;
case MULTI:
if (value) {
stream.write("*" + value.length + "\r\n");
_results = [];
for (_i = 0, _len = value.length; _i < _len; _i++) {
item = value[_i];
if (item) {
stream.write("$" + item.length + "\r\n");
stream.write(item);
_results.push(stream.write("\r\n"));
} else {
_results.push(stream.write("$-1\r\n"));
}
}
return _results;
} else {
return stream.write("*-1\r\n");
}
}
};
}
Context.prototype.debug = function(reply, debug) {
return this.browser.debug = debug === "0" || debug === "off";
};
Context.prototype.echo = function(reply, text) {
return reply(SINGLE, text);
};
Context.prototype.reset = function(reply) {
this.browser = new module.parent.exports.Browser({
debug: this.debug
});
if (reply) {
return reply(SINGLE, "OK");
}
};
Context.prototype.status = function(reply) {
return reply(INTEGER, this.browser.statusCode || 0);
};
Context.prototype.visit = function(reply, url) {
this.browser.visit(url);
return reply(SINGLE, "OK");
};
Context.prototype.wait = function(reply) {
return this.browser.wait(function(error) {
if (error) {
return reply(ERROR, error.message);
} else {
return reply(SINGLE, "OK");
}
});
};
return Context;
})();
Protocol = (function() {
function Protocol(port) {
var active, debug, server;
debug = false;
server = net.createServer(function(stream) {
var context;
stream.setNoDelay(true);
context = new Context(stream, debug);
return stream.on("data", function(chunk) {
return context.process(chunk);
});
});
active = false;
port || (port = 8091);
this.listen = function(callback) {
var listener;
listener = function(err) {
if (!err) {
active = true;
}
if (callback) {
return callback(err);
}
};
if (typeof port === "number") {
return server.listen(port, "127.0.0.1", listener);
} else {
return server.listen(port, listener);
}
};
this.close = function() {
if (active) {
server.close();
return active = false;
}
};
this.__defineGetter__("active", function() {
return active;
});
}
return Protocol;
})();
exports.Protocol = Protocol;
exports.listen = function(port, callback) {
var protocol, _ref;
if (!callback) {
_ref = [8091, port], port = _ref[0], callback = _ref[1];
}
protocol = new Protocol(port);
return protocol.listen(callback);
};