zombie-globbies
Version:
A very quick fix for [**Zombie**](https://github.com/assaf/zombie) to permit to crawl correctly webpages with attributes on the html tag (eg: html lang="en").
224 lines (196 loc) • 7.45 kB
JavaScript
var Events, HTML, URL, XMLHttpRequest, raise,
__hasProp = {}.hasOwnProperty,
__extends = function(child, parent) { for (var key in parent) { if (__hasProp.call(parent, key)) child[key] = parent[key]; } function ctor() { this.constructor = child; } ctor.prototype = parent.prototype; child.prototype = new ctor(); child.__super__ = parent.prototype; return child; };
HTML = require("jsdom").defaultLevel;
Events = require("jsdom").level(3, 'events');
URL = require("url");
raise = require("./scripts");
XMLHttpRequest = (function(_super) {
__extends(XMLHttpRequest, _super);
function XMLHttpRequest(window) {
this._window = window;
this._pending = null;
this._responseHeaders = null;
this.readyState = XMLHttpRequest.UNSENT;
this.onreadystatechange = null;
this.timeout = 0;
this.status = null;
this.statusText = null;
this.responseText = null;
this.responseXML = null;
this._ownerDocument = window.document;
}
XMLHttpRequest.prototype.abort = function() {
var request;
request = this._pending;
if (this.readyState === XMLHttpRequest.UNSENT || (this.readyState === XMLHttpRequest.OPENED && !request.sent)) {
this.readyState = XMLHttpRequest.UNSENT;
return;
}
return request.aborted = true;
};
XMLHttpRequest.prototype.getAllResponseHeaders = function(header) {
var headerStrings, value, _ref;
if (this._responseHeaders) {
headerStrings = [];
_ref = this._responseHeaders;
for (header in _ref) {
value = _ref[header];
headerStrings.push("" + header + ": " + value);
}
return headerStrings.join("\n");
} else {
return null;
}
};
XMLHttpRequest.prototype.getResponseHeader = function(header) {
if (this._responseHeaders) {
return this._responseHeaders[header.toLowerCase()];
} else {
return null;
}
};
XMLHttpRequest.prototype.open = function(method, url, async, user, password) {
var headers, request;
if (async === false) {
throw new HTML.DOMException(HTML.NOT_SUPPORTED_ERR, "Zombie does not support synchronous XHR requests");
}
this.abort();
method = method.toUpperCase();
if (/^(CONNECT|TRACE|TRACK)$/.test(method)) {
throw new HTML.DOMException(HTML.SECURITY_ERR, "Unsupported HTTP method");
}
if (!/^(DELETE|GET|HEAD|OPTIONS|POST|PUT)$/.test(method)) {
throw new HTML.DOMException(HTML.SYNTAX_ERR, "Unsupported HTTP method");
}
headers = {};
url = URL.parse(URL.resolve(this._window.location.href, url));
if ((url.protocol === 'https:' && url.port === '443') || (url.protocol === 'http:' && url.port === '80')) {
delete url.port;
}
if (!/^https?:$/i.test(url.protocol)) {
throw new HTML.DOMException(HTML.NOT_SUPPORTED_ERR, "Only HTTP/S protocol supported");
}
url.hostname || (url.hostname = this._window.location.hostname);
url.host = url.port ? url.host = "" + url.hostname + ":" + url.port : url.host = url.hostname;
if (url.host !== this._window.location.host) {
headers.origin = this._window.location.protocol + "//" + this._window.location.host;
this._cors = headers.origin;
}
url.hash = null;
if (user) {
url.auth = "" + user + ":" + password;
}
this.status = null;
this.statusText = null;
this.responseText = null;
this.responseXML = null;
request = {
method: method,
url: URL.format(url),
headers: headers
};
this._pending = request;
this._stateChanged(XMLHttpRequest.OPENED);
};
XMLHttpRequest.prototype.send = function(data) {
var request, _base;
if (this.readyState !== XMLHttpRequest.OPENED) {
throw new HTML.DOMException(HTML.INVALID_STATE_ERR, "Invalid state");
}
this._fire("loadstart");
request = this._pending;
(_base = request.headers)["content-type"] || (_base["content-type"] = "text/plain");
request.body = data;
request.timeout = this.timeout;
this._window._eventQueue.http(request.method, request.url, request, (function(_this) {
return function(error, response) {
var allowedOrigin, wrappedError, _ref;
if (_this._pending === request) {
_this._pending = null;
}
_this.status = 0;
_this.responseText = "";
if (request.aborted) {
_this._stateChanged(XMLHttpRequest.DONE);
_this._fire("progress");
error = new HTML.DOMException(HTML.ABORT_ERR, "Request aborted");
_this._fire("abort", error);
return;
}
if (error) {
_this._stateChanged(XMLHttpRequest.DONE);
_this._fire("progress");
if (error.code === "ETIMEDOUT") {
error = new HTML.DOMException(HTML.TIMEOUT_ERR, "The request timed out");
_this._fire("timeout", wrappedError);
} else {
wrappedError = new HTML.DOMException(HTML.NETWORK_ERR, error.message);
_this._fire("error", wrappedError);
}
_this._fire("loadend");
return;
}
if (_this._cors) {
allowedOrigin = response.headers['access-control-allow-origin'];
if (!(allowedOrigin === '*' || allowedOrigin === _this._cors)) {
error = new HTML.DOMException(HTML.SECURITY_ERR, "Cannot make request to different domain");
_this._stateChanged(XMLHttpRequest.DONE);
_this._fire("progress");
_this._fire("error", error);
_this._fire("loadend");
_this.raise("error", error.message, {
exception: error
});
return;
}
}
_this.status = response.statusCode;
_this.statusText = response.statusText;
_this._responseHeaders = response.headers;
_this._stateChanged(XMLHttpRequest.HEADERS_RECEIVED);
_this.responseText = ((_ref = response.body) != null ? _ref.toString() : void 0) || "";
_this._stateChanged(XMLHttpRequest.LOADING);
_this.responseXML = null;
_this._stateChanged(XMLHttpRequest.DONE);
_this._fire("progress");
_this._fire("load");
return _this._fire("loadend");
};
})(this));
request.sent = true;
};
XMLHttpRequest.prototype.setRequestHeader = function(header, value) {
var request;
if (this.readyState !== XMLHttpRequest.OPENED) {
throw new HTML.DOMException(HTML.INVALID_STATE_ERR, "Invalid state");
}
request = this._pending;
request.headers[header.toString().toLowerCase()] = value.toString();
};
XMLHttpRequest.prototype._stateChanged = function(newState) {
this.readyState = newState;
return this._fire("readystatechange");
};
XMLHttpRequest.prototype._fire = function(eventName, error) {
var event;
event = new Events.Event('xhr');
event.initEvent(eventName, true, true);
event.error = error;
return this.dispatchEvent(event);
};
XMLHttpRequest.prototype.raise = function(type, message, data) {
return this._ownerDocument.raise(type, message, data);
};
return XMLHttpRequest;
})(Events.EventTarget);
XMLHttpRequest.UNSENT = 0;
XMLHttpRequest.OPENED = 1;
XMLHttpRequest.HEADERS_RECEIVED = 2;
XMLHttpRequest.LOADING = 3;
XMLHttpRequest.DONE = 4;
HTML.SECURITY_ERR = 18;
HTML.NETWORK_ERR = 19;
HTML.ABORT_ERR = 20;
HTML.TIMEOUT_ERR = 23;
module.exports = XMLHttpRequest;