zombie-globbies
Version:
A very quick fix for [**Zombie**](https://github.com/assaf/zombie) to permit to crawl correctly webpages with attributes on the html tag (eg: html lang="en").
598 lines (566 loc) • 17 kB
JavaScript
var EventSource, Events, File, HTML, History, JSDOM, JSDOM_PATH, Path, Screen, URL, WebSocket, XMLHttpRequest, XPath, createDocument, createWindow, jsdomDispatchEvent, jsdomRaise, loadDocument,
__slice = [].slice;
Path = require("path");
JSDOM_PATH = require.resolve("jsdom");
createDocument = require("./document");
EventSource = require("eventsource");
History = require("./history");
JSDOM = require("jsdom");
WebSocket = require("ws");
URL = require("url");
XMLHttpRequest = require("./xhr");
createWindow = require("" + JSDOM_PATH + "/../jsdom/browser/index").createWindow;
XPath = require("" + JSDOM_PATH + "/../jsdom/level3/xpath");
Events = JSDOM.level(3, 'events');
HTML = JSDOM.defaultLevel;
module.exports = function(_arg) {
var browser, closed, document, encoding, eventQueue, global, history, method, name, opener, params, parent, plugins, referer, url, window, windowHistory;
browser = _arg.browser, params = _arg.params, encoding = _arg.encoding, history = _arg.history, method = _arg.method, name = _arg.name, opener = _arg.opener, parent = _arg.parent, referer = _arg.referer, url = _arg.url;
name || (name = "");
url || (url = "about:blank");
if (!/^(about|javascript|http|https|file):/i.test(url)) {
throw new Error("Cannot load resource " + url + ", unsupported protocol");
}
window = createWindow(HTML);
global = window.getGlobal();
closed = false;
Object.defineProperty(window, "browser", {
value: browser,
enumerable: true
});
document = createDocument(browser, window, referer || history.url);
Object.defineProperty(window, "document", {
value: document,
enumerable: true
});
Object.defineProperty(window, "name", {
value: name,
enumerable: true
});
if (parent) {
Object.defineProperty(window, "parent", {
value: parent,
enumerable: true
});
Object.defineProperty(window, "top", {
value: parent.top,
enumerable: true
});
} else {
Object.defineProperty(window, "parent", {
value: global,
enumerable: true
});
Object.defineProperty(window, "top", {
value: global,
enumerable: true
});
}
Object.defineProperty(window, "opener", {
value: opener && opener,
enumerable: true
});
Object.defineProperty(window, "title", {
get: function() {
return document.title;
},
set: function(title) {
return document.title = title;
},
enumerable: true
});
Object.defineProperty(window, "console", {
value: browser.console,
enumerable: true
});
Object.defineProperty(window, "requestAnimationFrame", {
get: function() {
return window.setImmediate;
}
});
plugins = [];
plugins.item = function() {};
plugins.namedItem = function() {};
Object.defineProperties(window.navigator, {
cookieEnabled: {
value: true
},
javaEnabled: {
value: function() {
return false;
}
},
language: {
value: browser.language
},
mimeTypes: {
value: plugins
},
platform: {
value: 'node'
},
plugins: {
value: plugins
},
userAgent: {
value: browser.userAgent
},
vendor: {
value: "Zombie Industries"
}
});
Object.defineProperty(window, "cookies", {
get: function() {
return browser.cookies.serialize(this.location.hostname, this.location.pathname);
}
});
browser._storages.extend(window);
browser._interact.extend(window);
Object.defineProperties(window, {
File: {
value: File
},
Event: {
value: Events.Event
},
screen: {
value: new Screen()
},
MouseEvent: {
value: Events.MouseEvent
},
MutationEvent: {
value: Events.MutationEvent
},
UIEvent: {
value: Events.UIEvent
}
});
window.atob = function(string) {
return new Buffer(string, "base64").toString("utf8");
};
window.btoa = function(string) {
return new Buffer(string, "utf8").toString("base64");
};
window.XMLHttpRequest = function() {
return new XMLHttpRequest(window);
};
window.WebSocket = function(url, protocol) {
var origin;
url = HTML.resourceLoader.resolve(document, url);
origin = "" + window.location.protocol + "//" + window.location.host;
return new WebSocket(url, {
origin: origin,
protocol: protocol
});
};
window.Image = function(width, height) {
var img;
img = new HTML.HTMLImageElement(window.document);
img.width = width;
img.height = height;
return img;
};
window.DataView = DataView;
window.XPathException = XPath.XPathException;
window.XPathExpression = XPath.XPathExpression;
window.XPathEvaluator = XPath.XPathEvaluator;
window.XPathResult = XPath.XPathResult;
window.resizeTo = function(width, height) {
window.outerWidth = window.innerWidth = width;
return window.outerHeight = window.innerHeight = height;
};
window.resizeBy = function(width, height) {
return window.resizeTo(window.outerWidth + width, window.outerHeight + height);
};
window.onhashchange = null;
window.postMessage = function(data, targetOrigin) {
var event, origin;
document = window.document;
event = document.createEvent("MessageEvent");
event.initEvent("message", false, false);
event.data = data;
event.source = (browser._windowInScope || window).getGlobal();
origin = event.source.location;
event.origin = URL.format({
protocol: origin.protocol,
host: origin.host
});
return window.dispatchEvent(event);
};
window._evaluate = function(code, filename) {
var error, originalInScope, result, _ref;
try {
_ref = [browser._windowInScope, window], originalInScope = _ref[0], browser._windowInScope = _ref[1];
if (typeof code === "string" || code instanceof String) {
result = global.run(code, filename);
} else if (code) {
result = code.call(global);
}
browser.emit("evaluated", code, result, filename);
return result;
} catch (_error) {
error = _error;
error.filename || (error.filename = filename);
throw error;
} finally {
browser._windowInScope = originalInScope;
}
};
eventQueue = browser.eventLoop.createEventQueue(window);
Object.defineProperties(window, {
_eventQueue: {
value: eventQueue
},
setTimeout: {
value: eventQueue.setTimeout.bind(eventQueue)
},
clearTimeout: {
value: eventQueue.clearTimeout.bind(eventQueue)
},
setInterval: {
value: eventQueue.setInterval.bind(eventQueue)
},
clearInterval: {
value: eventQueue.clearInterval.bind(eventQueue)
},
setImmediate: {
value: function(fn) {
return eventQueue.setTimeout(fn, 0);
}
},
clearImmediate: {
value: eventQueue.clearTimeout.bind(eventQueue)
}
});
window.EventSource = function(url) {
var eventSource;
url = HTML.resourceLoader.resolve(document, url);
eventSource = new EventSource(url);
eventQueue.addEventSource(eventSource);
return eventSource;
};
window.open = function(url, name, features) {
url = url && HTML.resourceLoader.resolve(document, url);
return browser.tabs.open({
name: name,
url: url,
opener: window
});
};
Object.defineProperty(window, "closed", {
get: function() {
return closed;
},
enumerable: true
});
window._destroy = function() {
var frame, _i, _len, _ref;
if (closed) {
return;
}
closed = true;
_ref = window.frames;
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
frame = _ref[_i];
frame.close();
}
eventQueue.destroy();
document.close();
window.dispose();
};
window.close = function() {
if (parent || closed) {
return;
}
if (browser._windowInScope === opener || browser._windowInScope === null) {
browser.emit("closed", window);
window._destroy();
history.destroy();
} else {
browser.log("Scripts may not close windows that were not opened by script");
}
};
windowHistory = {
forward: function() {
return windowHistory.go(1);
},
back: function() {
return windowHistory.go(-1);
},
go: function(amount) {
return browser.eventLoop.next(function() {
return history.go(amount);
});
},
pushState: function() {
var args;
args = 1 <= arguments.length ? __slice.call(arguments, 0) : [];
return history.pushState.apply(history, args);
},
replaceState: function() {
var args;
args = 1 <= arguments.length ? __slice.call(arguments, 0) : [];
return history.replaceState.apply(history, args);
},
_submit: history.submit.bind(history),
dump: history.dump.bind(history)
};
Object.defineProperties(windowHistory, {
length: {
get: function() {
return history.length;
},
enumerable: true
},
state: {
get: function() {
return history.state;
},
enumerable: true
}
});
Object.defineProperties(window, {
history: {
value: windowHistory
},
location: {
get: function() {
return history.location;
},
set: function(url) {
return history.assign(url);
},
enumerable: true
}
});
browser.emit("opened", window);
window._submit = function(_arg1) {
var encoding, method, params, submitTo, target, url;
url = _arg1.url, method = _arg1.method, encoding = _arg1.encoding, params = _arg1.params, target = _arg1.target;
url = HTML.resourceLoader.resolve(document, url);
target || (target = "_self");
browser.emit("submit", url, target);
switch (target) {
case "_self":
submitTo = window;
break;
case "_parent":
submitTo = window.parent;
break;
case "_top":
submitTo = window.top;
break;
default:
submitTo = browser.tabs.open({
name: target
});
}
return submitTo.history._submit({
url: url,
method: method,
encoding: encoding,
params: params
});
};
setImmediate(function() {
return loadDocument({
document: document,
history: history,
url: url,
method: method,
encoding: encoding,
params: params
});
});
return window;
};
loadDocument = function(_arg) {
var browser, document, done, encoding, error, headers, history, method, params, pathname, protocol, url, window, _ref;
document = _arg.document, history = _arg.history, url = _arg.url, method = _arg.method, encoding = _arg.encoding, params = _arg.params;
window = document.window;
browser = window.browser;
window._response = {};
if (window.closed) {
return;
}
done = function(error) {
if (error) {
return browser.emit("error", error);
} else {
return browser.emit("loaded", document);
}
};
method = (method || "GET").toUpperCase();
if (method === "POST") {
headers = {
"content-type": encoding || "application/x-www-form-urlencoded"
};
}
_ref = URL.parse(url), protocol = _ref.protocol, pathname = _ref.pathname;
switch (protocol) {
case "about:":
document.open();
document.write("<html><body></body></html>");
document.close();
return browser.emit("loaded", document);
case "javascript:":
try {
window._evaluate(pathname, "javascript:");
return browser.emit("loaded", document);
} catch (_error) {
error = _error;
return browser.emit("error", error);
}
break;
case "http:":
case "https:":
case "file:":
headers = headers || {};
if (!headers.referer) {
headers.referer = document.referrer;
}
headers.accept = "text/html";
return window._eventQueue.http(method, url, {
headers: headers,
params: params,
target: document
}, function(error, response) {
var body, contentLoaded, handleRefresh, message, windowLoaded;
if (error) {
if (response) {
window._response = response;
history.updateLocation(window, response.url);
}
message = (response && response.body) || error.message || error;
document.open();
document.write("<html><body>" + message + "</body></html>");
document.close();
return;
}
window._response = response;
windowLoaded = function(event) {
document.removeEventListener("load", windowLoaded);
return window.dispatchEvent(event);
};
document.addEventListener("load", windowLoaded);
handleRefresh = function() {
var content, match, nothing, refresh, refreshTimeout, refreshURL, refresh_timeout, refresh_url;
refresh = document.querySelector("meta[http-equiv='refresh']");
if (refresh) {
content = refresh.getAttribute("content");
match = content.match(/^\s*(\d+)(?:\s*;\s*url\s*=\s*(.*?))?\s*(?:;|$)/i);
if (match) {
nothing = match[0], refresh_timeout = match[1], refresh_url = match[2];
} else {
return;
}
refreshTimeout = parseInt(refresh_timeout, 10);
refreshURL = refresh_url || document.location.href;
if (refreshTimeout >= 0) {
return window._eventQueue.enqueue(function() {
var newWindow;
history.replace(refreshURL);
newWindow = history.current.window;
return newWindow.addEventListener("load", function() {
return newWindow._response.redirects++;
});
});
}
}
};
contentLoaded = function(event) {
document.removeEventListener("DOMContentLoaded", contentLoaded);
window.dispatchEvent(event);
return handleRefresh();
};
document.addEventListener("DOMContentLoaded", contentLoaded);
history.updateLocation(window, response.url);
window.browser.emit("loading", document);
body = response.body;
if (!/<html/.test(body)) {
body = "<html><body>" + (body || "") + "</body></html>";
}
document.open();
document.write(body);
document.close();
if (document.documentElement) {
return browser.emit("loaded", document);
} else {
return browser.emit("error", new Error("Could not parse document at " + url));
}
});
default:
return browser.emit("error", new Error("Cannot load resource " + url + ", unsupported protocol"));
}
};
jsdomDispatchEvent = Events.EventTarget.prototype.dispatchEvent;
Events.EventTarget.prototype.dispatchEvent = function(event) {
var browser, document, originalInScope, window, _ref;
document = this._ownerDocument || this.document || this;
window = document.parentWindow;
browser = window.browser;
browser.emit("event", event, this);
try {
_ref = [browser._windowInScope, window], originalInScope = _ref[0], browser._windowInScope = _ref[1];
window.event = event;
return jsdomDispatchEvent.call(this, event);
} finally {
delete window.event;
browser._windowInScope = originalInScope;
}
};
jsdomRaise = HTML.Document.prototype.raise;
HTML.Document.prototype.raise = function(type, message, data) {
var document, error, line, partial, window, _i, _len, _ref;
jsdomRaise.call(this, type, message, data);
error = data && (data.exception || data.error);
if (error) {
document = this;
window = document.parentWindow;
partial = [];
if (error.stack) {
_ref = error.stack.split("\n");
for (_i = 0, _len = _ref.length; _i < _len; _i++) {
line = _ref[_i];
if (~line.indexOf("contextify/lib/contextify.js")) {
break;
}
partial.push(line);
}
}
partial.push(" in " + document.location.href);
error.stack = partial.join("\n");
window._eventQueue.onerror(error);
}
};
Screen = (function() {
function Screen() {
this.top = this.left = 0;
this.width = 1280;
this.height = 800;
}
Screen.prototype.__defineGetter__("availLeft", function() {
return 0;
});
Screen.prototype.__defineGetter__("availTop", function() {
return 0;
});
Screen.prototype.__defineGetter__("availWidth", function() {
return 1280;
});
Screen.prototype.__defineGetter__("availHeight", function() {
return 800;
});
Screen.prototype.__defineGetter__("colorDepth", function() {
return 24;
});
Screen.prototype.__defineGetter__("pixelDepth", function() {
return 24;
});
return Screen;
})();
File = (function() {
function File() {}
return File;
})();