UNPKG

zombie-globbies

Version:

A very quick fix for [**Zombie**](https://github.com/assaf/zombie) to permit to crawl correctly webpages with attributes on the html tag (eg: html lang="en").

598 lines (566 loc) 17 kB
var EventSource, Events, File, HTML, History, JSDOM, JSDOM_PATH, Path, Screen, URL, WebSocket, XMLHttpRequest, XPath, createDocument, createWindow, jsdomDispatchEvent, jsdomRaise, loadDocument, __slice = [].slice; Path = require("path"); JSDOM_PATH = require.resolve("jsdom"); createDocument = require("./document"); EventSource = require("eventsource"); History = require("./history"); JSDOM = require("jsdom"); WebSocket = require("ws"); URL = require("url"); XMLHttpRequest = require("./xhr"); createWindow = require("" + JSDOM_PATH + "/../jsdom/browser/index").createWindow; XPath = require("" + JSDOM_PATH + "/../jsdom/level3/xpath"); Events = JSDOM.level(3, 'events'); HTML = JSDOM.defaultLevel; module.exports = function(_arg) { var browser, closed, document, encoding, eventQueue, global, history, method, name, opener, params, parent, plugins, referer, url, window, windowHistory; browser = _arg.browser, params = _arg.params, encoding = _arg.encoding, history = _arg.history, method = _arg.method, name = _arg.name, opener = _arg.opener, parent = _arg.parent, referer = _arg.referer, url = _arg.url; name || (name = ""); url || (url = "about:blank"); if (!/^(about|javascript|http|https|file):/i.test(url)) { throw new Error("Cannot load resource " + url + ", unsupported protocol"); } window = createWindow(HTML); global = window.getGlobal(); closed = false; Object.defineProperty(window, "browser", { value: browser, enumerable: true }); document = createDocument(browser, window, referer || history.url); Object.defineProperty(window, "document", { value: document, enumerable: true }); Object.defineProperty(window, "name", { value: name, enumerable: true }); if (parent) { Object.defineProperty(window, "parent", { value: parent, enumerable: true }); Object.defineProperty(window, "top", { value: parent.top, enumerable: true }); } else { Object.defineProperty(window, "parent", { value: global, enumerable: true }); Object.defineProperty(window, "top", { value: global, enumerable: true }); } Object.defineProperty(window, "opener", { value: opener && opener, enumerable: true }); Object.defineProperty(window, "title", { get: function() { return document.title; }, set: function(title) { return document.title = title; }, enumerable: true }); Object.defineProperty(window, "console", { value: browser.console, enumerable: true }); Object.defineProperty(window, "requestAnimationFrame", { get: function() { return window.setImmediate; } }); plugins = []; plugins.item = function() {}; plugins.namedItem = function() {}; Object.defineProperties(window.navigator, { cookieEnabled: { value: true }, javaEnabled: { value: function() { return false; } }, language: { value: browser.language }, mimeTypes: { value: plugins }, platform: { value: 'node' }, plugins: { value: plugins }, userAgent: { value: browser.userAgent }, vendor: { value: "Zombie Industries" } }); Object.defineProperty(window, "cookies", { get: function() { return browser.cookies.serialize(this.location.hostname, this.location.pathname); } }); browser._storages.extend(window); browser._interact.extend(window); Object.defineProperties(window, { File: { value: File }, Event: { value: Events.Event }, screen: { value: new Screen() }, MouseEvent: { value: Events.MouseEvent }, MutationEvent: { value: Events.MutationEvent }, UIEvent: { value: Events.UIEvent } }); window.atob = function(string) { return new Buffer(string, "base64").toString("utf8"); }; window.btoa = function(string) { return new Buffer(string, "utf8").toString("base64"); }; window.XMLHttpRequest = function() { return new XMLHttpRequest(window); }; window.WebSocket = function(url, protocol) { var origin; url = HTML.resourceLoader.resolve(document, url); origin = "" + window.location.protocol + "//" + window.location.host; return new WebSocket(url, { origin: origin, protocol: protocol }); }; window.Image = function(width, height) { var img; img = new HTML.HTMLImageElement(window.document); img.width = width; img.height = height; return img; }; window.DataView = DataView; window.XPathException = XPath.XPathException; window.XPathExpression = XPath.XPathExpression; window.XPathEvaluator = XPath.XPathEvaluator; window.XPathResult = XPath.XPathResult; window.resizeTo = function(width, height) { window.outerWidth = window.innerWidth = width; return window.outerHeight = window.innerHeight = height; }; window.resizeBy = function(width, height) { return window.resizeTo(window.outerWidth + width, window.outerHeight + height); }; window.onhashchange = null; window.postMessage = function(data, targetOrigin) { var event, origin; document = window.document; event = document.createEvent("MessageEvent"); event.initEvent("message", false, false); event.data = data; event.source = (browser._windowInScope || window).getGlobal(); origin = event.source.location; event.origin = URL.format({ protocol: origin.protocol, host: origin.host }); return window.dispatchEvent(event); }; window._evaluate = function(code, filename) { var error, originalInScope, result, _ref; try { _ref = [browser._windowInScope, window], originalInScope = _ref[0], browser._windowInScope = _ref[1]; if (typeof code === "string" || code instanceof String) { result = global.run(code, filename); } else if (code) { result = code.call(global); } browser.emit("evaluated", code, result, filename); return result; } catch (_error) { error = _error; error.filename || (error.filename = filename); throw error; } finally { browser._windowInScope = originalInScope; } }; eventQueue = browser.eventLoop.createEventQueue(window); Object.defineProperties(window, { _eventQueue: { value: eventQueue }, setTimeout: { value: eventQueue.setTimeout.bind(eventQueue) }, clearTimeout: { value: eventQueue.clearTimeout.bind(eventQueue) }, setInterval: { value: eventQueue.setInterval.bind(eventQueue) }, clearInterval: { value: eventQueue.clearInterval.bind(eventQueue) }, setImmediate: { value: function(fn) { return eventQueue.setTimeout(fn, 0); } }, clearImmediate: { value: eventQueue.clearTimeout.bind(eventQueue) } }); window.EventSource = function(url) { var eventSource; url = HTML.resourceLoader.resolve(document, url); eventSource = new EventSource(url); eventQueue.addEventSource(eventSource); return eventSource; }; window.open = function(url, name, features) { url = url && HTML.resourceLoader.resolve(document, url); return browser.tabs.open({ name: name, url: url, opener: window }); }; Object.defineProperty(window, "closed", { get: function() { return closed; }, enumerable: true }); window._destroy = function() { var frame, _i, _len, _ref; if (closed) { return; } closed = true; _ref = window.frames; for (_i = 0, _len = _ref.length; _i < _len; _i++) { frame = _ref[_i]; frame.close(); } eventQueue.destroy(); document.close(); window.dispose(); }; window.close = function() { if (parent || closed) { return; } if (browser._windowInScope === opener || browser._windowInScope === null) { browser.emit("closed", window); window._destroy(); history.destroy(); } else { browser.log("Scripts may not close windows that were not opened by script"); } }; windowHistory = { forward: function() { return windowHistory.go(1); }, back: function() { return windowHistory.go(-1); }, go: function(amount) { return browser.eventLoop.next(function() { return history.go(amount); }); }, pushState: function() { var args; args = 1 <= arguments.length ? __slice.call(arguments, 0) : []; return history.pushState.apply(history, args); }, replaceState: function() { var args; args = 1 <= arguments.length ? __slice.call(arguments, 0) : []; return history.replaceState.apply(history, args); }, _submit: history.submit.bind(history), dump: history.dump.bind(history) }; Object.defineProperties(windowHistory, { length: { get: function() { return history.length; }, enumerable: true }, state: { get: function() { return history.state; }, enumerable: true } }); Object.defineProperties(window, { history: { value: windowHistory }, location: { get: function() { return history.location; }, set: function(url) { return history.assign(url); }, enumerable: true } }); browser.emit("opened", window); window._submit = function(_arg1) { var encoding, method, params, submitTo, target, url; url = _arg1.url, method = _arg1.method, encoding = _arg1.encoding, params = _arg1.params, target = _arg1.target; url = HTML.resourceLoader.resolve(document, url); target || (target = "_self"); browser.emit("submit", url, target); switch (target) { case "_self": submitTo = window; break; case "_parent": submitTo = window.parent; break; case "_top": submitTo = window.top; break; default: submitTo = browser.tabs.open({ name: target }); } return submitTo.history._submit({ url: url, method: method, encoding: encoding, params: params }); }; setImmediate(function() { return loadDocument({ document: document, history: history, url: url, method: method, encoding: encoding, params: params }); }); return window; }; loadDocument = function(_arg) { var browser, document, done, encoding, error, headers, history, method, params, pathname, protocol, url, window, _ref; document = _arg.document, history = _arg.history, url = _arg.url, method = _arg.method, encoding = _arg.encoding, params = _arg.params; window = document.window; browser = window.browser; window._response = {}; if (window.closed) { return; } done = function(error) { if (error) { return browser.emit("error", error); } else { return browser.emit("loaded", document); } }; method = (method || "GET").toUpperCase(); if (method === "POST") { headers = { "content-type": encoding || "application/x-www-form-urlencoded" }; } _ref = URL.parse(url), protocol = _ref.protocol, pathname = _ref.pathname; switch (protocol) { case "about:": document.open(); document.write("<html><body></body></html>"); document.close(); return browser.emit("loaded", document); case "javascript:": try { window._evaluate(pathname, "javascript:"); return browser.emit("loaded", document); } catch (_error) { error = _error; return browser.emit("error", error); } break; case "http:": case "https:": case "file:": headers = headers || {}; if (!headers.referer) { headers.referer = document.referrer; } headers.accept = "text/html"; return window._eventQueue.http(method, url, { headers: headers, params: params, target: document }, function(error, response) { var body, contentLoaded, handleRefresh, message, windowLoaded; if (error) { if (response) { window._response = response; history.updateLocation(window, response.url); } message = (response && response.body) || error.message || error; document.open(); document.write("<html><body>" + message + "</body></html>"); document.close(); return; } window._response = response; windowLoaded = function(event) { document.removeEventListener("load", windowLoaded); return window.dispatchEvent(event); }; document.addEventListener("load", windowLoaded); handleRefresh = function() { var content, match, nothing, refresh, refreshTimeout, refreshURL, refresh_timeout, refresh_url; refresh = document.querySelector("meta[http-equiv='refresh']"); if (refresh) { content = refresh.getAttribute("content"); match = content.match(/^\s*(\d+)(?:\s*;\s*url\s*=\s*(.*?))?\s*(?:;|$)/i); if (match) { nothing = match[0], refresh_timeout = match[1], refresh_url = match[2]; } else { return; } refreshTimeout = parseInt(refresh_timeout, 10); refreshURL = refresh_url || document.location.href; if (refreshTimeout >= 0) { return window._eventQueue.enqueue(function() { var newWindow; history.replace(refreshURL); newWindow = history.current.window; return newWindow.addEventListener("load", function() { return newWindow._response.redirects++; }); }); } } }; contentLoaded = function(event) { document.removeEventListener("DOMContentLoaded", contentLoaded); window.dispatchEvent(event); return handleRefresh(); }; document.addEventListener("DOMContentLoaded", contentLoaded); history.updateLocation(window, response.url); window.browser.emit("loading", document); body = response.body; if (!/<html/.test(body)) { body = "<html><body>" + (body || "") + "</body></html>"; } document.open(); document.write(body); document.close(); if (document.documentElement) { return browser.emit("loaded", document); } else { return browser.emit("error", new Error("Could not parse document at " + url)); } }); default: return browser.emit("error", new Error("Cannot load resource " + url + ", unsupported protocol")); } }; jsdomDispatchEvent = Events.EventTarget.prototype.dispatchEvent; Events.EventTarget.prototype.dispatchEvent = function(event) { var browser, document, originalInScope, window, _ref; document = this._ownerDocument || this.document || this; window = document.parentWindow; browser = window.browser; browser.emit("event", event, this); try { _ref = [browser._windowInScope, window], originalInScope = _ref[0], browser._windowInScope = _ref[1]; window.event = event; return jsdomDispatchEvent.call(this, event); } finally { delete window.event; browser._windowInScope = originalInScope; } }; jsdomRaise = HTML.Document.prototype.raise; HTML.Document.prototype.raise = function(type, message, data) { var document, error, line, partial, window, _i, _len, _ref; jsdomRaise.call(this, type, message, data); error = data && (data.exception || data.error); if (error) { document = this; window = document.parentWindow; partial = []; if (error.stack) { _ref = error.stack.split("\n"); for (_i = 0, _len = _ref.length; _i < _len; _i++) { line = _ref[_i]; if (~line.indexOf("contextify/lib/contextify.js")) { break; } partial.push(line); } } partial.push(" in " + document.location.href); error.stack = partial.join("\n"); window._eventQueue.onerror(error); } }; Screen = (function() { function Screen() { this.top = this.left = 0; this.width = 1280; this.height = 800; } Screen.prototype.__defineGetter__("availLeft", function() { return 0; }); Screen.prototype.__defineGetter__("availTop", function() { return 0; }); Screen.prototype.__defineGetter__("availWidth", function() { return 1280; }); Screen.prototype.__defineGetter__("availHeight", function() { return 800; }); Screen.prototype.__defineGetter__("colorDepth", function() { return 24; }); Screen.prototype.__defineGetter__("pixelDepth", function() { return 24; }); return Screen; })(); File = (function() { function File() {} return File; })();