unblocker
Version:
Web proxy for evading internet censorship & general-purpose library for rewriting remote websites.
314 lines (269 loc) • 10.4 kB
JavaScript
var url = require("url");
var _ = require("lodash");
var debug = require("debug")("unblocker:core");
var debugWS = require("debug")("unblocker:websocket");
var middlewareDebugger = require("./middleware-debugger.js");
// expose all built-in middleware
Unblocker.host = require("./host.js");
Unblocker.referer = require("./referer.js");
Unblocker.cookies = require("./cookies.js");
Unblocker.hsts = require("./hsts.js");
Unblocker.hpkp = require("./hpkp.js");
Unblocker.csp = require("./csp.js");
Unblocker.redirects = require("./redirects.js");
Unblocker.decompress = require("./decompress.js");
Unblocker.charsets = require("./charsets.js");
Unblocker.urlPrefixer = require("./url-prefixer.js");
Unblocker.clientScripts = require("./client-scripts.js");
Unblocker.metaRobots = require("./meta-robots.js");
Unblocker.contentLength = require("./content-length.js");
// these aren't middleware, but are still worth exposing
Unblocker.proxy = require("./proxy.js");
Unblocker.contentTypes = require("./content-types.js");
Unblocker.getRealUrl = require("./get-real-url.js");
Unblocker.websockets = require("./websockets.js");
var defaultConfig = {
prefix: "/proxy/",
host: null, // can be used to override the url used in redirects
requestMiddleware: [],
responseMiddleware: [],
standardMiddleware: true,
clientScripts: true, // note: disabling standardMiddleware also disables clientScripts. It's mostly in a separate setting for testing
processContentTypes: Unblocker.contentTypes.html.concat(
Unblocker.contentTypes.css
),
};
function Unblocker(config) {
_.defaults(config, defaultConfig);
if (config.prefix.substr(-1) != "/") {
config.prefix += "/";
}
if (config.standardMiddleware !== false) {
var host = Unblocker.host(config);
var referer = Unblocker.referer(config);
var cookies = Unblocker.cookies(config);
var hsts = Unblocker.hsts(config);
var hpkp = Unblocker.hpkp(config);
var csp = Unblocker.csp(config);
var redirects = Unblocker.redirects(config);
var decompress = Unblocker.decompress(config);
var charsets = Unblocker.charsets(config);
var urlPrefixer = Unblocker.urlPrefixer(config);
var metaRobots = Unblocker.metaRobots(config);
var contentLength = Unblocker.contentLength(config);
// this applies to every request that gets proxied
config.requestMiddleware = [
host,
referer,
decompress.handleRequest,
cookies.handleRequest,
].concat(config.requestMiddleware);
config.responseMiddleware = [
hsts,
hpkp,
csp,
redirects,
decompress.handleResponse,
charsets,
urlPrefixer,
cookies.handleResponse,
metaRobots,
].concat(config.responseMiddleware, [contentLength]);
var clientScripts;
if (config.clientScripts) {
// insert clientScripts after the urlPrefixer
clientScripts = Unblocker.clientScripts(config);
const position = config.responseMiddleware.indexOf(urlPrefixer) + 1;
config.responseMiddleware.splice(position, 0, clientScripts.injector);
}
}
// the middleware debugger logs details before/after each piece of middleware
if (middlewareDebugger.enabled) {
config.requestMiddleware = middlewareDebugger.debugMiddleware(
config.requestMiddleware,
"request"
);
config.responseMiddleware = middlewareDebugger.debugMiddleware(
config.responseMiddleware,
"response"
);
}
debug("Unblocker initialized, config: ", config);
var proxy = Unblocker.proxy(config);
var getRealUrl = Unblocker.getRealUrl(config);
/**
* This is what makes this server magic: if we get an unrecognized request that wasn't corrected by
* proxy's filter, this checks the referrer to determine what the path should be, and then issues a
* 307 redirect to a proxied url at that path
*
* 307 redirects cause the client to re-use the original method and body at the new location
*/
function recoverTargetUrl(request) {
if (request.url.indexOf(config.prefix) === 0) {
// handles /proxy/ and /proxy
if (
request.url == config.prefix ||
request.url == config.prefix.substr(0, config.prefix.length - 1)
) {
return null;
}
// handles cases like like /proxy/google.com and redirects to /proxy/http://google.com/
return "http://" + request.url.substr(config.prefix.length);
}
// if there is no referer, then either they just got here or we can't help them
if (!request.headers.referer) {
return null;
}
var ref = url.parse(request.headers.referer);
// if we couldn't parse the referrer or they came from another site, they send them to the home page
if (!ref || ref.host != thisHost(request)) {
return null;
}
// now we know where they came from, so we can do something for them
if (ref.pathname.indexOf(config.prefix + "http") === 0) {
var real_url = getRealUrl(ref.pathname);
var real_uri = url.parse(real_url);
var target_url = real_uri.protocol + "//" + real_uri.host + request.url;
debug("recovering broken link to %s", request.url);
// now, take the requested pat on the previous known host and send the user on their way
return target_url;
}
// fallback - there was a referer, but it wasn't one that we could use to determine the correct path
return null;
}
// returns the configured host if one exists, otherwise the host that the current request came in on
function thisHost(request) {
if (config.host) {
return config.host;
} else {
return request.headers.host; // normal case: include the hostname but assume we're either on a standard port or behind a reverse proxy
}
}
// returns the http://site.com/proxy
function thisSite(request) {
// default to express's more advanced version of this when available (handles X-Forwarded-Protocol headers)
const proto =
request.protocol ||
request.headers["X-Forwarded-Protocol"] ||
(request.connection.encrypted ? "https" : "http");
return proto + "://" + thisHost(request) + config.prefix;
}
function redirectTo(request, response, site, headers) {
site = site || "";
if (site.substr(0, 1) == "/") {
site = site.substr(1);
}
if (site.substr(0, config.prefix.length) == config.prefix) {
// no /proxy/proxy redirects
site = site.substr(config.prefix.length);
}
var location = request.thisSite() + site;
debug("redirecting to %s", location);
try {
response.writeHead(
307,
_.defaults(headers || {}, {
Location: location,
})
);
} catch (ex) {
// Most likely because the headers were already sent
console.error("Failed to send redirect", ex);
}
response.end();
}
function initData(clientRequest, clientResponse, clientSocket) {
// convenience methods
clientRequest.thisHost = thisHost.bind(thisHost, clientRequest);
clientRequest.thisSite = thisSite.bind(thisSite, clientRequest);
var uri,
formatted = null;
var url_data = url.parse(clientRequest.url);
var rawUrl = clientRequest.url.substr(config.prefix.length);
// todo: consider supporting ws here for websockets
if (url_data.pathname.indexOf(config.prefix + "http") === 0) {
uri = url.parse(getRealUrl(clientRequest.url));
formatted = url.format(uri);
} else {
var target = recoverTargetUrl(clientRequest);
if (target) {
// redirecting is dificult here, and doesn't add much value, so just use the recovered target url
uri = url.parse(target);
formatted = url.format(uri);
}
}
// todo: check the TLD to handle cases where a client requested ../img.jpg from the root and this gets translated to /proxy/http://img.jpg by the browser.
// (surviv.io does this)
// This is how api consumers can hook into requests.
// The data object is passed to all requestMiddleware before the request is sent to the remote server,
// and it is passed through all responseMiddleware before being sent to the client.
var data = {
url: formatted,
uri,
rawUrl,
clientRequest,
clientResponse,
headers: _.cloneDeep(clientRequest.headers),
stream: clientRequest,
isWebsocket: !!clientSocket,
clientSocket,
};
return data;
}
// todo: see if this can be synchronous
const clientScriptsServer = config.clientScripts
? clientScripts.server
: (req, res, next) => next();
// regular web requests
function handleRequest(clientRequest, clientResponse, next) {
const data = initData(clientRequest, clientResponse);
clientScriptsServer(clientRequest, clientResponse, (err) => {
if (err) return next(err);
clientResponse.redirectTo = redirectTo.bind(
redirectTo,
clientRequest,
clientResponse
);
if (!next) {
next = function () {
clientResponse.writeHead(400);
clientResponse.end("Unable to process request");
};
}
const formatted = data.url;
const raw = data.rawUrl;
if (formatted) {
// If the raw URL isn't quite right, but we can figure it out, redirect to the correct URL.
// Special exception for cases where routers collapsed slashes (see #130)
if (formatted !== raw && formatted.replace("://", ":/") !== raw) {
return clientResponse.redirectTo(formatted);
}
proxy(data, next);
} else {
next();
}
});
}
// websocket support
const proxyWebsocket = Unblocker.websockets(config);
handleRequest.onUpgrade = function onUpgrade(
clientRequest,
clientSocket,
clientHead
) {
debugWS("handling websocket req to", clientRequest.url);
const data = initData(clientRequest, null, clientSocket);
data.clientHead = clientHead;
if (data.url) {
proxyWebsocket(data);
} else {
// nothing else to do, we don't know where the websocket is supposed to go.
debugWS("unable to handle websocket upgrade", clientRequest.url);
clientSocket.end("HTTP/1.1 400 Bad Request\r\n\r\n");
}
};
return handleRequest;
}
module.exports = Unblocker;
module.exports.defaultConfig = defaultConfig;
;