unblocker
Version:
Web proxy for evading internet censorship & general-purpose library for rewriting remote websites.
181 lines (162 loc) • 6.88 kB
JavaScript
var URL = require("url");
var libCookie = require("cookie");
var setCookie = require("set-cookie-parser");
var TLD = require("tld");
var Transform = require("stream").Transform;
var contentTypes = require("./content-types.js");
var debug = require("debug")("unblocker:cookies");
var _ = require("lodash");
/**
* Forwards cookies on to client, rewriting domain and path to match site's "directory" on proxy server.
*
* Gets a bit hackey when switching protocols or subdomains - cookies are copied over to the new "directory" but flags such as httponly and expires are lost and path is reset to site root
*
* Todo: consider creating an extra cookie to hold flags for other cookies when switching protocols or subdomains
*
* @param config
*/
function cookies(config) {
var REDIRECT_QUERY_PARAM = "__proxy_cookies_to";
// normally we do nothing here, but when the user is switching protocols or subdomains, the handleResponse function
// will rewrite the links to start with the old protocol & domain (so that we get sent the cookies), and then it
// will copy the old cookies to the new path
function redirectCookiesWith(data) {
var uri = URL.parse(data.url, true); // true = parseQueryString
if (uri.query[REDIRECT_QUERY_PARAM]) {
var nextUri = URL.parse(uri.query[REDIRECT_QUERY_PARAM]);
debug(
"copying cookies from %s to %s",
data.url,
uri.query[REDIRECT_QUERY_PARAM]
);
var cookies = libCookie.parse(data.headers.cookie || "");
var setCookieHeaders = Object.keys(cookies).map(function (name) {
var value = cookies[name];
return libCookie.serialize(name, value, {
path: config.prefix + nextUri.protocol + "//" + nextUri.host + "/",
});
});
data.clientResponse.redirectTo(uri.query.__proxy_cookies_to, {
"set-cookie": setCookieHeaders,
});
}
// todo: copy cookies over from clientRequest when the remote server sends a 3xx redirect to a differnet protocol / subdomain
}
// normally libCookie.serialize passes values through encodeURIComponent, but a custom encoder may be provided to prevent that
// see https://www.npmjs.com/package/cookie#encode
function noChange(value) {
return value;
}
function rewriteCookiesAndLinks(data) {
var uri = URL.parse(data.url);
var nextUri;
// this is set by the redirect middleware in the case of a 3xx redirect
if (data.redirectUrl) {
nextUri = URL.parse(data.redirectUrl);
}
// first update any set-cookie headers to ensure the path is prefixed with the site
var cookies = setCookie.parse(data, {
decodeValues: false, // normally it calls decodeURIComponent on each value - but we want to just pass them along unchanged in this case.
});
if (cookies.length) {
debug("remaping set-cookie headers");
data.headers["set-cookie"] = cookies.map(function (cookie) {
var targetUri = nextUri || uri;
cookie.path =
config.prefix +
targetUri.protocol +
"//" +
targetUri.host +
(cookie.path || "/");
delete cookie.domain;
delete cookie.secure; // todo: maybe leave this if we know the proxy is being accessed over https?
cookie.encode = noChange;
return libCookie.serialize(cookie.name, cookie.value, cookie);
});
}
if (data.redirectUrl) {
var diffProto = nextUri.protocol != uri.protocol;
var diffHost = nextUri.hostname != uri.hostname;
// if protocol or hostname are changing, but the registered tld is the same, copy the cookies over to the new "path"
if (
(diffProto || diffHost) &&
TLD.registered(nextUri.hostname) == TLD.registered(uri.hostname)
) {
debug("copying cookies from %s to %s", data.url, data.redirectUrl);
// get all of the old cookies (from the request) indexed by name, and create set-cookie headers for each one
var oldCookies = libCookie.parse(
data.clientRequest.headers.cookie || ""
);
var oldSetCookieHeaders = _.mapValues(
oldCookies,
function (value, name) {
return libCookie.serialize(name, value, {
path:
config.prefix + nextUri.protocol + "//" + nextUri.host + "/",
});
}
);
// but, if we have a new cookie with the same name as an old one, delete the old one
cookies.forEach(function (cookie) {
delete oldSetCookieHeaders[cookie.name];
});
// finally, append the remaining old cookie headers to any existing set-cookie headers in the response
data.headers["set-cookie"] = (data.headers["set-cookie"] || []).concat(
_.values(oldSetCookieHeaders)
);
}
}
// takes a link that switches protocol and/or subdomain and makes it first go through the cookie handler on the current protocol/sub and then redirect with the cookies coppied over
function updateLink(proxiedUrl, url /*, subdomain*/) {
var next_uri = URL.parse(url);
if (next_uri.protocol != uri.protocol || next_uri.host != uri.host) {
// rewrite the url - we want the old proto and domain, but the new path just in case there are any cookies that are limited to that sub-path (although they won't be on the new protodomain...)
var cookieProxiedUrl =
config.prefix +
uri.protocol +
"//" +
uri.host +
next_uri.pathname +
"?" +
REDIRECT_QUERY_PARAM +
"=" +
encodeURIComponent(url);
debug(
"rewriting link from %s to %s in order to allow cookies to be copied over to new path",
proxiedUrl,
cookieProxiedUrl
);
return cookieProxiedUrl;
} else {
// if neither the proto nor the host have changed, just replace it with the same string
return proxiedUrl;
}
}
// next scan the links for anything that switches subdomain or protocol (if this is a content-type that we want to process
if (contentTypes.shouldProcess(config, data)) {
var tld = TLD.registered(uri.hostname);
var RE_PROTO_SUBDOMAIN_URL = new RegExp(
config.prefix + "(https?://([a-z0-9.-]+.)?" + tld + "[^'\") \\\\]*)",
"ig"
);
data.stream = data.stream.pipe(
new Transform({
decodeStrings: false,
transform: function (chunk, encoding, next) {
var updated = chunk
.toString()
.replace(RE_PROTO_SUBDOMAIN_URL, updateLink);
this.push(updated, "utf8");
next();
},
})
);
}
}
return {
handleRequest: redirectCookiesWith,
handleResponse: rewriteCookiesAndLinks,
};
}
module.exports = cookies;
;