unblocker
Version:
Web proxy for evading internet censorship & general-purpose library for rewriting remote websites.
142 lines (121 loc) • 7.04 kB
JavaScript
var URL = require('url');
var libCookie = require('cookie');
var setCookie = require('set-cookie-parser');
var TLD = require('tld');
var Transform = require('stream').Transform;
var contentTypes = require('./content-types.js');
var debug = require('debug')('unblocker:cookies');
var _ = require('lodash');
/**
* Forwards cookies on to client, rewriting domain and path to match site's "directory" on proxy server.
*
* Gets a bit hackey when switching protocols or subdomains - cookies are copied over to the new "directory" but flags such as httponly and expires are lost and path is reset to site root
*
* Todo: consider creating an extra cookie to hold flags for other cookies when switching protocols or subdomains
*
* @param config
*/
function cookies(config) {
var REDIRECT_QUERY_PARAM = '__proxy_cookies_to';
// normally we do nothing here, but when the user is switching protocols or subdomains, the handleResponse function
// will rewrite the links to start with the old protocol & domain (so that we get sent the cookies), and then it
// will copy the old cookies to the new path
function redirectCookiesWith(data) {
var uri = URL.parse(data.url, true); // true = parseQueryString
if (uri.query[REDIRECT_QUERY_PARAM]) {
var nextUri = URL.parse(uri.query[REDIRECT_QUERY_PARAM]);
debug('copying cookies from %s to %s', data.url, uri.query[REDIRECT_QUERY_PARAM]);
var cookies = libCookie.parse(data.headers.cookie || '');
var setCookieHeaders = Object.keys(cookies).map(function(name) {
var value = cookies[name];
return libCookie.serialize(name, value, {
path: config.prefix + nextUri.protocol + '//' + nextUri.host + '/'
});
});
data.clientResponse.redirectTo(uri.query.__proxy_cookies_to, {
'set-cookie': setCookieHeaders
});
}
// todo: copy cookies over from clientRequest when the remote server sends a 3xx redirect to a differnet protocol / subdomain
}
// normally libCookie.serialize passes values through encodeURIComponent, but a custom encoder may be provided to prevent that
// see https://www.npmjs.com/package/cookie#encode
function noChange(value) {
return value;
}
function rewriteCookiesAndLinks(data) {
var uri = URL.parse(data.url);
var nextUri;
// this is set by the redirect middleware in the case of a 3xx redirect
if (data.redirectUrl) {
nextUri = URL.parse(data.redirectUrl);
}
// first update any set-cookie headers to ensure the path is prefixed with the site
var cookies = setCookie.parse(data, {
decodeValues: false // normally it calls decodeURIComponent on each value - but we want to just pass them along unchanged in this case.
});
if (cookies.length) {
debug('remaping set-cookie headers');
data.headers['set-cookie'] = cookies.map(function(cookie) {
var targetUri = nextUri || uri;
cookie.path = config.prefix + targetUri.protocol + '//' + targetUri.host + (cookie.path || '/');
delete cookie.domain;
delete cookie.secure; // todo: maybe leave this if we know the proxy is being accessed over https?
cookie.encode = noChange;
return libCookie.serialize(cookie.name, cookie.value, cookie);
});
}
if (data.redirectUrl) {
var diffProto = nextUri.protocol != uri.protocol;
var diffHost = nextUri.hostname != uri.hostname;
// if protocol or hostname are changing, but the registered tld is the same, copy the cookies over to the new "path"
if ((diffProto || diffHost) && TLD.registered(nextUri.hostname) == TLD.registered(uri.hostname)) {
debug('copying cookies from %s to %s', data.url, data.redirectUrl);
// get all of the old cookies (from the request) indexed by name, and create set-cookie headers for each one
var oldCookies = libCookie.parse(data.clientRequest.headers.cookie || '');
var oldSetCookieHeaders = _.mapValues(oldCookies, function(value, name) {
return libCookie.serialize(name, value, {
path: config.prefix + nextUri.protocol + '//' + nextUri.host + '/'
});
});
// but, if we have a new cookie with the same name as an old one, delete the old one
cookies.forEach(function(cookie) {
delete oldSetCookieHeaders[cookie.name];
});
// finally, append the remaining old cookie headers to any existing set-cookie headers in the response
data.headers['set-cookie'] = (data.headers['set-cookie'] || []).concat(_.values(oldSetCookieHeaders));
}
}
// takes a link that switches protocol and/or subdomain and makes it first go through the cookie handler on the current protocol/sub and then redirect with the cookies coppied over
function updateLink(proxiedUrl, url /*, subdomain*/ ) {
var next_uri = URL.parse(url);
if (next_uri.protocol != uri.protocol || next_uri.host != uri.host) {
// rewrite the url - we want the old proto and domain, but the new path just in case there are any cookies that are limited to that sub-path (although they won't be on the new protodomain...)
var cookieProxiedUrl = config.prefix + uri.protocol + '//' + uri.host + next_uri.pathname + '?' + REDIRECT_QUERY_PARAM + '=' + encodeURIComponent(url);
debug('rewriting link from %s to %s in order to allow cookies to be copied over to new path', proxiedUrl, cookieProxiedUrl);
return cookieProxiedUrl;
} else {
// if neither the proto nor the host have changed, just replace it with the same string
return proxiedUrl;
}
}
// next scan the links for anything that switches subdomain or protocol (if this is a content-type that we want to process
if (contentTypes.shouldProcess(config, data)) {
var tld = TLD.registered(uri.hostname);
var RE_PROTO_SUBDOMAIN_URL = new RegExp(config.prefix + "(https?://([a-z0-9.-]+\.)?" + tld + "[^'\") \\\\]*)", "ig");
data.stream = data.stream.pipe(new Transform({
decodeStrings: false,
transform: function(chunk, encoding, next) {
var updated = chunk.toString().replace(RE_PROTO_SUBDOMAIN_URL, updateLink);
this.push(updated, 'utf8');
next();
}
}));
}
}
return {
handleRequest: redirectCookiesWith,
handleResponse: rewriteCookiesAndLinks
};
}
module.exports = cookies;