UNPKG

smart-private-npm

Version:

An intelligent routing proxy for npm with support for: private, whitelisted, and blacklisted packaged

613 lines (539 loc) 18.4 kB
/* * npm-proxy.js: Smart, prototypal proxy for routing traffic between _the_ public npm and _a_ private npm. * * (C) 2013, Nodejitsu Inc. * */ var httpProxy = require('http-proxy'), EE = require('events').EventEmitter, hyperquest = require('hyperquest'), util = require('util'), url_ = require('url'); // // ### function NpmProxy (options) // #### @options {Object} Options for initializing the proxy // #### @npm {Array|string} Public npm CouchDBs we are proxying against. // #### @policy {Object} Default policy // #### - npm {url.parse} Private npm CouchDB we are proxying against. // #### - transparent {boolean} If true: always behaves as a pass-thru to public npm(s). // #### - private {Object} Set of initial private modules. // #### - blacklist {Object} Set of initial blacklisted modules. // #### - whitelist {Object} Set of iniitial whitelisted modules. // #### @writePrivateOk {function} **Optional** Predicate for writing new private packages. // #### @log {function} **Optional** Log function. Defaults to console. // // Constructor function for the NpmProxy object responsible for // making proxy decisions between multiple npm registries. // var NpmProxy = module.exports = function (options) { if (!(this instanceof NpmProxy)) { return new NpmProxy(options) } EE.call(this); var self = this; // // URL to CouchDB and the proxy instance to use. // this.npm = options.npm; this.log = options.log || console; // // Remark: if we dont have a specific read/write url, // assume we either have an array or an url.parsed object // this.interval = options.interval || 60 * 15 * 1000; this.currentNpm = this.npm && this.npm.read || this.npm; this.isUrlArray(this.npm.read || this.npm); // // Default these values if there is no read/write // this.writeNpm = this.npm.write || this.currentNpm; this.secure = options.secure || options.strictSSL || options.rejectUnauthorized || false; // // Setup the http-proxy instance to handle bad respones // and allow lax SSL if there is nothing passed in // this.proxy = httpProxy.createProxyServer({ secure: this.secure, prependPath: false }); this.proxy.on('error', this.onProxyError.bind(this)); // // Proxy these events to the main prototype so we don't need to inspect // the internal http-proxy instance // this.proxy.on('start', this.emit.bind(this, 'start')); this.proxy.on('end', this.emit.bind(this, 'end')); // // Handler for decoupling any authorization logic // for new private packages from the proxy itself. // this.writePrivateOk = options.writePrivateOk; // // Set the policy // if (options.policy) { this.setPolicy(options.policy); } }; util.inherits(NpmProxy, EE); // // ### function isUrlArray(urls) // Handles the case where we have an array of urls so its reusable // NpmProxy.prototype.isUrlArray = function (urls) { // // Begin cycling public npm URLs only if it is an Array // we can cycle through. // if (Array.isArray(urls)) { if (urls.length === 1) { this.currentNpm = urls[0] } else { this.currentNpm = null; this.nextPublicNpm(urls); this.intervalId = setInterval( this.nextPublicNpm.bind(this, urls), this.interval ); } } }; // // ### function setPolicy (policy) // Sets the specified `policy` on this instance // NpmProxy.prototype.setPolicy = function (policy) { // // Remark: Pre-transformed the policy Arrays into Objects // for fast lookup. // this.policy = policy; this.policy.blacklist = this.policy.blacklist || {}; if (this.policy.transparent) { this.private = this.decide = this.merge = this.public; } }; // // ### function nextPublicNpm () // Sets the current public npm to a random // selection (without replacement). // NpmProxy.prototype.nextPublicNpm = function (urls) { var index = Math.random() * urls.length | 0, lastNpm = this.currentNpm; this.currentNpm = urls.splice(index, 1)[0]; this.log.info('[public npm] %s --> %s', (lastNpm && lastNpm.href) || 'none', this.currentNpm.href); if (lastNpm) { urls.push(lastNpm); } }; // // ### function public (req, res, policy) // #### @req {ServerRequest} Incoming Request to the npm registry // #### @res {ServerResponse} Outgoing Response to the npm client // // Make a proxy request to `url` against the public // npm registry and stream the response back to the `res`. // NpmProxy.prototype.public = function (req, res) { var address = req.connection.remoteAddress || req.socket.remoteAddress, method = req.method.toLowerCase(), host, npm; npm = method !== 'put' && method !== 'delete' ? this.currentNpm : this.writeNpm; host = npm.vhost || npm.host || npm.hostname; this.log.info('[public] %s - %s %s %s %j', address, req.method, req.url, host, req.headers); this.emit('headers', req, req.headers, npm); req.headers.host = host; this.proxy.web(req, res, { target: npm.href }); }; // // ### function private (req, res, policy) // #### @req {ServerRequest} Incoming Request to the npm registry // #### @res {ServerResponse} Outgoing Response to the npm client // #### @policy {Object} Policy info with admin and private npm dbs. // // Make a proxy request to `url` against the private // npm registry and stream the response back to the `res`. // NpmProxy.prototype.private = function (req, res, policy) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; if (policy.transparent) { return this.public(req, res); } var address = req.connection.remoteAddress || req.socket.remoteAddress, host = policy.npm.vhost || policy.npm.host || policy.npm.hostname; this.log.info('[private] %s - %s %s %s %j', address, req.method, req.url, host, req.headers); this.emit('headers', req, req.headers, policy.npm); req.headers.host = host; this.proxy.web(req, res, { target: policy.npm.href }); }; // // ### function decide (req, res, policy) // #### @req {ServerRequest} Incoming Request to the npm registry // #### @res {ServerResponse} Outgoing Response to the npm client // #### @policy {Object} Policy info with admin and private npm dbs. // // For the `pkg` requested, based on the: // // * Whitelist policy // * Blacklist policy // * Known private packages // // decide whether to proxy to the public or private npm // registry and then stream the response back to the res // from whatever registry was selected. // NpmProxy.prototype.decide = function (req, res, policy) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; if (policy.transparent) { return this.public(req, res); } var packageNamespaceMatch = /^\/-\/package\/(.*?)\//.exec(req.url); var address = req.connection.remoteAddress || req.socket.remoteAddress, url = req.url, method = req.method.toLowerCase(), pkg = packageNamespaceMatch ? packageNamespaceMatch[1] : url.slice(1).split('?').shift().split('/').shift(), proxy = this.proxy, self = this, decideFn; // // Proxy or serve not found based on the decision // function onDecision(err, target) { // // If there was no target then this is a 404 by definition // even if it exists in the public registry because of a // potential whitelist. // if (err || !target) { return self.notFound(req, res, err || { message: 'Unknown pkg: ' + pkg }); } // if X-Forwarded-Host is set, npm returns 404 {"error":"not_found","reason":"no_db_file"} if (req.headers["x-forwarded-host"]) delete req.headers["x-forwarded-host"]; // // If we get a valid target then we can proxy to it // self.log.info('[decide] %s - %s %s %s %j', address, req.method, req.url, target.vhost || target.host || target.hostname, req.headers); self.emit('headers', req, req.headers, target); req.headers.host = target.vhost || target.host || target.hostname; proxy.web(req, res, { target: target.href }); } // // Calculate the decision function based on the HTTP // method. We could potentially optimize this by having two // deicison functions since the readUrl method(s) do not // have an async-nature. // // The choice of `standard{Read,Write}Url` vs `whitelist{Read,Write}Url` // is an important distinction here because the logic is // so drastically different between whitelist and not. // if (method === 'get' || method === 'head') { return policy.whitelist ? this.whitelistReadUrl(pkg, policy, onDecision) : this.standardReadUrl(pkg, policy, onDecision); } return policy.whitelist ? this.whitelistWriteUrl(pkg, policy, onDecision) : this.standardWriteUrl(pkg, policy, onDecision); }; // // ### function notFound (req, res) // Simple 404 handler. // NpmProxy.prototype.notFound = function (req, res, err) { var address = req.connection.remoteAddress || req.socket.remoteAddress, code = err ? 400 : 404, json; if (!err) { global.console.trace(); } err = err || { message: 'Unknown error' }; this.log.error('[not found] %s - %s %s %s %j', address, req.method, req.url, err.message, req.headers); res.writeHead(code, { 'content-type': 'application/json' }); json = { error: 'not_found', reason: err.message }; res.end(JSON.stringify(json)); }; // // ### function standardReadUrl (pkg, policy, callback) // #### @pkg {string} npm package to get the read URL for. // #### @policy {Object} Policy info with admin and private npm dbs. // Calculates the target read (i.e. GET or HEAD) URL based on the // `pkg`, `this.policy` and `this.npm` targets. // NpmProxy.prototype.standardReadUrl = function (pkg, policy, callback) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; // // There **IS NO WHITELIST** so if it is already a known private package // or part of a blacklist then proxy directly to the private npm. // if (policy.private[pkg] || policy.blacklist[pkg]) { return callback(null, policy.npm); } // // Otherwise send it to the public npm // return callback(null, this.currentNpm); }; // // ### function standardWriteUrl (pkg, callback) // #### @pkg {string} npm package to get the write URL for. // #### @policy {Object} Policy info with admin and private npm dbs. // Calculates the target write (i.e. PUT or POST) URL based on the // `pkg`, `this.policy` and `this.npm` targets.. // NpmProxy.prototype.standardWriteUrl = function (pkg, policy, callback) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; var writeOk = this.writePrivateOk, self = this, err; // // There **IS NO WHITELIST** so if it is already a known private package // or part of a blacklist then proxy directly to the private npm. // if (policy.private[pkg] || policy.blacklist[pkg]) { return callback(null, policy.npm); } // // Otherwise we need to look this package in the public registry // - if it does not exist we proxy to the private registry // - if it does exist then we proxy to the public registry // hyperquest({ uri: url_.resolve(this.writeNpm.href, pkg), rejectUnauthorized: this.secure }) .on('error', callback) .on('response', function (res) { if (res.statusCode == 404) { if (writeOk) { err = writeOk(policy, self); if (err) { return callback(err); } } policy.private[pkg] = true; return callback(null, policy.npm); } return callback(null, self.writeNpm); }); }; // // ### function whitelistReadUrl (pkg, callback) // #### @pkg {string} npm package to get the read URL for. // #### @policy {Object} Policy info with admin and private npm dbs. // Calculates the target read (i.e. GET or HEAD) URL based on the // `pkg`, `this.policy` and `this.npm` targets.. Assumes there is // a whitelist by default. // NpmProxy.prototype.whitelistReadUrl = function (pkg, policy, callback) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; // // There **IS A WHITELIST** so if it is in the whitelist proxy to the // public registry // if (policy.whitelist[pkg]) { return callback(null, this.currentNpm); } // // If it is already a known private package or part of a blacklist // then proxy directly to the private npm. // if (policy.private[pkg] || policy.blacklist[pkg]) { return callback(null, policy.npm); } // // Otherwise it is FORBIDDEN! // return callback(new Error('Your whitelist policy prevents you from getting ' + pkg)); }; // // ### function whitelistWriteUrl (pkg, callback) // #### @pkg {string} npm package to get the read URL for. // #### @policy {Object} Policy info with admin and private npm dbs. // Calculates the target read (i.e. GET or HEAD) URL based on the // `pkg`, `this.policy` and `this.npm` targets.. Assumes there is // a whitelist by default. // NpmProxy.prototype.whitelistWriteUrl = function (pkg, policy, callback) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; var writePrivateOk = this.writePrivateOk, limits = policy && policy.limits, self = this; // // There **IS A WHITELIST** so if it is in the whitelist proxy to the // public registry // if (policy.whitelist[pkg]) { return callback(null, this.writeNpm); } // // If it is already a known private package or part of a blacklist // then proxy directly to the private npm. // if (policy.private[pkg] || policy.blacklist[pkg]) { return callback(null, policy.npm); } // // Otherwise we need to look this package in the public registry // - if it does not exist we proxy to the private registry // - if it does exist then we 404 // hyperquest({ uri: url_.resolve(this.writeNpm.href, pkg), rejectUnauthorized: this.secure }) .on('error', callback) .on('response', function (res) { if (res.statusCode == 404) { if (limits && limits.private && Object.keys(policy.private).length >= limits.private) { return callback(new Error('Out of private packages. Have you considered upgrading?')); } policy.private[pkg] = true; return callback(null, policy.npm); } // // Otherwise it is FORBIDDEN. // return callback(new Error('Your whitelist policy prevents you from writing ' + pkg)); }); }; // // ### function merge (req, res) // #### @req {ServerRequest} Incoming Request to the npm registry // #### @res {ServerResponse} Outgoing Response to the npm client // #### @policy {Object} Policy info with admin and private npm dbs. // // Concurrently request `/url` against the public // and private npm registry and stream the JSON // merged responses back to `res` as a single // JSON object. // NpmProxy.prototype.merge = function (req, res, policy) { // // Always default to a set policy. This enables the // the enterprise case only one policy enforced. // policy = policy || this.policy; var address = req.connection.remoteAddress || req.socket.remoteAddress, method = req.method, url = req.url, self = this, contentTypes = {}, responses = {}; // // ### function makeRequest (target) // Makes a request to `req.url` to the // specified target. // function makeRequest(target) { var headers = Object.keys(req.headers) .reduce(function (all, key) { all[key] = req.headers[key]; return all; }, {}); // // Set the correct host header. // headers.host = target.host; self.log.info('[merge] %s - %s %s %s %j', address, req.method, req.url, target.host, req.headers); return hyperquest({ url: url_.resolve(target.href, url), method: method, headers: headers }); } // // ### function onResponse (type, pRes) // Sets the content type from the proxy // response. // function onResponse(type, pRes) { contentTypes[type] = pRes.headers['content-type'].split(';')[0]; responses[type] = pRes; // // If we have both a public and a private // response. // if (responses.public && responses.private) { if (contentTypes.public === contentTypes.private) { return self.merge.handlers[contentTypes.public](req, res, responses); } res.writeHead(500, { 'content-type': 'text/plain' }); res.end('Content-Type mismatch: ' + JSON.stringify(contentTypes)); } } makeRequest(policy.npm) .on('response', onResponse.bind(null, 'private')); makeRequest(this.currentNpm) .on('response', onResponse.bind(null, 'public')); }; // // ### @merge.handlers {Object} // Merge handlers for multiple proxy responses. // NpmProxy.prototype.merge.handlers = { 'text/plain': function textPlain(req, res, responses) { // // TODO: Properly merge these together. // responses.public.pipe(res); }, 'text/xml': function textXml(req, res, responses) { // // TODO: Properly merge these together. // responses.public.pipe(res); }, 'application/json': function appJson(req, res, responses) { // // TODO: Properly merge these together. // responses.public.pipe(res); } }; // // ### function onProxyError (err, req, res) // `http-proxy` "error" event handler // NpmProxy.prototype.onProxyError = function (err, req, res) { var address = req.connection.remoteAddress || req.socket.remoteAddress, code = res.statusCode || 500, json; this.log.error('[proxy error] %s - %s %s %s %j', address, req.method, req.url, err.message, req.headers); if (!res.headersSent) { res.writeHead(code, { 'content-type': 'application/json' }); } json = { error: 'proxy_error', reason: err.message }; res.end(JSON.stringify(json)); };