co-urllib
Version:
co version of urllib
824 lines (690 loc) • 24.6 kB
JavaScript
/**!
* co-urllib - lib/urllib.js
*
* MIT Licensed
*
* Authors:
* dead_horse <dead_horse@qq.com> (http://deadhorse.me)
* fengmk2 <fengmk2@gmail.com> (http://fengmk2.cnpmjs.org)
*/
(function(
// Reliable reference to the global object (i.e. window in browsers).
global,
// Dummy constructor that we use as the .constructor property for
// functions that return Generator objects.
GeneratorFunction,
// Undefined value, more compressible than void 0.
undefined
) {
var hasOwn = Object.prototype.hasOwnProperty;
if (global.wrapGenerator) {
return;
}
function wrapGenerator(innerFn, self, tryList) {
return new Generator(innerFn, self || null, tryList || []);
}
global.wrapGenerator = wrapGenerator;
if (typeof exports !== "undefined") {
exports.wrapGenerator = wrapGenerator;
}
var GenStateSuspendedStart = "suspendedStart";
var GenStateSuspendedYield = "suspendedYield";
var GenStateExecuting = "executing";
var GenStateCompleted = "completed";
// Returning this object from the innerFn has the same effect as
// breaking out of the dispatch switch statement.
var ContinueSentinel = {};
wrapGenerator.mark = function(genFun) {
genFun.constructor = GeneratorFunction;
return genFun;
};
// Ensure isGeneratorFunction works when Function#name not supported.
if (GeneratorFunction.name !== "GeneratorFunction") {
GeneratorFunction.name = "GeneratorFunction";
}
wrapGenerator.isGeneratorFunction = function(genFun) {
var ctor = genFun && genFun.constructor;
return ctor ? GeneratorFunction.name === ctor.name : false;
};
function Generator(innerFn, self, tryList) {
var generator = this;
var context = new Context(tryList);
var state = GenStateSuspendedStart;
function invoke(method, arg) {
if (state === GenStateExecuting) {
throw new Error("Generator is already running");
}
if (state === GenStateCompleted) {
throw new Error("Generator has already finished");
}
while (true) {
var delegate = context.delegate;
if (delegate) {
try {
var info = delegate.generator[method](arg);
// Delegate generator ran and handled its own exceptions so
// regardless of what the method was, we continue as if it is
// "next" with an undefined arg.
method = "next";
arg = undefined;
} catch (uncaught) {
context.delegate = null;
// Like returning generator.throw(uncaught), but without the
// overhead of an extra function call.
method = "throw";
arg = uncaught;
continue;
}
if (info.done) {
context[delegate.resultName] = info.value;
context.next = delegate.nextLoc;
} else {
state = GenStateSuspendedYield;
return info;
}
context.delegate = null;
}
if (method === "next") {
if (state === GenStateSuspendedStart &&
typeof arg !== "undefined") {
// https://people.mozilla.org/~jorendorff/es6-draft.html#sec-generatorresume
throw new TypeError(
"attempt to send " + JSON.stringify(arg) + " to newborn generator"
);
}
if (state === GenStateSuspendedYield) {
context.sent = arg;
} else {
delete context.sent;
}
} else if (method === "throw") {
if (state === GenStateSuspendedStart) {
state = GenStateCompleted;
throw arg;
}
if (context.dispatchException(arg)) {
// If the dispatched exception was caught by a catch block,
// then let that catch block handle the exception normally.
method = "next";
arg = undefined;
}
}
state = GenStateExecuting;
try {
var value = innerFn.call(self, context);
// If an exception is thrown from innerFn, we leave state ===
// GenStateExecuting and loop back for another invocation.
state = context.done
? GenStateCompleted
: GenStateSuspendedYield;
var info = {
value: value,
done: context.done
};
if (value === ContinueSentinel) {
if (context.delegate && method === "next") {
// Deliberately forget the last sent value so that we don't
// accidentally pass it on to the delegate.
arg = undefined;
}
} else {
return info;
}
} catch (thrown) {
state = GenStateCompleted;
if (method === "next") {
context.dispatchException(thrown);
} else {
arg = thrown;
}
}
}
}
generator.next = invoke.bind(generator, "next");
generator.throw = invoke.bind(generator, "throw");
}
Generator.prototype.toString = function() {
return "[object Generator]";
};
function pushTryEntry(triple) {
var entry = { tryLoc: triple[0] };
if (1 in triple) {
entry.catchLoc = triple[1];
}
if (2 in triple) {
entry.finallyLoc = triple[2];
}
this.tryEntries.push(entry);
}
function resetTryEntry(entry, i) {
var record = entry.completion || {};
record.type = i === 0 ? "normal" : "return";
delete record.arg;
entry.completion = record;
}
function Context(tryList) {
// The root entry object (effectively a try statement without a catch
// or a finally block) gives us a place to store values thrown from
// locations where there is no enclosing try statement.
this.tryEntries = [{ tryLoc: "root" }];
tryList.forEach(pushTryEntry, this);
this.reset();
}
Context.prototype = {
constructor: Context,
reset: function() {
this.prev = 0;
this.next = 0;
this.sent = undefined;
this.done = false;
this.delegate = null;
this.tryEntries.forEach(resetTryEntry);
// Pre-initialize at least 20 temporary variables to enable hidden
// class optimizations for simple generators.
for (var tempIndex = 0, tempName;
hasOwn.call(this, tempName = "t" + tempIndex) || tempIndex < 20;
++tempIndex) {
this[tempName] = null;
}
},
stop: function() {
this.done = true;
var rootEntry = this.tryEntries[0];
var rootRecord = rootEntry.completion;
if (rootRecord.type === "throw") {
throw rootRecord.arg;
}
return this.rval;
},
keys: function(object) {
var keys = [];
for (var key in object) {
keys.push(key);
}
keys.reverse();
// Rather than returning an object with a next method, we keep
// things simple and return the next function itself.
return function next() {
while (keys.length) {
var key = keys.pop();
if (key in object) {
next.value = key;
next.done = false;
return next;
}
}
// To avoid creating an additional object, we just hang the .value
// and .done properties off the next function object itself. This
// also ensures that the minifier will not anonymize the function.
next.done = true;
return next;
};
},
dispatchException: function(exception) {
if (this.done) {
throw exception;
}
var context = this;
function handle(loc, caught) {
record.type = "throw";
record.arg = exception;
context.next = loc;
return !!caught;
}
for (var i = this.tryEntries.length - 1; i >= 0; --i) {
var entry = this.tryEntries[i];
var record = entry.completion;
if (entry.tryLoc === "root") {
// Exception thrown outside of any try block that could handle
// it, so set the completion value of the entire function to
// throw the exception.
return handle("end");
}
if (entry.tryLoc <= this.prev) {
var hasCatch = hasOwn.call(entry, "catchLoc");
var hasFinally = hasOwn.call(entry, "finallyLoc");
if (hasCatch && hasFinally) {
if (this.prev < entry.catchLoc) {
return handle(entry.catchLoc, true);
} else if (this.prev < entry.finallyLoc) {
return handle(entry.finallyLoc);
}
} else if (hasCatch) {
if (this.prev < entry.catchLoc) {
return handle(entry.catchLoc, true);
}
} else if (hasFinally) {
if (this.prev < entry.finallyLoc) {
return handle(entry.finallyLoc);
}
} else {
throw new Error("try statement without catch or finally");
}
}
}
},
_findFinallyEntry: function(finallyLoc) {
for (var i = this.tryEntries.length - 1; i >= 0; --i) {
var entry = this.tryEntries[i];
if (entry.tryLoc <= this.prev &&
hasOwn.call(entry, "finallyLoc") && (
entry.finallyLoc === finallyLoc ||
this.prev < entry.finallyLoc)) {
return entry;
}
}
},
abrupt: function(type, arg) {
var entry = this._findFinallyEntry();
var record = entry ? entry.completion : {};
record.type = type;
record.arg = arg;
if (entry) {
this.next = entry.finallyLoc;
} else {
this.complete(record);
}
return ContinueSentinel;
},
complete: function(record) {
if (record.type === "throw") {
throw record.arg;
}
if (record.type === "break" ||
record.type === "continue") {
this.next = record.arg;
} else if (record.type === "return") {
this.rval = record.arg;
this.next = "end";
}
return ContinueSentinel;
},
finish: function(finallyLoc) {
var entry = this._findFinallyEntry(finallyLoc);
return this.complete(entry.completion);
},
"catch": function(tryLoc) {
for (var i = this.tryEntries.length - 1; i >= 0; --i) {
var entry = this.tryEntries[i];
if (entry.tryLoc === tryLoc) {
var record = entry.completion;
if (record.type === "throw") {
var thrown = record.arg;
resetTryEntry(entry, i);
}
return thrown;
}
}
// The context.catch method must only be called with a location
// argument that corresponds to a known catch block.
throw new Error("illegal catch attempt");
},
delegateYield: function(generator, resultName, nextLoc) {
this.delegate = {
generator: generator,
resultName: resultName,
nextLoc: nextLoc
};
return ContinueSentinel;
}
};
}).apply(this, Function("return [this, function GeneratorFunction(){}]")());
wrapGenerator.mark(request);
'use strict';
/**
* Module dependencies.
*/
var debug = require('debug')('co-urllib');
var thunkify = require('thunkify');
var http = require('http');
var https = require('https');
var urlutil = require('url');
var qs = require('querystring');
var path = require('path');
var fs = require('fs');
var zlib = require('zlib');
var readall = require('co-readall');
var assertTimeout = require('co-assert-timeout');
var ua = require('default-user-agent');
var gunzip = thunkify(zlib.gunzip);
var pkg = JSON.parse(fs.readFileSync(path.join(__dirname, '..', 'package.json')));
var REQUEST_ID = 0;
function createRequest(httplib) {
return function (options, args) {
var reqId = ++REQUEST_ID;
return function (done) {
var called = false;
function _done(err, result) {
if (called) {
return;
}
called = true;
done(err, result);
}
var req = httplib.request(options, function (res) {
_done(null, {req: req, res: res});
});
req.requestId = reqId;
req.on('error', function onerror(err) {
if (err.name === 'Error') {
err.name = 'RequestError';
}
debug('Request#%d %s `req error` event emit, %s: %s', reqId, options.path, err.name, err.message);
_done(err);
});
if (args.stream) {
args.stream.pipe(req);
} else {
req.end(args.body);
}
};
};
}
var httpRequest = createRequest(http);
var httpsRequest = createRequest(https);
var USER_AGENT = exports.USER_AGENT = ua('node-co-urllib', pkg.version);
// change Agent.maxSockets to 1000
exports.agent = new http.Agent();
exports.agent.maxSockets = 1000;
exports.httpsAgent = new https.Agent();
exports.httpsAgent.maxSockets = 1000;
/**
* The default request timeout(in milliseconds) 5000ms.
* @type {Number}
* @const
*/
exports.TIMEOUT = 5000;
/**
* Handle all http request, both http and https support well.
*
* @example
*
* // GET http://httptest.cnodejs.net
* var result = yield *urllib.request('http://httptest.cnodejs.net/test/get');
* // POST http://httptest.cnodejs.net
* var args = { type: 'post', data: { foo: 'bar' } };
* var result = yield *urllib.request('http://httptest.cnodejs.net/test/post', args);
*
* @param {String|Object} url
* @param {Object} [args], optional
* - {Object} [data]: request data, will auto be query stringify.
* - {String|Buffer} [content]: optional, if set content, `data` will ignore.
* - {ReadStream} [stream]: read stream to sent.
* - {WriteStream} [writeStream]: writable stream to save response data.
* If you use this, callback's data should be null.
* We will just `pipe(ws, {end: true})`.
* - {String} [method]: optional, could be GET | POST | DELETE | PUT, default is GET
* - {String} [dataType]: optional, `text` or `json`, default is buffer
* - {Object} [headers]: optional, request headers
* - {Number} [timeout]: request timeout(in milliseconds), default is `exports.TIMEOUT`
* - {Agent} [agent]: optional, http agent. Set `false` if you does not use agent.
* - {Agent} [httpsAgent]: optional, https agent. Set `false` if you does not use agent.
* - {String} [auth]: Basic authentication i.e. 'user:password' to compute an Authorization header.
* - {String|Buffer|Array} [ca]: An array of strings or Buffers of trusted certificates.
* If this is omitted several well known "root" CAs will be used, like VeriSign.
* These are used to authorize connections.
* Notes: This is necessary only if the server uses the self-signed certificate
* - {Boolean} [rejectUnauthorized]: If true, the server certificate is verified against the list of supplied CAs.
* An 'error' event is emitted if verification fails. Default: true.
* - {String|Buffer} [pfx]: A string or Buffer containing the private key,
* certificate and CA certs of the server in PFX or PKCS12 format.
* - {String|Buffer} [key]: A string or Buffer containing the private key of the client in PEM format.
* Notes: This is necessary only if using the client certificate authentication
* - {String|Buffer} [cert]: A string or Buffer containing the certificate key of the client in PEM format.
* Notes: This is necessary only if using the client certificate authentication
* - {String} [passphrase]: A string of passphrase for the private key or pfx.
* - {Boolean} [followRedirect]: Follow HTTP 3xx responses as redirects. defaults to false.
* - {Number} [maxRedirects]: The maximum number of redirects to follow, defaults to 10.
* - {Function(options)} [beforeRequest]: Before request hook, you can change every thing here.
* - {Boolean} [gzip]: Accept gzip response content and auto decode it, default is `false`.
* @return {Object} result, contains `data` and `res`
* - {Buffer|Object} data
* - {Response} res
* @api public
*/
function request(url, args) {
var parsedUrl, method, port, _request, agent, options, sslNames, i, name, auth, body, isReadAction, contentType, length, enableGzip, acceptEncoding, r, req, res, reqId, err, _url, data, size, encoding, gzipLength;
return wrapGenerator(function request$($ctx0) {
while (1) switch ($ctx0.prev = $ctx0.next) {
case 0:
args = args || {};
args.timeout = args.timeout || exports.TIMEOUT;
args.maxRedirects = args.maxRedirects || 10;
parsedUrl = typeof url === 'string' ? urlutil.parse(url) : url;
method = (args.type || args.method || parsedUrl.method || 'GET').toUpperCase();
port = parsedUrl.port || 80;
_request = httpRequest;
agent = args.agent || exports.agent;
if (parsedUrl.protocol === 'https:') {
_request = httpsRequest;
agent = args.httpsAgent || exports.httpsAgent;
if (args.httpsAgent === false) {
agent = false;
}
if (!parsedUrl.port) {
port = 443;
}
}
if (args.agent === false) {
agent = false;
}
options = {
host: parsedUrl.hostname || parsedUrl.host || 'localhost',
path: parsedUrl.path || '/',
method: method,
port: port,
agent: agent,
headers: args.headers || {}
};
sslNames = ['ca', 'pfx', 'key', 'cert', 'passphrase'];
for (i = 0; i < sslNames.length; i++) {
name = sslNames[i];
if (args[name]) {
options[name] = args[name];
}
}
if (args.rejectUnauthorized !== undefined) {
options.rejectUnauthorized = args.rejectUnauthorized;
}
auth = args.auth || parsedUrl.auth;
if (auth) {
options.auth = auth;
}
body = args.content || args.data;
isReadAction = method === 'GET' || method === 'HEAD';
if (!args.content) {
if (body && !(typeof body === 'string' || Buffer.isBuffer(body))) {
if (isReadAction) {
// read: GET, HEAD, use query string
body = qs.stringify(body);
} else {
// auto add application/x-www-form-urlencoded when using urlencode form request
if (!options.headers['Content-Type'] && !options.headers['content-type']) {
options.headers['Content-Type'] = 'application/x-www-form-urlencoded';
}
contentType = options.headers['Content-Type'] || options.headers['content-type'];
if (contentType === 'application/json') {
body = JSON.stringify(body);
} else {
// 'application/x-www-form-urlencoded'
body = qs.stringify(body);
}
}
}
}
// if it's a GET or HEAD request, data should be sent as query string
if (isReadAction && body) {
options.path += (parsedUrl.query ? '&' : '?') + body;
body = null;
}
if (body) {
length = body.length;
if (!Buffer.isBuffer(body)) {
length = Buffer.byteLength(body);
}
options.headers['Content-Length'] = length;
}
args.dataType = args.dataType || args.datatype;
if (args.dataType === 'json') {
options.headers.Accept = 'application/json';
}
if (typeof args.beforeRequest === 'function') {
// you can use this hook to change every thing.
args.beforeRequest(options);
}
// set user-agent
if (!options.headers['User-Agent'] && !options.headers['user-agent']) {
options.headers['User-Agent'] = USER_AGENT;
}
enableGzip = args.gzip === true;
if (enableGzip) {
acceptEncoding = options.headers['Accept-Encoding'] || options.headers['accept-encoding'];
if (acceptEncoding) {
enableGzip = false; // user want to handle response content decode themself
} else {
options.headers['Accept-Encoding'] = 'gzip';
}
}
debug('%s %s, headers: %j', method, url, options.headers);
$ctx0.prev = 28;
$ctx0.next = 31;
return assertTimeout(_request(options, {body: body, stream: args.stream}), args.timeout);
case 31:
r = $ctx0.sent;
$ctx0.next = 41;
break;
case 34:
$ctx0.prev = 34;
$ctx0.t0 = $ctx0.catch(28);
$ctx0.t0.status = $ctx0.t0.status || -1;
if ($ctx0.t0.status === 408) {
$ctx0.t0.name = 'ConnectionTimeoutError';
}
$ctx0.t0.message += ' (' + method + ' ' + url + ')';
$ctx0.t0.headers = {};
throw $ctx0.t0;
case 41:
req = r.req;
res = r.res;
reqId = req.requestId;
debug('Request#%d %s `req response` event emit: status %d, headers: %j',
reqId, options.path, res.statusCode, res.headers);
if (!((res.statusCode === 302 || res.statusCode === 301) && args.followRedirect)) {
$ctx0.next = 58;
break;
}
args._followRedirectCount = (args._followRedirectCount || 0) + 1;
err = null;
if (!res.headers.location) {
err = new Error('Got statusCode ' + res.statusCode + ' but cannot resolve next location from headers');
err.name = 'FollowRedirectError';
} else if (args._followRedirectCount > args.maxRedirects) {
err = new Error('Exceeded ' + args.maxRedirects + ' maxRedirects. Probably stuck in a redirect loop ' + url);
err.name = 'MaxRedirectError';
}
if (!err) {
$ctx0.next = 54;
break;
}
err.message += ' (' + method + ' ' + url + ')';
err.status = res.statusCode;
err.headers = res.headers;
throw err;
case 54:
_url = urlutil.resolve(url, res.headers.location);
debug('Request#%d %s: `redirected` from %s to %s', reqId, options.path, url, _url);
return $ctx0.delegateYield(request(_url, args), "t1", 57);
case 57:
return $ctx0.abrupt("return", $ctx0.t1);
case 58:
res.on('aborted', function () {
debug('response was aborted');
});
req.on('close', function () {
debug('Request#%d %s: `req close` event emit', reqId, options.path);
});
$ctx0.prev = 60;
$ctx0.next = 63;
return assertTimeout(readall(res, args.writeStream), args.timeout);
case 63:
data = $ctx0.sent;
$ctx0.next = 73;
break;
case 66:
$ctx0.prev = 66;
$ctx0.t2 = $ctx0.catch(60);
$ctx0.t2.requestId = reqId;
$ctx0.t2.status = $ctx0.t2.status || res.statusCode;
if ($ctx0.t2.status === 408) {
req.abort(); // try to abort response handle
$ctx0.t2.message += ' (' + method + ' ' + url + ')';
$ctx0.t2.name = 'ResponseTimeoutError';
}
$ctx0.t2.headers = res.headers;
throw $ctx0.t2;
case 73:
size = data && data.length || 0;
debug('Request#%d %s %s %s got %d bytes body',
reqId, method, url, res.statusCode, size);
encoding = res.headers['content-encoding'];
if (!(size > 0)) {
$ctx0.next = 102;
break;
}
if (!enableGzip) {
$ctx0.next = 85;
break;
}
if (!(encoding && encoding.toLowerCase() === 'gzip')) {
$ctx0.next = 85;
break;
}
gzipLength = data.length;
$ctx0.next = 82;
return gunzip(data);
case 82:
data = $ctx0.sent;
encoding = null;
debug('gunzip %d bytes body to %d bytes', gzipLength, data.length);
case 85:
if (!(!encoding && args.dataType)) {
$ctx0.next = 102;
break;
}
if (!(args.dataType === 'json')) {
$ctx0.next = 101;
break;
}
$ctx0.prev = 87;
data = JSON.parse(data);
$ctx0.next = 99;
break;
case 91:
$ctx0.prev = 91;
$ctx0.t3 = $ctx0.catch(87);
$ctx0.t3.message += ' (' + method + ' ' + url + ')';
$ctx0.t3.name = 'JSONResponseFormatError';
$ctx0.t3.status = res.statusCode;
$ctx0.t3.headers = res.headers;
$ctx0.t3.data = data;
throw $ctx0.t3;
case 99:
$ctx0.next = 102;
break;
case 101:
if (args.dataType === 'text') {
data = data.toString();
}
case 102:
return $ctx0.abrupt("return", {
data: data,
status: res.statusCode,
headers: res.headers
});
case 103:
case "end":
return $ctx0.stop();
}
}, this, [[28, 34], [60, 66], [87, 91]]);
}
exports.request = request;