urllib
Version:
Help in opening URLs (mostly HTTP) in a complex world — basic and digest authentication, redirections, timeout and more. Base undici API.
655 lines • 58.8 kB
JavaScript
"use strict";
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.HttpClient = exports.channels = exports.HEADER_USER_AGENT = exports.VERSION = exports.PROTO_RE = void 0;
const node_diagnostics_channel_1 = __importDefault(require("node:diagnostics_channel"));
const node_events_1 = require("node:events");
const node_http_1 = require("node:http");
const node_util_1 = require("node:util");
const node_zlib_1 = require("node:zlib");
const node_stream_1 = require("node:stream");
const promises_1 = require("node:stream/promises");
const node_path_1 = require("node:path");
const node_fs_1 = require("node:fs");
const node_url_1 = require("node:url");
const node_perf_hooks_1 = require("node:perf_hooks");
const node_querystring_1 = __importDefault(require("node:querystring"));
const promises_2 = require("node:timers/promises");
const undici_1 = require("undici");
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const symbols_js_1 = __importDefault(require("undici/lib/core/symbols.js"));
const mime_types_1 = __importDefault(require("mime-types"));
const qs_1 = __importDefault(require("qs"));
// Compatible with old style formstream
const formstream_1 = __importDefault(require("formstream"));
const FormData_js_1 = require("./FormData.js");
const HttpAgent_js_1 = require("./HttpAgent.js");
const utils_js_1 = require("./utils.js");
const symbols_js_2 = __importDefault(require("./symbols.js"));
const diagnosticsChannel_js_1 = require("./diagnosticsChannel.js");
const HttpClientError_js_1 = require("./HttpClientError.js");
exports.PROTO_RE = /^https?:\/\//i;
function noop() {
// noop
}
const debug = (0, node_util_1.debuglog)('urllib:HttpClient');
exports.VERSION = '4.8.2';
// 'node-urllib/4.0.0 Node.js/18.19.0 (darwin; x64)'
exports.HEADER_USER_AGENT = `node-urllib/${exports.VERSION} Node.js/${process.version.substring(1)} (${process.platform}; ${process.arch})`;
function getFileName(stream) {
const filePath = stream.path;
if (filePath) {
return (0, node_path_1.basename)(filePath);
}
return '';
}
function defaultIsRetry(response) {
return response.status >= 500;
}
exports.channels = {
request: node_diagnostics_channel_1.default.channel('urllib:request'),
response: node_diagnostics_channel_1.default.channel('urllib:response'),
fetchRequest: node_diagnostics_channel_1.default.channel('urllib:fetch:request'),
fetchResponse: node_diagnostics_channel_1.default.channel('urllib:fetch:response'),
};
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Redirections
const RedirectStatusCodes = [
301, // Moved Permanently
302, // Found
303, // See Other
307, // Temporary Redirect
308, // Permanent Redirect
];
class HttpClient extends node_events_1.EventEmitter {
#defaultArgs;
#dispatcher;
constructor(clientOptions) {
super();
this.#defaultArgs = clientOptions?.defaultArgs;
if (clientOptions?.lookup || clientOptions?.checkAddress) {
this.#dispatcher = new HttpAgent_js_1.HttpAgent({
lookup: clientOptions.lookup,
checkAddress: clientOptions.checkAddress,
connect: clientOptions.connect,
allowH2: clientOptions.allowH2,
});
}
else if (clientOptions?.connect) {
this.#dispatcher = new undici_1.Agent({
connect: clientOptions.connect,
allowH2: clientOptions.allowH2,
});
}
else if (clientOptions?.allowH2) {
// Support HTTP2
this.#dispatcher = new undici_1.Agent({
allowH2: clientOptions.allowH2,
});
}
(0, diagnosticsChannel_js_1.initDiagnosticsChannel)();
}
getDispatcher() {
return this.#dispatcher ?? (0, undici_1.getGlobalDispatcher)();
}
setDispatcher(dispatcher) {
this.#dispatcher = dispatcher;
}
getDispatcherPoolStats() {
const agent = this.getDispatcher();
// origin => Pool Instance
const clients = Reflect.get(agent, symbols_js_1.default.kClients);
const poolStatsMap = {};
if (!clients) {
return poolStatsMap;
}
for (const [key, ref] of clients) {
const pool = (typeof ref.deref === 'function' ? ref.deref() : ref);
// NOTE: pool become to { dispatcher: Pool } in undici@v7
const stats = pool?.stats ?? pool?.dispatcher?.stats;
if (!stats)
continue;
poolStatsMap[key] = {
connected: stats.connected,
free: stats.free,
pending: stats.pending,
queued: stats.queued,
running: stats.running,
size: stats.size,
};
}
return poolStatsMap;
}
async request(url, options) {
return await this.#requestInternal(url, options);
}
// alias to request, keep compatible with urllib@2 HttpClient.curl
async curl(url, options) {
return await this.request(url, options);
}
async #requestInternal(url, options, requestContext) {
const requestId = (0, utils_js_1.globalId)('HttpClientRequest');
let requestUrl;
if (typeof url === 'string') {
if (!exports.PROTO_RE.test(url)) {
// Support `request('www.server.com')`
url = 'http://' + url;
}
requestUrl = new URL(url);
}
else {
if (!url.searchParams) {
// url maybe url.parse(url) object in urllib2
requestUrl = new URL((0, node_url_1.format)(url));
}
else {
// or even if not, we clone to avoid mutating it
requestUrl = new URL(url.toString());
}
}
const method = (options?.type || options?.method || 'GET').toUpperCase();
const originalHeaders = options?.headers;
const headers = {};
const args = {
retry: 0,
socketErrorRetry: 1,
timing: true,
...this.#defaultArgs,
...options,
// keep method and headers exists on args for request event handler to easy use
method,
headers,
};
requestContext = {
retries: 0,
socketErrorRetries: 0,
redirects: 0,
history: [],
...requestContext,
};
if (!requestContext.requestStartTime) {
requestContext.requestStartTime = node_perf_hooks_1.performance.now();
}
requestContext.history.push(requestUrl.href);
const requestStartTime = requestContext.requestStartTime;
// https://developer.chrome.com/docs/devtools/network/reference/?utm_source=devtools#timing-explanation
const timing = {
// socket assigned
queuing: 0,
// dns lookup time
dnslookup: 0,
// socket connected
connected: 0,
// request headers sent
requestHeadersSent: 0,
// request sent, including headers and body
requestSent: 0,
// Time to first byte (TTFB), the response headers have been received
waiting: 0,
// the response body and trailers have been received
contentDownload: 0,
};
const originalOpaque = args.opaque;
// using opaque to diagnostics channel, binding request and socket
const internalOpaque = {
[symbols_js_2.default.kRequestId]: requestId,
[symbols_js_2.default.kRequestStartTime]: requestStartTime,
[symbols_js_2.default.kEnableRequestTiming]: !!args.timing,
[symbols_js_2.default.kRequestTiming]: timing,
[symbols_js_2.default.kRequestOriginalOpaque]: originalOpaque,
};
const reqMeta = {
requestId,
url: requestUrl.href,
args,
ctx: args.ctx,
retries: requestContext.retries,
};
const socketInfo = {
id: 0,
localAddress: '',
localPort: 0,
remoteAddress: '',
remotePort: 0,
remoteFamily: '',
bytesWritten: 0,
bytesRead: 0,
handledRequests: 0,
handledResponses: 0,
};
// keep urllib createCallbackResponse style
const resHeaders = {};
let res = {
status: -1,
statusCode: -1,
statusText: '',
statusMessage: '',
headers: resHeaders,
size: 0,
aborted: false,
rt: 0,
keepAliveSocket: true,
requestUrls: requestContext.history,
timing,
socket: socketInfo,
retries: requestContext.retries,
socketErrorRetries: requestContext.socketErrorRetries,
};
let headersTimeout = 5000;
let bodyTimeout = 5000;
if (args.timeout) {
if (Array.isArray(args.timeout)) {
headersTimeout = args.timeout[0] ?? headersTimeout;
bodyTimeout = args.timeout[1] ?? bodyTimeout;
}
else {
// compatible with urllib@2 timeout string format
headersTimeout = bodyTimeout = typeof args.timeout === 'string' ? parseInt(args.timeout) : args.timeout;
}
}
if (originalHeaders) {
// convert headers to lower-case
for (const name in originalHeaders) {
headers[name.toLowerCase()] = originalHeaders[name];
}
}
// hidden user-agent
const hiddenUserAgent = 'user-agent' in headers && !headers['user-agent'];
if (hiddenUserAgent) {
delete headers['user-agent'];
}
else if (!headers['user-agent']) {
// need to set user-agent
headers['user-agent'] = exports.HEADER_USER_AGENT;
}
// Alias to dataType = 'stream'
if (args.streaming || args.customResponse) {
args.dataType = 'stream';
}
if (args.dataType === 'json' && !headers.accept) {
headers.accept = 'application/json';
}
// gzip alias to compressed
if (args.gzip && args.compressed !== false) {
args.compressed = true;
}
if (args.compressed && !headers['accept-encoding']) {
headers['accept-encoding'] = 'gzip, br';
}
if (requestContext.retries > 0) {
headers['x-urllib-retry'] = `${requestContext.retries}/${args.retry}`;
}
if (requestContext.socketErrorRetries > 0) {
headers['x-urllib-retry-on-socket-error'] = `${requestContext.socketErrorRetries}/${args.socketErrorRetry}`;
}
if (args.auth && !headers.authorization) {
headers.authorization = `Basic ${Buffer.from(args.auth).toString('base64')}`;
}
// streaming request should disable socketErrorRetry and retry
let isStreamingRequest = false;
let isStreamingResponse = false;
if (args.dataType === 'stream' || args.writeStream) {
isStreamingResponse = true;
}
let maxRedirects = args.maxRedirects ?? 10;
try {
const requestOptions = {
method,
// disable undici auto redirect handler
// maxRedirections: 0,
headersTimeout,
headers,
bodyTimeout,
opaque: internalOpaque,
dispatcher: args.dispatcher ?? this.#dispatcher,
signal: args.signal,
reset: false,
};
if (typeof args.highWaterMark === 'number') {
requestOptions.highWaterMark = args.highWaterMark;
}
if (typeof args.reset === 'boolean') {
requestOptions.reset = args.reset;
}
if (args.followRedirect === false) {
maxRedirects = 0;
}
const isGETOrHEAD = requestOptions.method === 'GET' || requestOptions.method === 'HEAD';
// alias to args.content
if (args.stream && !args.content) {
// convert old style stream to new stream
// https://nodejs.org/dist/latest-v18.x/docs/api/stream.html#readablewrapstream
if ((0, utils_js_1.isReadable)(args.stream) && !(args.stream instanceof node_stream_1.Readable)) {
debug('Request#%d convert old style stream to Readable', requestId);
args.stream = new node_stream_1.Readable().wrap(args.stream);
isStreamingRequest = true;
}
else if (args.stream instanceof formstream_1.default) {
debug('Request#%d convert formstream to Readable', requestId);
args.stream = new node_stream_1.Readable().wrap(args.stream);
isStreamingRequest = true;
}
args.content = args.stream;
}
if (args.files) {
if (isGETOrHEAD) {
requestOptions.method = 'POST';
}
const formData = new FormData_js_1.FormData();
const uploadFiles = [];
if (Array.isArray(args.files)) {
for (const [index, file] of args.files.entries()) {
const field = index === 0 ? 'file' : `file${index}`;
uploadFiles.push([field, file]);
}
}
else if (args.files instanceof node_stream_1.Readable || (0, utils_js_1.isReadable)(args.files)) {
uploadFiles.push(['file', args.files]);
}
else if (typeof args.files === 'string' || Buffer.isBuffer(args.files)) {
uploadFiles.push(['file', args.files]);
}
else if (typeof args.files === 'object') {
const files = args.files;
for (const field in files) {
// set custom fileName
const file = files[field];
uploadFiles.push([field, file, field]);
}
}
// set normal fields first
if (args.data) {
for (const field in args.data) {
formData.append(field, args.data[field]);
}
}
for (const [index, [field, file, customFileName]] of uploadFiles.entries()) {
let fileName = '';
let value;
if (typeof file === 'string') {
fileName = (0, node_path_1.basename)(file);
value = (0, node_fs_1.createReadStream)(file);
}
else if (Buffer.isBuffer(file)) {
fileName = customFileName || `bufferfile${index}`;
value = file;
}
else if (file instanceof node_stream_1.Readable || (0, utils_js_1.isReadable)(file)) {
fileName = getFileName(file) || customFileName || `streamfile${index}`;
isStreamingRequest = true;
value = file;
}
const mimeType = mime_types_1.default.lookup(fileName) || '';
formData.append(field, value, {
filename: fileName,
contentType: mimeType,
});
debug('formData append field: %s, mimeType: %s, fileName: %s', field, mimeType, fileName);
}
Object.assign(headers, formData.getHeaders());
requestOptions.body = formData;
}
else if (args.content) {
if (!isGETOrHEAD) {
// handle content
requestOptions.body = args.content;
if (args.contentType) {
headers['content-type'] = args.contentType;
}
else if (typeof args.content === 'string' && !headers['content-type']) {
headers['content-type'] = 'text/plain;charset=UTF-8';
}
isStreamingRequest = (0, utils_js_1.isReadable)(args.content);
}
}
else if (args.data) {
const isStringOrBufferOrReadable = typeof args.data === 'string'
|| Buffer.isBuffer(args.data)
|| (0, utils_js_1.isReadable)(args.data);
if (isGETOrHEAD) {
if (!isStringOrBufferOrReadable) {
let query;
if (args.nestedQuerystring) {
query = qs_1.default.stringify(args.data);
}
else {
query = node_querystring_1.default.stringify(args.data);
}
// reset the requestUrl
const href = requestUrl.href;
requestUrl = new URL(href + (href.includes('?') ? '&' : '?') + query);
}
}
else {
if (isStringOrBufferOrReadable) {
requestOptions.body = args.data;
isStreamingRequest = (0, utils_js_1.isReadable)(args.data);
}
else {
if (args.contentType === 'json'
|| args.contentType === 'application/json'
|| headers['content-type']?.startsWith('application/json')) {
requestOptions.body = JSON.stringify(args.data);
if (!headers['content-type']) {
headers['content-type'] = 'application/json';
}
}
else {
headers['content-type'] = 'application/x-www-form-urlencoded;charset=UTF-8';
if (args.nestedQuerystring) {
requestOptions.body = qs_1.default.stringify(args.data);
}
else {
requestOptions.body = new URLSearchParams(args.data).toString();
}
}
}
}
}
if (isStreamingRequest) {
args.retry = 0;
args.socketErrorRetry = 0;
maxRedirects = 0;
}
if (isStreamingResponse) {
args.retry = 0;
args.socketErrorRetry = 0;
}
debug('Request#%d %s %s, headers: %j, headersTimeout: %s, bodyTimeout: %s, isStreamingRequest: %s, isStreamingResponse: %s, maxRedirections: %s, redirects: %s', requestId, requestOptions.method, requestUrl.href, headers, headersTimeout, bodyTimeout, isStreamingRequest, isStreamingResponse, maxRedirects, requestContext.redirects);
requestOptions.headers = headers;
exports.channels.request.publish({
request: reqMeta,
});
if (this.listenerCount('request') > 0) {
this.emit('request', reqMeta);
}
let response = await (0, undici_1.request)(requestUrl, requestOptions);
if (response.statusCode === 401 && (response.headers['www-authenticate'] || response.headers['x-www-authenticate']) &&
!requestOptions.headers.authorization && args.digestAuth) {
// handle digest auth
const authenticateHeaders = response.headers['www-authenticate'] ?? response.headers['x-www-authenticate'];
const authenticate = Array.isArray(authenticateHeaders)
? authenticateHeaders.find(authHeader => authHeader.startsWith('Digest '))
: authenticateHeaders;
if (authenticate && authenticate.startsWith('Digest ')) {
debug('Request#%d %s: got digest auth header WWW-Authenticate: %s', requestId, requestUrl.href, authenticate);
requestOptions.headers.authorization = (0, utils_js_1.digestAuthHeader)(requestOptions.method, `${requestUrl.pathname}${requestUrl.search}`, authenticate, args.digestAuth);
debug('Request#%d %s: auth with digest header: %s', requestId, url, requestOptions.headers.authorization);
if (Array.isArray(response.headers['set-cookie'])) {
// FIXME: merge exists cookie header
requestOptions.headers.cookie = response.headers['set-cookie'].join(';');
}
// Ensure the previous response is consumed as we re-use the same variable
await response.body.arrayBuffer();
response = await (0, undici_1.request)(requestUrl, requestOptions);
}
}
const contentEncoding = response.headers['content-encoding'];
const isCompressedContent = contentEncoding === 'gzip' || contentEncoding === 'br';
res.headers = response.headers;
res.status = res.statusCode = response.statusCode;
res.statusMessage = res.statusText = node_http_1.STATUS_CODES[res.status] || '';
if (res.headers['content-length']) {
res.size = parseInt(res.headers['content-length']);
}
// https://developer.mozilla.org/en-US/docs/Web/HTTP/Redirections
if (RedirectStatusCodes.includes(res.statusCode) && maxRedirects > 0 && requestContext.redirects < maxRedirects) {
if (res.headers.location) {
requestContext.redirects++;
const nextUrl = new URL(res.headers.location, requestUrl.href);
// Ensure the response is consumed
await response.body.arrayBuffer();
debug('Request#%d got response, status: %s, headers: %j, timing: %j, redirect to %s', requestId, res.status, res.headers, res.timing, nextUrl.href);
return await this.#requestInternal(nextUrl.href, options, requestContext);
}
}
let data = null;
if (args.dataType === 'stream') {
// only auto decompress on request args.compressed = true
if (args.compressed === true && isCompressedContent) {
// gzip or br
const decoder = contentEncoding === 'gzip' ? (0, node_zlib_1.createGunzip)() : (0, node_zlib_1.createBrotliDecompress)();
res = Object.assign((0, node_stream_1.pipeline)(response.body, decoder, noop), res);
}
else {
res = Object.assign(response.body, res);
}
}
else if (args.writeStream) {
if (args.compressed === true && isCompressedContent) {
const decoder = contentEncoding === 'gzip' ? (0, node_zlib_1.createGunzip)() : (0, node_zlib_1.createBrotliDecompress)();
await (0, promises_1.pipeline)(response.body, decoder, args.writeStream);
}
else {
await (0, promises_1.pipeline)(response.body, args.writeStream);
}
}
else {
// buffer
data = Buffer.from(await response.body.arrayBuffer());
if (isCompressedContent && data.length > 0) {
try {
data = contentEncoding === 'gzip' ? (0, node_zlib_1.gunzipSync)(data) : (0, node_zlib_1.brotliDecompressSync)(data);
}
catch (err) {
if (err.name === 'Error') {
err.name = 'UnzipError';
}
throw err;
}
}
if (args.dataType === 'text' || args.dataType === 'html') {
data = data.toString();
}
else if (args.dataType === 'json') {
if (data.length === 0) {
data = null;
}
else {
data = (0, utils_js_1.parseJSON)(data.toString(), args.fixJSONCtlChars);
}
}
}
res.rt = (0, utils_js_1.performanceTime)(requestStartTime);
// get real socket info from internalOpaque
(0, utils_js_1.updateSocketInfo)(socketInfo, internalOpaque);
const clientResponse = {
opaque: originalOpaque,
data,
status: res.status,
statusCode: res.status,
statusText: res.statusText,
headers: res.headers,
url: requestUrl.href,
redirected: requestContext.history.length > 1,
requestUrls: res.requestUrls,
res,
};
debug('Request#%d got response, status: %s, headers: %j, timing: %j, socket: %j', requestId, res.status, res.headers, res.timing, res.socket);
if (args.retry > 0 && requestContext.retries < args.retry) {
const isRetry = args.isRetry ?? defaultIsRetry;
if (isRetry(clientResponse)) {
if (args.retryDelay) {
await (0, promises_2.setTimeout)(args.retryDelay);
}
requestContext.retries++;
return await this.#requestInternal(url, options, requestContext);
}
}
exports.channels.response.publish({
request: reqMeta,
response: res,
});
if (this.listenerCount('response') > 0) {
this.emit('response', {
requestId,
error: null,
ctx: args.ctx,
req: {
...reqMeta,
options: args,
},
res,
});
}
return clientResponse;
}
catch (rawError) {
debug('Request#%d throw error: %s, socketErrorRetry: %s, socketErrorRetries: %s', requestId, rawError, args.socketErrorRetry, requestContext.socketErrorRetries);
let err = rawError;
if (err.name === 'HeadersTimeoutError') {
err = new HttpClientError_js_1.HttpClientRequestTimeoutError(headersTimeout, { cause: err });
}
else if (err.name === 'BodyTimeoutError') {
err = new HttpClientError_js_1.HttpClientRequestTimeoutError(bodyTimeout, { cause: err });
}
else if (err.name === 'InformationalError' && err.message.includes('stream timeout')) {
err = new HttpClientError_js_1.HttpClientRequestTimeoutError(bodyTimeout, { cause: err });
}
else if (err.code === 'UND_ERR_CONNECT_TIMEOUT') {
err = new HttpClientError_js_1.HttpClientConnectTimeoutError(err.message, err.code, { cause: err });
}
else if (err.code === 'UND_ERR_SOCKET' || err.code === 'ECONNRESET') {
// auto retry on socket error, https://github.com/node-modules/urllib/issues/454
if (args.socketErrorRetry > 0 && requestContext.socketErrorRetries < args.socketErrorRetry) {
requestContext.socketErrorRetries++;
debug('Request#%d retry on socket error, socketErrorRetries: %d', requestId, requestContext.socketErrorRetries);
return await this.#requestInternal(url, options, requestContext);
}
}
err.opaque = originalOpaque;
err.status = res.status;
err.headers = res.headers;
err.res = res;
if (err.socket) {
// store rawSocket
err._rawSocket = err.socket;
}
err.socket = socketInfo;
res.rt = (0, utils_js_1.performanceTime)(requestStartTime);
(0, utils_js_1.updateSocketInfo)(socketInfo, internalOpaque, rawError);
exports.channels.response.publish({
request: reqMeta,
response: res,
error: err,
});
if (this.listenerCount('response') > 0) {
this.emit('response', {
requestId,
error: err,
ctx: args.ctx,
req: {
...reqMeta,
options: args,
},
res,
});
}
throw err;
}
}
}
exports.HttpClient = HttpClient;
//# sourceMappingURL=data:application/json;base64,