curlconverter
Version:
convert curl commands to Python, JavaScript, Go, PHP and more
1,298 lines • 66.7 kB
JavaScript
import { CCError, has, UTF8encoder } from "../../utils.js";
import { Word, eq } from "../../shell/Word.js";
import { parse, COMMON_SUPPORTED_ARGS } from "../../parse.js";
import { wordDecodeURIComponent, percentEncode } from "../../Query.js";
import { parse as jsonParseLossless, stringify as jsonStringifyLossless, isSafeNumber, isInteger, isLosslessNumber, } from "lossless-json";
// TODO: partiallySupportedArgs
export const supportedArgs = new Set([
...COMMON_SUPPORTED_ARGS,
"compressed",
// "no-compressed",
// "anyauth",
// "no-anyauth",
"digest",
"no-digest",
"aws-sigv4",
"negotiate",
"no-negotiate",
"delegation", // GSS/kerberos
// "service-name", // GSS/kerberos, not supported
"ntlm",
"no-ntlm",
"ntlm-wb",
"no-ntlm-wb",
"http1.1",
// These are not supported, just better warning message
"http2",
"http2-prior-knowledge",
"http3",
"http3-only",
"cookie-jar",
"cert",
"cacert",
"key",
"capath",
"form",
"form-string",
"location",
"no-location",
"location-trusted", // not exactly supported, just better warning message
"no-location-trusted",
"max-redirs",
"max-time",
"connect-timeout",
"insecure",
"no-insecure",
"output",
"upload-file",
"next",
"proxy",
"proxy-user",
]);
// https://peps.python.org/pep-3138/
// https://www.unicode.org/reports/tr44/#GC_Values_Table
// https://unicode.org/Public/UNIDATA/UnicodeData.txt
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
const regexSingleEscape = /'|\\|\p{C}|[^ \P{Z}]/gu;
const regexDoubleEscape = /"|\\|\p{C}|[^ \P{Z}]/gu;
// Also used for Go and R
export function reprStr(s, quote) {
if (quote === undefined) {
quote = "'";
if (s.includes("'") && !s.includes('"')) {
quote = '"';
}
}
const regex = quote === "'" ? regexSingleEscape : regexDoubleEscape;
return (quote +
s.replace(regex, (c) => {
switch (c) {
case "\x07":
return "\\a";
case "\b":
return "\\b";
case "\f":
return "\\f";
case "\n":
return "\\n";
case "\r":
return "\\r";
case "\t":
return "\\t";
case "\v":
return "\\v";
case "\\":
return "\\\\";
case "'":
return "\\'";
case '"':
return '\\"';
}
const hex = c.codePointAt(0).toString(16);
if (hex.length <= 2) {
return "\\x" + hex.padStart(2, "0");
}
if (hex.length <= 4) {
return "\\u" + hex.padStart(4, "0");
}
return "\\U" + hex.padStart(8, "0");
}) +
quote);
}
// TODO: use this if string contains unmatched surrogates?
// It just replaces them with the replacement character, but at least that code would run.
export function pybescComplex(s) {
let quote = "'";
if (s.includes("'") && !s.includes('"')) {
quote = '"';
}
const quoteCode = quote.charCodeAt(0);
// TODO: using UTF-8 here is overly simplistic and how to encode here
// is a pretty complicated decision.
// For starters, it would be more correct to use the same encoding as
// the terminal when running from the command line.
const bytes = UTF8encoder.encode(s);
return ("b" +
quote +
[...bytes]
.map((c) => {
switch (c) {
case 0x07:
return "\\a";
case 0x08:
return "\\b";
case 0x0c:
return "\\f";
case 0x0a:
return "\\n";
case 0x0d:
return "\\r";
case 0x09:
return "\\t";
case 0x0b:
return "\\v";
case 0x5c:
return "\\\\";
case quoteCode:
return "\\" + String.fromCharCode(c);
}
if (c >= 0x20 && c < 0x7f) {
return String.fromCharCode(c);
}
const hex = c.toString(16);
return "\\x" + hex.padStart(2, "0");
})
.join("") +
quote);
}
export function reprStrBinary(s) {
const sEsc = reprStr(s);
// We check until 0x7F instead of 0xFF because curl (running in a UTF-8 terminal) when it gets
// bytes sends them as is, but if we pass b'\x80' to Requests, it will encode that byte as
// Latin-1 (presumably for backwards compatibility) instead of UTF-8.
if (/^[\x00-\x7f]*$/.test(s)) {
return "b" + sEsc;
}
// TODO: unmatched surrogates will generate code that throws an error
// e.g.: '\uDC00'.encode()
return sEsc + ".encode()";
}
export function repr(word, osVars, imports, binary = false,
// os.getenv('MYVAR') returns None if MYVAR is not set
// os.getenv('MYVAR', '') returns '' if MYVAR is not set but it's a bit more verbose,
// so setting errorOk to true will use the shorter version
errorOk = false) {
const reprFn = binary ? reprStrBinary : reprStr;
const reprs = [];
for (const t of word.tokens) {
if (typeof t === "string") {
reprs.push(reprFn(t));
}
else if (t.type === "variable") {
// TODO: getenvb() is not available on Windows
const fn = binary ? "os.getenvb" : "os.getenv";
let getEnvCall = fn + "(" + reprFn(t.value);
if (!errorOk || word.tokens.length > 1) {
getEnvCall += ", " + reprFn("");
}
getEnvCall += ")";
reprs.push(getEnvCall);
// TODO: if the result of a repr() is discarded, this adds an unused import
imports.add("os");
}
else if (t.type === "command") {
// TODO: warn that shell=True is a bad idea
// or properly parse the subcommand and render it as an array
let subprocessCall = "subprocess.run(" +
reprStr(t.value) +
", shell=True, capture_output=True";
if (!binary) {
subprocessCall += ", text=True";
}
subprocessCall += ").stdout";
// TODO: generate a descriptive command name with ChatGPT
// TODO: if there's only one command, name the variable "command" instead of "command1"
let i = 1;
let pyVar = "command" + i;
// We need to check because we often try to represent the same
// token twice and discard one of the attempts.
// This is linear time but hopefully there's not that many subcommands.
while (pyVar in osVars && osVars[pyVar] !== subprocessCall) {
i++;
pyVar = "command" + i;
if (i > Number.MAX_SAFE_INTEGER) {
throw new CCError("lol");
}
}
osVars[pyVar] = subprocessCall;
reprs.push(pyVar);
// TODO: if the result of a repr() is discarded, this adds an unused import
imports.add("subprocess");
}
}
return reprs.join(" + ");
}
function reprb(word, osVars, imports) {
return repr(word, osVars, imports, true);
}
export function asFloat(word, osVars, imports) {
if (word.isString()) {
// TODO: check it's actually a valid float
return word.toString();
}
return "float(" + repr(word, osVars, imports, false, true) + ")";
}
export function asInt(word, osVars, imports) {
if (word.isString()) {
// TODO: check it's actually a valid int
return word.toString();
}
return "int(" + repr(word, osVars, imports, false, true) + ")";
}
// Port of Python's json.dumps() with its default options, which is what Requests uses
// https://github.com/psf/requests/blob/b0e025ade7ed30ed53ab61f542779af7e024932e/requests/models.py#L473
// It's different from JSON.stringify(). Namely, it adds spaces after ',' and ':'
// and all non-ASCII characters in strings are escaped:
// > JSON.stringify('\xFF')
// '"ÿ"'
// >>> json.dumps('\xFF')
// '"\\u00ff"'
const pythonJsonEscape = /"|\\|[^\x20-\x7E]/g;
function jsonRepr(s) {
return ('"' +
s.replace(pythonJsonEscape, (c) => {
// https://tc39.es/ecma262/#table-json-single-character-escapes
switch (c) {
case "\b":
return "\\b";
case "\t":
return "\\t";
case "\n":
return "\\n";
case "\f":
return "\\f";
case "\r":
return "\\r";
case "\\":
return "\\\\";
case '"':
return '\\"';
}
const hex = c.charCodeAt(0).toString(16);
return "\\u" + hex.padStart(4, "0");
}) +
'"');
}
function ensure_minimum_exponent_length(n) {
// If there's only 1 exponent digit, add a leading 0 to it
// ensure_minimum_exponent_length('1e-7') => '1e-07'
const [mantissa, exponent] = n.split("e");
const exponentSign = exponent[0];
const exponentValue = exponent.slice(1);
if (exponentValue.length === 1) {
return mantissa + "e" + exponentSign + "0" + exponentValue;
}
return n;
}
function floatAsPython(value) {
// JSON.stringify() and lossless-json's stringify() don't stringify floats like Python.
// Notably, JavaScript doesn't add a trailing '.0' to floats that are integers but Python does
// JSON.stringify(10.0) => '10'
// str(10.0) => '10.0'
//
// Python adds a leading 0 to exponent notation numbers with 1 exponent digit
// JSON.stringify(1e-7) => '1e-7'
// str(1e-7) => '1e-07'
//
// Finally, Python will switch to scientific notation if the number has more than
// 17 digits not in scientific notation.
//
// Python's float formatting starts here:
// https://github.com/python/cpython/blob/bdc93b8a3563b4a3adb25fa902c0c879ccf427f6/Python/pystrtod.c#L915-L918
// and is ultimately this code:
// snprintf(buf, buf_size, "%.17g", val)
// change_decimal_from_locale_to_dot(buffer); // not important
// ensure_minimum_exponent_length(buffer, buf_size);
// ensure_decimal_point(buffer, buf_size, 17); // can switch to exponent notation
//
// And JavaScript's formatting is specified here:
// https://tc39.es/ecma262/multipage/ecmascript-data-types-and-values.html#sec-numeric-types-number-tostring
let asJsStr = value.toString();
if (asJsStr.includes("e")) {
asJsStr = ensure_minimum_exponent_length(asJsStr);
}
else {
if (isInteger(asJsStr)) {
asJsStr += ".0";
}
// If there's more than 17 digits of precision, switch to scientific notation
const significantDigits = asJsStr
.replace(/^-/, "")
.replace(".", "")
.replace(/^0+/, "");
const asExponential = ensure_minimum_exponent_length(value.toExponential());
if (significantDigits.length > 17 ||
(asExponential.length < asJsStr.length &&
asJsStr.split(".")[1].length > 4)) {
asJsStr = asExponential;
}
}
return asJsStr;
}
function jsonDumps(obj) {
if (isLosslessNumber(obj)) {
const numAsStr = jsonStringifyLossless(obj);
if (isInteger(numAsStr)) {
return numAsStr;
}
if (!isSafeNumber(numAsStr)) {
throw new CCError("float unrepresentable in Python: " + numAsStr);
}
// Can't be bigint because it's !isInteger and isSafeNumber
return floatAsPython(obj.valueOf());
}
switch (typeof obj) {
case "string":
return jsonRepr(obj);
case "number":
// If the number in the JSON file is very large, it'll turn into Infinity
if (!isFinite(obj)) {
throw new CCError("found Infitiny in JSON");
}
// TODO: If the number in the JSON file is too big for JavaScript, we will lose information
// TODO: JavaScript and Python serialize floats differently.
// JSON.stringify(2e2) => 200
// json.dumps(2e2) => 200.0
return obj.toString();
case "boolean":
return obj.toString();
case "object":
if (obj === null) {
return "null";
}
if (Array.isArray(obj)) {
return "[" + obj.map(jsonDumps).join(", ") + "]";
}
return ("{" +
Object.entries(obj)
.map((e) => jsonRepr(e[0]) + ": " + jsonDumps(e[1]))
.join(", ") +
"}");
default:
throw new CCError("unexpected object type that shouldn't appear in JSON: " + typeof obj);
}
}
function objToPython(obj, indent = 0) {
if (isLosslessNumber(obj)) {
const numAsStr = jsonStringifyLossless(obj);
// If the number is a large float, it might not be representable in Python
// Both JavaScript and Python use f64 so we check if the float
// is representable in JavaScript.
if (!isInteger(numAsStr) && !isSafeNumber(numAsStr)) {
throw new CCError("float unrepresentable in Python: " + numAsStr);
}
// Displaying floats as they will be serialized in Python would help users
// understand why they're getting the "JSON won't be serialized as it was originally"
// message, but I think displaying them as they appear in the JSON is likely
// to be more convenient if you need to edit the value.
return numAsStr;
}
switch (typeof obj) {
case "string":
return reprStr(obj);
case "number":
// TODO: there are differences in number serialization between Python and JavaScript
// TODO: if the number in the JSON file is too big for JavaScript, we will lose information
return obj.toString();
case "boolean":
return obj ? "True" : "False";
case "object":
if (obj === null) {
return "None";
}
if (Array.isArray(obj)) {
if (obj.length === 0) {
return "[]";
}
let s = "[\n";
for (const item of obj) {
s += " ".repeat(indent + 4) + objToPython(item, indent + 4) + ",\n";
}
s += " ".repeat(indent) + "]";
return s;
}
if (Object.keys(obj).length === 0) {
return "{}";
}
{
let s = "{\n";
for (const [k, v] of Object.entries(obj)) {
// repr() because JSON keys must be strings.
s +=
" ".repeat(indent + 4) +
reprStr(k) +
": " +
objToPython(v, indent + 4) +
",\n";
}
s += " ".repeat(indent) + "}";
return s;
}
default:
throw new CCError("unexpected object type that shouldn't appear in JSON: " + typeof obj);
}
}
export function formatHeaders(headers, commentedOutHeaders, osVars, imports) {
// TODO: what if there are repeat headers
let headerDict = "headers = {\n";
for (const [headerName, headerValue] of headers) {
if (headerValue === null) {
continue;
}
let lineStart;
const headerNameLower = headerName.toLowerCase().toString();
if (has(commentedOutHeaders, headerNameLower)) {
if (commentedOutHeaders[headerNameLower]) {
headerDict += " # " + commentedOutHeaders[headerNameLower] + "\n";
}
lineStart = " # ";
}
else {
lineStart = " ";
}
headerDict +=
lineStart +
repr(headerName, osVars, imports) +
": " +
repr(headerValue, osVars, imports) +
",\n";
}
headerDict += "}\n";
return headerDict;
}
function decodePercentEncoding(s) {
let decoded;
try {
// https://url.spec.whatwg.org/#urlencoded-parsing recommends replacing + with space
// before decoding.
decoded = wordDecodeURIComponent(s.replace(/\+/g, " "));
}
catch (e) {
if (e instanceof URIError) {
// String contains invalid percent encoded characters
return null;
}
throw e;
}
// If the query string doesn't round-trip, we cannot properly convert it.
const roundTripKey = percentEncode(decoded);
// If the original data used %20 instead of + (what requests will send), that's close enough
if (!eq(roundTripKey, s) && !eq(roundTripKey.replace(/%20/g, "+"), s)) {
return null;
}
return decoded;
}
function dataEntriesToDict(dataEntries) {
// Group keys
// TODO: because keys can be code that reads from a file, those should not be considered the
// same key, for example what if that file is /dev/urandom.
// TODO: would we need to distinguish if /dev/urandom came from @/dev/urandom or from @-?
const asDict = {};
let prevKey = null;
for (const [key, val] of dataEntries) {
if (prevKey === key) {
asDict[key].push(val);
}
else {
if (!Object.prototype.hasOwnProperty.call(asDict, key)) {
asDict[key] = [val];
}
else {
// A repeated key with a different key between one of its repetitions
// means we can't represent these entries as a dictionary.
return null;
}
}
prevKey = key;
}
return asDict;
}
function dataEntriesToPython(dataEntries) {
if (dataEntries.length === 0) {
return "''"; // This shouldn't happen
}
const entriesDict = dataEntriesToDict(dataEntries);
if (entriesDict !== null) {
if (Object.keys(entriesDict).length === 0) {
return "''"; // This shouldn't happen
}
let s = "{\n";
for (const [key, vals] of Object.entries(entriesDict)) {
s += " " + key + ": ";
if (vals.length === 0) {
s += "''"; // This shouldn't happen
}
else if (vals.length === 1) {
s += vals[0];
}
else {
s += "[\n";
for (const val of vals) {
s += " " + val + ",\n";
}
s += " ]";
}
s += ",\n";
}
s += "}";
return s;
}
let s = "[\n";
for (const entry of dataEntries) {
const [key, val] = entry;
s += " (" + key + ", " + val + "),\n";
}
s += "]";
return s;
}
function formatDataAsEntries(dataArray, osVars, imports, variableName = "data") {
// This code is more complicated than you might expect because it needs
// to handle a --data-urlencode that reads from a file followed by --json
// because --json doesn't add an '&' before its value. Specifically, we
// have these cases:
//
// --data-urlencode @filename --json =value
// {open('filename').read(): 'value'}
//
// --data-urlencode @filename --json key=value
// {open('filename').read() + 'key': 'value'}
//
// --data-urlencode @filename --json key
// error
//
// --data-urlencode name@filename --json value
// {'name': open('filename').read() + 'value'}
//
// --data-urlencode name@filename --json key=value
// error
//
// --data-urlencode name@filename --json =blah
// error
//
// --data-urlencode adds an '&' before its value, so we don't have to worry about
// --json <foo> --data-urlencode <bar>
for (const d of dataArray) {
if (!(d instanceof Word) && d.filetype !== "urlencode") {
return null;
}
}
const dataEntries = [];
let percentWarn = new Word();
for (const [i, d] of dataArray.entries()) {
if (d instanceof Word) {
let newEntries = d.split("&");
const prevEntry = i > 0 ? dataEntries[dataEntries.length - 1] : null;
if (prevEntry !== null) {
// If there's a prevEntry, we can assume it came from --data-urlencode
// because it would be part of the current `d` string if it didn't.
const [first, ...rest] = newEntries;
if (first.includes("=") && prevEntry[1] === null) {
const [key, val] = first.split("=", 2);
const decodedKey = decodePercentEncoding(key);
if (decodedKey === null) {
return null;
}
const decodedVal = decodePercentEncoding(val);
if (decodedVal === null) {
return null;
}
if (key.toBool()) {
prevEntry[0] += " + " + repr(decodedKey, osVars, imports);
}
prevEntry[1] = repr(decodedVal, osVars, imports);
if (!percentWarn.toBool()) {
if (key.includes("%20")) {
percentWarn = key;
}
if (val.includes("%20")) {
percentWarn = val;
}
}
}
else if (!first.includes("=") && prevEntry[1] !== null) {
if (first.toBool()) {
const decodedVal = decodePercentEncoding(first);
if (decodedVal === null) {
return null;
}
prevEntry[1] += " + " + repr(decodedVal, osVars, imports);
if (!percentWarn.toBool() && first.includes("%20")) {
percentWarn = first;
}
}
}
else {
return null;
}
newEntries = rest;
}
for (const [j, entry] of newEntries.entries()) {
if (entry.isEmpty() &&
j === newEntries.length - 1 &&
i !== dataArray.length - 1) {
// A --data-urlencoded should come next
continue;
}
if (!entry.includes("=")) {
return null;
}
const [key, val] = entry.split("=", 2);
const decodedKey = decodePercentEncoding(key);
if (decodedKey === null) {
return null;
}
const decodedVal = decodePercentEncoding(val);
if (decodedVal === null) {
return null;
}
dataEntries.push([
repr(decodedKey, osVars, imports),
repr(decodedVal, osVars, imports),
]);
if (!percentWarn.toBool()) {
if (key.includes("%20")) {
percentWarn = key;
}
if (val.includes("%20")) {
percentWarn = val;
}
}
}
continue;
}
// TODO: I bet Python doesn't treat file paths identically to curl
const readFile = eq(d.filename, "-")
? "sys.stdin.read()"
: "open(" + repr(d.filename, osVars, imports, false, true) + ").read()";
if (!d.name) {
dataEntries.push([readFile, null]);
}
else {
// Curl doesn't percent encode the name but Requests will
if (!eq(d.name, percentEncode(d.name))) {
return null;
}
dataEntries.push([repr(d.name, osVars, imports), readFile]);
}
}
if (dataEntries.some((e) => e[1] === null)) {
return null;
}
return [
variableName +
" = " +
dataEntriesToPython(dataEntries) +
"\n",
percentWarn.toString(),
];
}
function formatDataAsStr(dataArray, imports, osVars, variableName = "data") {
// If one of the arguments has to be binary, then they all have to be binary
// because we can't mix bytes and str.
// An argument has to be binary when the input command has
// --data-binary @filename
// otherwise we could generate code that will try to read an image file as text and error.
const binary = dataArray.some((d) => !(d instanceof Word) && d.filetype === "binary");
const reprFunc = binary ? reprb : repr;
const prefix = binary ? "b" : "";
const mode = binary ? ", 'rb'" : "";
// If we see a string with non-ASCII characters, or read from a file (which may contain
// non-ASCII characters), we convert the entire string to bytes at the end.
// curl sends bytes as-is, which is presumably in UTF-8, whereas Requests sends
// 0x80-0xFF as Latin-1 (as-is) and throws an error if it sees codepoints
// above 0xFF.
// TODO: is this actually helpful?
let encode = false;
let encodeOnSeparateLine = false;
const lines = [];
let extra = new Word();
let i, d;
for ([i, d] of dataArray.entries()) {
const op = i === 0 ? "=" : "+=";
let line = variableName + " " + op + " ";
if (i < dataArray.length - 1 && d instanceof Word && d.endsWith("&")) {
// Put the trailing '&' on the next line so that we don't have single '&'s on their own lines
extra = new Word("&");
d = d.slice(0, -1);
}
if (d instanceof Word) {
if (d.length) {
line += reprFunc(d, osVars, imports);
lines.push(line);
encode || (encode = d.test(/[^\x00-\x7F]/));
}
continue;
}
const { filetype, name, filename } = d;
if (filetype === "urlencode" && name) {
line += reprFunc(extra.add(name).append("="), osVars, imports) + " + ";
encodeOnSeparateLine = true; // we would need to add parentheses because of the +
}
else if (extra.toBool()) {
line += reprFunc(extra, osVars, imports) + " + ";
encodeOnSeparateLine = true;
}
if (extra.toBool()) {
encodeOnSeparateLine = true; // we would need to add parentheses because of the +
}
let readFile = "";
if (eq(filename, "-")) {
readFile += binary ? "sys.stdin.buffer" : "sys.stdin";
imports.add("sys");
}
else {
// TODO: if filename is a command, this won't work because unlike bash,
// Python won't remove the trailing newline from the result of a command
// we need to add .trim()
line =
"with open(" +
repr(filename, osVars, imports, false, true) +
mode +
") as f:\n " +
line;
readFile += "f";
}
readFile += ".read()";
if (!["binary", "json", "urlencode"].includes(filetype)) {
readFile += `.replace(${prefix}'\\n', ${prefix}'').replace(${prefix}'\\r', ${prefix}'')`;
}
if (filetype === "urlencode") {
readFile = "quote_plus(" + readFile + ")";
if (binary) {
// quote_plus() always returns a string
readFile += ".encode()";
}
imports.add("urllib.parse.quote_plus");
}
else {
// --data-urlencode files don't need to be encoded because
// they'll be percent-encoded and therefore ASCII-only
encode = true;
}
line += readFile;
lines.push(line);
extra = new Word();
}
if (binary) {
encode = false;
}
else if (encode && lines.length === 1 && !encodeOnSeparateLine) {
lines[lines.length - 1] += ".encode()";
encode = false;
}
return [lines.join("\n") + "\n", encode];
}
export function formatDataAsJson(d, imports, osVars) {
if (d instanceof Word) {
if (!d.isString()) {
// TODO: a JSON parser that allows shell variables
return [null, false];
}
// Try to parse using lossless-json first, then fall back to JSON.parse
// TODO: repeated dictionary keys are discarded
// https://github.com/josdejong/lossless-json/issues/244
let dataAsJson;
try {
// TODO: types
// https://github.com/josdejong/lossless-json/issues/245
dataAsJson = jsonParseLossless(d.toString());
}
catch (_a) {
try {
dataAsJson = JSON.parse(d.toString());
}
catch (_b) {
return [null, false];
}
}
try {
const jsonDataString = "json_data = " + objToPython(dataAsJson) + "\n";
// JSON might not be serialized by Python exactly as it was originally
// due to different whitespace, float formatting like extra + in exponent
// (1e100 vs 1e+100), different escape sequences in strings
// ("\/" vs "/" or "\u0008" vs "\b") or duplicate object keys.
let jsonRoundtrips = false;
try {
jsonRoundtrips = jsonDumps(dataAsJson) === d.tokens[0];
}
catch (_c) { }
return [jsonDataString, jsonRoundtrips];
}
catch (_d) { }
}
else if (d.filetype === "json") {
let jsonDataString = "";
jsonDataString +=
"with open(" +
repr(d.filename, osVars, imports, false, true) +
") as f:\n";
jsonDataString += " json_data = json.load(f)\n";
imports.add("json");
return [jsonDataString, false];
}
return [null, false];
}
function getDataString(request, osVars, warnings) {
const imports = new Set();
if (!request.data || !request.dataArray) {
return [null, false, null, imports];
}
// There's 4 ways to pass data to Requests (in descending order of preference):
// a or dictionary/list as the json= argument
// a dictionary, or a list of tuples (if the dictionary would have duplicate keys) as the data= argument
// a string as data=
// bytes as data=
// We can pass json= if the data is valid JSON and we've specified json in the
// Content-Type header because passing json= will set that header.
//
// However, if there will be a mismatch between how the JSON is formatted
// we need to output a commented out version of the request with data= as well.
// This can happen when there's extra whitespace in the original data or
// because the JSON contains numbers that are too big to be stored in
// JavaScript or because there's objects with duplicate keys.
const contentType = request.headers.get("content-type");
let dataAsJson = null;
let jsonRoundtrips = false;
if (request.dataArray.length === 1 &&
contentType &&
contentType.split(";")[0].toString().trim() === "application/json") {
[dataAsJson, jsonRoundtrips] = formatDataAsJson(request.dataArray[0], imports, osVars);
}
if (jsonRoundtrips) {
return [null, false, dataAsJson, imports];
}
// data= can't be a dict or a list of tuples (i.e. entries) when
// there is a @file from --data, --data-binary or --json (because they can contain an '&' which would get escaped)
// there is a --data-urlencode without a name= or name@
// if you split the input on & and there's a value that doesn't contain an = (e.g. --data "foo=bar&" or simply --data "&")
// there is a name or value that doesn't roundtrip through percent encoding
const dataAsEntries = formatDataAsEntries(request.dataArray, osVars, imports);
if (dataAsEntries !== null) {
const [dataEntries, percentWarn] = dataAsEntries;
if (eq(request.headers.get("content-type"), "application/x-www-form-urlencoded") &&
request.headers.length === 1) {
// Requests adds the header when you include a body
// so if it's the only header, don't print the headers
request.headers.delete("content-type");
}
if (percentWarn) {
warnings.push([
"percent-encoded-spaces-in-data",
'data contains spaces encoded by curl as "%20" which will be sent as "+" instead: ' +
JSON.stringify(percentWarn),
]);
}
return [dataEntries, false, dataAsJson, imports];
}
const [dataAsString, shouldEncode] = formatDataAsStr(request.dataArray, imports, osVars);
return [dataAsString, shouldEncode, dataAsJson, imports];
}
function getFilesString(request, osVars, imports) {
let usesStdin = false;
if (!request.multipartUploads) {
return ["", usesStdin];
}
const multipartUploads = request.multipartUploads.map((m) => {
// https://github.com/psf/requests/blob/2d5517682b3b38547634d153cea43d48fbc8cdb5/requests/models.py#L117
//
// Requests's multipart syntax looks like this:
// name/filename: content
// name: open(filename/contentFile)
// name: (filename, open(contentFile)[, contentType[, headers]]))
const name = m.name ? repr(m.name, osVars, imports) : "None";
if (!("contentType" in m) && !("headers" in m) && !("encoder" in m)) {
if ("contentFile" in m &&
eq(m.contentFile, m.filename) &&
!eq(m.contentFile, "-")) {
return [
name,
"open(" +
repr(m.contentFile, osVars, imports, false, true) +
", 'rb')",
];
}
if ("content" in m && "filename" in m && eq(m.name, m.filename)) {
return [name, repr(m.content, osVars, imports)];
}
}
const sentFilename = "filename" in m && m.filename
? repr(m.filename, osVars, imports)
: "None";
const tuple = [sentFilename];
if ("contentFile" in m) {
if (eq(m.contentFile, "-")) {
// TODO: use piped stdin if we have it
usesStdin = true;
tuple.push("sys.stdin.buffer.read())");
}
else {
tuple.push("open(" +
repr(m.contentFile, osVars, imports, false, true) +
", 'rb')");
}
}
else {
tuple.push(repr(m.content, osVars, imports));
}
let addedContentType = false;
if ("contentType" in m && m.contentType) {
tuple.push(repr(m.contentType, osVars, imports));
addedContentType = true;
}
if (("headers" in m && m.headers) ||
("headerFiles" in m && m.headerFiles)) {
if (!addedContentType) {
tuple.push("None");
}
const headerArg = [];
if (m.headers) {
let headerDict = "{";
for (const header of m.headers) {
// TODO: can have multiple headers in one headers= argument?
const [name, value] = header.split(":", 2);
headerDict +=
repr(name, osVars, imports) +
": " +
repr(value.trimStart() || new Word(), osVars, imports) +
", ";
}
if (headerDict.endsWith(", ")) {
headerDict = headerDict.slice(0, -2);
}
headerDict += "}";
headerArg.push(headerDict);
}
if (m.headerFiles && m.headerFiles.length) {
for (const headerFile of m.headerFiles) {
headerArg.push("read_headerfile(" +
repr(headerFile, osVars, imports, false, true) +
")");
}
imports.add("read_headerfile");
}
tuple.push(headerArg.join(" | "));
}
return [name, "(" + tuple.join(", ") + ")"];
});
const multipartUploadsAsDict = Object.fromEntries(multipartUploads);
let filesString = "files = ";
if (Object.keys(multipartUploadsAsDict).length === multipartUploads.length) {
filesString += "{\n";
for (const [multipartKey, multipartValue] of multipartUploads) {
filesString += " " + multipartKey + ": " + multipartValue + ",\n";
}
filesString += "}\n";
}
else {
filesString += "[\n";
for (const [multipartKey, multipartValue] of multipartUploads) {
filesString += " (" + multipartKey + ", " + multipartValue + "),\n";
}
filesString += "]\n";
}
return [filesString, usesStdin];
}
// Don't add indent/comment characters to empty lines, most importantly the last line
// which will be empty when there's a trailing newline.
function indent(s, level) {
if (level === 0) {
return s;
}
const begin = " ".repeat(level);
return s
.split("\n")
.map((l) => (l.trim() ? begin + l : l))
.join("\n");
}
function commentOut(s) {
return s
.split("\n")
.map((l) => (l.trim() ? "#" + l : l))
.join("\n");
}
function uniqueWarn(seenWarnings, warnings, warning) {
if (!seenWarnings.has(warning[0])) {
seenWarnings.add(warning[0]);
warnings.push(warning);
}
}
function joinArgs(args) {
let s = "(";
if (args.join("").length < 100) {
s += args.join(", ");
}
else {
s += "\n";
for (const arg of args) {
s += " " + arg + ",\n";
}
}
return s + ")";
}
function requestToPython(request, warnings = [], imports, thirdPartyImports) {
const osVars = {};
const commentedOutHeaders = {
// TODO: add a warning why this should be commented out?
"accept-encoding": "",
"content-length": "",
};
// https://github.com/icing/blog/blob/main/curl_on_a_weekend.md
if (eq(request.headers.get("te"), "trailers")) {
commentedOutHeaders.te = "Requests doesn't support trailers";
}
let cookieStr;
let cookieFile = null;
if (request.cookies) {
// TODO: handle duplicate cookie names
cookieStr = "cookies = {\n";
for (const [cookieName, cookieValue] of request.cookies) {
cookieStr +=
" " +
repr(cookieName, osVars, imports) +
": " +
repr(cookieValue, osVars, imports) +
",\n";
}
cookieStr += "}\n";
// Before Python 3.11, cookies= was sorted alphabetically
// https://github.com/python/cpython/issues/86232
commentedOutHeaders.cookie = "";
if (request.cookieFiles) {
warnings.push([
"cookie-files",
"passing both cookies and cookie files with --cookie/-b is not supported",
]);
}
if (request.cookieJar) {
warnings.push([
"cookie-files",
"passing both cookies and --cookie-jar/-c is not supported",
]);
}
}
else if ((request.cookieFiles && request.cookieFiles.length) ||
request.cookieJar) {
imports.add("http.cookiejar.MozillaCookieJar");
if (request.cookieFiles && request.cookieFiles.length) {
// TODO: what if user passes multiple cookie files?
// TODO: what if user passes cookies and cookie files?
cookieFile = request.cookieFiles[request.cookieFiles.length - 1];
if (request.cookieFiles.length > 1) {
warnings.push([
"cookie-files",
// TODO: curl reads all of them.
"multiple cookie files are not supported, using the last one: " +
JSON.stringify(cookieFile.toString()),
]);
}
// TODO: do we need to .load()?
cookieStr =
"cookies = MozillaCookieJar(" +
repr(cookieFile, osVars, imports) +
")\n";
}
else if (request.cookieJar) {
cookieStr = "cookies = MozillaCookieJar()\n";
}
}
let proxyDict;
if (request.proxy) {
let proxy = request.proxy.includes("://")
? request.proxy
: request.proxy.prepend("http://");
const protocol = proxy.split("://")[0].toLowerCase();
if (eq(protocol, "socks")) {
// https://github.com/curl/curl/blob/curl-7_86_0/lib/url.c#L2418-L2419
proxy = proxy.replace("socks", "socks4");
}
proxyDict = "proxies = {\n";
proxyDict += " 'http': " + repr(proxy, osVars, imports) + ",\n";
// TODO: if (protocol !== "http") { ?
proxyDict += " 'https': " + repr(proxy, osVars, imports) + ",\n";
proxyDict += "}\n";
}
let certStr;
if (request.cert || request.key) {
certStr = "cert = ";
let certPart = "None";
if (request.cert) {
const [cert, password] = request.cert;
certPart = repr(cert, osVars, imports);
if (password) {
warnings.push([
"cert-password",
"passing a password to --cert is not supported: " +
JSON.stringify(password.toString()),
]);
}
}
if (request.key) {
certStr +=
"(" + certPart + ", " + repr(request.key, osVars, imports) + ")";
}
else {
certStr += certPart;
}
certStr += "\n";
}
// if there's only 1 URL, put params all together here, unless it's just one string.
// if there's more than 1, if we have params that are added to each URL from
// --get --data or --url-query that need to read a file, put just the shared part
// here, then keep the query in the URL, in the URL.
// If there's no --get --data or --url-query, then
// put params (if it can be rendered as a list or dict) right before the requests line
// Otherwise, keep the query in the URL.
let paramsStr;
let shouldEncodeParams; // TODO: necessary?
function readsFile(paramArray) {
return paramArray.some((p) => !(p instanceof Word));
}
const paramArray = request.urls.length === 1 ? request.urls[0].queryArray : request.queryArray;
if (paramArray &&
(request.urls.length === 1 ||
(request.urls.length > 1 && readsFile(paramArray)))) {
const queryAsEntries = formatDataAsEntries(paramArray, osVars, imports, "params");
if (queryAsEntries !== null) {
let percentWarn;
[paramsStr, percentWarn] = queryAsEntries;
if (percentWarn) {
warnings.push([
"percent-encoded-spaces-in-query",
// TODO: will they?
'URL querystring contains spaces encoded as "%20" which will be sent by Requests as "+" instead: ' +
JSON.stringify(percentWarn),
]);
}
}
else if (readsFile(paramArray)) {
[paramsStr, shouldEncodeParams] = formatDataAsStr(paramArray, imports, osVars, "params");
}
}
const contentType = request.headers.get("content-type");
let dataString;
let jsonDataString;
let filesString;
let shouldEncode;
if (request.urls[0].uploadFile && request.urls.length === 1) {
// TODO: https://docs.python-requests.org/en/latest/user/advanced/#streaming-uploads
if (eq(request.urls[0].uploadFile, "-") ||
eq(request.urls[0].uploadFile, ".")) {
dataString = "data = sys.stdin.buffer.read()\n";
imports.add("sys");
}
else {
dataString =
"with open(" +
repr(request.urls[0].uploadFile, osVars, imports, false, true) +
", 'rb') as f:\n";
dataString += " data = f.read()\n";
}
}
else if (request.multipartUploads) {
let usesStdin = false;
[filesString, usesStdin] = getFilesString(request, osVars, imports);
if (usesStdin) {
imports.add("sys");
}
// If you pass files= then Requests adds this header and a `boundary`
// If you manually pass a Content-Type header it won't set a `boundary`
// wheras curl does, so the request will fail.
// https://github.com/curlconverter/curlconverter/issues/248
if (filesString &&
contentType &&
eq(contentType.trim(), "multipart/form-data") &&
!contentType.includes("boundary=")) {
// TODO: better wording
commentedOutHeaders["content-type"] =
"requests won't add a boundary if this header is set when you pass files=";
}
}
else if (request.data && !request.data.isEmpty()) {
// !isEmpty() because passing data='' is the same as not passing data=
// We need to set the Content-Type header in headers= and not set data=
let dataImports;
[dataString, shouldEncode, jsonDataString, dataImports] = getDataString(request, osVars, warnings);
dataImports.forEach(imports.add, imports);
// Remove "Content-Type" from the headers dict
// because Requests adds it automatically when you use json=
if (jsonDataString &&
!dataString &&
contentType &&
eq(contentType.trim(), "application/json")) {
commentedOutHeaders["content-type"] = "Already added when you pass json=";
}
}
let headerDict;
if (request.headers.length) {
headerDict = formatHeaders(request.headers, commentedOutHeaders, osVars, imports);
}
let pythonCode = "";
if (proxyDict) {
pythonCode += proxyDict + "\n";
}
if (cookieStr) {
pythonCode += cookieStr + "\n";
}
if (headerDict) {
pythonCode += headerDict + "\n";
}
if (paramsStr) {
pythonCode += paramsStr + "\n";
}
if (certStr) {
pythonCode += certStr + "\n";
}
if (jsonDataString) {
pythonCode += jsonDataString + "\n";
}
else if (dataString) {
pythonCode += dataString + "\n";
}
else if (filesString) {
pythonCode += filesString + "\n";
}
// By default, curl doesn't follow redirects and Requests does.
// Unless redirect behavior has been explicitly set with -L/--location/--no-location
// or --max-redirs 0 we pretend generate code that follows redirects,
// because adding allow_redirects=False to almost every command would be ugly
// and it only matters when the server responds with a redirect, which isn't
// that common.
let followRedirects = request.followRedirects;
let maxRedirects = undefined;
if (request.maxRedirects !== undefined) {
maxRedirects = asInt(request.maxRedirects, osVars, imports);
}
if (followRedirects === undefined) {
followRedirects = true;
// Users would see this warning for most commands
// warnings.push([
// "--location",
// "Requests defaults to following redirects, curl doesn't",
// ]);
}
const hasMaxRedirects = followRedirects &&
maxRedirects &&
maxRedirects !== "0" &&
maxRedirects !== "30"; // Requests default
// Things that vary per-url:
// method (because --upload-file can make it PUT)
// data= (because of --upload-file)
// --output file
// params= (because of the query string)
// auth= (because the URL can have an auth string)
const seenWarnings = new Set();
const requestLines = [];
let extraEmptyLine = false;
for (const [urlObjIndex, urlObj] of request.urls.entries()) {
const requestsMethods = [
"GET",
"HEAD",
"POST",
"PATCH",
"PUT",
"DELETE",
"OPTIONS", // undocumented
];
let fn;
const args = [];
const methodAsString = urlObj.method.toString();
if (urlObj.method.isString() && requestsMethods.includes(methodAsString)) {
fn = methodAsString.toLowerCase();
}
else {
fn = "request";
args.push(repr(urlObj.method, osVars, imports));
if (urlObj.method.isString() &&
methodAsString !== methodAsString.toUpperCase()) {
warnings.push([
"method