curlconverter
Version:
convert curl commands to Python, JavaScript, Go, PHP and more
1,631 lines (1,526 loc) • 60.2 kB
text/typescript
import { CCError, has, UTF8encoder } from "../../utils.js";
import { Word, eq } from "../../shell/Word.js";
import { parse, COMMON_SUPPORTED_ARGS } from "../../parse.js";
import type { Request, Warnings } from "../../parse.js";
import { Headers } from "../../Headers.js";
import { wordDecodeURIComponent, percentEncode } from "../../Query.js";
import { DataParam } from "../../Request.js";
import {
parse as jsonParseLossless,
stringify as jsonStringifyLossless,
isSafeNumber,
isInteger,
isLosslessNumber,
} from "lossless-json";
// TODO: partiallySupportedArgs
export const supportedArgs = new Set([
...COMMON_SUPPORTED_ARGS,
"compressed",
// "no-compressed",
// "anyauth",
// "no-anyauth",
"digest",
"no-digest",
"aws-sigv4",
"negotiate",
"no-negotiate",
"delegation", // GSS/kerberos
// "service-name", // GSS/kerberos, not supported
"ntlm",
"no-ntlm",
"ntlm-wb",
"no-ntlm-wb",
"http1.1",
// These are not supported, just better warning message
"http2",
"http2-prior-knowledge",
"http3",
"http3-only",
"cookie-jar",
"cert",
"cacert",
"key",
"capath",
"form",
"form-string",
"location",
"no-location",
"location-trusted", // not exactly supported, just better warning message
"no-location-trusted",
"max-redirs",
"max-time",
"connect-timeout",
"insecure",
"no-insecure",
"output",
"upload-file",
"next",
"proxy",
"proxy-user",
]);
// https://peps.python.org/pep-3138/
// https://www.unicode.org/reports/tr44/#GC_Values_Table
// https://unicode.org/Public/UNIDATA/UnicodeData.txt
// https://en.wikipedia.org/wiki/Plane_(Unicode)#Overview
const regexSingleEscape = /'|\\|\p{C}|[^ \P{Z}]/gu;
const regexDoubleEscape = /"|\\|\p{C}|[^ \P{Z}]/gu;
// Also used for Go and R
export function reprStr(s: string, quote?: '"' | "'"): string {
if (quote === undefined) {
quote = "'";
if (s.includes("'") && !s.includes('"')) {
quote = '"';
}
}
const regex = quote === "'" ? regexSingleEscape : regexDoubleEscape;
return (
quote +
s.replace(regex, (c: string): string => {
switch (c) {
case "\x07":
return "\\a";
case "\b":
return "\\b";
case "\f":
return "\\f";
case "\n":
return "\\n";
case "\r":
return "\\r";
case "\t":
return "\\t";
case "\v":
return "\\v";
case "\\":
return "\\\\";
case "'":
return "\\'";
case '"':
return '\\"';
}
const hex = (c.codePointAt(0) as number).toString(16);
if (hex.length <= 2) {
return "\\x" + hex.padStart(2, "0");
}
if (hex.length <= 4) {
return "\\u" + hex.padStart(4, "0");
}
return "\\U" + hex.padStart(8, "0");
}) +
quote
);
}
// TODO: use this if string contains unmatched surrogates?
// It just replaces them with the replacement character, but at least that code would run.
export function pybescComplex(s: string): string {
let quote = "'";
if (s.includes("'") && !s.includes('"')) {
quote = '"';
}
const quoteCode = quote.charCodeAt(0);
// TODO: using UTF-8 here is overly simplistic and how to encode here
// is a pretty complicated decision.
// For starters, it would be more correct to use the same encoding as
// the terminal when running from the command line.
const bytes = UTF8encoder.encode(s);
return (
"b" +
quote +
[...bytes]
.map((c: number): string => {
switch (c) {
case 0x07:
return "\\a";
case 0x08:
return "\\b";
case 0x0c:
return "\\f";
case 0x0a:
return "\\n";
case 0x0d:
return "\\r";
case 0x09:
return "\\t";
case 0x0b:
return "\\v";
case 0x5c:
return "\\\\";
case quoteCode:
return "\\" + String.fromCharCode(c);
}
if (c >= 0x20 && c < 0x7f) {
return String.fromCharCode(c);
}
const hex = c.toString(16);
return "\\x" + hex.padStart(2, "0");
})
.join("") +
quote
);
}
export function reprStrBinary(s: string): string {
const sEsc = reprStr(s);
// We check until 0x7F instead of 0xFF because curl (running in a UTF-8 terminal) when it gets
// bytes sends them as is, but if we pass b'\x80' to Requests, it will encode that byte as
// Latin-1 (presumably for backwards compatibility) instead of UTF-8.
if (/^[\x00-\x7f]*$/.test(s)) {
return "b" + sEsc;
}
// TODO: unmatched surrogates will generate code that throws an error
// e.g.: '\uDC00'.encode()
return sEsc + ".encode()";
}
export type OSVars = { [key: string]: string };
export function repr(
word: Word,
osVars: OSVars,
imports: Set<string>,
binary = false,
// os.getenv('MYVAR') returns None if MYVAR is not set
// os.getenv('MYVAR', '') returns '' if MYVAR is not set but it's a bit more verbose,
// so setting errorOk to true will use the shorter version
errorOk = false,
): string {
const reprFn = binary ? reprStrBinary : reprStr;
const reprs = [];
for (const t of word.tokens) {
if (typeof t === "string") {
reprs.push(reprFn(t));
} else if (t.type === "variable") {
// TODO: getenvb() is not available on Windows
const fn = binary ? "os.getenvb" : "os.getenv";
let getEnvCall = fn + "(" + reprFn(t.value);
if (!errorOk || word.tokens.length > 1) {
getEnvCall += ", " + reprFn("");
}
getEnvCall += ")";
reprs.push(getEnvCall);
// TODO: if the result of a repr() is discarded, this adds an unused import
imports.add("os");
} else if (t.type === "command") {
// TODO: warn that shell=True is a bad idea
// or properly parse the subcommand and render it as an array
let subprocessCall =
"subprocess.run(" +
reprStr(t.value) +
", shell=True, capture_output=True";
if (!binary) {
subprocessCall += ", text=True";
}
subprocessCall += ").stdout";
// TODO: generate a descriptive command name with ChatGPT
// TODO: if there's only one command, name the variable "command" instead of "command1"
let i = 1;
let pyVar = "command" + i;
// We need to check because we often try to represent the same
// token twice and discard one of the attempts.
// This is linear time but hopefully there's not that many subcommands.
while (pyVar in osVars && osVars[pyVar] !== subprocessCall) {
i++;
pyVar = "command" + i;
if (i > Number.MAX_SAFE_INTEGER) {
throw new CCError("lol");
}
}
osVars[pyVar] = subprocessCall;
reprs.push(pyVar);
// TODO: if the result of a repr() is discarded, this adds an unused import
imports.add("subprocess");
}
}
return reprs.join(" + ");
}
function reprb(word: Word, osVars: OSVars, imports: Set<string>): string {
return repr(word, osVars, imports, true);
}
export function asFloat(
word: Word,
osVars: OSVars,
imports: Set<string>,
): string {
if (word.isString()) {
// TODO: check it's actually a valid float
return word.toString();
}
return "float(" + repr(word, osVars, imports, false, true) + ")";
}
export function asInt(
word: Word,
osVars: OSVars,
imports: Set<string>,
): string {
if (word.isString()) {
// TODO: check it's actually a valid int
return word.toString();
}
return "int(" + repr(word, osVars, imports, false, true) + ")";
}
// Port of Python's json.dumps() with its default options, which is what Requests uses
// https://github.com/psf/requests/blob/b0e025ade7ed30ed53ab61f542779af7e024932e/requests/models.py#L473
// It's different from JSON.stringify(). Namely, it adds spaces after ',' and ':'
// and all non-ASCII characters in strings are escaped:
// > JSON.stringify('\xFF')
// '"ÿ"'
// >>> json.dumps('\xFF')
// '"\\u00ff"'
const pythonJsonEscape = /"|\\|[^\x20-\x7E]/g;
function jsonRepr(s: string): string {
return (
'"' +
s.replace(pythonJsonEscape, (c: string): string => {
// https://tc39.es/ecma262/#table-json-single-character-escapes
switch (c) {
case "\b":
return "\\b";
case "\t":
return "\\t";
case "\n":
return "\\n";
case "\f":
return "\\f";
case "\r":
return "\\r";
case "\\":
return "\\\\";
case '"':
return '\\"';
}
const hex = c.charCodeAt(0).toString(16);
return "\\u" + hex.padStart(4, "0");
}) +
'"'
);
}
function ensure_minimum_exponent_length(n: string): string {
// If there's only 1 exponent digit, add a leading 0 to it
// ensure_minimum_exponent_length('1e-7') => '1e-07'
const [mantissa, exponent] = n.split("e");
const exponentSign = exponent[0];
const exponentValue = exponent.slice(1);
if (exponentValue.length === 1) {
return mantissa + "e" + exponentSign + "0" + exponentValue;
}
return n;
}
function floatAsPython(value: number): string {
// JSON.stringify() and lossless-json's stringify() don't stringify floats like Python.
// Notably, JavaScript doesn't add a trailing '.0' to floats that are integers but Python does
// JSON.stringify(10.0) => '10'
// str(10.0) => '10.0'
//
// Python adds a leading 0 to exponent notation numbers with 1 exponent digit
// JSON.stringify(1e-7) => '1e-7'
// str(1e-7) => '1e-07'
//
// Finally, Python will switch to scientific notation if the number has more than
// 17 digits not in scientific notation.
//
// Python's float formatting starts here:
// https://github.com/python/cpython/blob/bdc93b8a3563b4a3adb25fa902c0c879ccf427f6/Python/pystrtod.c#L915-L918
// and is ultimately this code:
// snprintf(buf, buf_size, "%.17g", val)
// change_decimal_from_locale_to_dot(buffer); // not important
// ensure_minimum_exponent_length(buffer, buf_size);
// ensure_decimal_point(buffer, buf_size, 17); // can switch to exponent notation
//
// And JavaScript's formatting is specified here:
// https://tc39.es/ecma262/multipage/ecmascript-data-types-and-values.html#sec-numeric-types-number-tostring
let asJsStr = value.toString();
if (asJsStr.includes("e")) {
asJsStr = ensure_minimum_exponent_length(asJsStr);
} else {
if (isInteger(asJsStr)) {
asJsStr += ".0";
}
// If there's more than 17 digits of precision, switch to scientific notation
const significantDigits = asJsStr
.replace(/^-/, "")
.replace(".", "")
.replace(/^0+/, "");
const asExponential = ensure_minimum_exponent_length(value.toExponential());
if (
significantDigits.length > 17 ||
(asExponential.length < asJsStr.length &&
asJsStr.split(".")[1].length > 4)
) {
asJsStr = asExponential;
}
}
return asJsStr;
}
function jsonDumps(obj: string | number | boolean | object | null): string {
if (isLosslessNumber(obj)) {
const numAsStr = jsonStringifyLossless(obj) as string;
if (isInteger(numAsStr)) {
return numAsStr;
}
if (!isSafeNumber(numAsStr)) {
throw new CCError("float unrepresentable in Python: " + numAsStr);
}
// Can't be bigint because it's !isInteger and isSafeNumber
return floatAsPython(obj.valueOf() as number);
}
switch (typeof obj) {
case "string":
return jsonRepr(obj);
case "number":
// If the number in the JSON file is very large, it'll turn into Infinity
if (!isFinite(obj)) {
throw new CCError("found Infitiny in JSON");
}
// TODO: If the number in the JSON file is too big for JavaScript, we will lose information
// TODO: JavaScript and Python serialize floats differently.
// JSON.stringify(2e2) => 200
// json.dumps(2e2) => 200.0
return obj.toString();
case "boolean":
return obj.toString();
case "object":
if (obj === null) {
return "null";
}
if (Array.isArray(obj)) {
return "[" + obj.map(jsonDumps).join(", ") + "]";
}
return (
"{" +
Object.entries(obj)
.map((e) => jsonRepr(e[0]) + ": " + jsonDumps(e[1]))
.join(", ") +
"}"
);
default:
throw new CCError(
"unexpected object type that shouldn't appear in JSON: " + typeof obj,
);
}
}
function objToPython(
obj: string | number | boolean | object | null,
indent = 0,
): string {
if (isLosslessNumber(obj)) {
const numAsStr = jsonStringifyLossless(obj) as string;
// If the number is a large float, it might not be representable in Python
// Both JavaScript and Python use f64 so we check if the float
// is representable in JavaScript.
if (!isInteger(numAsStr) && !isSafeNumber(numAsStr)) {
throw new CCError("float unrepresentable in Python: " + numAsStr);
}
// Displaying floats as they will be serialized in Python would help users
// understand why they're getting the "JSON won't be serialized as it was originally"
// message, but I think displaying them as they appear in the JSON is likely
// to be more convenient if you need to edit the value.
return numAsStr;
}
switch (typeof obj) {
case "string":
return reprStr(obj);
case "number":
// TODO: there are differences in number serialization between Python and JavaScript
// TODO: if the number in the JSON file is too big for JavaScript, we will lose information
return obj.toString();
case "boolean":
return obj ? "True" : "False";
case "object":
if (obj === null) {
return "None";
}
if (Array.isArray(obj)) {
if (obj.length === 0) {
return "[]";
}
let s = "[\n";
for (const item of obj) {
s += " ".repeat(indent + 4) + objToPython(item, indent + 4) + ",\n";
}
s += " ".repeat(indent) + "]";
return s;
}
if (Object.keys(obj).length === 0) {
return "{}";
}
{
let s = "{\n";
for (const [k, v] of Object.entries(obj)) {
// repr() because JSON keys must be strings.
s +=
" ".repeat(indent + 4) +
reprStr(k) +
": " +
objToPython(v, indent + 4) +
",\n";
}
s += " ".repeat(indent) + "}";
return s;
}
default:
throw new CCError(
"unexpected object type that shouldn't appear in JSON: " + typeof obj,
);
}
}
export function formatHeaders(
headers: Headers,
commentedOutHeaders: { [key: string]: string },
osVars: OSVars,
imports: Set<string>,
): string {
// TODO: what if there are repeat headers
let headerDict = "headers = {\n";
for (const [headerName, headerValue] of headers) {
if (headerValue === null) {
continue;
}
let lineStart;
const headerNameLower = headerName.toLowerCase().toString();
if (has(commentedOutHeaders, headerNameLower)) {
if (commentedOutHeaders[headerNameLower]) {
headerDict += " # " + commentedOutHeaders[headerNameLower] + "\n";
}
lineStart = " # ";
} else {
lineStart = " ";
}
headerDict +=
lineStart +
repr(headerName, osVars, imports) +
": " +
repr(headerValue, osVars, imports) +
",\n";
}
headerDict += "}\n";
return headerDict;
}
function decodePercentEncoding(s: Word): Word | null {
let decoded;
try {
// https://url.spec.whatwg.org/#urlencoded-parsing recommends replacing + with space
// before decoding.
decoded = wordDecodeURIComponent(s.replace(/\+/g, " "));
} catch (e) {
if (e instanceof URIError) {
// String contains invalid percent encoded characters
return null;
}
throw e;
}
// If the query string doesn't round-trip, we cannot properly convert it.
const roundTripKey = percentEncode(decoded);
// If the original data used %20 instead of + (what requests will send), that's close enough
if (!eq(roundTripKey, s) && !eq(roundTripKey.replace(/%20/g, "+"), s)) {
return null;
}
return decoded;
}
function dataEntriesToDict(
dataEntries: Array<[string, string]>,
): { [key: string]: Array<string> } | null {
// Group keys
// TODO: because keys can be code that reads from a file, those should not be considered the
// same key, for example what if that file is /dev/urandom.
// TODO: would we need to distinguish if /dev/urandom came from @/dev/urandom or from @-?
const asDict: { [key: string]: Array<string> } = {};
let prevKey = null;
for (const [key, val] of dataEntries) {
if (prevKey === key) {
asDict[key].push(val);
} else {
if (!Object.prototype.hasOwnProperty.call(asDict, key)) {
asDict[key] = [val];
} else {
// A repeated key with a different key between one of its repetitions
// means we can't represent these entries as a dictionary.
return null;
}
}
prevKey = key;
}
return asDict;
}
function dataEntriesToPython(dataEntries: Array<[string, string]>): string {
if (dataEntries.length === 0) {
return "''"; // This shouldn't happen
}
const entriesDict = dataEntriesToDict(dataEntries);
if (entriesDict !== null) {
if (Object.keys(entriesDict).length === 0) {
return "''"; // This shouldn't happen
}
let s = "{\n";
for (const [key, vals] of Object.entries(entriesDict)) {
s += " " + key + ": ";
if (vals.length === 0) {
s += "''"; // This shouldn't happen
} else if (vals.length === 1) {
s += vals[0];
} else {
s += "[\n";
for (const val of vals) {
s += " " + val + ",\n";
}
s += " ]";
}
s += ",\n";
}
s += "}";
return s;
}
let s = "[\n";
for (const entry of dataEntries) {
const [key, val] = entry;
s += " (" + key + ", " + val + "),\n";
}
s += "]";
return s;
}
function formatDataAsEntries(
dataArray: DataParam[],
osVars: OSVars,
imports: Set<string>,
variableName: "data" | "params" = "data",
): [string, string] | null {
// This code is more complicated than you might expect because it needs
// to handle a --data-urlencode that reads from a file followed by --json
// because --json doesn't add an '&' before its value. Specifically, we
// have these cases:
//
// --data-urlencode @filename --json =value
// {open('filename').read(): 'value'}
//
// --data-urlencode @filename --json key=value
// {open('filename').read() + 'key': 'value'}
//
// --data-urlencode @filename --json key
// error
//
// --data-urlencode name@filename --json value
// {'name': open('filename').read() + 'value'}
//
// --data-urlencode name@filename --json key=value
// error
//
// --data-urlencode name@filename --json =blah
// error
//
// --data-urlencode adds an '&' before its value, so we don't have to worry about
// --json <foo> --data-urlencode <bar>
for (const d of dataArray) {
if (!(d instanceof Word) && d.filetype !== "urlencode") {
return null;
}
}
const dataEntries: Array<[string, string | null]> = [];
let percentWarn = new Word();
for (const [i, d] of dataArray.entries()) {
if (d instanceof Word) {
let newEntries = d.split("&");
const prevEntry = i > 0 ? dataEntries[dataEntries.length - 1] : null;
if (prevEntry !== null) {
// If there's a prevEntry, we can assume it came from --data-urlencode
// because it would be part of the current `d` string if it didn't.
const [first, ...rest] = newEntries;
if (first.includes("=") && prevEntry[1] === null) {
const [key, val] = first.split("=", 2);
const decodedKey = decodePercentEncoding(key);
if (decodedKey === null) {
return null;
}
const decodedVal = decodePercentEncoding(val);
if (decodedVal === null) {
return null;
}
if (key.toBool()) {
prevEntry[0] += " + " + repr(decodedKey, osVars, imports);
}
prevEntry[1] = repr(decodedVal, osVars, imports);
if (!percentWarn.toBool()) {
if (key.includes("%20")) {
percentWarn = key;
}
if (val.includes("%20")) {
percentWarn = val;
}
}
} else if (!first.includes("=") && prevEntry[1] !== null) {
if (first.toBool()) {
const decodedVal = decodePercentEncoding(first);
if (decodedVal === null) {
return null;
}
prevEntry[1] += " + " + repr(decodedVal, osVars, imports);
if (!percentWarn.toBool() && first.includes("%20")) {
percentWarn = first;
}
}
} else {
return null;
}
newEntries = rest;
}
for (const [j, entry] of newEntries.entries()) {
if (
entry.isEmpty() &&
j === newEntries.length - 1 &&
i !== dataArray.length - 1
) {
// A --data-urlencoded should come next
continue;
}
if (!entry.includes("=")) {
return null;
}
const [key, val] = entry.split("=", 2);
const decodedKey = decodePercentEncoding(key);
if (decodedKey === null) {
return null;
}
const decodedVal = decodePercentEncoding(val);
if (decodedVal === null) {
return null;
}
dataEntries.push([
repr(decodedKey, osVars, imports),
repr(decodedVal, osVars, imports),
]);
if (!percentWarn.toBool()) {
if (key.includes("%20")) {
percentWarn = key;
}
if (val.includes("%20")) {
percentWarn = val;
}
}
}
continue;
}
// TODO: I bet Python doesn't treat file paths identically to curl
const readFile = eq(d.filename, "-")
? "sys.stdin.read()"
: "open(" + repr(d.filename, osVars, imports, false, true) + ").read()";
if (!d.name) {
dataEntries.push([readFile, null]);
} else {
// Curl doesn't percent encode the name but Requests will
if (!eq(d.name, percentEncode(d.name))) {
return null;
}
dataEntries.push([repr(d.name, osVars, imports), readFile]);
}
}
if (dataEntries.some((e) => e[1] === null)) {
return null;
}
return [
variableName +
" = " +
dataEntriesToPython(dataEntries as [string, string][]) +
"\n",
percentWarn.toString(),
];
}
function formatDataAsStr(
dataArray: DataParam[],
imports: Set<string>,
osVars: OSVars,
variableName: "data" | "params" = "data",
): [string, boolean] {
// If one of the arguments has to be binary, then they all have to be binary
// because we can't mix bytes and str.
// An argument has to be binary when the input command has
// --data-binary @filename
// otherwise we could generate code that will try to read an image file as text and error.
const binary = dataArray.some(
(d) => !(d instanceof Word) && d.filetype === "binary",
);
const reprFunc = binary ? reprb : repr;
const prefix = binary ? "b" : "";
const mode = binary ? ", 'rb'" : "";
// If we see a string with non-ASCII characters, or read from a file (which may contain
// non-ASCII characters), we convert the entire string to bytes at the end.
// curl sends bytes as-is, which is presumably in UTF-8, whereas Requests sends
// 0x80-0xFF as Latin-1 (as-is) and throws an error if it sees codepoints
// above 0xFF.
// TODO: is this actually helpful?
let encode = false;
let encodeOnSeparateLine = false;
const lines = [];
let extra = new Word();
let i, d;
for ([i, d] of dataArray.entries()) {
const op = i === 0 ? "=" : "+=";
let line = variableName + " " + op + " ";
if (i < dataArray.length - 1 && d instanceof Word && d.endsWith("&")) {
// Put the trailing '&' on the next line so that we don't have single '&'s on their own lines
extra = new Word("&");
d = d.slice(0, -1);
}
if (d instanceof Word) {
if (d.length) {
line += reprFunc(d, osVars, imports);
lines.push(line);
encode ||= d.test(/[^\x00-\x7F]/);
}
continue;
}
const { filetype, name, filename } = d;
if (filetype === "urlencode" && name) {
line += reprFunc(extra.add(name).append("="), osVars, imports) + " + ";
encodeOnSeparateLine = true; // we would need to add parentheses because of the +
} else if (extra.toBool()) {
line += reprFunc(extra, osVars, imports) + " + ";
encodeOnSeparateLine = true;
}
if (extra.toBool()) {
encodeOnSeparateLine = true; // we would need to add parentheses because of the +
}
let readFile = "";
if (eq(filename, "-")) {
readFile += binary ? "sys.stdin.buffer" : "sys.stdin";
imports.add("sys");
} else {
// TODO: if filename is a command, this won't work because unlike bash,
// Python won't remove the trailing newline from the result of a command
// we need to add .trim()
line =
"with open(" +
repr(filename, osVars, imports, false, true) +
mode +
") as f:\n " +
line;
readFile += "f";
}
readFile += ".read()";
if (!["binary", "json", "urlencode"].includes(filetype)) {
readFile += `.replace(${prefix}'\\n', ${prefix}'').replace(${prefix}'\\r', ${prefix}'')`;
}
if (filetype === "urlencode") {
readFile = "quote_plus(" + readFile + ")";
if (binary) {
// quote_plus() always returns a string
readFile += ".encode()";
}
imports.add("urllib.parse.quote_plus");
} else {
// --data-urlencode files don't need to be encoded because
// they'll be percent-encoded and therefore ASCII-only
encode = true;
}
line += readFile;
lines.push(line);
extra = new Word();
}
if (binary) {
encode = false;
} else if (encode && lines.length === 1 && !encodeOnSeparateLine) {
lines[lines.length - 1] += ".encode()";
encode = false;
}
return [lines.join("\n") + "\n", encode];
}
export function formatDataAsJson(
d: DataParam,
imports: Set<string>,
osVars: OSVars,
): [string | null, boolean] {
if (d instanceof Word) {
if (!d.isString()) {
// TODO: a JSON parser that allows shell variables
return [null, false];
}
// Try to parse using lossless-json first, then fall back to JSON.parse
// TODO: repeated dictionary keys are discarded
// https://github.com/josdejong/lossless-json/issues/244
let dataAsJson;
try {
// TODO: types
// https://github.com/josdejong/lossless-json/issues/245
dataAsJson = jsonParseLossless(d.toString()) as any;
} catch {
try {
dataAsJson = JSON.parse(d.toString());
} catch {
return [null, false];
}
}
try {
const jsonDataString = "json_data = " + objToPython(dataAsJson) + "\n";
// JSON might not be serialized by Python exactly as it was originally
// due to different whitespace, float formatting like extra + in exponent
// (1e100 vs 1e+100), different escape sequences in strings
// ("\/" vs "/" or "\u0008" vs "\b") or duplicate object keys.
let jsonRoundtrips = false;
try {
jsonRoundtrips = jsonDumps(dataAsJson) === d.tokens[0];
} catch {}
return [jsonDataString, jsonRoundtrips];
} catch {}
} else if (d.filetype === "json") {
let jsonDataString = "";
jsonDataString +=
"with open(" +
repr(d.filename, osVars, imports, false, true) +
") as f:\n";
jsonDataString += " json_data = json.load(f)\n";
imports.add("json");
return [jsonDataString, false];
}
return [null, false];
}
function getDataString(
request: Request,
osVars: OSVars,
warnings: Warnings,
): [string | null, boolean | null, string | null, Set<string>] {
const imports = new Set<string>();
if (!request.data || !request.dataArray) {
return [null, false, null, imports];
}
// There's 4 ways to pass data to Requests (in descending order of preference):
// a or dictionary/list as the json= argument
// a dictionary, or a list of tuples (if the dictionary would have duplicate keys) as the data= argument
// a string as data=
// bytes as data=
// We can pass json= if the data is valid JSON and we've specified json in the
// Content-Type header because passing json= will set that header.
//
// However, if there will be a mismatch between how the JSON is formatted
// we need to output a commented out version of the request with data= as well.
// This can happen when there's extra whitespace in the original data or
// because the JSON contains numbers that are too big to be stored in
// JavaScript or because there's objects with duplicate keys.
const contentType = request.headers.get("content-type");
let dataAsJson: string | null = null;
let jsonRoundtrips = false;
if (
request.dataArray.length === 1 &&
contentType &&
contentType.split(";")[0].toString().trim() === "application/json"
) {
[dataAsJson, jsonRoundtrips] = formatDataAsJson(
request.dataArray[0],
imports,
osVars,
);
}
if (jsonRoundtrips) {
return [null, false, dataAsJson, imports];
}
// data= can't be a dict or a list of tuples (i.e. entries) when
// there is a @file from --data, --data-binary or --json (because they can contain an '&' which would get escaped)
// there is a --data-urlencode without a name= or name@
// if you split the input on & and there's a value that doesn't contain an = (e.g. --data "foo=bar&" or simply --data "&")
// there is a name or value that doesn't roundtrip through percent encoding
const dataAsEntries = formatDataAsEntries(request.dataArray, osVars, imports);
if (dataAsEntries !== null) {
const [dataEntries, percentWarn] = dataAsEntries;
if (
eq(
request.headers.get("content-type"),
"application/x-www-form-urlencoded",
) &&
request.headers.length === 1
) {
// Requests adds the header when you include a body
// so if it's the only header, don't print the headers
request.headers.delete("content-type");
}
if (percentWarn) {
warnings.push([
"percent-encoded-spaces-in-data",
'data contains spaces encoded by curl as "%20" which will be sent as "+" instead: ' +
JSON.stringify(percentWarn),
]);
}
return [dataEntries, false, dataAsJson, imports];
}
const [dataAsString, shouldEncode] = formatDataAsStr(
request.dataArray,
imports,
osVars,
);
return [dataAsString, shouldEncode, dataAsJson, imports];
}
function getFilesString(
request: Request,
osVars: OSVars,
imports: Set<string>,
): [string, boolean] {
let usesStdin = false;
if (!request.multipartUploads) {
return ["", usesStdin];
}
const multipartUploads = request.multipartUploads.map((m) => {
// https://github.com/psf/requests/blob/2d5517682b3b38547634d153cea43d48fbc8cdb5/requests/models.py#L117
//
// Requests's multipart syntax looks like this:
// name/filename: content
// name: open(filename/contentFile)
// name: (filename, open(contentFile)[, contentType[, headers]]))
const name = m.name ? repr(m.name, osVars, imports) : "None";
if (!("contentType" in m) && !("headers" in m) && !("encoder" in m)) {
if (
"contentFile" in m &&
eq(m.contentFile, m.filename) &&
!eq(m.contentFile, "-")
) {
return [
name,
"open(" +
repr(m.contentFile, osVars, imports, false, true) +
", 'rb')",
];
}
if ("content" in m && "filename" in m && eq(m.name, m.filename)) {
return [name, repr(m.content, osVars, imports)];
}
}
const sentFilename =
"filename" in m && m.filename
? repr(m.filename, osVars, imports)
: "None";
const tuple = [sentFilename];
if ("contentFile" in m) {
if (eq(m.contentFile, "-")) {
// TODO: use piped stdin if we have it
usesStdin = true;
tuple.push("sys.stdin.buffer.read())");
} else {
tuple.push(
"open(" +
repr(m.contentFile, osVars, imports, false, true) +
", 'rb')",
);
}
} else {
tuple.push(repr(m.content, osVars, imports));
}
let addedContentType = false;
if ("contentType" in m && m.contentType) {
tuple.push(repr(m.contentType, osVars, imports));
addedContentType = true;
}
if (
("headers" in m && m.headers) ||
("headerFiles" in m && m.headerFiles)
) {
if (!addedContentType) {
tuple.push("None");
}
const headerArg = [];
if (m.headers) {
let headerDict = "{";
for (const header of m.headers) {
// TODO: can have multiple headers in one headers= argument?
const [name, value] = header.split(":", 2);
headerDict +=
repr(name, osVars, imports) +
": " +
repr(value.trimStart() || new Word(), osVars, imports) +
", ";
}
if (headerDict.endsWith(", ")) {
headerDict = headerDict.slice(0, -2);
}
headerDict += "}";
headerArg.push(headerDict);
}
if (m.headerFiles && m.headerFiles.length) {
for (const headerFile of m.headerFiles) {
headerArg.push(
"read_headerfile(" +
repr(headerFile, osVars, imports, false, true) +
")",
);
}
imports.add("read_headerfile");
}
tuple.push(headerArg.join(" | "));
}
return [name, "(" + tuple.join(", ") + ")"];
});
const multipartUploadsAsDict = Object.fromEntries(multipartUploads);
let filesString = "files = ";
if (Object.keys(multipartUploadsAsDict).length === multipartUploads.length) {
filesString += "{\n";
for (const [multipartKey, multipartValue] of multipartUploads) {
filesString += " " + multipartKey + ": " + multipartValue + ",\n";
}
filesString += "}\n";
} else {
filesString += "[\n";
for (const [multipartKey, multipartValue] of multipartUploads) {
filesString += " (" + multipartKey + ", " + multipartValue + "),\n";
}
filesString += "]\n";
}
return [filesString, usesStdin];
}
// Don't add indent/comment characters to empty lines, most importantly the last line
// which will be empty when there's a trailing newline.
function indent(s: string, level: number) {
if (level === 0) {
return s;
}
const begin = " ".repeat(level);
return s
.split("\n")
.map((l) => (l.trim() ? begin + l : l))
.join("\n");
}
function commentOut(s: string) {
return s
.split("\n")
.map((l) => (l.trim() ? "#" + l : l))
.join("\n");
}
function uniqueWarn(
seenWarnings: Set<string>,
warnings: Warnings,
warning: [string, string],
) {
if (!seenWarnings.has(warning[0])) {
seenWarnings.add(warning[0]);
warnings.push(warning);
}
}
function joinArgs(args: string[]) {
let s = "(";
if (args.join("").length < 100) {
s += args.join(", ");
} else {
s += "\n";
for (const arg of args) {
s += " " + arg + ",\n";
}
}
return s + ")";
}
function requestToPython(
request: Request,
warnings: Warnings = [],
imports: Set<string>,
thirdPartyImports: Set<string>,
): string {
const osVars: OSVars = {};
const commentedOutHeaders: { [key: string]: string } = {
// TODO: add a warning why this should be commented out?
"accept-encoding": "",
"content-length": "",
};
// https://github.com/icing/blog/blob/main/curl_on_a_weekend.md
if (eq(request.headers.get("te"), "trailers")) {
commentedOutHeaders.te = "Requests doesn't support trailers";
}
let cookieStr;
let cookieFile: Word | null = null;
if (request.cookies) {
// TODO: handle duplicate cookie names
cookieStr = "cookies = {\n";
for (const [cookieName, cookieValue] of request.cookies) {
cookieStr +=
" " +
repr(cookieName, osVars, imports) +
": " +
repr(cookieValue, osVars, imports) +
",\n";
}
cookieStr += "}\n";
// Before Python 3.11, cookies= was sorted alphabetically
// https://github.com/python/cpython/issues/86232
commentedOutHeaders.cookie = "";
if (request.cookieFiles) {
warnings.push([
"cookie-files",
"passing both cookies and cookie files with --cookie/-b is not supported",
]);
}
if (request.cookieJar) {
warnings.push([
"cookie-files",
"passing both cookies and --cookie-jar/-c is not supported",
]);
}
} else if (
(request.cookieFiles && request.cookieFiles.length) ||
request.cookieJar
) {
imports.add("http.cookiejar.MozillaCookieJar");
if (request.cookieFiles && request.cookieFiles.length) {
// TODO: what if user passes multiple cookie files?
// TODO: what if user passes cookies and cookie files?
cookieFile = request.cookieFiles[request.cookieFiles.length - 1];
if (request.cookieFiles.length > 1) {
warnings.push([
"cookie-files",
// TODO: curl reads all of them.
"multiple cookie files are not supported, using the last one: " +
JSON.stringify(cookieFile.toString()),
]);
}
// TODO: do we need to .load()?
cookieStr =
"cookies = MozillaCookieJar(" +
repr(cookieFile, osVars, imports) +
")\n";
} else if (request.cookieJar) {
cookieStr = "cookies = MozillaCookieJar()\n";
}
}
let proxyDict;
if (request.proxy) {
let proxy = request.proxy.includes("://")
? request.proxy
: request.proxy.prepend("http://");
const protocol = proxy.split("://")[0].toLowerCase();
if (eq(protocol, "socks")) {
// https://github.com/curl/curl/blob/curl-7_86_0/lib/url.c#L2418-L2419
proxy = proxy.replace("socks", "socks4");
}
proxyDict = "proxies = {\n";
proxyDict += " 'http': " + repr(proxy, osVars, imports) + ",\n";
// TODO: if (protocol !== "http") { ?
proxyDict += " 'https': " + repr(proxy, osVars, imports) + ",\n";
proxyDict += "}\n";
}
let certStr;
if (request.cert || request.key) {
certStr = "cert = ";
let certPart = "None";
if (request.cert) {
const [cert, password] = request.cert;
certPart = repr(cert, osVars, imports);
if (password) {
warnings.push([
"cert-password",
"passing a password to --cert is not supported: " +
JSON.stringify(password.toString()),
]);
}
}
if (request.key) {
certStr +=
"(" + certPart + ", " + repr(request.key, osVars, imports) + ")";
} else {
certStr += certPart;
}
certStr += "\n";
}
// if there's only 1 URL, put params all together here, unless it's just one string.
// if there's more than 1, if we have params that are added to each URL from
// --get --data or --url-query that need to read a file, put just the shared part
// here, then keep the query in the URL, in the URL.
// If there's no --get --data or --url-query, then
// put params (if it can be rendered as a list or dict) right before the requests line
// Otherwise, keep the query in the URL.
let paramsStr;
let shouldEncodeParams; // TODO: necessary?
function readsFile(paramArray: DataParam[]) {
return paramArray.some((p) => !(p instanceof Word));
}
const paramArray =
request.urls.length === 1 ? request.urls[0].queryArray : request.queryArray;
if (
paramArray &&
(request.urls.length === 1 ||
(request.urls.length > 1 && readsFile(paramArray)))
) {
const queryAsEntries = formatDataAsEntries(
paramArray,
osVars,
imports,
"params",
);
if (queryAsEntries !== null) {
let percentWarn;
[paramsStr, percentWarn] = queryAsEntries;
if (percentWarn) {
warnings.push([
"percent-encoded-spaces-in-query",
// TODO: will they?
'URL querystring contains spaces encoded as "%20" which will be sent by Requests as "+" instead: ' +
JSON.stringify(percentWarn),
]);
}
} else if (readsFile(paramArray)) {
[paramsStr, shouldEncodeParams] = formatDataAsStr(
paramArray,
imports,
osVars,
"params",
);
}
}
const contentType = request.headers.get("content-type");
let dataString;
let jsonDataString;
let filesString;
let shouldEncode;
if (request.urls[0].uploadFile && request.urls.length === 1) {
// TODO: https://docs.python-requests.org/en/latest/user/advanced/#streaming-uploads
if (
eq(request.urls[0].uploadFile, "-") ||
eq(request.urls[0].uploadFile, ".")
) {
dataString = "data = sys.stdin.buffer.read()\n";
imports.add("sys");
} else {
dataString =
"with open(" +
repr(request.urls[0].uploadFile, osVars, imports, false, true) +
", 'rb') as f:\n";
dataString += " data = f.read()\n";
}
} else if (request.multipartUploads) {
let usesStdin = false;
[filesString, usesStdin] = getFilesString(request, osVars, imports);
if (usesStdin) {
imports.add("sys");
}
// If you pass files= then Requests adds this header and a `boundary`
// If you manually pass a Content-Type header it won't set a `boundary`
// wheras curl does, so the request will fail.
// https://github.com/curlconverter/curlconverter/issues/248
if (
filesString &&
contentType &&
eq(contentType.trim(), "multipart/form-data") &&
!contentType.includes("boundary=")
) {
// TODO: better wording
commentedOutHeaders["content-type"] =
"requests won't add a boundary if this header is set when you pass files=";
}
} else if (request.data && !request.data.isEmpty()) {
// !isEmpty() because passing data='' is the same as not passing data=
// We need to set the Content-Type header in headers= and not set data=
let dataImports: Set<string>;
[dataString, shouldEncode, jsonDataString, dataImports] = getDataString(
request,
osVars,
warnings,
);
dataImports.forEach(imports.add, imports);
// Remove "Content-Type" from the headers dict
// because Requests adds it automatically when you use json=
if (
jsonDataString &&
!dataString &&
contentType &&
eq(contentType.trim(), "application/json")
) {
commentedOutHeaders["content-type"] = "Already added when you pass json=";
}
}
let headerDict;
if (request.headers.length) {
headerDict = formatHeaders(
request.headers,
commentedOutHeaders,
osVars,
imports,
);
}
let pythonCode = "";
if (proxyDict) {
pythonCode += proxyDict + "\n";
}
if (cookieStr) {
pythonCode += cookieStr + "\n";
}
if (headerDict) {
pythonCode += headerDict + "\n";
}
if (paramsStr) {
pythonCode += paramsStr + "\n";
}
if (certStr) {
pythonCode += certStr + "\n";
}
if (jsonDataString) {
pythonCode += jsonDataString + "\n";
} else if (dataString) {
pythonCode += dataString + "\n";
} else if (filesString) {
pythonCode += filesString + "\n";
}
// By default, curl doesn't follow redirects and Requests does.
// Unless redirect behavior has been explicitly set with -L/--location/--no-location
// or --max-redirs 0 we pretend generate code that follows redirects,
// because adding allow_redirects=False to almost every command would be ugly
// and it only matters when the server responds with a redirect, which isn't
// that common.
let followRedirects = request.followRedirects;
let maxRedirects = undefined;
if (request.maxRedirects !== undefined) {
maxRedirects = asInt(request.maxRedirects, osVars, imports);
}
if (followRedirects === undefined) {
followRedirects = true;
// Users would see this warning for most commands
// warnings.push([
// "--location",
// "Requests defaults to following redirects, curl doesn't",
// ]);
}
const hasMaxRedirects =
followRedirects &&
maxRedirects &&
maxRedirects !== "0" &&
maxRedirects !== "30"; // Requests default
// Things that vary per-url:
// method (because --upload-file can make it PUT)
// data= (because of --upload-file)
// --output file
// params= (because of the query string)
// auth= (because the URL can have an auth string)
const seenWarnings: Set<string> = new Set();
const requestLines = [];
let extraEmptyLine = false;
for (const [urlObjIndex, urlObj] of request.urls.entries()) {
const requestsMethods = [
"GET",
"HEAD",
"POST",
"PATCH",
"PUT",
"DELETE",
"OPTIONS", // undocumented
];
let fn;
const args = [];
const methodAsString = urlObj.method.toString();
if (urlObj.method.isString() && requestsMethods.includes(methodAsString)) {
fn = methodAsString.toLowerCase();
} else {
fn = "request";
args.push(repr(urlObj.method, osVars, imports));
if (
urlObj.method.isString() &&
methodAsString !== methodAsString.toUpperCase()
) {
warnings.push([
"method",
"Requests will uppercase the HTTP method: " +
JSON.stringify(methodAsString),
]);
}
}
let urlParamsStr;
let url = urlObj.url;
if (request.urls.length === 1) {
if (paramsStr) {
url = urlObj.urlWithoutQueryArray;
} else {
url = urlObj.url;
}
} else {
if (paramsStr) {
url = urlObj.urlWithOriginalQuery;
} else {
if (urlObj.queryArray && urlObj.queryArray.length > 0) {
const urlQueryAsEntries = formatDataAsEntries(
urlObj.queryArray,
osVars,
imports,
"params",
);
if (urlQueryAsEntries !== null) {
let percentWarn;
[urlParamsStr, percentWarn] = urlQueryAsEntries;
url = urlObj.urlWithoutQueryArray;
if (percentWarn) {
warnings.push([
"percent-encoded-spaces-in-query",
// TODO: will they?
'query contains spaces encoded by curl as "%20" which will be sent as "+" instead: ' +
JSON.stringify(percentWarn),
]);
}
} else if (readsFile(urlObj.queryArray)) {
[urlParamsStr, shouldEncodeParams] = formatDataAsStr(
urlObj.queryArray,
imports,
osVars,
"params",
);
url = urlObj.urlWithoutQueryArray;
}
}
// url = urlObj.url
}
}
args.push(repr(url, osVars, imports));
if (paramsStr || urlParamsStr) {
args.push("params=params" + (shouldEncodeParams ? ".encode()" : ""));
}
if (cookieStr && !request.cookieJar) {
args.push("cookies=cookies");
}
if (headerDict) {
args.push("headers=headers");
}
if (urlObj.uploadFile) {
if (request.urls.length > 1) {
// If there's more than one URL we could have --data for all
// of them and --upload-file for just one of them and we can't
// overwrite the `data` variable in that case.
args.push("data=file_contents");
} else {
args.push("data=data");
}
} else if (filesString) {
args.push("files=files");
} else if (request.data && !request.data.isEmpty()) {
if (jsonDataString) {
args.push("json=json_data");
} else {
args.push("data=data" + (shouldEncode ? ".encode()" : ""));
}
}
if (proxyDict) {
args.push("proxies=proxies");
}
if (certStr) {
args.push("cert=cert");
}
const certOrPath = request.cacert || request.capath;
if (request.insecure) {
args.push("verify=False");
} else if (certOrPath) {
args.push("verify=" + repr(certOrPath, osVars, imports));
}
// TODO: does this header check apply to all auth methods?
if (urlObj.auth && !request.headers.has("Authorization")) {
const [user, password] = urlObj.auth;
let auth =
"(" +
repr(user, osVars, imports) +
", " +
repr(password, osVars, imports) +
")";
switch (request.authType) {
case "basic":
break;
case "digest":
thirdPartyImports.add("requests.auth.HTTPDigestAuth");
auth = "HTTPDigestAuth" + auth;
break;
case "ntlm":
case "ntlm-wb":
thirdPartyImports.add("requests_ntlm.HttpNtlmAuth");
auth = "HttpNtlmAuth" + auth;
// TODO: this could stop being true
uniqueWarn(seenWarnings, warnings, [
"ntlm",
"requests-ntlm is unmaintained",
]);
break;
case "negotiate":
thirdPartyImports.add("requests_gssapi.HTTPSPNEGOAuth");
auth = "HTTPSPNEGOAuth(";
if (request.delegation) {
if (eq(request.delegation, "always")) {
auth += "delegate=True";
} else if (eq(request.delegation, "none")) {
auth += "delegate=False";