UNPKG

@clickup/ent-framework

Version:

A PostgreSQL graph-database-alike library with microsharding and row-level security

1,146 lines (1,066 loc) 37.1 kB
import assert from "assert"; import difference from "lodash/difference"; import last from "lodash/last"; import random from "lodash/random"; import uniq from "lodash/uniq"; import type { QueryAnnotation } from "../abstract/QueryAnnotation"; import { Runner } from "../abstract/Runner"; import type { Schema } from "../abstract/Schema"; import { hasKey } from "../internal/misc"; import type { Field, FieldAliased, Hints, Literal, Table, Value, Where, } from "../types"; import { ID } from "../types"; import { escapeIdent } from "./helpers/escapeIdent"; import { escapeLiteral } from "./helpers/escapeLiteral"; import { escapeBoolean } from "./internal/escapeBoolean"; import { escapeComposite } from "./internal/escapeComposite"; import { escapeDate } from "./internal/escapeDate"; import { escapeID } from "./internal/escapeID"; import { escapeIdentComposite } from "./internal/escapeIdentComposite"; import { escapeString } from "./internal/escapeString"; import { escapeStringify } from "./internal/escapeStringify"; import { parseCompositeRow } from "./internal/parseCompositeRow"; import type { PgClient } from "./PgClient"; import { PgError } from "./PgError"; const DEADLOCK_RETRY_MS_MIN = 2000; const DEADLOCK_RETRY_MS_MAX = 5000; const ERROR_DEADLOCK = "deadlock detected"; const ERROR_FK = "violates foreign key constraint "; const ERROR_CONFLICT_RECOVERY = "canceling statement due to conflict with recovery"; // "Class 22 — Data Exception" errors are typically caused by invalid // input values (e.g. invalid date format or type cast). See for details: // https://www.postgresql.org/docs/14/errcodes-appendix.html const ERROR_CODE_PREFIX_DATA_EXCEPTION = "22"; /** * A convenient pile of helper methods usable by most of PgQuery* classes. In * some sense it's an anti-pattern, but still reduces the boilerplate. * * PgRunner is also responsible for stringifying the values passed to the * queries and parsing values returned from the DB according to the field types * specs. */ export abstract class PgRunner< TTable extends Table, TInput, TOutput, > extends Runner<TInput, TOutput> { private escapers: Partial<Record<string, (v: unknown) => string>> = {}; private oneOfBuilders: Partial< Record<string, (v: readonly unknown[]) => string> > = {}; private dbValueToJs: Array<[string, (v: unknown) => unknown]> = []; private stringify: Partial<Record<string, (v: never) => string>> = {}; override ["constructor"]!: typeof PgRunner; protected async clientQuery<TOutput extends object>( sql: string, annotations: QueryAnnotation[], batchFactor: number, hints?: Hints, ): Promise<TOutput[]> { const rows = await this.client.query<TOutput>({ query: [sql], hints, isWrite: this.constructor.IS_WRITE, annotations, op: this.op, table: this.name, batchFactor, }); // Apply parsers only for known field names. Notice that TOutput is not // necessarily a type of the table's row, it can be something else (in e.g. // INSERT or DELETE operations). if (rows.length > 0) { for (const [field, dbValueToJs] of this.dbValueToJs) { if (field in rows[0]) { for (const row of rows) { const dbValue = row[field as keyof TOutput]; if (dbValue !== null && dbValue !== undefined) { (row as Record<string, unknown>)[field] = dbValueToJs(dbValue); } } } } } return rows; } /** * Formats prefixes/suffixes of various compound SQL clauses. Don't use on * performance-critical path! */ protected fmt( template: string, args: { fields?: Array<FieldAliased<TTable>>; normalize?: boolean } = {}, ): string { return template.replace( /%(?:T|SELECT_FIELDS|FIELDS|UPDATE_FIELD_VALUE_PAIRS|PK)(?:\(([%\w]+)\))?/g, (c: string, a?: string) => { a = a?.replace(/%T/g, this.name); // Table name. if (c === "%T") { return escapeIdent(this.name); } // Comma-separated list of ALL fields in the table to be used in SELECT // clauses (always includes ID field). if (c === "%SELECT_FIELDS") { return uniq([...Object.keys(this.schema.table), ID]) .map((f) => this.escapeField(f) + (f === ID ? ` AS ${ID}` : "")) .join(", "); } // Comma-separated list of the passed fields (never with AS clause). if (c.startsWith("%FIELDS")) { assert(args.fields, `BUG: no args.fields passed in ${template}`); return args.fields .map((field) => this.escapeField(field, { withTable: a, normalize: args.normalize, }), ) .join(", "); } // field1=X.field1, field2=X.field2, ... if (c.startsWith("%UPDATE_FIELD_VALUE_PAIRS")) { assert(args.fields, `BUG: no args.fields passed in ${template}`); assert(a, "BUG: you must pass an argument, source table alias name"); return args.fields .map( (field) => `${this.escapeField(field)}=` + this.escapeField(field, { withTable: a }), ) .join(", "); } // Primary key (simple or composite). if (c.startsWith("%PK")) { return this.escapeField(ID, { withTable: a }); } throw Error(`Unknown format spec "${c}" in "${template}"`); }, ); } /** * Escapes a value at runtime using the codegen functions created above. We * use escapers table and the codegen for the following reasons: * 1. We want to be sure that we know in advance, how to escape all table * fields (and not fail at runtime). * 2. We want to make createEscapeCode() the single source of truth about * fields escaping, even at runtime. */ protected escapeValue(field: Field<TTable>, value: unknown): string { const escaper = this.nullThrowsUnknownField(this.escapers[field], field); return escaper(value); } /** * Escapes field name identifier. * - In case it's a composite primary key, returns its `ROW(f1,f2,...)` * representation. * - A field may be aliased, e.g. if `{ field: "abc", alias: "$cas.abc" }` is * passed, then the returned value will be `"$cas.abc"`. Basically, `field` * name is used only to verify that such field is presented in the schema. */ protected escapeField( info: FieldAliased<TTable>, { withTable, normalize }: { withTable?: string; normalize?: boolean } = {}, ): string { const [field, alias] = typeof info === "string" ? [info, info] : [info.field, info.alias]; if (this.schema.table[field]) { const sql = withTable ? `${escapeIdent(withTable)}.` + escapeIdent(alias) : escapeIdent(alias); return normalize ? this.normalizeSQLExpr(field, sql) : sql; } if (field === ID) { return escapeIdentComposite(this.schema.uniqueKey, withTable); } return this.nullThrowsUnknownField(null, field); } /** * Returns a newly created JS function which, when called with a row set, * returns the following SQL clause: * * ``` * WITH rows(id, a, b, _key) AS (VALUES * ((NULL::tbl).id, (NULL::tbl).a, (NULL::tbl).b, 'k0'), * ('123', 'xyz', 'nn', 'kSome'), * ('456', 'abc', 'nn', 'kOther'), * ... * ) * {suffix} * ``` * * For composite primary key, its parts (fields) are always prepended. The set * of columns is passed in specs. */ protected createWithBuilder({ fields, suffix, }: { fields: ReadonlyArray<FieldAliased<TTable>>; suffix: string; }): { prefix: string; func: (entries: Iterable<[key: string, input: object]>) => string; suffix: string; } { const cols = [ ...fields.map((info) => { const [field, alias] = typeof info === "string" ? [info, info] : [info.field, info.alias]; return { field: escapeIdent(alias), escapedValue: this.fmt("(NULL::%T).") + this.escapeField(field), }; }), { field: "_key", escapedValue: "'k0'" }, ]; // We prepend VALUES with a row which consists of all NULL values, but typed // to the actual table's columns types. This hints PG how to cast input. return this.createValuesBuilder({ prefix: `WITH rows(${cols.map(({ field }) => field).join(", ")}) AS (VALUES\n` + ` (${cols.map(({ escapedValue }) => escapedValue).join(", ")}),`, indent: " ", fields, withKey: true, suffix: ")\n" + suffix.replace(/^/gm, " "), }); } /** * Returns a newly created JS function which, when called with a row set, * returns the following SQL clause (when called with withKey=true): * * ``` * ('123', 'xyz', 'nn', 'kSome'), * ('456', 'abc', 'nn', 'kOther'), * ... * ) * ``` * * or (when called without withKey): * * ``` * ('123', 'xyz', 'nn'), * ('456', 'abc', 'nn'), * ... * ``` * * The set of columns is passed in fields. * * When the builder func is called, the actual values for some field in a row * is extracted from the same-named prop of the row, but if a `{ field, * rowPath }` object is passed in `fields` array, then the value is extracted * from the `rowPath` sub-prop of the row. This is used to e.g. access * `row.$cas.blah` value for a field named blah (in this case, * `rowPath="$cas"`). * * Notice that either a simple primary key or a composite primary key columns * are always prepended to the list of values since it makes no sense to * generate VALUES clause without exact identification of the destination. */ protected createValuesBuilder<TInput extends object>({ prefix, indent, fields, withKey, skipSorting, suffix, }: { prefix: string; indent: string; fields: ReadonlyArray<FieldAliased<TTable>>; withKey?: boolean; skipSorting?: boolean; suffix: string; }): { prefix: string; func: (entries: Iterable<[key: string, input: TInput]>) => string; suffix: string; } { const cols = fields.map((info) => { const [field, fieldValCode] = typeof info === "string" ? [info, `$input.${info}`] : [info.field, `$input.${info.alias}`]; const spec = this.nullThrowsUnknownField(this.schema.table[field], field); return this.createEscapeCode( field, fieldValCode, spec.autoInsert !== undefined ? spec.autoInsert : spec.autoUpdate, ); }); const rowFunc = this.newFunction( "$key", "$input", "return " + (indent ? `${JSON.stringify("\n" + indent)} +` : "") + '"(" + ' + cols.join(" + ', ' + ") + (withKey ? '+ ", " + this.escapeString($key)' : "") + '+ ")"', ); return { prefix, func: (entries: Iterable<[key: string, input: TInput]>) => { const parts: string[] = []; for (const [key, input] of entries) { parts.push(rowFunc(key, this.unfoldCompositePK(input))); } // To eliminate deadlocks in parallel batched inserts, we sort rows. // This prevents deadlocks when two batched queries are running in // different connections, and the table has some unique key. if (!skipSorting) { parts.sort(); } return parts.join(","); }, suffix, }; } /** * Returns a newly created JS function which, when called with an object, * returns the following SQL clause: * * id='123', a='xyz', b='nnn' [, {literal}] * * The set of columns is passed in specs, all other columns are ignored. */ protected createUpdateKVsBuilder( fields: Array<Field<TTable>>, ): (input: object, literal?: Literal) => string { const parts = fields.map( (field) => JSON.stringify(this.escapeField(field) + "=") + " + " + this.createEscapeCode( field, `$input.${field}`, this.schema.table[field].autoUpdate, ), ); const func = this.newFunction( "$input", "return " + (parts.length ? parts.join(" + ', ' + ") : '""'), ); return (input: object, literal?: Literal): string => { const kvs = func(input); const custom = literal ? escapeLiteral(literal) : ""; return kvs && custom ? `${kvs}, ${custom}` : kvs ? kvs : custom; }; } /** * Prefers to do utilize createAnyBuilder() if it can (i.e. build * a=ANY('{...}') clause). Otherwise, builds an IN(...) clause. */ protected createOneOfBuilder( field: Field<TTable>, fieldValCode = "$value", ): (values: Iterable<unknown>) => string { const specType = this.schema.table[field]?.type; return specType === Boolean || specType === ID || specType === Number || specType === String ? this.createAnyBuilder(field, fieldValCode) : this.createInBuilder(field, fieldValCode); } /** * Given a list of fields, returns two builders: * * 1. "Optimized": a newly created JS function which, when called with a row * set, returns one the following SQL clauses: * * ``` * WHERE (field1, field2) IN(VALUES * ((NULL::tbl).field1, (NULL::tbl).field2), * ('aa', 'bb'), * ('cc', 'dd')) * * or * * WHERE (field1='a' AND field2='b' AND field3 IN('a', 'b', 'c', ...)) OR (...) * ^^^^^^^^^^prefix^^^^^^^^^ ^^^^^^^^ins^^^^^^^ * ``` * * 2. "Plain": the last one builder mentioned above (good to always use for * non-batched queries for instance). */ protected createWhereBuildersFieldsEq<TInput extends object>(args: { prefix: string; fields: ReadonlyArray<Field<TTable>>; suffix: string; }): { plain: { prefix: string; func: (inputs: Iterable<[key: string, input: TInput]>) => string; suffix: string; }; optimized: { prefix: string; func: (inputs: Iterable<[key: string, input: TInput]>) => string; suffix: string; }; } { const plain = this.createWhereBuilderFieldsEqOrBased<TInput>(args); return { plain, optimized: args.fields.length > 1 && args.fields.every((field) => !this.schema.table[field].allowNull) ? this.createWhereBuilderFieldsEqTuplesBased<TInput>(args) : plain, }; } /** * Returns a newly created JS function which, when called with a Where object, * returns the generated SQL WHERE clause. * * - The building is relatively expensive, since it traverses the Where object * at run-time and doesn't know the shape beforehand. * - If the Where object is undefined, skips the entire WHERE clause. */ protected createWhereBuilder({ prefix, suffix, }: { prefix: string; suffix: string; }): { prefix: string; func: (where: Where<TTable>) => string; suffix: string; } { return { prefix: prefix + "WHERE ", func: (where: Where<TTable>) => this.buildWhere(this.schema.table, where, true), suffix, }; } /** * Prepends or appends a primary key to the list of fields. In case the * primary key is plain (i.e. "id" field), it's just added as a field; * otherwise, the unique key fields are added. * * For INSERT/UPSERT operations, we want to append the primary key, since it's * often types pre-generated as a random-looking value. In many places, we * sort batched lists of rows before e.g. inserting them, so we order them by * their natural data order which prevents deadlocks on unique key conflict * when multiple concurrent transactions try to insert the same set of rows in * different order ("while inserting index tuple"). * * For UPDATE operations though, we want to prepend the primary key, to make * sure we run batched updates in the same order in multiple concurrent * transactions. This lowers the chances of deadlocks too. */ protected addPK( fields: ReadonlyArray<Field<TTable>>, mode: "prepend" | "append", ): string[] { const pkFields = this.schema.table[ID] ? [ID] : this.schema.uniqueKey; fields = difference(fields, pkFields); return mode === "prepend" ? [...pkFields, ...fields] : [...fields, ...pkFields]; } constructor( public readonly schema: Schema<TTable>, private client: PgClient, ) { super(schema.name); // For tables with composite primary key and no explicit "id" column, we // still need an ID escaper (where id looks like "(1,2)" anonymous row). for (const field of [ID, ...Object.keys(this.schema.table)]) { const body = "return " + this.createEscapeCode(field, "$value"); this.escapers[field] = this.newFunction("$value", body); this.oneOfBuilders[field] = this.createOneOfBuilder(field); } for (const [field, { type }] of Object.entries(this.schema.table)) { if (hasKey("dbValueToJs", type) && hasKey("stringify", type)) { this.dbValueToJs.push([field, type.dbValueToJs.bind(type)]); this.stringify[field] = type.stringify.bind(type); } } } delayForSingleQueryRetryOnError( e: unknown, ): number | "immediate_retry" | "no_retry" { // Deadlocks may happen when a simple query involves multiple rows (e.g. // deleting a row by ID, but this row has foreign keys, especially with ON // DELETE CASCADE). return e instanceof PgError && e.message.includes(ERROR_DEADLOCK) ? random(DEADLOCK_RETRY_MS_MIN, DEADLOCK_RETRY_MS_MAX) : e instanceof PgError && e.message.includes(ERROR_CONFLICT_RECOVERY) ? "immediate_retry" : "no_retry"; } shouldDebatchOnError(e: unknown): boolean { return ( // Debatch some of SQL WRITE query errors. (e instanceof PgError && e.message.includes(ERROR_DEADLOCK)) || (e instanceof PgError && e.message.includes(ERROR_FK)) || // Debatch "conflict with recovery" errors (we support retries only after // debatching, so have to return true here). (e instanceof PgError && e.message.includes(ERROR_CONFLICT_RECOVERY)) || (e instanceof PgError && !!e.cause?.code?.startsWith(ERROR_CODE_PREFIX_DATA_EXCEPTION)) ); } /** * Given a list of fields, returns a newly created JS function which, when * called with a row set, returns the following SQL clause: * * ``` * WHERE (field1='a' AND field2='b' AND field3 IN('a', 'b', 'c', ...)) OR (...) * ^^^^^^^^^^prefix^^^^^^^^^ ^^^^^^^^ins^^^^^^^ * ``` * * The assumption is that the last field in the list is the most variable, * whilst all previous fields compose a more or less static prefix * * - ATTENTION: if at least one OR is produced, it will likely result in a * slower Bitmap Index Scan. * - Used in runSingle() (no ORs there) or when optimized builder is not * available (e.g. when unique key contains nullable fields). */ private createWhereBuilderFieldsEqOrBased<TInput extends object>({ prefix, fields, suffix, }: { prefix: string; fields: ReadonlyArray<Field<TTable>>; suffix: string; }): { prefix: string; func: (inputs: Iterable<[key: string, input: TInput]>) => string; suffix: string; } { const lastField = last(fields)!; // fieldN IN('aa', 'bb', 'cc', ...) const lastFieldOneOf = this.createOneOfBuilder( lastField, `$value[1].${lastField}`, ); if (fields.length === 1) { // If we have only one field, we can use the plain oneOfBuilder (which is // either an IN(...) or =ANY(...) clause). return { prefix: prefix + "WHERE ", func: lastFieldOneOf, suffix, }; } return { prefix: prefix + "WHERE ", func: (inputs: Iterable<[key: string, input: TInput]>) => { const insByPrefix = new Map< string, Array<[key: string, input: TInput]> >(); for (const input of inputs) { let prefix = ""; for (let i = 0; i < fields.length - 1; i++) { const field = fields[i]; if (prefix !== "") { prefix += " AND "; } const value = (input[1] as Record<string, unknown>)[field]; prefix += value !== null ? field + "=" + this.escapeValue(field, value) : field + " IS NULL"; } let ins = insByPrefix.get(prefix); if (!ins) { ins = []; insByPrefix.set(prefix, ins); } ins.push(input); } let sql = ""; for (const [prefix, ins] of insByPrefix) { if (sql !== "") { sql += " OR "; } const inClause = lastFieldOneOf(ins); if (prefix !== "") { sql += "(" + prefix + " AND " + inClause + ")"; } else { sql += inClause; } } return sql; }, suffix, }; } /** * Given a list of fields, returns a newly created JS function which, when * called with a row set, returns the following SQL clause: * * ``` * WHERE (field1, field2) IN(VALUES * ((NULL::tbl).field1, (NULL::tbl).field2), * ('aa', 'bb'), * ('cc', 'dd')) * ``` * * The assumption is that all fields are non-nullable. * * - This clause always produces an Index Scan (not Bitmap Index Scan). * - Used in most of the cases in runBatch(), e.g. when unique key has >1 * fields, and they are all non-nullable. */ private createWhereBuilderFieldsEqTuplesBased<TInput extends object>({ prefix, fields, suffix, }: { prefix: string; fields: ReadonlyArray<Field<TTable>>; suffix: string; }): { prefix: string; func: (entries: Iterable<[key: string, input: TInput]>) => string; suffix: string; } { const escapedFields = fields.map((f) => this.escapeField(f)); return this.createValuesBuilder<TInput>({ prefix: prefix + `WHERE (${escapedFields.join(", ")}) IN(VALUES\n` + " (" + escapedFields.map((f) => this.fmt(`(NULL::%T).${f}`)).join(", ") + "),", indent: " ", fields, skipSorting: true, // for JS perf suffix: ")" + suffix, }); } private buildWhere( specs: TTable, where: Where<TTable>, isTopLevel: boolean = false, ): string { const pieces: string[] = []; for (const key of Object.keys(where)) { const value = where[key]; if (value === undefined) { continue; } if (key[0] === "$") { continue; } let foundOp = false; if (hasKey("$gte", value)) { pieces.push(this.buildFieldBinOp(key, ">=", value.$gte)); foundOp = true; } if (hasKey("$gt", value)) { pieces.push(this.buildFieldBinOp(key, ">", value.$gt)); foundOp = true; } if (hasKey("$lte", value)) { pieces.push(this.buildFieldBinOp(key, "<=", value.$lte)); foundOp = true; } if (hasKey("$lt", value)) { pieces.push(this.buildFieldBinOp(key, "<", value.$lt)); foundOp = true; } if (hasKey("$ne", value)) { pieces.push(this.buildFieldNe(key, value.$ne)); foundOp = true; } if (hasKey("$isDistinctFrom", value)) { pieces.push(this.buildFieldIsDistinctFrom(key, value.$isDistinctFrom)); foundOp = true; } if (hasKey("$overlap", value)) { pieces.push(this.buildFieldBinOp(key, "&&", value.$overlap)); foundOp = true; } if (!foundOp) { pieces.push(this.buildFieldEq(key, value)); } } if (hasKey("$and", where)) { const clause = this.buildLogical(specs, "AND", where.$and); if (clause.length) { pieces.push(clause); } } if (hasKey("$or", where)) { const clause = this.buildLogical(specs, "OR", where.$or); if (clause.length) { pieces.push(clause); } } if (hasKey("$not", where)) { pieces.push(this.buildNot(specs, where.$not)); } if (hasKey("$literal", where)) { // $literal clause in WHERE may look like "abc OR def", and to make sure // this OR doesn't interfere with priorities of other operators around, we // always wrap the literal with (). We must wrap in WHERE only, not in // e.g. ORDER BY or CTEs. pieces.push("(" + escapeLiteral(where.$literal) + ")"); } if (!pieces.length) { // This is for cases like { [$and]: [{}, {}] } pieces.push("true"); } const sql = pieces.join(" AND "); return pieces.length > 1 && !isTopLevel ? "(" + sql + ")" : sql; } private buildFieldBinOp<TField extends Field<TTable>>( field: TField, binOp: string, value: NonNullable<Value<TTable[TField]>>, ): string { return this.escapeField(field) + binOp + this.escapeValue(field, value); } private buildFieldIsDistinctFrom<TField extends Field<TTable>>( field: TField, value: Value<TTable[TField]>, ): string { return ( this.escapeField(field) + " IS DISTINCT FROM " + this.escapeValue(field, value) ); } private buildFieldEq<TField extends Field<TTable>>( field: TField, value: Where<TTable>[TField], ): string { if (value === null) { return this.escapeField(field) + " IS NULL"; } else if (value instanceof Array) { const inBuilder = this.nullThrowsUnknownField( this.oneOfBuilders[field], field, ); return inBuilder(value); } else { return this.escapeField(field) + "=" + this.escapeValue(field, value); } } private buildLogical( specs: TTable, op: "OR" | "AND", items: ReadonlyArray<Where<TTable>>, ): string { const clause = op === "OR" ? " OR " : " AND "; if (items.length === 0) { return ` false /* Empty${clause}*/ `; } const sql = items.map((item) => this.buildWhere(specs, item)).join(clause); return items.length > 1 ? "(" + sql + ")" : sql; } private buildNot(specs: TTable, where: Where<TTable>): string { return "NOT " + this.buildWhere(specs, where); } private buildFieldNe<TField extends Field<TTable>>( field: TField, value: Value<TTable[TField]> | ReadonlyArray<Value<TTable[TField]>>, ): string { if (value === null) { return this.escapeField(field) + " IS NOT NULL"; } else if (value instanceof Array) { let andIsNotNull = false; const pieces: string[] = []; for (const v of value) { if (v === null) { andIsNotNull = true; } else { pieces.push(this.escapeValue(field, v)); } } const sql = pieces.length ? this.escapeField(field) + " NOT IN(" + pieces.join(",") + ")" : "true/*empty_NOT_IN*/"; return andIsNotNull ? "(" + sql + " AND " + this.escapeField(field) + " IS NOT NULL)" : sql; } else { return this.escapeField(field) + "<>" + this.escapeValue(field, value); } } /** * Returns a newly created JS function which, when called with an array of * values, returns one of following SQL clauses: * * - $field=ANY('{aaa,bbb,ccc}') * - ($field=ANY('{aaa,bbb}') OR $field IS NULL) * - $field='aaa' (see below, why) * - ($field='aaa' OR $field IS NULL) * - $field IS NULL * - false */ private createAnyBuilder( field: Field<TTable>, fieldValCode = "$value", ): ($values: Iterable<unknown>) => string { // Notes: // // - See arrayfuncs.c, array_out() function (needquote logic): // https://github.com/postgres/postgres/blob/4ddfbd2/src/backend/utils/adt/arrayfuncs.c#L1136-L1156 // - Why will it work not worse (index wise) than multi-value IN(): // https://www.postgresql.org/message-id/1761901.1668657080%40sss.pgh.pa.us // - We can't easily use a general-purpose quoting function here, because we // must exclude nulls from the values, to add an explicit "OR IS NULL" // clause instead. // - We sacrifice performance a little and not quote everything blindly. // This is to gain the generated SQL queries some more readability. // // Also one more thing. Imagine we have a `btree(a, b)` index. Compare two // queries for one-element use case: // // 1. `a='aaa' AND b=ANY('{bbb}')` // 2. `a='aaa' AND b IN('bbb')` // // They may produce different plans: IN() always coalesces to `b='bbb'` in // the plan (and thus, to an btree index scan), whilst =ANY() always remains // =ANY(). This causes PG to choose a "post-filtering" plan for one-element // use case sometimes: // // 1. For =ANY: Index Cond: (a='aaa'); Filter: b=ANY('{bbb}') - BAD! // 2. For IN(): Index Cond: (a='aaa') AND (b='bbb') // // So to be on a safe side, we never emit a one-element =ANY(); instead, we // turn `b=ANY('{bbb}')` into `b='bbb'`. // const escapedFieldCode = JSON.stringify(this.escapeField(field)); const body = ` let sql = ''; let lastValue = null; let nonNullCount = 0; let hasIsNull = false; for (const $value of $values) { if (${fieldValCode} != null) { if (sql) sql += ','; nonNullCount++; lastValue = "" + ${fieldValCode}; sql += lastValue.match(/^$|^NULL$|[ \\t\\n\\r\\v\\f]|["\\\\{},]/is) ? '"' + lastValue.replace(/\\\\/g, '\\\\\\\\').replace(/"/g, '\\\\"') + '"' : lastValue; } else { hasIsNull = true; } } if (sql) { if (nonNullCount > 1) { sql = '{' + sql + '}'; sql = ${escapedFieldCode} + '=ANY(' + this.escapeString(sql) + ')'; } else { sql = ${escapedFieldCode} + '=' + this.escapeString(lastValue); } } return sql && hasIsNull ? '(' + sql + ' OR ' + ${escapedFieldCode} + ' IS NULL)' : hasIsNull ? ${escapedFieldCode} + ' IS NULL' : sql ? sql : 'false/*empty_ANY*/'; `; return this.newFunction("$values", body); } /** * Returns a newly created JS function which, when called with an array of * values, returns one of following SQL clauses: * * - $field IN('aaa', 'bbb', 'ccc') * - ($field IN('aaa', 'bbb') OR $field IS NULL) * - $field IS NULL * - false * * This only works for primitive types. */ private createInBuilder( field: Field<TTable>, fieldValCode = "$value", ): ($values: Iterable<unknown>) => string { const escapedFieldCode = JSON.stringify(this.escapeField(field)); const valueCode = this.createEscapeCode(field, fieldValCode); const body = ` let sql = ''; let hasIsNull = false; for (const $value of $values) { if (${fieldValCode} != null) { if (sql) sql += ','; sql += ${valueCode}; } else { hasIsNull = true; } } if (sql) { sql = ${escapedFieldCode} + ' IN(' + sql + ')'; } return sql && hasIsNull ? '(' + sql + ' OR ' + ${escapedFieldCode} + ' IS NULL)' : hasIsNull ? ${escapedFieldCode} + ' IS NULL' : sql ? sql : 'false/*empty_IN*/'; `; return this.newFunction("$values", body); } /** * For codegen, returns the following piece of JS code: * * '($fieldValCode !== undefined ? this.escapeXyz($fieldValCode) : "$defSQL")' * * It's expected that, while running the generated code, `this` points to an * object with a) `escapeXyz()` functions, b) `stringify` object containing * the table fields custom to-string converters. */ private createEscapeCode( field: Field<TTable>, fieldValCode: string, defSQL?: string, ): string { const specType = this.schema.table[field]?.type; if (!specType && field !== ID) { throw Error(`BUG: cannot find the field "${field}" in the schema`); } const escapeCode = specType === undefined && field === ID ? `this.escapeComposite(${fieldValCode})` : specType === Boolean ? `this.escapeBoolean(${fieldValCode})` : specType === Date ? `this.escapeDate(${fieldValCode}, ${JSON.stringify(field)})` : specType === ID ? `this.escapeID(${fieldValCode})` : specType === Number ? `this.escapeString(${fieldValCode})` : specType === String ? `this.escapeString(${fieldValCode})` : hasKey("stringify", specType) ? `this.escapeStringify(${fieldValCode}, this.stringify.${field})` : (() => { throw Error( `BUG: unknown spec type ${specType} for field ${field}`, ); })(); if (defSQL !== undefined) { return ( `(${fieldValCode} !== undefined ` + `? ${escapeCode} ` + `: ${JSON.stringify(defSQL)})` ); } else { return escapeCode; } } /** * Compiles a function body with `this` bound to some well-known properties * which are available in the body. * * For each table, we compile frequently accessible pieces of code which * serialize data in SQL format. This allows to remove lots of logic and "ifs" * from runtime and speed up hot code paths. */ private newFunction(...argsAndBody: string[]): (...args: unknown[]) => never { return new Function(...argsAndBody).bind({ escapeComposite, escapeBoolean, escapeDate, escapeID, escapeString, escapeStringify, stringify: this.stringify, }); } /** * The problem: PG is not working fine with queries like: * * ``` * WITH rows(composite_id, c) AS ( * VALUES * ( ROW((NULL::tbl).x, (NULL::tbl).y), (NULL::tbl).c ), * ( ROW(1,2), 3 ), * ( ROW(3,4), 5 ) * ) * UPDATE tbl SET c=rows.c * FROM rows WHERE ROW(tbl.x, tbl.y)=composite_id * ``` * * It cannot match the type of composite_id with the row, and even the trick * with NULLs doesn't help it to infer types. It's a limitation of WITH clause * (because in INSERT ... VALUES, there is no such problem). * * So the only solution is to parse/decompose the row string into individual * unique key columns at runtime for batched UPDATEs. And yes, it's slow. * * ``` * WITH rows(x, y, c) AS ( * VALUES * ( (NULL::tbl).x, (NULL::tbl).y, (NULL::tbl).c ), * ( 1, 2, 3 ), * ( 3, 4, 5 ) * ) * UPDATE tbl SET c=rows.c * FROM rows WHERE ROW(tbl.x, tbl.y)=ROW(rows.x, ROW.y) * ``` */ private unfoldCompositePK<TInput extends object>(inputIn: TInput): TInput { let input = inputIn as Record<string, string | null>; if (!this.schema.table[ID] && typeof input[ID] === "string") { const compositePK = parseCompositeRow(input[ID]); input = { ...input }; for (const [i, field] of this.schema.uniqueKey.entries()) { input[field] = compositePK[i]; } } return input as TInput; } /** * Some data types are different between PG and JS. Here we have a chance to * "normalize" them. E.g. in JS, Date is truncated to milliseconds (3 digits), * whilst in PG, it's by default of 6 digits precision (so if we didn't * normalize, then JS Date would've been never equal to a PG timestamp). */ private normalizeSQLExpr(field: Field<TTable>, sql: string): string { const spec = this.nullThrowsUnknownField(this.schema.table[field], field); if (spec.type === Date) { // Notice that `CAST(x AS timestamptz(3))` does ROUNDING, and we need // TRUNCATION, since it's the default behavior of postgres-date (they // changed it to rounding once, but then reverted intentionally) and // node-postgres. See https://github.com/brianc/node-postgres/issues/1200 sql = `date_trunc('ms', ${sql})`; } return sql; } /** * Throws an exception about some field being not mentioned in the table * schema if the passed data is undefined. Notice that ID is treated as always * available in this message. */ private nullThrowsUnknownField<T>( data: T, field: Field<TTable>, ): Exclude<T, null | undefined> { if (data === null || data === undefined) { throw Error( `Unknown field: ${field}; allowed fields: ` + [ID, ...Object.keys(this.schema.table)], ); } else { return data as Exclude<T, null | undefined>; } } }