@cocalc/database
Version:
CoCalc: code for working with our PostgreSQL database
1,140 lines (1,030 loc) • 49.4 kB
text/coffeescript
#########################################################################
# This file is part of CoCalc: Copyright © 2020 Sagemath, Inc.
# License: AGPLv3 s.t. "Commons Clause" – see LICENSE.md for details
#########################################################################
# PostgreSQL -- basic queries and database interface
exports.DEBUG = true
# If database connection is non-responsive but no error raised directly
# by db client, then we will know and fix, rather than just sitting there...
DEFAULT_TIMEOUS_MS = 60000
# Do not test for non-responsiveness until a while after initial connection
# established, since things tend to work initially, *but* may also be much
# slower, due to tons of clients simultaneously connecting to DB.
DEFAULT_TIMEOUT_DELAY_MS = DEFAULT_TIMEOUS_MS * 4
QUERY_ALERT_THRESH_MS=5000
consts = require('./consts')
DEFAULT_STATEMENT_TIMEOUT_MS = consts.STATEMENT_TIMEOUT_MS
EventEmitter = require('events')
fs = require('fs')
async = require('async')
escapeString = require('sql-string-escape')
validator = require('validator')
{callback2} = require('@cocalc/util/async-utils')
LRU = require('lru-cache')
pg = require('pg')
winston = require('@cocalc/backend/logger').getLogger('postgres')
{do_query_with_pg_params} = require('./postgres/set-pg-params')
{ syncSchema } = require('./postgres/schema')
{ pgType } = require('./postgres/schema/pg-type')
{ quoteField } = require('./postgres/schema/util')
{ primaryKey, primaryKeys } = require('./postgres/schema/table')
misc_node = require('@cocalc/backend/misc_node')
data = require("@cocalc/backend/data")
{defaults} = misc = require('@cocalc/util/misc')
required = defaults.required
{SCHEMA, client_db} = require('@cocalc/util/schema')
metrics = require('./metrics')
exports.PUBLIC_PROJECT_COLUMNS = ['project_id', 'last_edited', 'title', 'description', 'deleted', 'created', 'env']
exports.PROJECT_COLUMNS = ['users'].concat(exports.PUBLIC_PROJECT_COLUMNS)
dbPassword = require('@cocalc/database/pool/password').default;
class exports.PostgreSQL extends EventEmitter # emits a 'connect' event whenever we successfully connect to the database and 'disconnect' when connection to postgres fails
constructor: (opts) ->
super()
opts = defaults opts,
host : data.pghost # DEPRECATED: or 'hostname:port' or 'host1,host2,...' (multiple hosts) -- TODO -- :port only works for one host.
database : data.pgdatabase
user : data.pguser
debug : exports.DEBUG
connect : true
password : undefined
cache_expiry : 5000 # expire cached queries after this many milliseconds
# keep this very short; it's just meant to reduce impact of a bunch of
# identical permission checks in a single user query.
cache_size : 300 # cache this many queries; use @_query(cache:true, ...) to cache result
concurrent_warn : 500
concurrent_heavily_loaded : 70 # when concurrent hits this, consider load "heavy"; this changes home some queries behave to be faster but provide less info
ensure_exists : true # ensure database exists on startup (runs psql in a shell)
timeout_ms : DEFAULT_TIMEOUS_MS # **IMPORTANT: if *any* query takes this long, entire connection is terminated and recreated!**
timeout_delay_ms : DEFAULT_TIMEOUT_DELAY_MS # Only reconnect on timeout this many ms after connect. Motivation: on initial startup queries may take much longer due to competition with other clients.
@setMaxListeners(0) # because of a potentially large number of changefeeds
@_state = 'init'
@_debug = opts.debug
@_timeout_ms = opts.timeout_ms
@_timeout_delay_ms = opts.timeout_delay_ms
@_ensure_exists = opts.ensure_exists
@_init_test_query()
dbg = @_dbg("constructor") # must be after setting @_debug above
dbg(opts)
i = opts.host.indexOf(':')
if i != -1
@_host = opts.host.slice(0, i)
@_port = parseInt(opts.host.slice(i+1))
else
@_host = opts.host
@_port = 5432
@_concurrent_warn = opts.concurrent_warn
@_concurrent_heavily_loaded = opts.concurrent_heavily_loaded
@_user = opts.user
@_database = opts.database
@_password = opts.password ? dbPassword()
@_init_metrics()
if opts.cache_expiry and opts.cache_size
@_query_cache = new LRU({max:opts.cache_size, maxAge: opts.cache_expiry})
if opts.connect
@connect() # start trying to connect
clear_cache: =>
@_query_cache?.reset()
close: =>
if @_state == 'closed'
return # nothing to do
@_close_test_query()
@_state = 'closed'
@emit('close')
@removeAllListeners()
if @_clients?
for client in @_clients
client.removeAllListeners()
client.end()
delete @_clients
###
If @_timeout_ms is set, then we periodically do a simple test query,
to ensure that the database connection is working and responding to queries.
If the query below times out, then the connection will get recreated.
###
_do_test_query: =>
dbg = @_dbg('test_query')
dbg('starting')
@_query
query : 'SELECT NOW()'
cb : (err, result) =>
dbg("finished", err, result)
_init_test_query: =>
if not @_timeout_ms
return
@_test_query = setInterval(@_do_test_query, @_timeout_ms)
_close_test_query: =>
if @_test_query?
clearInterval(@_test_query)
delete @_test_query
engine: -> 'postgresql'
connect: (opts) =>
opts = defaults opts,
max_time : undefined # set to something shorter to not try forever
# Only first max_time is used.
cb : undefined
if @_state == 'closed'
opts.cb?("closed")
return
dbg = @_dbg("connect")
if @_clients?
dbg("already connected")
opts.cb?()
return
if @_connecting?
dbg('already trying to connect')
@_connecting.push(opts.cb)
# keep several times the db-concurrent-warn limit of callbacks
max_connecting = 5 * @_concurrent_warn
while @_connecting.length > max_connecting
@_connecting.shift()
dbg("WARNING: still no DB available, dropping old callbacks (limit: #{max_connecting})")
return
dbg('will try to connect')
@_state = 'init'
if opts.max_time
dbg("for up to #{opts.max_time}ms")
else
dbg("until successful")
@_connecting = [opts.cb]
misc.retry_until_success
f : @_connect
max_delay : 10000
max_time : opts.max_time
start_delay : 500 + 500*Math.random()
log : dbg
cb : (err) =>
v = @_connecting
delete @_connecting
for cb in v
cb?(err)
if not err
@_state = 'connected'
@emit('connect')
disconnect: () =>
if @_clients?
for client in @_clients
client.end()
client.removeAllListeners()
delete @_clients
is_connected: () =>
return @_clients? and @_clients.length > 0
_connect: (cb) =>
dbg = @_dbg("_connect")
dbg("connect to #{@_host}")
@_clear_listening_state() # definitely not listening
if @_clients?
@disconnect()
locals =
clients : []
hosts : []
@_connect_time = 0
@_concurrent_queries = 0 # can't be any going on now.
async.series([
(cb) =>
if @_ensure_exists
dbg("first make sure db exists")
@_ensure_database_exists(cb)
else
dbg("assuming database exists")
cb()
(cb) =>
if not @_host # undefined if @_host=''
locals.hosts = [undefined]
cb()
return
if @_host.indexOf('/') != -1
dbg("using a local socket file (not a hostname)")
locals.hosts = [@_host]
cb()
return
f = (host, cb) =>
hostname = host.split(':')[0]
winston.debug("Looking up ip addresses of #{hostname}")
require('dns').lookup hostname, {all:true}, (err, ips) =>
if err
winston.debug("Got #{hostname} --> err=#{err}")
# NON-FATAL -- we just don't include these and hope to
# have at least one total working host...
cb()
else
winston.debug("Got #{hostname} --> #{JSON.stringify(ips)}")
# In kubernetes the stateful set service just has
# lots of ip address. We connect to *all* of them,
# and spread queries across them equally.
for x in ips
locals.hosts.push(x.address)
cb()
async.map(@_host.split(','), f, (err) => cb(err))
(cb) =>
dbg("connecting to #{JSON.stringify(locals.hosts)}...")
if locals.hosts.length == 0
dbg("locals.hosts has length 0 -- no available db")
cb("no databases available")
return
dbg("create client and start connecting...")
locals.clients = []
# Use a function to initialize the client, to avoid any issues with scope of "client" below.
# Ref: https://node-postgres.com/apis/client
init_client = (host) =>
client = new pg.Client
user : @_user
host : host
port : @_port
password : @_password
database : @_database
statement_timeout: DEFAULT_STATEMENT_TIMEOUT_MS # we set a statement_timeout, to avoid queries locking up PG
if @_notification?
client.on('notification', @_notification)
onError = (err) =>
# only listen once for error; after that we've
# killed connection and don't care.
client.removeListener('error', onError)
if @_state == 'init'
# already started connecting
return
@emit('disconnect')
dbg("error -- #{err}")
@disconnect()
@connect() # start trying to reconnect
client.on('error', onError)
client.setMaxListeners(0) # there is one emitter for each concurrent query... (see query_cb)
locals.clients.push(client)
for host in locals.hosts
init_client(host)
# Connect the clients. If at least one succeeds, we use this.
# If none succeed, we declare failure.
# Obviously, this is NOT optimal -- it's just hopefully sufficiently robust/works.
# I'm going to redo this with experience.
locals.clients_that_worked = []
locals.errors = []
f = (client, c) =>
try
await client.connect()
locals.clients_that_worked.push(client)
catch err
locals.errors.push(err)
c()
async.map locals.clients, f, () =>
if locals.clients_that_worked.length == 0
console.warn("ALL clients failed", locals.errors)
dbg("ALL clients failed", locals.errors)
cb("ALL clients failed to connect")
else
# take what we got
if locals.clients.length == locals.clients_that_worked.length
dbg("ALL clients worked")
else
dbg("ONLY #{locals.clients_that_worked.length} clients worked")
locals.clients = locals.clients_that_worked
dbg("cb = ", cb)
cb()
(cb) =>
@_connect_time = new Date()
locals.i = 0
# Weird and unfortunate fact -- this query can and does **HANG** never returning
# in some edge cases. That's why we have to be paranoid about this entire _connect
# function...
f = (client, cb) =>
it_hung = =>
cb?("hung")
cb = undefined
timeout = setTimeout(it_hung, 15000)
dbg("now connected; checking if we can actually query the DB via client #{locals.i}")
locals.i += 1
client.query "SELECT NOW()", (err) =>
clearTimeout(timeout)
cb?(err)
async.map(locals.clients, f, cb)
(cb) =>
dbg("checking if ANY db server is in recovery, i.e., we are doing standby queries only")
@is_standby = false
f = (client, cb) =>
# Is this a read/write or read-only connection?
client.query "SELECT pg_is_in_recovery()", (err, resp) =>
if err
cb(err)
else
# True if ANY db connection is read only.
if resp.rows[0].pg_is_in_recovery
@is_standby = true
cb()
async.map(locals.clients, f, cb)
], (err) =>
if err
mesg = "Failed to connect to database -- #{err}"
dbg(mesg)
console.warn(mesg) # make it clear for interactive users with debugging off -- common mistake with env not setup right.
cb?(err)
else
@_clients = locals.clients
@_concurrent_queries = 0
dbg("connected!")
cb?(undefined, @)
)
# Return a native pg client connection. This will
# round robbin through all connections. It returns
# undefined if there are no connections.
_client: =>
if not @_clients?
return
if @_clients.length <= 1
return @_clients[0]
@_client_index ?= -1
@_client_index = @_client_index + 1
if @_client_index >= @_clients.length
@_client_index = 0
return @_clients[@_client_index]
# Return query function of a database connection.
get_db_query: =>
db = @_client()
return db?.query.bind(db)
_dbg: (f) =>
if @_debug
return (m) => winston.debug("PostgreSQL.#{f}: #{misc.trunc_middle(JSON.stringify(m), 250)}")
else
return ->
_init_metrics: =>
# initialize metrics
try
@query_time_histogram = metrics.newHistogram('db_query_ms_histogram', 'db queries'
buckets : [1, 5, 10, 20, 50, 100, 200, 500, 1000, 5000, 10000]
labels: ['table']
)
@concurrent_counter = metrics.newCounter('db_concurrent_total',
'Concurrent queries (started and finished)',
['state']
)
catch err
@_dbg("_init_metrics")("WARNING -- #{err}")
async_query: (opts) =>
return await callback2(@_query.bind(@), opts)
_query: (opts) =>
opts = defaults opts,
query : undefined # can give select and table instead
select : undefined # if given, should be string or array of column names -| can give these
table : undefined # if given, name of table -| two instead of query
params : []
cache : false # Will cache results for a few seconds or use cache. Use this
# when speed is very important, and results that are a few seconds
# out of date are fine.
where : undefined # Used for SELECT: If given, can be
# - a map with keys clauses with $::TYPE (not $1::TYPE!) and values
# the corresponding params. Also, WHERE must not be in the query already.
# If where[cond] is undefined, then cond is completely **ignored**.
# - a string, which is inserted as is as a normal WHERE condition.
# - an array of maps or strings.
set : undefined # Appends a SET clause to the query; same format as values.
values : undefined # Used for INSERT: If given, then params and where must not be given. Values is a map
# {'field1::type1':value, , 'field2::type2':value2, ...} which gets converted to
# ' (field1, field2, ...) VALUES ($1::type1, $2::type2, ...) '
# with corresponding params set. Undefined valued fields are ignored and types may
# be omitted. Javascript null is not ignored and converts to PostgreSQL NULL.
conflict : undefined # If given, then values must also be given; appends this to query:
# ON CONFLICT (name) DO UPDATE SET value=EXCLUDED.value'
# Or, if conflict starts with "ON CONFLICT", then just include as is, e.g.,
# "ON CONFLICT DO NOTHING"
jsonb_set : undefined # Used for setting a field that contains a JSONB javascript map.
# Give as input an object
#
# { field1:{key1:val1, key2:val2, ...}, field2:{key3:val3,...}, ...}
#
# In each field, every key has the corresponding value set, unless val is undefined/null, in which
# case that key is deleted from the JSONB object fieldi. Simple as that! This is much, much
# cleaner to use than SQL. Also, if the value in fieldi itself is NULL, it gets
# created automatically.
jsonb_merge : undefined # Exactly like jsonb_set, but when val1 (say) is an object, it merges that object in,
# *instead of* setting field1[key1]=val1. So after this field1[key1] has what was in it
# and also what is in val1. Obviously field1[key1] had better have been an array or NULL.
order_by : undefined
limit : undefined
offset : undefined
safety_check: true
retry_until_success : undefined # if given, should be options to misc.retry_until_success
pg_params : undefined # key/value map of postgres parameters, which will be set for the query in a single transaction
timeout_s : undefined # by default, there is a "statement_timeout" set. set to 0 to disable or a number in seconds
cb : undefined
# quick check for write query against read-only connection
if @is_standby and (opts.set? or opts.jsonb_set? or opts.jsonb_merge?)
opts.cb?("set queries against standby not allowed")
return
if opts.retry_until_success
@_query_retry_until_success(opts)
return
if not @is_connected()
dbg = @_dbg("_query")
dbg("connecting first...")
# 2022-06: below there was {max_time: 45000} set with the note
# "don't try forever; queries could pile up."
# but I think this is rather harmful, since the hub could stop
# trying to connect to the database altogether.
# Rather, hub/health-checks::checkDBConnectivity will
# mark the hub as being bad if it can't connect to the database.
@connect
cb : (err) =>
if err
dbg("FAILED to connect -- #{err}")
opts.cb?("database is down (please try later)")
else
dbg("connected, now doing query")
@__do_query(opts)
else
@__do_query(opts)
_query_retry_until_success: (opts) =>
retry_opts = opts.retry_until_success
orig_cb = opts.cb
delete opts.retry_until_success
# f just calls @_do_query, but with a different cb (same opts)
args = undefined
f = (cb) =>
opts.cb = (args0...) =>
args = args0
cb(args[0])
@_query(opts)
retry_opts.f = f
# When misc.retry_until_success finishes, it calls this, which just
# calls the original cb.
retry_opts.cb = (err) =>
if err
orig_cb?(err)
else
orig_cb?(args...)
# OK, now start it attempting.
misc.retry_until_success(retry_opts)
__do_query: (opts) =>
dbg = @_dbg("__do_query('#{misc.trunc(opts.query?.replace(/\n/g, " "),250)}',id='#{misc.uuid().slice(0,6)}')")
if not @is_connected()
# TODO: should also check that client is connected.
opts.cb?("client not yet initialized")
return
if opts.params? and not misc.is_array(opts.params)
opts.cb?("params must be an array")
return
if not opts.query?
if not opts.table?
opts.cb?("if query not given, then table must be given")
return
if not opts.select?
opts.select = '*'
if misc.is_array(opts.select)
opts.select = (quote_field(field) for field in opts.select).join(',')
opts.query = "SELECT #{opts.select} FROM \"#{opts.table}\""
delete opts.select
push_param = (param, type) ->
if type?.toUpperCase() == 'JSONB'
param = misc.to_json(param) # I don't understand why this is needed by the driver....
opts.params.push(param)
return opts.params.length
if opts.jsonb_merge?
if opts.jsonb_set?
opts.cb?("if jsonb_merge is set then jsonb_set must not be set")
return
opts.jsonb_set = opts.jsonb_merge
SET = []
if opts.jsonb_set?
# This little piece of very hard to write (and clever?) code
# makes it so we can set or **merge in at any nested level** (!)
# arbitrary JSON objects. We can also delete any key at any
# level by making the value null or undefined! This is amazingly
# easy to use in queries -- basically making JSONP with postgres
# as expressive as RethinkDB REQL (even better in some ways).
set = (field, data, path) =>
obj = "COALESCE(#{field}#>'{#{path.join(',')}}', '{}'::JSONB)"
for key, val of data
if not val?
# remove key from object
obj = "(#{obj} - '#{key}')"
else
if opts.jsonb_merge? and (typeof(val) == 'object' and not misc.is_date(val))
subobj = set(field, val, path.concat([key]))
obj = "JSONB_SET(#{obj}, '{#{key}}', #{subobj})"
else
# completely replace field[key] with val.
obj = "JSONB_SET(#{obj}, '{#{key}}', $#{push_param(val, 'JSONB')}::JSONB)"
return obj
v = ("#{field}=#{set(field, data, [])}" for field, data of opts.jsonb_set)
SET.push(v...)
if opts.values?
#dbg("values = #{misc.to_json(opts.values)}")
if opts.where?
opts.cb?("where must not be defined if opts.values is defined")
return
if misc.is_array(opts.values)
# An array of numerous separate object that we will insert all at once.
# Determine the fields, which as the union of the keys of all values.
fields = {}
for x in opts.values
if not misc.is_object(x)
opts.cb?("if values is an array, every entry must be an object")
return
for k, p of x
fields[k] = true
# convert to array
fields = misc.keys(fields)
fields_to_index = {}
n = 0
for field in fields
fields_to_index[field] = n
n += 1
values = []
for x in opts.values
value = []
for field, param of x
if field.indexOf('::') != -1
[field, type] = field.split('::')
type = type.trim()
y = "$#{push_param(param, type)}::#{type}"
else
y = "$#{push_param(param)}"
value[fields_to_index[field]] = y
values.push(value)
else
# A single entry that we'll insert.
fields = []
values = []
for field, param of opts.values
if param == undefined
# ignore undefined fields -- makes code cleaner (and makes sense)
continue
if field.indexOf('::') != -1
[field, type] = field.split('::')
fields.push(quote_field(field.trim()))
type = type.trim()
values.push("$#{push_param(param, type)}::#{type}")
continue
else
fields.push(quote_field(field))
values.push("$#{push_param(param)}")
values = [values] # just one
if values.length > 0
opts.query += " (#{(quote_field(field) for field in fields).join(',')}) VALUES " + (" (#{value.join(',')}) " for value in values).join(',')
if opts.set?
v = []
for field, param of opts.set
if field.indexOf('::') != -1
[field, type] = field.split('::')
type = type.trim()
v.push("#{quote_field(field.trim())}=$#{push_param(param, type)}::#{type}")
continue
else
v.push("#{quote_field(field.trim())}=$#{push_param(param)}")
if v.length > 0
SET.push(v...)
if opts.conflict?
if misc.is_string(opts.conflict) and misc.startswith(opts.conflict.toLowerCase().trim(), 'on conflict')
# Straight string inclusion
opts.query += ' ' + opts.conflict + ' '
else
if not opts.values?
opts.cb?("if conflict is specified then values must also be specified")
return
if not misc.is_array(opts.conflict)
if typeof(opts.conflict) != 'string'
opts.cb?("conflict (='#{misc.to_json(opts.conflict)}') must be a string (the field name), for now")
return
else
conflict = [opts.conflict]
else
conflict = opts.conflict
v = ("#{quote_field(field)}=EXCLUDED.#{field}" for field in fields when field not in conflict)
SET.push(v...)
if SET.length == 0
opts.query += " ON CONFLICT (#{conflict.join(',')}) DO NOTHING "
else
opts.query += " ON CONFLICT (#{conflict.join(',')}) DO UPDATE "
if SET.length > 0
opts.query += " SET " + SET.join(' , ')
WHERE = []
push_where = (x) =>
if typeof(x) == 'string'
WHERE.push(x)
else if misc.is_array(x)
for v in x
push_where(v)
else if misc.is_object(x)
for cond, param of x
if typeof(cond) != 'string'
opts.cb?("each condition must be a string but '#{cond}' isn't")
return
if not param? # *IGNORE* where conditions where value is explicitly undefined
continue
if cond.indexOf('$') == -1
# where condition is missing it's $ parameter -- default to equality
cond += " = $"
WHERE.push(cond.replace('$', "$#{push_param(param)}"))
if opts.where?
push_where(opts.where)
if WHERE.length > 0
if opts.values?
opts.cb?("values must not be given if where clause given")
return
opts.query += " WHERE #{WHERE.join(' AND ')}"
if opts.order_by?
if opts.order_by.indexOf("'") >= 0
err = "ERROR -- detected ' apostrophe in order_by='#{opts.order_by}'"
dbg(err)
opts.cb?(err)
return
opts.query += " ORDER BY #{opts.order_by}"
if opts.limit?
if not validator.isInt('' + opts.limit, min:0)
err = "ERROR -- opts.limit = '#{opts.limit}' is not an integer"
dbg(err)
opts.cb?(err)
return
opts.query += " LIMIT #{opts.limit} "
if opts.offset?
if not validator.isInt('' + opts.offset, min:0)
err = "ERROR -- opts.offset = '#{opts.offset}' is not an integer"
dbg(err)
opts.cb?(err)
return
opts.query += " OFFSET #{opts.offset} "
if opts.safety_check
safety_check = opts.query.toLowerCase().trim()
if (safety_check.startsWith('update') or safety_check.startsWith('delete')) and (safety_check.indexOf('where') == -1 and safety_check.indexOf('trigger') == -1 and safety_check.indexOf('insert') == -1 and safety_check.indexOf('create') == -1)
# This is always a bug.
err = "ERROR -- Dangerous UPDATE or DELETE without a WHERE, TRIGGER, or INSERT: query='#{opts.query}'"
dbg(err)
opts.cb?(err)
return
if opts.cache and @_query_cache?
# check for cached result
full_query_string = JSON.stringify([opts.query, opts.params])
if (x = @_query_cache.get(full_query_string))?
dbg("using cache for '#{opts.query}'")
opts.cb?(x...)
return
# params can easily be huge, e.g., a blob. But this may be
# needed at some point for debugging.
#dbg("query='#{opts.query}', params=#{misc.to_json(opts.params)}")
client = @_client()
if not client?
opts.cb?("not connected")
return
@_concurrent_queries ?= 0
@_concurrent_queries += 1
dbg("query='#{opts.query} (concurrent=#{@_concurrent_queries})'")
@concurrent_counter?.labels('started').inc(1)
try
start = new Date()
if @_timeout_ms and @_timeout_delay_ms
# Create a timer, so that if the query doesn't return within
# timeout_ms time, then the entire connection is destroyed.
# It then gets recreated automatically. I tested
# and all outstanding queries also get an error when this happens.
timeout_error = =>
# Only disconnect with timeout error if it has been sufficiently long
# since connecting. This way when an error is triggered, all the
# outstanding timers at the moment of the error will just get ignored
# when they fire (since @_connect_time is 0 or too recent).
if @_connect_time and new Date() - @_connect_time > @_timeout_delay_ms
client.emit('error', 'timeout')
timer = setTimeout(timeout_error, @_timeout_ms)
# PAINFUL FACT: In client.query below, if the client is closed/killed/errored
# (especially via client.emit above), then none of the callbacks from
# client.query are called!
finished = false
error_listener = ->
dbg("error_listener fired")
query_cb('error')
client.once('error', error_listener)
query_cb = (err, result) =>
if finished # ensure no matter what that query_cb is called at most once.
dbg("called when finished (ignoring)")
return
finished = true
client.removeListener('error', error_listener)
if @_timeout_ms
clearTimeout(timer)
query_time_ms = new Date() - start
@_concurrent_queries -= 1
@query_time_histogram?.observe({table:opts.table ? ''}, query_time_ms)
@concurrent_counter?.labels('ended').inc(1)
if err
dbg("done (concurrent=#{@_concurrent_queries}), (query_time_ms=#{query_time_ms}) -- error: #{err}")
## DANGER
# Only uncomment this for low level debugging!
#### dbg("params = #{JSON.stringify(opts.params)}")
##
err = 'postgresql ' + err
else
dbg("done (concurrent=#{@_concurrent_queries}) (query_time_ms=#{query_time_ms}) -- success")
if opts.cache and @_query_cache?
@_query_cache.set(full_query_string, [err, result])
opts.cb?(err, result)
if query_time_ms >= QUERY_ALERT_THRESH_MS
dbg("QUERY_ALERT_THRESH: query_time_ms=#{query_time_ms}\nQUERY_ALERT_THRESH: query='#{opts.query}'\nQUERY_ALERT_THRESH: params='#{misc.to_json(opts.params)}'")
# set a timeout for one specific query (there is a default when creating the pg.Client, see @_connect)
if opts.timeout_s? and typeof opts.timeout_s == 'number' and opts.timeout_s >= 0
dbg("set query timeout to #{opts.timeout_s}secs")
opts.pg_params ?= {}
# the actual param is in milliseconds
# https://postgresqlco.nf/en/doc/param/statement_timeout/
opts.pg_params.statement_timeout = 1000 * opts.timeout_s
if opts.pg_params?
dbg("run query with specific postgres parameters in a transaction")
do_query_with_pg_params(client: client, query: opts.query, params: opts.params, pg_params:opts.pg_params, cb: query_cb)
else
client.query(opts.query, opts.params, query_cb)
catch e
# this should never ever happen
dbg("EXCEPTION in client.query: #{e}")
opts.cb?(e)
@_concurrent_queries -= 1
@concurrent_counter?.labels('ended').inc(1)
return
# Special case of query for counting entries in a table.
_count: (opts) =>
opts = defaults opts,
table : required
where : undefined # as in _query
cb : required
@_query
query : "SELECT COUNT(*) FROM #{opts.table}"
where : opts.where
cb : count_result(opts.cb)
_validate_opts: (opts) =>
for k, v of opts
if k.slice(k.length-2) == 'id'
if v? and not misc.is_valid_uuid_string(v)
opts.cb?("invalid #{k} -- #{v}")
return false
if k.slice(k.length-3) == 'ids'
for w in v
if not misc.is_valid_uuid_string(w)
opts.cb?("invalid uuid #{w} in #{k} -- #{misc.to_json(v)}")
return false
if k == 'group' and v not in misc.PROJECT_GROUPS
opts.cb?("unknown project group '#{v}'"); return false
if k == 'groups'
for w in v
if w not in misc.PROJECT_GROUPS
opts.cb?("unknown project group '#{w}' in groups"); return false
return true
_ensure_database_exists: (cb) =>
dbg = @_dbg("_ensure_database_exists")
dbg("ensure database '#{@_database}' exists")
args = ['--user', @_user, '--host', @_host.split(',')[0], '--port', @_port, '--list', '--tuples-only']
dbg("psql #{args.join(' ')}")
misc_node.execute_code
command : 'psql'
args : args
env :
PGPASSWORD : @_password
cb : (err, output) =>
if err
cb(err)
return
databases = (x.split('|')[0].trim() for x in output.stdout.split('\n') when x)
if @_database in databases
dbg("database '#{@_database}' already exists")
cb()
return
dbg("creating database '#{@_database}'")
misc_node.execute_code
command : 'createdb'
args : ['--host', @_host, '--port', @_port, @_database]
env :
PGPASSWORD : @_password
cb : cb
_confirm_delete: (opts) =>
opts = defaults opts,
confirm : 'no'
cb : required
dbg = @_dbg("confirm")
if opts.confirm != 'yes'
err = "Really delete all data? -- you must explicitly pass in confirm='yes' (but confirm:'#{opts.confirm}')"
dbg(err)
opts.cb(err)
return false
else
return true
set_random_password: (opts) =>
throw Error("NotImplementedError")
# This will fail if any other clients have db open.
# This function is very important for automated testing.
delete_entire_database: (opts) =>
dbg = @_dbg("delete_entire_database")
dbg("deleting database '#{@_database}'")
if not @_confirm_delete(opts)
dbg("failed confirmation")
return
async.series([
(cb) =>
dbg("disconnect from db")
@disconnect()
cb()
(cb) =>
misc_node.execute_code
command : 'dropdb'
args : ['--host', @_host, '--port', @_port, @_database]
cb : cb
], opts.cb)
# Deletes all the contents of the tables in the database. It doesn't
# delete anything about the schema itself: indexes or tables.
delete_all: (opts) =>
dbg = @_dbg("delete_all")
dbg("deleting all contents of tables in '#{@_database}'")
if not @_confirm_delete(opts)
return
# If the cache is enabled, be sure to also clear it.
@clear_cache()
tables = undefined
# Delete anything cached in the db object. Obviously, not putting something here
# is a natural place in which to cause bugs... but they will probably all be bugs
# of the form "the test suite fails", so we'll find them.
delete @_stats_cached
# Actually delete tables
async.series([
(cb) =>
@_get_tables (err, t) =>
tables = t; cb(err)
(cb) =>
f = (table, cb) =>
@_query
query : "DELETE FROM #{table}"
safety_check : false
cb : cb
async.map(tables, f, cb)
], opts.cb)
# return list of tables in the database
_get_tables: (cb) =>
@_query
query : "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public'"
cb : (err, result) =>
if err
cb(err)
else
cb(undefined, (row.table_name for row in result.rows))
# Return list of columns in a given table
_get_columns: (table, cb) =>
@_query
query : "SELECT column_name FROM information_schema.columns"
where :
"table_name = $::text" : table
cb : (err, result) =>
if err
cb(err)
else
cb(undefined, (row.column_name for row in result.rows))
_primary_keys: (table) =>
return primaryKeys(table)
# Return *the* primary key, assuming unique; otherwise raise an exception.
_primary_key: (table) =>
return primaryKey(table)
_throttle: (name, time_s, key...) =>
key = misc.to_json(key)
x = "_throttle_#{name}"
@[x] ?= {}
if @[x][key]
return true
@[x][key] = true
setTimeout((()=>delete @[x]?[key]), time_s*1000)
return false
# Ensure that the actual schema in the database matches the one defined in SCHEMA.
# This creates the initial schema, adds new columns, and in a VERY LIMITED
# range of cases, *might be* be able to change the data type of a column.
update_schema: (opts) =>
try
await syncSchema(SCHEMA);
opts.cb?()
catch err
opts.cb?(err)
# Return the number of outstanding concurrent queries.
concurrent: =>
return @_concurrent_queries ? 0
is_heavily_loaded: =>
return @_concurrent_queries >= @_concurrent_heavily_loaded
# Compute the sha1 hash (in hex) of the input arguments, which are
# converted to strings (via json) if they are not strings, then concatenated.
# This is used for computing compound primary keys in a way that is relatively
# safe, and in situations where if there were a highly unlikely collision, it
# wouldn't be the end of the world. There is a similar client-only slower version
# of this function (in schema.coffee), so don't change it willy nilly.
sha1: (args...) ->
v = ((if typeof(x) == 'string' then x else JSON.stringify(x)) for x in args).join('')
return misc_node.sha1(v)
# Go through every table in the schema with a column called "expire", and
# delete every entry where expire is <= right now.
delete_expired: (opts) =>
opts = defaults opts,
count_only : false # if true, only count the number of rows that would be deleted
table : undefined # only delete from this table
cb : required
dbg = @_dbg("delete_expired(...)")
dbg()
f = (table, cb) =>
dbg("table='#{table}'")
if opts.count_only
@_query
query : "SELECT COUNT(*) FROM #{table} WHERE expire <= NOW()"
cb : (err, result) =>
if not err
dbg("COUNT for table #{table} is #{result.rows[0].count}")
cb(err)
else
dbg("deleting expired entries from '#{table}'")
@_query
query : "DELETE FROM #{table} WHERE expire <= NOW()"
cb : (err) =>
dbg("finished deleting expired entries from '#{table}' -- #{err}")
cb(err)
if opts.table
tables = [opts.table]
else
tables = (k for k, v of SCHEMA when v.fields?.expire? and not v.virtual)
async.map(tables, f, opts.cb)
# count number of entries in a table
count: (opts) =>
opts = defaults opts,
table : required
cb : required
@_query
query : "SELECT COUNT(*) FROM #{opts.table}"
cb : count_result(opts.cb)
# sanitize strings before inserting them into a query string
sanitize: (s) =>
escapeString(s)
###
Other misc functions
###
exports.pg_type = pg_type = (info) ->
return pgType(info)
exports.quote_field = quote_field = (field) ->
return quoteField(field)
# Timestamp the given number of seconds **in the future**.
exports.expire_time = expire_time = (ttl) ->
if ttl then new Date((new Date() - 0) + ttl*1000)
# Returns a function that takes as input the output of doing a SQL query.
# If there are no results, returns undefined.
# If there is exactly one result, what is returned depends on pattern:
# 'a_field' --> returns the value of this field in the result
# If more than one result, an error
exports.one_result = one_result = (pattern, cb) ->
if not cb? and typeof(pattern) == 'function'
cb = pattern
pattern = undefined
if not cb?
return -> # do nothing -- return function that ignores result
return (err, result) ->
if err
cb(err)
return
if not result?.rows?
cb()
return
switch result.rows.length
when 0
cb()
when 1
obj = misc.map_without_undefined_and_null(result.rows[0])
if not pattern?
cb(undefined, obj)
return
switch typeof(pattern)
when 'string'
x = obj[pattern]
if not x? # null or undefined -- SQL returns null, but we want undefined
cb()
else
if obj.expire? and new Date() >= obj.expire
cb()
else
cb(undefined, x)
when 'object'
x = {}
for p in pattern
if obj[p]?
x[p] = obj[p]
cb(undefined, x)
else
cb("BUG: unknown pattern -- #{pattern}")
else
cb("more than one result")
exports.all_results = all_results = (pattern, cb) ->
if not cb? and typeof(pattern) == 'function'
cb = pattern
pattern = undefined
if not cb?
return -> # do nothing -- return function that ignores result
return (err, result) ->
if err
cb(err)
else
rows = result.rows
if not pattern?
# TODO: we use stupid (?) misc.copy to unwrap from pg driver type -- investigate better!
# Maybe this is fine. I don't know.
cb(undefined, (misc.copy(x) for x in rows))
else if typeof(pattern) == 'string'
cb(undefined, ((x[pattern] ? undefined) for x in rows))
else
cb("unsupported pattern type '#{typeof(pattern)}'")
exports.count_result = count_result = (cb) ->
if not cb?
return -> # do nothing -- return function that ignores result
return (err, result) ->
if err
cb(err)
else
cb(undefined, parseInt(result?.rows?[0]?.count))