@sap/cds
Version:
SAP Cloud Application Programming Model - CDS for Node.js
536 lines (479 loc) • 21.2 kB
JavaScript
const cds = require('../_runtime/cds')
const LOG = cds.log('persistent-queue|queue|persistent-outbox|outbox')
const { inspect } = require('util')
const TaskRunner = require('./TaskRunner')
const taskRunner = new TaskRunner()
const { expBkfFix: waitingTime } = require('../_runtime/common/utils/waitingTime')
const PROCESSING = 'processing'
const INTERNAL_USER = 'cds.internal.user'
const $taskProcessorRegistered = Symbol('task processor registered')
const $queued = Symbol('queued')
const $unqueued = Symbol('unqueued')
const $stored_reqs = Symbol('stored_reqs')
const $error = Symbol('error')
const _get100NanosecondTimestampISOString = (offset = 0) => {
const [now, nanoseconds] = [new Date(Date.now() + offset), process.hrtime()[1]]
return now.toISOString().replace('Z', `${nanoseconds}`.padStart(9, '0').substring(3, 7) + 'Z')
}
const _getTasksEntity = () => {
const tasksDbName = 'cds.outbox.Messages'
const tasksEntity = cds.model.definitions[tasksDbName]
if (!tasksEntity) throw new Error(`The entity '${tasksDbName}' is missing but needed for persistent tasks.`)
return tasksEntity
}
// REVISIT: Is this always a reliable way to identify the provider tenant?
// Are there scenarios where the credentials have a different format?
const _isProviderTenant = tenant =>
(cds.requires.auth && cds.requires.auth.credentials && cds.requires.auth.credentials.identityzoneid === tenant) ||
cds.requires.multitenancy.t0 === tenant
const _hasPersistentQueue = tenant => {
if (!cds.db) return false // no persistence configured
if (cds.requires.multitenancy && tenant && _isProviderTenant(tenant)) return false // no persistence for provider account
return true
}
const _safeJSONParse = string => {
try {
return string && JSON.parse(string)
} catch {
// Don't throw
}
}
const _targetName = (name, opts) => (opts.targetPrefix ? opts.targetPrefix + name : name)
// Note: This function can also run for each tenant on startup
//
// tx1: Fetch messages which are not in process and are not locked (SELECT FOR UPDATE)
// tx1: Set those which are processable (not 'processing' or timed out) to 'processing'
// Process messages (in parallel or sequentially)
// tx2 (legacyLocking: tx1): Update/Delete messages based on outcome, set status to null
//
const processTasks = (service, tenant, _opts = {}) => {
const opts = Object.assign({ attempt: 0 }, _opts)
if (!opts.parallel) opts.chunkSize = 1
const name = service.name
const tasksEntity = _getTasksEntity()
let letAppCrash = false
const __done = () => {
if (letAppCrash) cds.exit(1)
taskRunner.end({ name, tenant }, () => processTasks(service, tenant, opts))
}
const _done = () => {
if (!opts.legacyLocking) __done()
// else will be handled in spawn
}
return taskRunner.run({ name, tenant }, () => {
const config = tenant ? { tenant, user: cds.User.privileged } : { user: cds.User.privileged }
config.after = 1 // make sure spawn puts its cb on the `timer` queue (via setTimeout), which is also used by `taskRunner`
const _begin = opts.legacyLocking ? cds.spawn.bind(cds) : cb => cb()
const _end = opts.legacyLocking ? s => s.on('done', __done) : () => {}
const _tx = opts.legacyLocking ? cb => cb() : cds.tx.bind(cds)
const spawn = _begin(async () => {
let selectedTasks
const currTime = Date.now()
const _timeout = cds.utils.ms4(opts.timeout)
let currMinWaitingTime
const _setWaitingTime = time => {
if (currMinWaitingTime === undefined) currMinWaitingTime = time
else currMinWaitingTime = Math.min(currMinWaitingTime, time)
}
const tasksQuery = SELECT.from(tasksEntity)
.where({ target: _targetName(name, opts) })
.orderBy(opts.parallel ? ['status', 'timestamp', 'ID'] : ['timestamp', 'status', 'ID'])
.limit(opts.chunkSize)
.forUpdate()
if (opts.maxAttempts) tasksQuery.where({ attempts: { '<': opts.maxAttempts } })
if (opts.parallel) {
tasksQuery.SELECT.forUpdate.ignoreLocked = true
const shifted = new Date(Math.max(0, currTime - _timeout)).toISOString()
tasksQuery.where({ status: null, or: { lastAttemptTimestamp: { '<': shifted } } })
// Note: If there are messages which are not yet timed out, but will hang eventually,
// there will be no scheduled processing. One could remove that filter, but
// it would (in the worst case) select <chunkSize> messages which are all in process
// and not yet timed out, hence only <chunkSize> messages could be processed at any given time.
}
LOG._debug && LOG.debug(`${name}: Fetch messages`)
try {
// Use dedicated transaction to fetch relevant messages
// and _immediately_ set their status to 'processing' and commit
// thus keeping the database lock as short as possible
selectedTasks = await _tx(async () => {
const selectedTasks = await tasksQuery
const shifted = currTime - _timeout
const processableTasks = []
// filter those which must not be processed
for (const t of selectedTasks) {
// break at the first one which is in the future (they're ordered by timestamp)
const taskTimestamp = new Date(t.timestamp).getTime()
const _waitingTimePlanned = taskTimestamp - currTime
if (_waitingTimePlanned > 0) {
_setWaitingTime(_waitingTimePlanned)
break // everything afterwards is even further in the future
}
// ignore those which should have a longer waiting time
// remember the minimum waiting time to retrigger the processing (will be compared to failed messages later)
const lastAttemptTimestamp = t.lastAttemptTimestamp && new Date(t.lastAttemptTimestamp).getTime()
if (lastAttemptTimestamp && t.attempts) {
const _alreadyWaited = currTime - lastAttemptTimestamp
const _shouldHaveWaited = waitingTime(t.attempts)
const _remainingTime = _shouldHaveWaited - _alreadyWaited
if (_remainingTime > 0) {
_setWaitingTime(_remainingTime)
continue
}
}
if (t.status === null || new Date(t.lastAttemptTimestamp).getTime() < shifted) processableTasks.push(t)
}
// Note: There's also no scheduling for tasks which are not yet timed out.
if (!processableTasks.length) return [] // all in process
// prettier-ignore
LOG._debug && LOG.debug(`${name}: Process ${processableTasks.length} ${processableTasks.length > 1 ? 'messages' : 'message'}`)
if (!opts.legacyLocking) {
await UPDATE(tasksEntity)
.set({ status: PROCESSING })
.where({ ID: { in: processableTasks.filter(t => t.status === null).map(t => t.ID) } })
}
return processableTasks
})
} catch (e) {
// could potentially be a timeout
const _waitingTime = waitingTime(opts.attempt)
// prettier-ignore
LOG.error(`${name}: Message retrieval failed`, e, `Retry${_waitingTime > 0 ? ` in ${Math.round(_waitingTime / 1000)} s` : ''}`)
taskRunner.plan(
{
name,
tenant,
waitingTime: _waitingTime
},
() => processTasks(service, tenant, { ...opts, attempt: opts.attempt + 1 })
)
return _done()
}
const tasksGen = function* () {
for (const task of selectedTasks) {
const _msg = _safeJSONParse(task.msg)
const context = _msg.context || {}
const userId = _msg[INTERNAL_USER]
if (_msg.query) {
const q = (_msg.query = cds.ql(_msg.query))
q.bind(service)
_msg.target = cds.infer.target(q)
}
const msg = _msg._fromSend ? new cds.Request(_msg) : new cds.Event(_msg)
delete msg._fromSend
delete msg[INTERNAL_USER]
const user = new cds.User.Privileged(userId)
context.user = user
if (!msg) continue
const res = {
ID: task.ID,
msg,
context,
attempts: task.attempts || 0
}
yield res
}
}
const toBeDeleted = []
const toBeUpdated = []
const toBeCreated = []
// Remember the failed message with the minimum current attempts (-> determines when the next processing shall be planned)
let minAttemptFailed
try {
const _handleWithErr = async task => {
try {
// REVISIT: Shouldn't that work like a standard inbound adapter? I.e. either of:
// - cds._with({...}, ()=> srv.dispatch(task.msg)) // instead of srv.handle(task.msg)
// - cds.tx({...}, ()=> srv.dispatch(task.msg)) // instead of srv.handle(task.msg)
// Problem: If task involves db, dedicated transactions will block on SQLite if an outer transaction is open
const _run = opts.legacyLocking && cds.db?.kind === 'sqlite' ? cds._with : service.tx.bind(service)
const result = await _run({ ...task.context, tenant }, async () => {
return opts.handle ? await opts.handle.call(service, task.msg) : await service.handle(task.msg)
})
task.results = result
toBeDeleted.push(task)
} catch (e) {
if (!minAttemptFailed) minAttemptFailed = task.attempts
else minAttemptFailed = Math.min(minAttemptFailed, task.attempts)
task[$error] = e
if (cds.error.isSystemError(e)) {
LOG.error(`${service.name}: Programming error detected:`, e)
task.updateData = { attempts: opts.maxAttempts }
toBeUpdated.push(task)
throw new Error(`${service.name}: Programming error detected.`)
}
if (e.unrecoverable) {
LOG.error(`${service.name}: Unrecoverable error:`, e)
if (opts.maxAttempts) {
task.updateData = { attempts: opts.maxAttempts }
toBeUpdated.push(task)
} else toBeDeleted.push(task)
} else {
cds.repl || LOG.error(`${service.name}: Emit failed:`, e)
task.updateData = { attempts: task.attempts + 1 }
toBeUpdated.push(task)
return false
}
}
}
const tasks = tasksGen()
// REVISIT: Maybe we can also support handleMany and provide the iterator (for batch processing)
if (opts.parallel) {
const res = await Promise.allSettled([...tasks].map(_handleWithErr))
const errors = res.filter(r => r.status === 'rejected').map(r => r.reason)
if (errors.length) {
throw new Error(`${service.name}: Programming errors detected.`)
}
} else {
// In principle, this branch is not needed as for `parallel == false`, there's only one chunk at a time,
// hence the `Promise.allSettled` above would be sufficient.
// Let's keep it if we want to change it in the future.
for (const task of tasks) {
if ((await _handleWithErr(task)) === false) break
}
}
} catch (e) {
LOG.error(e)
letAppCrash = true
}
const queries = []
if (toBeDeleted.length)
queries.push(
DELETE.from(tasksEntity).where(
'ID in',
toBeDeleted.map(msg => msg.ID)
)
)
// There can be tasks which are not updated / deleted, their status must be set back to `null`
const updateTasks = selectedTasks.filter(
task => !toBeDeleted.some(t => t.ID === task.ID) && !toBeUpdated.some(t => t.ID === task.ID)
)
if (updateTasks.length) {
queries.push(
UPDATE(tasksEntity)
.where({ ID: { in: updateTasks.map(t => t.ID) } })
.set({ status: null })
)
}
for (const each of toBeUpdated) {
if (toBeDeleted.some(d => d.ID === each.ID)) continue
each.updateData.status = null
if (opts.storeLastError !== false) each.updateData.lastError = inspect(each[$error])
if (each.updateData.lastError && typeof each.updateData.lastError !== 'string') {
each.updateData.lastError = inspect(each.updateData.lastError)
}
queries.push(UPDATE(tasksEntity).where({ ID: each.ID }).set(each.updateData))
}
const _newMsgFrom = msg => {
const _fromSend = msg instanceof cds.Request
const newMsg = { ...msg }
if (msg.entity) newMsg.entity = msg.entity // needed for proper `h.for(msg)` handling
newMsg._fromSend = _fromSend
if (!newMsg.queue) return newMsg
if (!newMsg.queue.after && !newMsg.queue.every) return newMsg
newMsg.queue = { ...newMsg.queue }
delete newMsg.queue.every
delete newMsg.queue.after
return newMsg
}
const _failed = task => {
const msg = _newMsgFrom(task.msg)
msg.event = msg.event + '/#failed'
const _errorToObj = error => {
if (typeof error === 'string') return { message: error }
return {
name: error.name,
message: error.message,
stack: error.stack,
code: error.code,
...error
}
}
msg.results = _errorToObj(task[$error])
if (service.handlers.on.some(h => h.for(msg)) || service.handlers.after.some(h => h.for(msg))) {
toBeCreated.push(_createTask(service.name, msg, task.context, opts))
}
}
const _succeeded = task => {
const msg = _newMsgFrom(task.msg)
msg.event = msg.event + '/#succeeded'
if (service.handlers.on.some(h => h.for(msg)) || service.handlers.after.some(h => h.for(msg))) {
toBeCreated.push(_createTask(service.name, msg, task.context, opts))
}
}
for (const task of toBeDeleted) {
// invoke succeeded handlers
if (!task.msg.event.endsWith('/#succeeded') && !task.msg.event.endsWith('/#failed')) {
if (!task.error) {
// skip programming errors & unrecoverable without maxAttempts
_succeeded(task)
}
}
// handle `every`
if (task.msg.queue?.every) {
const _m = { ...task.msg }
_m._fromSend = task.msg instanceof cds.Request
const _task = _createTask(service.name, _m, task.context, opts)
_task.timestamp = _get100NanosecondTimestampISOString(task.msg.queue.every)
toBeCreated.push(_task)
}
}
// invoke failed handlers (only if max attempts is reached)
for (const task of toBeUpdated) {
if (
!task.msg.event.endsWith('/#succeeded') &&
!task.msg.event.endsWith('/#failed') &&
opts.maxAttempts &&
task.updateData.attempts >= opts.maxAttempts
) {
_failed(task)
}
}
if (toBeCreated.length) queries.push(INSERT.into(tasksEntity).entries(toBeCreated))
if (queries.length) {
await _tx(() => Promise.all(queries))
// prettier-ignore
LOG._debug && LOG.debug(`${name}: Messages modified (-${toBeDeleted.length}, ~${toBeUpdated.length + updateTasks.length}, +${toBeCreated.length})`)
}
if (letAppCrash) return _done()
if (toBeUpdated.length) {
LOG.error(`${name}: Some messages could not be processed`)
_setWaitingTime(waitingTime(minAttemptFailed + 1))
}
if (toBeDeleted.length === opts.chunkSize || toBeCreated.length) {
_setWaitingTime(0)
}
if (currMinWaitingTime !== undefined) {
// prettier-ignore
LOG._debug && LOG.debug(`${name}: Process${currMinWaitingTime > 0 ? ` in ${Math.round(currMinWaitingTime / 1000)} s` : ''}`)
taskRunner.plan(
{
name,
tenant,
waitingTime: currMinWaitingTime
},
() => processTasks(service, tenant, opts)
)
return _done()
}
LOG._debug && LOG.debug(`${name}: Done`)
return _done()
}, config)
_end(spawn)
})
}
const registerTaskProcessor = (name, context) => {
const registry = context[$taskProcessorRegistered] || (context[$taskProcessorRegistered] = new Set())
if (!registry.has(name)) {
registry.add(name)
return true
}
return false
}
const _createTask = (name, msg, context, taskOpts) => {
const _msg = { [INTERNAL_USER]: context.user.id }
const _newContext = {}
for (const key in context) {
if (!taskOpts.ignoredContext.includes(key)) _newContext[key] = context[key]
}
_msg.context = _newContext
if (msg._fromSend || msg.reply) _msg._fromSend = true // send or emit?
for (const prop of ['inbound', 'event', 'data', 'headers', 'queue', 'results']) {
if (msg[prop]) _msg[prop] = msg[prop]
}
if (msg.query) {
_msg.query = typeof msg.query.flat === 'function' ? msg.query.flat() : msg.query
delete _msg.query._target
delete _msg.query.__target
delete _msg.query.target
delete _msg.data // `req.data` should be a getter to whatever is in `req.query`
}
const taskMsg = {
ID: cds.utils.uuid(),
target: _targetName(name, taskOpts),
timestamp: _get100NanosecondTimestampISOString(msg.queue?.after), // needs to be different for each emit
msg: JSON.stringify(_msg)
}
return taskMsg
}
const writeInQueue = async (name, msg, context, taskOpts) => {
const taskMsg = _createTask(name, msg, context, taskOpts)
const tasksEntity = _getTasksEntity()
LOG._debug && LOG.debug(`${name}: Write message to queue`)
return INSERT.into(tasksEntity).entries(taskMsg)
}
exports.unqueued = function unqueued(srv) {
return srv[$unqueued] || srv
}
exports.queued = function queued(srv, customOpts) {
// queue max. once
if (!new.target) {
const former = srv[$queued]
if (former) return former
}
const originalSrv = srv[$unqueued] || srv
const queuedSrv = Object.create(originalSrv)
queuedSrv[$unqueued] = originalSrv
if (!new.target) Object.defineProperty(srv, $queued, { value: queuedSrv })
let requiresOpts = cds.requires.queue
if (cds.requires.outbox !== undefined) {
cds.utils.deprecated({ old: 'cds.requires.outbox', use: 'cds.requires.queue' })
requiresOpts = cds.requires.outbox ? Object.assign({}, requiresOpts, cds.requires.outbox) : false
}
let serviceOpts = srv.options?.queued ?? srv.options?.outboxed
if (srv.options?.outbox !== undefined) {
// REVISIT: cds.utils.deprecated({ old: 'cds.requires.<srv>.outbox', use: 'cds.requires.<srv>.queued or cds.requires.<srv>.outboxed' })
serviceOpts = srv.options.outbox ? Object.assign({}, serviceOpts, srv.options.outbox) : false
}
if (typeof requiresOpts === 'string') requiresOpts = { kind: requiresOpts }
if (typeof serviceOpts === 'string') serviceOpts = { kind: serviceOpts }
const queueOpts = Object.assign(
{},
(typeof requiresOpts === 'object' && requiresOpts) || {},
(typeof serviceOpts === 'object' && serviceOpts) || {},
customOpts || {}
)
queuedSrv.outboxed = queueOpts // Store effective outbox configuration (e.g. used in telemetry)
queuedSrv.handle = async function (req) {
const context = req.context || cds.context
if (
(queueOpts.kind === 'persistent-queue' || queueOpts.kind === 'persistent-outbox') &&
_hasPersistentQueue(context.tenant)
) {
// returns true if not yet registered
if (registerTaskProcessor(srv.name, context)) {
// NOTE: What if there are different queue options for the same service?!
// There could be tasks for srv1 with { maxAttempts: 1 }
// and tasks for srv1 with { maxAttempts: 9 }.
// How would they be processed? I'd rather not have dedicated
// service names or store serialized options for each task.
context.on('succeeded', () => processTasks(originalSrv, context.tenant, queueOpts))
}
await writeInQueue(srv.name, req, context, queueOpts)
return
}
if (!context[$stored_reqs]) {
context[$stored_reqs] = []
context.on('succeeded', async () => {
// REVISIT: Also allow maxAttempts for in-memory queue?
for (const _req of context[$stored_reqs]) {
try {
if (_req.reply) await originalSrv.send(_req)
else await originalSrv.emit(_req)
} catch (e) {
LOG.error('Emit failed', { event: _req.event, cause: e })
if (cds.error.isSystemError(e)) {
await cds.shutdown(e)
return
}
}
}
delete context[$stored_reqs]
})
}
context[$stored_reqs].push(req)
}
queuedSrv.flush = function flush(tenant = cds.context?.tenant, opts) {
return processTasks(originalSrv, tenant, Object.assign({}, queueOpts, opts))
}
return queuedSrv
}