UNPKG

@flowfuse/flowfuse

Version:

An open source low-code development platform

395 lines (377 loc) • 18.5 kB
const { default: axios } = require('axios') const { LRUCache } = require('lru-cache') /** * Assistant api routes * * - /api/v1/assistant * * @namespace assistant * @memberof forge.routes.api */ module.exports = async function (app) { const assetCache = new LRUCache({ max: app.config.assistant?.assetCache?.max || 100, ttl: app.config.assistant?.assetCache?.ttl || 30 * 60 * 1000, // Defaults to 1/2 hour cache for assets - enough to handle bursts updateAgeOnGet: false // do not update the age on get, we want it to expire after the original ttl }) const tablesSchemaCache = new LRUCache({ max: app.config.assistant?.tablesSchemaCache?.max || 100, ttl: app.config.assistant?.tablesSchemaCache?.ttl || 5 * 60 * 1000, // Defaults to 5 mins updateAgeOnGet: false // do not update the age on get, we want it to expire after the original ttl }) // decorate the app with the asset cache app.decorate('assistant', { assetCache, tablesSchemaCache }) // Get the assistant service configuration const serviceUrl = app.config.assistant?.service?.url const serviceToken = app.config.assistant?.service?.token const serviceEnabled = app.config.assistant?.enabled !== false && serviceUrl const requestTimeout = app.config.assistant?.service?.requestTimeout || 60000 app.addHook('preHandler', app.verifySession) app.addHook('preHandler', app.needsPermission('assistant:call')) app.addHook('preHandler', async (request, reply) => { if (!serviceEnabled) { return reply.code(501).send({ code: 'service_disabled', error: 'Assistant service is not enabled' }) } if (request.session.ownerType === 'device') { request.owner = await app.db.models.Device.byId(+request.session.ownerId) request.ownerType = 'device' request.ownerId = request.owner.hashid request.team = await app.db.models.Team.byId(request.owner.Team.id) } else if (request.session.ownerType === 'project') { request.owner = await app.db.models.Project.byId(request.session.ownerId) request.ownerType = 'project' request.ownerId = request.owner.id request.team = await app.db.models.Team.byId(request.owner.Team.id) } else if (request.session.ownerType === 'user') { request.ownerType = 'user' request.ownerId = request.session.ownerId } }) /** * Endpoint to serve static assets * This is used to serve static assets required by the assistant plugin * namely, the models and vocabulary files (typically < 1MB in total). * Assets are cached in memory to reduce load during bursts. * The assets are fetched from the assistant service and cached for 30 minutes. */ app.get('/assets/*', { schema: { hide: true // dont show in swagger } }, async (request, reply) => { const upstreamPath = request.params['*'] // the path to the asset if (!upstreamPath) { return reply.code(400).send({ code: 'invalid_path', error: 'Invalid asset path' }) } else if (upstreamPath.startsWith('/')) { return reply.code(400).send({ code: 'invalid_path', error: 'Asset paths must not start with a slash' }) } const targetUrl = new URL(`assets/${upstreamPath}`, serviceUrl).toString() // construct the full URL to the asset const cacheKey = upstreamPath const cachedAsset = assetCache.get(cacheKey) // check if the asset is cached if (cachedAsset) { // FUTURE: add an etag to the cached asset and if the client has an If-None-Match header, check if it matches the cached asset's ETag // If it does, return a 304 Not Modified response // For now, the requester (ff-assistant) does not store the file or ETag so there is no point at this time. // However, if we have a WAF or similar in front of this service, it may add an ETag header to the response // and we can use that to send a 304 Not Modified response reply.code(cachedAsset.statusCode) const responseHeaders = buildAssetResponseHeaders(cachedAsset) for (const headerName in responseHeaders) { reply.header(headerName, responseHeaders[headerName]) } return reply.send(cachedAsset.body) } // Make a get request for asset to the assistant service try { const headers = await buildRequestHeaders(request) const response = await axios.get(targetUrl, { headers, responseType: 'arraybuffer', // Always get response as a buffer to handle both binary and text/json timeout: requestTimeout, validateStatus: (status) => true // Accept all status codes, we will handle them manually }) const responseBody = Buffer.from(response.data) // Store the response in cache, including status, headers, and body const assetToCache = { statusCode: response.status, // Axios uses 'status' for status code headers: { ...response.headers }, body: responseBody // This will be a Node.js Buffer } // cache the asset assetCache.set(upstreamPath, assetToCache) const responseHeaders = buildAssetResponseHeaders(response) for (const headerName in responseHeaders) { reply.header(headerName, responseHeaders[headerName]) } reply.send(response.data) } catch (error) { if (!reply.sent) { reply.code(error.response?.status || 500).send({ code: error.response?.data?.code || 'unexpected_error', error: error.response?.data?.error || error.message }) } } }) /** * Endpoint for FIM (fill-in-the-middle) code completion requests * For now, this is simply a relay to an external assistant service * In the future, we may decide to bring that service inside the core or * use an alternative means of accessing it. * @name /api/v1/assistant/fim/:nodeModule/:nodeType * @static * @memberof forge.routes.api.assistant */ app.post('/fim/:nodeModule/:nodeType', { config: { rateLimit: app.config.rate_limits ? { hook: 'preHandler', // apply the rate as a preHandler so that session is available max: 60, // max requests per window. Since the assistant plugin debounces requests, this should minimise risk of overuse without impacting user experience timeWindow: 30000, // 30 seconds window keyGenerator: (request) => { return request.ownerId || request.ip } } : false }, schema: { hide: true, // dont show in swagger params: { type: 'object', properties: { nodeModule: { type: 'string' }, nodeType: { type: 'string' } } }, body: { type: 'object', properties: { // The prompt to send to the assistant (required) prompt: { type: 'string' }, // A correlation id for the transaction (required) transactionId: { type: 'string' }, // Additional context for the completion (optional) context: { type: 'object', additionalProperties: true } }, required: ['prompt', 'transactionId'] }, response: { 200: { type: 'object', additionalProperties: true }, '4xx': { $ref: 'APIError' } } } }, async (request, reply) => { const inlineDisabled = app.config.assistant?.completions?.inlineEnabled === false const featureEnabled = app.config.features.enabled('assistantInlineCompletions') const featureEnabledForTeam = request.team?.getFeatureProperty('assistantInlineCompletions', false) const isStandaloneSessionUser = request.session.ownerType === 'user' if (inlineDisabled || !featureEnabled || !(isStandaloneSessionUser || featureEnabledForTeam)) { reply.code(404).send({ code: 'not_found', error: 'Not Found - feature not enabled for team' }) return } const nodeModule = request.params.nodeModule const nodeType = request.params.nodeType const supported = [ { nodeModule: 'node-red', nodeName: 'function' }, { nodeModule: '@flowfuse/node-red-dashboard', nodeName: 'ui-template' }, { nodeModule: '@flowfuse/nr-tables-nodes', nodeName: 'tables-query' } ] if (supported.findIndex(item => item.nodeModule === nodeModule && item.nodeName === nodeType) === -1) { // unsupported node return reply.code(400).send({ code: 'not_supported', error: 'Not Supported' }) } // if this is a `flowfuse-tables-query` lets see if tables are enabled and try to get the schema hints let tablesCacheKey = null if (nodeModule === '@flowfuse/nr-tables-nodes' && nodeType === 'tables-query') { const tablesFeatureEnabled = !isStandaloneSessionUser && app.config.features.enabled('tables') && request.team.getFeatureProperty('tables', false) tablesCacheKey = tablesFeatureEnabled && request.team.hashid + '/tables/schema' if (tablesCacheKey) { if (!tablesSchemaCache.has(tablesCacheKey)) { const { getTablesHints } = require('../../lib/assistant.js') const creds = await app.tables.getDatabases(request.team) if (creds && creds.length) { const database = creds[0] // Get the first database try { // Get the DDL const schemaData = await getTablesHints(app, request.team, database.hashid) tablesSchemaCache.set(tablesCacheKey, schemaData) } catch (error) { tablesSchemaCache.set(tablesCacheKey, '') } } } } } // post to the assistant service /fim/:nodeModule/:nodeType endpoint try { const requestMetadata = { instanceType: request.ownerType, instanceId: request.ownerId, additionalHeaders: request.headers } if (request.team) { requestMetadata.teamHashId = request.team.hashid if (app.billing && request.team.getSubscription) { const subscription = await request.team.getSubscription() requestMetadata.isTeamOnTrial = subscription ? subscription.isTrial() : null } } const data = { ...request.body } if (tablesCacheKey) { data.context = data.context || {} data.context.tablesSchema = tablesSchemaCache.get(tablesCacheKey) } const method = `fim/${encodeURIComponent(nodeModule)}/${encodeURIComponent(nodeType)}` const response = await app.db.controllers.Assistant.invokeLLM(method, data, requestMetadata) reply.send(response.data) } catch (error) { reply.code(error.response?.status || 500).send({ code: error.response?.data?.code || 'unexpected_error', error: error.response?.data?.error || error.message }) } }) /** * Endpoint for assistant methods * For now, this is simply a relay to an external assistant service * In the future, we may decide to bring that service inside the core or * use an alternative means of accessing it. */ app.post('/:method', { config: { rateLimit: app.config.rate_limits ? { hook: 'preHandler', // apply the rate as a preHandler so that session is available max: 5, // max requests per window timeWindow: 30000, // 30 seconds keyGenerator: (request) => { return request.ownerId || request.ip } } : false }, schema: { hide: true, // dont show in swagger body: { type: 'object', properties: { // The prompt to send to the assistant (required) prompt: { type: 'string' }, // A correlation id for the transaction (required) transactionId: { type: 'string' }, // Additional context for the function (optional) context: { type: 'object', additionalProperties: true } }, required: ['prompt', 'transactionId'] }, response: { 200: { type: 'object', additionalProperties: true }, '4xx': { $ref: 'APIError' } } } }, async (request, reply) => { const method = request.params.method // the method to call at the assistant service if (/^[a-z0-9_-]+$/.test(method) === false) { return reply.code(400).send({ code: 'invalid_method', error: 'Invalid method name' }) } // if this is a `flowfuse-tables-query` lets see if tables are enabled and try to get the schema hints const tablesFeatureEnabled = app.config.features.enabled('tables') && request.team?.getFeatureProperty('tables', false) const isTablesQuery = tablesFeatureEnabled && method === 'flowfuse-tables-query' const tablesCacheKey = request.team?.hashid + '/tables/schema' if (isTablesQuery) { const { getTablesHints } = require('../../lib/assistant.js') if (!tablesSchemaCache.has(tablesCacheKey)) { const creds = await app.tables.getDatabases(request.team) if (creds && creds.length) { const database = creds[0] // Get the first database try { // Get the DDL const schemaData = await getTablesHints(app, request.team, database.hashid) tablesSchemaCache.set(tablesCacheKey, schemaData) } catch (error) { tablesSchemaCache.set(tablesCacheKey, '') } } } } // post to the assistant service try { let isTeamOnTrial if (app.billing && request.team?.getSubscription) { const subscription = await request.team.getSubscription() isTeamOnTrial = subscription ? subscription.isTrial() : null } const data = { ...request.body } if (isTablesQuery) { data.context = data.context || {} data.context.tablesSchema = tablesSchemaCache.get(tablesCacheKey) } const response = await app.db.controllers.Assistant.invokeLLM( method, data, { teamHashId: request.team?.hashid, instanceType: request.ownerType, instanceId: request.ownerId, additionalHeaders: request.headers, isTeamOnTrial }) reply.send(response.data) } catch (error) { reply.code(error.response?.status || 500).send({ code: error.response?.data?.code || 'unexpected_error', error: error.response?.data?.error || error.message }) } }) async function buildRequestHeaders (request) { const headers = { 'ff-owner-type': request.ownerType, 'ff-owner-id': request.ownerId, 'ff-team-id': request.team?.hashid } // include license information, team id and trial status so that we can make decisions in the assistant service const isLicensed = app.license?.active() || false const licenseType = isLicensed ? (app.license.get('dev') ? 'DEV' : 'EE') : 'CE' const tier = isLicensed ? app.license.get('tier') : null headers['ff-license-active'] = isLicensed headers['ff-license-type'] = licenseType headers['ff-license-tier'] = tier if (app.billing && request.team?.getSubscription) { const subscription = await request.team.getSubscription() headers['ff-team-trial'] = subscription ? subscription.isTrial() : null } if (serviceToken) { headers.Authorization = `Bearer ${serviceToken}` } if (request.headers.accept) { headers.Accept = request.headers.accept } if (request.headers['user-agent']) { headers['User-Agent'] = request.headers['user-agent'] } return headers } function buildAssetResponseHeaders (response) { // A list of headers to pass through from the backend assistant asset service to the client. // They are only added if they are present in the response. // This is to ensure we only pass through headers that are relevant (and avoid passing through sensitive headers). const ASSET_HEADERS_TO_PROXY = [ 'content-type', 'content-length', 'date', 'expires', 'last-modified', 'cache-control', 'vary', 'accept-ranges', 'age' ] const headers = {} const sourceHeaders = response.headers || response // handles both Axios and cached object headers for (const headerName of ASSET_HEADERS_TO_PROXY) { if (sourceHeaders[headerName]) { headers[headerName] = sourceHeaders[headerName] } } return headers } }