UNPKG

@comunica/actor-init-query

Version:
519 lines 25.2 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.HttpServiceSparqlEndpoint = void 0; const http = require("node:http"); const querystring = require("node:querystring"); const url = require("node:url"); const context_entries_1 = require("@comunica/context-entries"); const core_1 = require("@comunica/core"); const asynciterator_1 = require("asynciterator"); const yargs_1 = require("yargs"); const __1 = require(".."); const CliArgsHandlerBase_1 = require("./cli/CliArgsHandlerBase"); const CliArgsHandlerHttp_1 = require("./cli/CliArgsHandlerHttp"); // Use require instead of import for default exports, to be compatible with variants of esModuleInterop in tsconfig. const clusterUntyped = require('node:cluster'); const process = require('process/'); const quad = require('rdf-quad'); // Force type on Cluster, because there are issues with the Node.js typings since v18 const cluster = clusterUntyped; /** * An HTTP service that exposes a Comunica engine as a SPARQL endpoint. */ class HttpServiceSparqlEndpoint { constructor(args) { this.lastQueryId = 0; this.context = args.context || {}; this.timeout = args.timeout ?? 60_000; this.port = args.port ?? 3_000; this.workers = args.workers ?? 1; this.freshWorkerPerQuery = Boolean(args.freshWorkerPerQuery); this.contextOverride = Boolean(args.contextOverride); this.engine = new __1.QueryEngineFactoryBase(args.moduleRootPath, args.defaultConfigPath, actorInitQuery => new __1.QueryEngineBase(actorInitQuery)).create(args); } /** * Starts the server * @param {string[]} argv The commandline arguments that the script was called with * @param {module:stream.internal.Writable} stdout The output stream to log to. * @param {module:stream.internal.Writable} stderr The error stream to log errors to. * @param {string} moduleRootPath The path to the invoking module. * @param {NodeJS.ProcessEnv} env The process env to get constants from. * @param {string} defaultConfigPath The path to get the config from if none is defined in the environment. * @param {(code: number) => void} exit The callback to invoke to stop the script. * @param {ICliArgsHandler[]} cliArgsHandlers Enables manipulation of the CLI arguments and their processing. * @return {Promise<void>} A promise that resolves when the server has been started. */ static async runArgsInProcess(argv, stdout, stderr, moduleRootPath, env, defaultConfigPath, exit, cliArgsHandlers = []) { const options = await HttpServiceSparqlEndpoint .generateConstructorArguments(argv, moduleRootPath, env, defaultConfigPath, stderr, exit, cliArgsHandlers); return new Promise((resolve) => { new HttpServiceSparqlEndpoint(options || {}).run(stdout, stderr) .then(resolve) .catch((error) => { stderr.write(error); exit(1); resolve(); }); }); } /** * Takes parsed commandline arguments and turns them into an object used in the HttpServiceSparqlEndpoint constructor * @param {args: string[]} argv The commandline arguments that the script was called with * @param {string} moduleRootPath The path to the invoking module. * @param {NodeJS.ProcessEnv} env The process env to get constants from. * @param {string} defaultConfigPath The path to get the config from if none is defined in the environment. * @param stderr The error stream. * @param exit An exit process callback. * @param {ICliArgsHandler[]} cliArgsHandlers Enables manipulation of the CLI arguments and their processing. */ static async generateConstructorArguments(argv, moduleRootPath, env, defaultConfigPath, stderr, exit, cliArgsHandlers) { // Populate yargs arguments object cliArgsHandlers = [ new CliArgsHandlerBase_1.CliArgsHandlerBase(), new CliArgsHandlerHttp_1.CliArgsHandlerHttp(), ...cliArgsHandlers, ]; let argumentsBuilder = (0, yargs_1.default)([]); for (const cliArgsHandler of cliArgsHandlers) { argumentsBuilder = cliArgsHandler.populateYargs(argumentsBuilder); } // Extract raw argument values from parsed yargs object, so that we can handle each of them hereafter let args; try { args = await argumentsBuilder.parse(argv); } catch (error) { stderr.write(`${await argumentsBuilder.getHelp()}\n\n${error.message}\n`); return exit(1); } // Invoke args handlers to process any remaining args const context = {}; try { for (const cliArgsHandler of cliArgsHandlers) { await cliArgsHandler.handleArgs(args, context); } } catch (error) { stderr.write(`${error.message}/n`); exit(1); } const freshWorkerPerQuery = args.freshWorker; const contextOverride = args.contextOverride; const port = args.port; const timeout = args.timeout * 1_000; const workers = args.workers; context[context_entries_1.KeysQueryOperation.readOnly.name] = !args.u; const configPath = env.COMUNICA_CONFIG ? env.COMUNICA_CONFIG : defaultConfigPath; return { defaultConfigPath, configPath, context, freshWorkerPerQuery, contextOverride, moduleRootPath, mainModulePath: moduleRootPath, port, timeout, workers, }; } /** * Start the HTTP service. * @param {module:stream.internal.Writable} stdout The output stream to log to. * @param {module:stream.internal.Writable} stderr The error stream to log errors to. */ run(stdout, stderr) { if (cluster.isMaster) { return this.runMaster(stdout, stderr); } return this.runWorker(stdout, stderr); } /** * Start the HTTP service as master. * @param {module:stream.internal.Writable} stdout The output stream to log to. * @param {module:stream.internal.Writable} stderr The error stream to log errors to. */ async runMaster(stdout, stderr) { stderr.write(`Server running on http://localhost:${this.port}/sparql\n`); // Create workers for (let i = 0; i < this.workers; i++) { cluster.fork(); } // Attach listeners to each new worker cluster.on('listening', (worker) => { // Respawn crashed workers worker.once('exit', (code, signal) => { if (!worker.exitedAfterDisconnect) { if (code === 9 || signal === 'SIGKILL') { stderr.write(`Worker ${worker.process.pid} forcefully killed with ${code || signal}. Killing main process as well.\n`); cluster.disconnect(); } else { stderr.write(`Worker ${worker.process.pid} died with ${code || signal}. Starting new worker.\n`); cluster.fork(); } } }); // Handle worker timeouts const workerTimeouts = {}; worker.on('message', ({ type, queryId }) => { if (type === 'start') { stderr.write(`Worker ${worker.process.pid} got assigned a new query (${queryId}).\n`); workerTimeouts[queryId] = setTimeout(() => { try { if (worker.isConnected()) { stderr.write(`Worker ${worker.process.pid} timed out for query ${queryId}.\n`); worker.send('shutdown'); } } catch (error) { stderr.write(`Unable to timeout worker ${worker.process.pid}: ${error.message}.\n`); } delete workerTimeouts[queryId]; }, this.timeout); } else if (type === 'end' && workerTimeouts[queryId]) { stderr.write(`Worker ${worker.process.pid} has completed query ${queryId}.\n`); clearTimeout(workerTimeouts[queryId]); delete workerTimeouts[queryId]; } }); }); // Disconnect from cluster on SIGINT, so that the process can cleanly terminate process.once('SIGINT', () => { cluster.disconnect(); }); } /** * Start the HTTP service as worker. * @param {module:stream.internal.Writable} stdout The output stream to log to. * @param {module:stream.internal.Writable} stderr The error stream to log errors to. */ async runWorker(stdout, stderr) { const engine = await this.engine; // Determine the allowed media types for requests const mediaTypes = await engine.getResultMediaTypes(); const variants = []; for (const type of Object.keys(mediaTypes)) { variants.push({ type, quality: mediaTypes[type] }); } // Start the server // eslint-disable-next-line ts/no-misused-promises const server = http.createServer(this.handleRequest.bind(this, engine, variants, stdout, stderr)); server.listen(this.port); stderr.write(`Server worker (${process.pid}) running on http://localhost:${this.port}/sparql\n`); // Keep track of all open connections const openConnections = new Set(); server.on('request', (request, response) => { openConnections.add(response); response.on('close', () => { openConnections.delete(response); }); }); // Subscribe to shutdown messages // eslint-disable-next-line ts/no-misused-promises process.on('message', async (message) => { if (message === 'shutdown') { stderr.write(`Shutting down worker ${process.pid} with ${openConnections.size} open connections.\n`); // Stop new connections from being accepted server.close(); // Close all open connections for (const connection of openConnections) { await new Promise(resolve => connection.end('!TIMEDOUT!', resolve)); } // Kill the worker once the connections have been closed process.exit(15); } }); // Catch global errors, and cleanly close open connections // eslint-disable-next-line ts/no-misused-promises process.on('uncaughtException', async (error) => { stderr.write(`Terminating worker ${process.pid} with ${openConnections.size} open connections due to uncaught exception.\n`); stderr.write(error.stack); // Stop new connections from being accepted server.close(); // Close all open connections for (const connection of openConnections) { await new Promise(resolve => connection.end('!ERROR!', resolve)); } // Kill the worker once the connections have been closed process.exit(15); }); } /** * Handles an HTTP request. * @param {QueryEngineBase} engine A SPARQL engine. * @param {{type: string; quality: number}[]} variants Allowed variants. * @param {module:stream.internal.Writable} stdout Output stream. * @param {module:stream.internal.Writable} stderr Error output stream. * @param {module:http.IncomingMessage} request Request object. * @param {module:http.ServerResponse} response Response object. */ async handleRequest(engine, variants, stdout, stderr, request, response) { const negotiated = require('negotiate').choose(variants, request) .sort((first, second) => second.qts - first.qts); const variant = request.headers.accept ? negotiated[0] : null; // Require qts strictly larger than 2, as 1 and 2 respectively allow * and */* matching. // For qts 0, 1, and 2, we fallback to our built-in media type defaults, for which we pass null. const mediaType = variant && variant.qts > 2 ? variant.type : null; // Verify the path // eslint-disable-next-line node/no-deprecated-api const requestUrl = url.parse(request.url ?? '', true); if (requestUrl.pathname === '/' || request.url === '/') { stdout.write('[301] Permanently moved. Redirected to /sparql.'); response.writeHead(301, { 'content-type': HttpServiceSparqlEndpoint.MIME_JSON, 'Access-Control-Allow-Origin': '*', Location: `http://localhost:${this.port}/sparql${requestUrl.search ?? ''}` }); response.end(JSON.stringify({ message: 'Queries are accepted on /sparql. Redirected.' })); return; } if (requestUrl.pathname !== '/sparql') { stdout.write('[404] Resource not found. Queries are accepted on /sparql.\n'); response.writeHead(404, { 'content-type': HttpServiceSparqlEndpoint.MIME_JSON, 'Access-Control-Allow-Origin': '*' }); response.end(JSON.stringify({ message: 'Resource not found. Queries are accepted on /sparql.' })); return; } // Parse the query, depending on the HTTP method let queryBody; switch (request.method) { case 'POST': queryBody = await this.parseBody(request); await this.writeQueryResult(engine, stdout, stderr, request, response, queryBody, mediaType, false, false, this.lastQueryId++); break; case 'HEAD': case 'GET': // eslint-disable-next-line no-case-declarations const queryValue = requestUrl.query.query; queryBody = queryValue ? { type: 'query', value: queryValue, context: undefined } : undefined; // eslint-disable-next-line no-case-declarations const headOnly = request.method === 'HEAD'; await this.writeQueryResult(engine, stdout, stderr, request, response, queryBody, mediaType, headOnly, true, this.lastQueryId++); break; default: stdout.write(`[405] ${request.method} to ${request.url}\n`); response.writeHead(405, { 'content-type': HttpServiceSparqlEndpoint.MIME_JSON, 'Access-Control-Allow-Origin': '*' }); response.end(JSON.stringify({ message: 'Incorrect HTTP method' })); } } /** * Writes the result of the given SPARQL query. * @param {QueryEngineBase} engine A SPARQL engine. * @param {module:stream.internal.Writable} stdout Output stream. * @param {module:stream.internal.Writable} stderr Error output stream. * @param {module:http.IncomingMessage} request Request object. * @param {module:http.ServerResponse} response Response object. * @param {IQueryBody | undefined} queryBody The query body. * @param {string} mediaType The requested response media type. * @param {boolean} headOnly If only the header should be written. * @param {boolean} readOnly If only data can be read, but not updated. (i.e., if we're in a GET request) * @param queryId The unique id of this query. */ async writeQueryResult(engine, stdout, stderr, request, response, queryBody, mediaType, headOnly, readOnly, queryId) { if (!queryBody || !queryBody.value) { return this.writeServiceDescription(engine, stdout, stderr, request, response, mediaType, headOnly); } // Log the start of the query execution stdout.write(`[200] ${request.method} to ${request.url}\n`); stdout.write(` Requested media type: ${mediaType}\n`); stdout.write(` Received ${queryBody.type} query: ${queryBody.value}\n`); // Send message to master process to indicate the start of an execution process.send({ type: 'start', queryId }); // Determine context let context = { ...this.context, ...this.contextOverride ? queryBody.context : undefined, }; if (readOnly) { context = { ...context, [context_entries_1.KeysQueryOperation.readOnly.name]: readOnly }; } let result; try { result = await engine.query(queryBody.value, context); // For update queries, also await the result if (result.resultType === 'void') { await result.execute(); } } catch (error) { stdout.write('[400] Bad request\n'); response.writeHead(400, { 'content-type': HttpServiceSparqlEndpoint.MIME_PLAIN, 'Access-Control-Allow-Origin': '*' }); response.end(error.message); return; } // Default to SPARQL JSON for bindings and boolean if (!mediaType) { switch (result.resultType) { case 'quads': mediaType = 'application/trig'; break; case 'void': mediaType = 'simple'; break; default: mediaType = 'application/sparql-results+json'; break; } } // Write header of response response.writeHead(200, { 'content-type': mediaType, 'Access-Control-Allow-Origin': '*' }); stdout.write(` Resolved to result media type: ${mediaType}\n`); // Stop further processing for HEAD requests if (headOnly) { response.end(); return; } let eventEmitter; try { const { data } = await engine.resultToString(result, mediaType); data.on('error', (error) => { stdout.write(`[500] Server error in results: ${error.message} \n`); if (!response.writableEnded) { response.end('An internal server error occurred.\n'); } }); data.pipe(response); eventEmitter = data; } catch { stdout.write('[400] Bad request, invalid media type\n'); response.writeHead(400, { 'content-type': HttpServiceSparqlEndpoint.MIME_PLAIN, 'Access-Control-Allow-Origin': '*' }); response.end('The response for the given query could not be serialized for the requested media type\n'); } // Send message to master process to indicate the end of an execution response.on('close', () => { process.send({ type: 'end', queryId }); }); this.stopResponse(response, queryId, process.stderr, eventEmitter); } async writeServiceDescription(engine, stdout, stderr, request, response, mediaType, headOnly) { stdout.write(`[200] ${request.method} to ${request.url}\n`); stdout.write(` Requested media type: ${mediaType}\n`); stdout.write(' Received query for service description.\n'); response.writeHead(200, { 'content-type': mediaType, 'Access-Control-Allow-Origin': '*' }); if (headOnly) { response.end(); return; } const s = request.url; const sd = 'http://www.w3.org/ns/sparql-service-description#'; const quads = [ // Basic metadata quad(s, 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', `${sd}Service`), quad(s, `${sd}endpoint`, '/sparql'), quad(s, `${sd}url`, '/sparql'), // Features quad(s, `${sd}feature`, `${sd}BasicFederatedQuery`), quad(s, `${sd}supportedLanguage`, `${sd}SPARQL10Query`), quad(s, `${sd}supportedLanguage`, `${sd}SPARQL11Query`), ]; let eventEmitter; try { // Append result formats const formats = await engine.getResultMediaTypeFormats(new core_1.ActionContext(this.context)); for (const format in formats) { quads.push(quad(s, `${sd}resultFormat`, formats[format])); } // Flush results const { data } = await engine.resultToString({ resultType: 'quads', execute: async () => new asynciterator_1.ArrayIterator(quads), metadata: undefined, }, mediaType); data.on('error', (error) => { stdout.write(`[500] Server error in results: ${error.message} \n`); response.end('An internal server error occurred.\n'); }); data.pipe(response); eventEmitter = data; } catch { stdout.write('[400] Bad request, invalid media type\n'); response.writeHead(400, { 'content-type': HttpServiceSparqlEndpoint.MIME_PLAIN, 'Access-Control-Allow-Origin': '*' }); response.end('The response for the given query could not be serialized for the requested media type\n'); return; } this.stopResponse(response, 0, process.stderr, eventEmitter); } /** * Stop after timeout or if the connection is terminated * @param {module:http.ServerResponse} response Response object. * @param queryId The unique query id. * @param stderr Error stream to write to. * @param {NodeJS.ReadableStream} eventEmitter Query result stream. */ stopResponse(response, queryId, stderr, eventEmitter) { response.on('close', killClient); // eslint-disable-next-line ts/no-this-alias const self = this; function killClient() { if (eventEmitter) { // Remove all listeners so we are sure no more write calls are made eventEmitter.removeAllListeners(); eventEmitter.on('error', () => { // Void any errors that may still occur }); eventEmitter.emit('end'); } try { response.end(); } catch { // Do nothing } // Kill the worker if we want fresh workers per query if (self.freshWorkerPerQuery) { stderr.write(`Killing fresh worker ${process.pid} after query ${queryId}.\n`); // eslint-disable-next-line unicorn/no-process-exit process.exit(15); } } } /** * Parses the body of a SPARQL POST request * @param {module:http.IncomingMessage} request Request object. * @return {Promise<IQueryBody>} A promise resolving to a query body object. */ parseBody(request) { return new Promise((resolve, reject) => { let body = ''; request.setEncoding('utf8'); request.on('error', reject); request.on('data', (chunk) => { body += chunk; }); request.on('end', () => { const contentType = request.headers['content-type']; if (contentType) { if (contentType.includes('application/sparql-query')) { return resolve({ type: 'query', value: body, context: undefined }); } if (contentType.includes('application/sparql-update')) { return resolve({ type: 'void', value: body, context: undefined }); } if (contentType.includes('application/x-www-form-urlencoded')) { const bodyStructure = querystring.parse(body); let context; if (bodyStructure.context) { try { context = JSON.parse(bodyStructure.context); } catch (error) { reject(new Error(`Invalid POST body with context received ('${bodyStructure.context}'): ${error.message}`)); } } if (bodyStructure.query) { return resolve({ type: 'query', value: bodyStructure.query, context }); } if (bodyStructure.update) { return resolve({ type: 'void', value: bodyStructure.update, context }); } } } reject(new Error(`Invalid POST body received, query type could not be determined`)); }); }); } } exports.HttpServiceSparqlEndpoint = HttpServiceSparqlEndpoint; HttpServiceSparqlEndpoint.MIME_PLAIN = 'text/plain'; HttpServiceSparqlEndpoint.MIME_JSON = 'application/json'; /* eslint-enable import/no-nodejs-modules */ //# sourceMappingURL=HttpServiceSparqlEndpoint.js.map