UNPKG

plyql

Version:

A SQL-like interface for Plywood

340 lines (339 loc) 20.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); var tslib_1 = require("tslib"); var fs = require("fs"); var path = require("path"); var hasOwnProp = require("has-own-prop"); var nopt = require("nopt"); var chronoshift_1 = require("chronoshift"); var plywood_1 = require("plywood"); var requester_1 = require("./requester"); var plyql_executor_1 = require("./plyql-executor"); var output_transform_1 = require("./output-transform"); var variables_1 = require("./variables"); var status_1 = require("./status"); var schema_1 = require("./schema"); var datasets_1 = require("./datasets"); function loadOrParseJSON(json) { if (typeof json === 'undefined') return null; if (typeof json !== 'string') throw new TypeError("load or parse must get a string"); if (json[0] === '@') { try { json = fs.readFileSync(json.substr(1), 'utf-8'); } catch (e) { throw new Error("can not load: " + json); } } try { return JSON.parse(json); } catch (e) { throw new Error("can not parse: " + json); } } function printUsage() { console.log("\nUsage: plyql [options]\n\nExamples:\n plyql -h 10.20.30.40 -q 'SHOW TABLES'\n\n plyql -h 10.20.30.40 -q 'DESCRIBE twitterstream'\n\n plyql -h 10.20.30.40 -q 'SELECT MAX(__time) AS maxTime FROM twitterstream'\n\n plyql -h 10.20.30.40 -s twitterstream -i P5D -q \\\n 'SELECT SUM(tweet_length) as TotalTweetLength WHERE first_hashtag = \"#imply\"'\n\nArguments:\n\n --help print this help message\n --version display the version number\n -v, --verbose display the queries that are being made\n -h, --host the host to connect to\n -s, --source use this source for the query (supersedes FROM clause)\n -i, --interval add (AND) a __time filter between NOW-INTERVAL and NOW\n -Z, --timezone the default timezone\n -o, --output the output format. Possible values: table (default), json, csv, tsv, flat, plywood, plywood-stream\n -t, --timeout the time before a query is timed out in ms (default: 180000)\n -r, --retry the number of tries a query should be attempted on error, 0 = unlimited, (default: 2)\n -c, --concurrent the limit of concurrent queries that could be made simultaneously, 0 = unlimited, (default: 2)\n --rollup use rollup mode [COUNT() -> SUM(count)]\n\n -q, --query the query to run\n --json-server the port on which to start the json server\n --experimental-mysql-gateway [Experimental] the port on which to start the MySQL gateway server\n\n --druid-version Assume this is the Druid version and do not query it\n --custom-aggregations A JSON string defining custom aggregations\n --custom-transforms A JSON string defining custom transforms\n --druid-context A JSON string representing the Druid context to use\n --skip-cache disable Druid caching\n --group-by-v2 Set groupByStrategy to 'v2' in the context to ensure use of the V2 GroupBy engine\n --introspection-strategy Druid introspection strategy\n Possible values:\n * segment-metadata-fallback - (default) use the segmentMetadata and fallback to GET route\n * segment-metadata-only - only use the segmentMetadata query\n * datasource-get - only use GET /druid/v2/datasources/DATASOURCE route\n\n --socks-host use this socks host to facilitate a Druid connection\n --socks-username the username for the socks proxy\n --socks-password the password for the socks proxy\n\n --force-time force a column to be interpreted as a time column\n --force-string force a column to be interpreted as a string column\n --force-boolean force a column to be interpreted as a boolean\n --force-number force a column to be interpreted as a number\n --force-unique force a column to be interpreted as a hyperLogLog uniques\n --force-theta force a column to be interpreted as a theta sketch\n --force-histogram force a column to be interpreted as an approximate histogram\n"); } function printVersion() { var cliPackageFilename = path.join(__dirname, '..', 'package.json'); var cliPackage; try { cliPackage = JSON.parse(fs.readFileSync(cliPackageFilename, 'utf8')); } catch (e) { console.log("could not read cli package", e.message); return; } console.log("plyql version " + cliPackage.version + " (plywood version " + plywood_1.version + ")"); } function parseArguments() { return nopt({ "host": String, "druid": String, "source": String, "data-source": String, "help": Boolean, "query": String, "json-server": Number, "experimental-mysql-gateway": Number, "interval": String, "timezone": String, "version": Boolean, "verbose": Boolean, "timeout": Number, "retry": Number, "concurrent": Number, "output": String, "force-time": [String, Array], "force-string": [String, Array], "force-boolean": [String, Array], "force-number": [String, Array], "force-unique": [String, Array], "force-theta": [String, Array], "force-histogram": [String, Array], "druid-version": String, "custom-aggregations": String, "custom-transforms": String, "druid-context": String, "druid-time-attribute": String, "rollup": Boolean, "skip-cache": Boolean, "group-by-v2": Boolean, "introspection-strategy": String, "socks-host": String, "socks-user": String, "socks-username": String, "socks-password": String }, { "v": ["--verbose"], "h": ["--host"], "s": ["--source"], "i": ["--interval"], "Z": ["--timezone"], "o": ["--output"], "t": ["--timeout"], "r": ["--retry"], "c": ["--concurrent"], "q": ["--query"] }, process.argv); } exports.parseArguments = parseArguments; function run(parsed) { return tslib_1.__awaiter(this, void 0, void 0, function () { var verbose, attributeOverrides, forceTime, _i, forceTime_1, attributeName, forceString, _a, forceString_1, attributeName, forceBoolean, _b, forceBoolean_1, attributeName, forceNumber, _c, forceNumber_1, attributeName, forceUnique, _d, forceUnique_1, attributeName, forceTheta, _e, forceTheta_1, attributeName, forceHistogram, _f, forceHistogram_1, attributeName, output, host, druidVersion, timezone, timeout, retry, concurrent, customAggregations, customTransforms, druidContext, timeAttribute, filter, intervalString, interval, _g, computedStart, computedEnd, masterSource, mode, sqlParse, serverPort, query, socksHost, socksUsername, socksPassword, requester, onlyDataSource, sources, context, variablesDataset, statusDataset, introspectedExternals, valueStream; return tslib_1.__generator(this, function (_h) { switch (_h.label) { case 0: if (parsed.argv.original.length === 0 || parsed.help) { printUsage(); return [2, null]; } if (parsed['version']) { printVersion(); return [2, null]; } verbose = parsed['verbose']; if (verbose) printVersion(); attributeOverrides = []; forceTime = parsed['force-time'] || []; for (_i = 0, forceTime_1 = forceTime; _i < forceTime_1.length; _i++) { attributeName = forceTime_1[_i]; attributeOverrides.push({ name: attributeName, type: 'TIME' }); } forceString = parsed['force-string'] || []; for (_a = 0, forceString_1 = forceString; _a < forceString_1.length; _a++) { attributeName = forceString_1[_a]; attributeOverrides.push({ name: attributeName, type: 'STRING' }); } forceBoolean = parsed['force-boolean'] || []; for (_b = 0, forceBoolean_1 = forceBoolean; _b < forceBoolean_1.length; _b++) { attributeName = forceBoolean_1[_b]; attributeOverrides.push({ name: attributeName, type: 'BOOLEAN' }); } forceNumber = parsed['force-number'] || []; for (_c = 0, forceNumber_1 = forceNumber; _c < forceNumber_1.length; _c++) { attributeName = forceNumber_1[_c]; attributeOverrides.push({ name: attributeName, type: 'NUMBER' }); } forceUnique = parsed['force-unique'] || []; for (_d = 0, forceUnique_1 = forceUnique; _d < forceUnique_1.length; _d++) { attributeName = forceUnique_1[_d]; attributeOverrides.push({ name: attributeName, nativeType: 'hyperUnique' }); } forceTheta = parsed['force-theta'] || []; for (_e = 0, forceTheta_1 = forceTheta; _e < forceTheta_1.length; _e++) { attributeName = forceTheta_1[_e]; attributeOverrides.push({ name: attributeName, nativeType: 'thetaSketch' }); } forceHistogram = parsed['force-histogram'] || []; for (_f = 0, forceHistogram_1 = forceHistogram; _f < forceHistogram_1.length; _f++) { attributeName = forceHistogram_1[_f]; attributeOverrides.push({ name: attributeName, nativeType: 'approximateHistogram' }); } output = (parsed['output'] || 'table').toLowerCase(); if (output !== 'table' && output !== 'json' && output !== 'csv' && output !== 'tsv' && output !== 'flat' && output !== 'plywood-stream') { throw new Error("output must be one of table, json, csv, tsv, flat, plywood, or plywood-stream (is " + output + ")"); } host = parsed['druid'] || parsed['host']; if (!host) { throw new Error("must have a host"); } druidVersion = parsed['druid-version']; timezone = chronoshift_1.Timezone.UTC; if (parsed['timezone']) { timezone = chronoshift_1.Timezone.fromJS(parsed['timezone']); } timeout = hasOwnProp(parsed, 'timeout') ? parsed['timeout'] : 180000; retry = hasOwnProp(parsed, 'retry') ? parsed['retry'] : 2; concurrent = hasOwnProp(parsed, 'concurrent') ? parsed['concurrent'] : 2; customAggregations = loadOrParseJSON(parsed['custom-aggregations']); customTransforms = loadOrParseJSON(parsed['custom-transforms']); druidContext = loadOrParseJSON(parsed['druid-context']) || {}; druidContext.timeout = timeout; if (parsed['skip-cache']) { druidContext.useCache = false; druidContext.populateCache = false; } if (parsed['group-by-v2']) { druidContext['groupByStrategy'] = 'v2'; } timeAttribute = parsed['druid-time-attribute'] || '__time'; filter = null; intervalString = parsed['interval']; if (intervalString) { interval = void 0; try { _g = chronoshift_1.parseInterval(intervalString, timezone), computedStart = _g.computedStart, computedEnd = _g.computedEnd; interval = plywood_1.TimeRange.fromJS({ start: computedStart, end: computedEnd }); } catch (e) { throw new Error("Could not parse interval: " + intervalString); } filter = plywood_1.$(timeAttribute).overlap(interval); } masterSource = parsed['source'] || parsed['data-source'] || null; if (Number(!!parsed['query']) + Number(!!parsed['json-server']) + Number(!!parsed['experimental-mysql-gateway']) > 1) { throw new Error("must set exactly one of --query (-q), --json-server, or --experimental-mysql-gateway"); } if (parsed['query']) { mode = 'query'; query = parsed['query']; if (verbose) { console.log('Received query:'); console.log(query); console.log('---------------------------'); } try { sqlParse = plywood_1.Expression.parseSQL(query, timezone); } catch (e) { throw new Error("Could not parse query: " + e.message); } if (sqlParse.verb && sqlParse.verb !== 'SELECT') { throw new Error("Unsupported SQL verb " + sqlParse.verb + " must be SELECT, DESCRIBE, SHOW, or a raw expression"); } if (verbose && sqlParse.expression) { console.log('Parsed query as the following plywood expression (as JSON):'); console.log(JSON.stringify(sqlParse.expression, null, 2)); console.log('---------------------------'); } } else if (parsed['json-server']) { mode = 'server'; serverPort = parsed['json-server']; } else if (parsed['experimental-mysql-gateway']) { mode = 'gateway'; serverPort = parsed['experimental-mysql-gateway']; } else { throw new Error("must set one of --query (-q), --json-server, or --experimental-mysql-gateway"); } socksHost = parsed['socks-host']; if (socksHost) { socksUsername = parsed['socks-username'] || parsed['socks-user']; socksPassword = parsed['socks-password']; } requester = requester_1.properDruidRequesterFactory({ druidHost: host, retry: retry, timeout: timeout, verbose: verbose, concurrentLimit: concurrent, socksHost: socksHost, socksUsername: socksUsername, socksPassword: socksPassword }); if (!!druidVersion) return [3, 2]; return [4, plywood_1.DruidExternal.getVersion(requester)]; case 1: druidVersion = _h.sent(); _h.label = 2; case 2: onlyDataSource = masterSource || (sqlParse ? sqlParse.table : null); if (!onlyDataSource) return [3, 3]; sources = [onlyDataSource]; return [3, 5]; case 3: return [4, plywood_1.DruidExternal.getSourceList(requester)]; case 4: sources = _h.sent(); _h.label = 5; case 5: if (verbose && !onlyDataSource) { console.log("Found sources [" + sources.join(',') + "]"); } context = {}; if (mode === 'gateway') { variablesDataset = variables_1.getVariablesDataset(); context['GLOBAL_VARIABLES'] = variablesDataset; context['SESSION_VARIABLES'] = variablesDataset; statusDataset = status_1.getStatusDataset(); context['GLOBAL_STATUS'] = statusDataset; context['SESSION_STATUS'] = statusDataset; context['CHARACTER_SETS'] = datasets_1.getCharacterSetsDataset(); context['COLLATIONS'] = datasets_1.getCollationsDataset(); context['KEY_COLUMN_USAGE'] = datasets_1.getKeyColumnUsageDataset(); context['INDEX'] = datasets_1.getIndexDataset(); context['WARNINGS'] = datasets_1.getWarningsDataset(); } return [4, Promise.all(sources.map(function (source) { return plywood_1.External.fromJS({ engine: 'druid', version: druidVersion, source: source, rollup: parsed['rollup'], timeAttribute: timeAttribute, allowEternity: true, allowSelectQueries: true, introspectionStrategy: parsed['introspection-strategy'], context: druidContext, customAggregations: customAggregations, customTransforms: customTransforms, filter: filter, attributeOverrides: attributeOverrides }, requester).introspect(); }))]; case 6: introspectedExternals = _h.sent(); introspectedExternals.forEach(function (introspectedExternal) { var source = introspectedExternal.source; context[source] = introspectedExternal; schema_1.addExternal(source, introspectedExternal, mode === 'gateway'); }); context['SCHEMATA'] = schema_1.getSchemataDataset(); context['TABLES'] = schema_1.getTablesDataset(); context['COLUMNS'] = schema_1.getColumnsDataset(); if (mode === 'query' && masterSource && !sqlParse.table && !sqlParse.rewrite) { context['data'] = context[masterSource]; } if (verbose) console.log("introspection complete"); switch (mode) { case 'query': valueStream = plyql_executor_1.executeSQLParseStream(sqlParse, context, timezone); valueStream.on('error', function (e) { console.error("Could not compute query due to error: " + e.message); }); valueStream .pipe(output_transform_1.getOutputTransform(output, timezone)) .pipe(process.stdout); return [2, null]; case 'gateway': require('./plyql-mysql-gateway').plyqlMySQLGateway(serverPort, context, timezone, null); return [2, null]; case 'server': require('./plyql-json-server').plyqlJSONServer(serverPort, context, timezone, null); return [2, null]; default: throw new Error("unsupported mode " + mode); } return [2]; } }); }); } exports.run = run;