UNPKG

clickhouse

Version:
1,048 lines (833 loc) 21.9 kB
'use strict'; const zlib = require('zlib'); const _ = require('lodash'); const request = require('request'); const { Transform, Readable, } = require('stream'); const JSONStream = require('JSONStream'); const through = require('through'); const stream2asynciter = require('stream2asynciter'); const { URL } = require('url'); const tsv = require('tsv'); const uuidv4 = require('uuid/v4'); const INSERT_FIELDS_MASK = /^INSERT\sINTO\s(.+?)\s*\(((\n|.)+?)\)/i; /** * Content-Encoding: gzip * Accept-Encoding: gzip * и включить настройку ClickHouse enable_http_compression. * * session_id * * session_timeout */ const SEPARATORS = { TSV: "\t", CSV: ",", Values: "," }; const ALIASES = { TabSeparated: "TSV" }; var ESCAPE_STRING = { /** * @return {string} */ TSV: function (value) { return value .replace(/\\/g, '\\\\') .replace(/\'/g, '\\\'') .replace(/\t/g, '\\t') .replace(/\n/g, '\\n'); }, CSV: function (value) { return value.replace (/\"/g, '""'); }, }; var ESCAPE_NULL = { TSV: "\\N", CSV: "\\N", Values: "\\N", JSONEachRow: "\\N", }; const R_ERROR = new RegExp('(Code|Error): ([0-9]{2})[,.] .*Exception: (.+?)$', 'm'); const URI = 'localhost'; const PORT = 8123; const DATABASE = 'default'; const FORMAT_NAMES = { JSON: 'json', TSV: 'tsv', CSV: 'csv' } const FORMATS = { [FORMAT_NAMES.JSON]: 'JSON', [FORMAT_NAMES.TSV]: 'TabSeparatedWithNames', [FORMAT_NAMES.CSV]: 'CSVWithNames', }; const REVERSE_FORMATS = Object.keys(FORMATS).reduce( function(obj, format) { obj[FORMATS[format]] = format; return obj; }, {} ); const R_FORMAT_PARSER = new RegExp( `FORMAT (${Object.keys(FORMATS).map(k => FORMATS[k]).join('|')})`, 'mi' ); function parseCSV(body, options = { header: true }) { const data = new tsv.Parser(SEPARATORS.CSV, options).parse(body); data.splice(data.length - 1, 1); return data; } function parseTSV(body, options = { header: true }) { const data = new tsv.Parser(SEPARATORS.TSV, options).parse(body); data.splice(data.length - 1, 1); return data; } function parseCSVStream(s = new Set()) { let isFirst = true; let ref = { fields: [] }; return through(function (chunk) { let str = chunk.toString(); let parsed = parseCSV(str, {header: isFirst}); let strarr = str.split("\n"); let plen = (isFirst && strarr.length - 1 || strarr.length) - parsed.length; if (!isFirst) { chunk = Buffer.concat([Buffer.from([...s].join("\n")), chunk]).toString(); parsed = parseCSV(str, {header: isFirst}); s = new Set(); } strarr.splice(strarr.length - plen).forEach((value => s.add(value))); chunkBuilder.call(this, isFirst, ref, str, parsed); isFirst = false; }) } function parseJSONStream() { return JSONStream.parse(['data', true]); } function parseTSVStream(s = new Set()) { let isFirst = true; let ref = { fields: [] }; return through(function (chunk) { let str = chunk.toString(); let parsed = parseTSV(str, {header: isFirst}); let strarr = str.split("\n"); let plen = (isFirst && strarr.length - 1 || strarr.length) - parsed.length; if (!isFirst) { chunk = Buffer.concat([Buffer.from([...s].join("\n")), chunk]).toString(); parsed = parseTSV(str, {header: isFirst}); s = new Set(); } strarr.splice(strarr.length - plen).forEach((value => s.add(value))); chunkBuilder.call(this, isFirst, ref, str, parsed); isFirst = false; }); } function chunkBuilder(isFirst, ref, chunk, parsed) { if (isFirst) { ref.fields = Object.keys(parsed[0]); parsed.forEach((value) => { this.queue(value); }); } else { parsed.forEach((value) => { let result = {}; ref.fields.forEach((field, index) => (result[field] = value[index])); this.queue(result); result = null; }); } } function encodeValue(quote, v, _format, isArray) { const format = ALIASES[_format] || _format; switch (typeof v) { case 'string': if (isArray) { return `'${ESCAPE_STRING[format] ? ESCAPE_STRING[format](v, quote) : v}'`; } return ESCAPE_STRING[format] ? ESCAPE_STRING[format](v, quote) : v; case 'number': if (isNaN(v)) { return 'nan'; } if (v === +Infinity) { return '+inf'; } if (v === -Infinity) { return '-inf'; } if (v === Infinity) { return 'inf'; } return v; case 'object': // clickhouse allows to use unix timestamp in seconds if (v instanceof Date) { return Math.round(v.getTime() / 1000); } // you can add array items if (v instanceof Array) { return '[' + v.map(function (i) { return encodeValue(true, i, format, true); }).join(',') + ']'; } // TODO: tuples support if (!format) { console.trace(); } if (v === null) { return format in ESCAPE_NULL ? ESCAPE_NULL[format] : v; } return format in ESCAPE_NULL ? ESCAPE_NULL[format] : v; case 'boolean': return v === true ? 1 : 0; default: return v; } } function getErrorObj(res) { const err = new Error(`${res.statusCode}: ${res.body || res.statusMessage}`); if (res.body) { const m = res.body.match(R_ERROR); if (m) { if (m[2] && isNaN(parseInt(m[2])) === false) { err.code = parseInt(m[2]); } if (m[3]) { err.message = m[3]; } } } return err; } function isObject(obj) { return Object.prototype.toString.call(obj) === '[object Object]'; } class Rs extends Transform { constructor(reqParams) { super(); const me = this; me.ws = request.post(reqParams); me.isPiped = false; // Без этого обработчика и вызова read Transform не отрабатывает до конца // https://nodejs.org/api/stream.html#stream_implementing_a_transform_stream // Writing data while the stream is not draining is particularly problematic for a Transform, // because the Transform streams are paused by default until they are piped or // an 'data' or 'readable' event handler is added. me.on('readable', function () { let data = me.read(); }); me.pipe(me.ws); me.on('pipe', function () { me.isPiped = true; }); } _transform(chunk, encoding, cb) { cb(null, chunk); } writeRow(data) { let row = ''; if (typeof data === 'string') { row = data; } else if (Array.isArray(data)) { row = ClickHouse.mapRowAsArray(data); } else if (isObject(data)) { throw new Error('Error: Inserted data must be an array, not an object.'); } let isOk = this.write( row + '\n' ); this.rowCount++; if (isOk) { return Promise.resolve(); } else { return new Promise((resolve, reject) => { const fn = err => reject(err); this.ws.once('error', fn); this.ws.once('drain', err => { this.ws.removeListener('error', fn); if (err) { reject(err); } else { resolve(); } }); }); } } exec() { let me = this; return new Promise((resolve, reject) => { me.ws .on('error', function(err) { reject(err); }) .on('response', function (res) { if (res.statusCode === 200) { return resolve({ r: 1 }); } let body = ''; res .on('data', data => body += data) .on('end', () => { res.body = body; return reject( getErrorObj(res) ); }); }); if ( ! me.isPiped) { me.end(); } }); } } class QueryCursor { constructor(connection, query, data, opts = {}) { this.connection = connection; this.query = query; this.data = data; this.opts = _.merge({}, opts, { format: this.connection.opts.format, raw: this.connection.opts.raw }); // Sometime needs to override format by query const formatFromQuery = ClickHouse.getFormatFromQuery(this.query); if (formatFromQuery && formatFromQuery !== this.format) { this.opts.format = formatFromQuery; } this.useTotals = false; this._request = null; this.queryId = opts.queryId || uuidv4(); if (this.isDebug) { console.log('QueryCursor', {query: this.query, data: this.data, opts: this.opts}); } } get isInsert() { return !!this.query.match(/^insert/i); } get isDebug() { return this.connection.opts.debug; } get format() { return this.opts.format; } // TODO Add check for white list of formats set format(newFormat) { this.opts.format = newFormat; } _getBodyForInsert() { const me = this; let query = me.query; let data = me.data; let values = [], fieldList = [], isFirstElObject = false; if(Array.isArray(data) && data.every(d => typeof d === 'string')) { values = data; } else if (Array.isArray(data) && Array.isArray(data[0])) { values = data; } else if (Array.isArray(data) && isObject(data[0])) { values = data; isFirstElObject = true; } else if (isObject(data)) { values = [data]; isFirstElObject = true; } else { throw new Error('ClickHouse._getBodyForInsert: data is invalid format'); } if (isFirstElObject) { let m = query.match(INSERT_FIELDS_MASK); if (m) { fieldList = m[2].split(',').map(s => s.trim()); } else { throw new Error('insert query wasnt parsed field list after TABLE_NAME'); } } return values.map(row => { if (typeof row === 'string') { return row; } if (isFirstElObject) { return ClickHouse.mapRowAsObject(fieldList, row); } else { return ClickHouse.mapRowAsArray(row); } }).join('\n'); } _getReqParams() { const me = this; const { reqParams, config, username, password, database, } = me.connection.opts; const params = _.merge({ headers: { 'Content-Type': 'text/plain' }, }, reqParams); const configQS = _.merge({}, config, { query_id: me.queryId, }); if (me.connection.opts.isSessionPerQuery) { configQS.session_id = uuidv4(); } if (database) { configQS.database = database; } const url = new URL(me.connection.url); if (username) { url.searchParams.append('user', username); } if (password) { url.searchParams.append('password', password); } Object.keys(configQS).forEach(k => { url.searchParams.append(k, configQS[k]); }); let data = me.data; let query = me.query; // check for any query params passed for interpolation // https://clickhouse.com/docs/en/interfaces/http/#cli-queries-with-parameters if (data && data.params) { // each variable used in the query is expected to be prefixed with `param_` // when passed in the request. Object.keys(data.params).forEach(k => { let value = encodeValue(false, data.params[k], 'TabSeparated'); url.searchParams.append( `param_${k}`, value ); }); } if (typeof query === 'string') { if (/with totals/i.test(query)) { me.useTotals = true; } // Hack for Sequelize ORM query = query.trim().trimEnd().replace(/;$/gm, ''); if (me.connection.trimQuery) { // Remove comments from the SQL // replace multiple white spaces with one white space query = query.replace(/(--[^\n]*)/g, '').replace(/\s+/g, ' ') } if (query.match(/^(with|select|show|exists|create|drop)/i)) { if ( ! R_FORMAT_PARSER.test(query)) { query += ` FORMAT ${ClickHouse.getFullFormatName(me.format)}`; } query += ';'; if (data && data.external) { params['formData'] = data.external.reduce( function(formData, external) { url.searchParams.append( `${external.name}_structure`, external.structure || 'str String' ); formData[external.name] = { value: external.data.join('\n'), options: { filename: external.name, contentType: 'text/plain' } }; return formData; }, {} ); } } else if (me.isInsert) { if (query.match(/values/i)) { if (data && Array.isArray(data) && data.every(d => typeof d === 'string')) { params['body'] = me._getBodyForInsert(); } } else { query += ' FORMAT TabSeparated'; if (data) { params['body'] = me._getBodyForInsert(); } } } } if (me.opts.sessionId !== undefined && typeof me.opts.sessionId === 'string') { url.searchParams.append('session_id', me.opts.sessionId); } if (me.connection.usePost) { // use formData transfer query body for long sql if (typeof params['formData'] === 'undefined') { params['formData'] = {} } params['formData']['query'] = query; } else { url.searchParams.append('query', query); } if (me.connection.isUseGzip) { params.headers['Accept-Encoding'] = 'gzip'; } params['url'] = url.toString(); if (me.isDebug) { console.log('QueryCursor._getReqParams: params', me.query, params); } return params; } exec(cb) { const me = this; const reqParams = me._getReqParams(); me._request = request.post(reqParams, (err, res) => { if (me.isDebug) { console.log('QueryCursor.exec: result', me.query, err, _.pick(res, [ 'statusCode', 'body', 'statusMessage', 'headers' ])); } if (err) { return cb(err); } else if (res.statusCode !== 200) { return cb( getErrorObj(res) ); } if ( ! res.body) { return cb(null, {r: 1}); } try { const data = this.opts.raw ? res.body : me.getBodyParser()(res.body); if (me.format === FORMAT_NAMES.JSON) { if (me.useTotals) { return cb(null, data); } return this.opts.raw ? cb(null, data) : cb(null, data.data); } if (me.useTotals) { return cb(null, { meta: {}, data, totals: {}, rows: {}, statistics: {}, }); } return cb(null, data); } catch (err) { cb(err); } }); } getBodyParser() { if (this.format === FORMAT_NAMES.JSON) { return JSON.parse; } if (this.format === FORMAT_NAMES.TSV) { return parseTSV; } if (this.format === FORMAT_NAMES.CSV) { return parseCSV; } throw new Error(`CursorQuery.getBodyParser: unknown format "${this.format}"`); }; getStreamParser() { if (this.format === FORMAT_NAMES.JSON) { return parseJSONStream; } if (this.format === FORMAT_NAMES.TSV) { return parseTSVStream; } if (this.format === FORMAT_NAMES.CSV) { return parseCSVStream; } throw new Error(`CursorQuery.getStreamParser: unknown format "${this.format}"`); } withTotals() { this.useTotals = true; return this; } toPromise() { let me = this; return new Promise((resolve, reject) => { me.exec(function (err, data) { if (err) return reject(err); resolve(data); }) }); } stream() { const me = this; const reqParams = me._getReqParams(); if (me.isInsert) { const rs = new Rs(reqParams); rs.query = me.query; me._request = rs; return rs; } else { const streamParser = this.getStreamParser()(); const rs = new Readable({ objectMode: true }); rs._read = () => {}; rs.query = me.query; const tf = new Transform({ objectMode: true }); let isFirstChunk = true; tf._transform = function (chunk, encoding, cb) { // В независимости от формата, в случае ошибки, в теле ответа будет текс, // подпадающий под регулярку R_ERROR. if (isFirstChunk) { isFirstChunk = false; if (R_ERROR.test(chunk.toString())) { streamParser.emit('error', new Error(chunk.toString())); rs.emit('close'); return cb(); } } cb(null, chunk); }; let metaData = {}; const requestStream = request.post(reqParams); // handle network socket errors to avoid uncaught error requestStream.on('error', function (err) { rs.emit('error', err); }); // Не делаем .pipe(rs) потому что rs - Readable, // а для pipe нужен Writable let s; if (me.connection.isUseGzip) { const z = zlib.createGunzip(); s = requestStream.pipe(z).pipe(tf).pipe(streamParser) } else { s = requestStream.pipe(tf).pipe(streamParser) } s .on('error', function (err) { rs.emit('error', err); }) .on('header', header => { metaData = _.merge({}, header); }) .on('footer', footer => { rs.emit('meta', _.merge(metaData, footer)); }) .on('data', function (data) { rs.emit('data', data); }) .on('close', function () { rs.emit('close'); }) .on('end', function () { rs.emit('end'); }); rs.__pause = rs.pause; rs.pause = () => { rs.__pause(); requestStream.pause(); streamParser.pause(); }; rs.__resume = rs.resume; rs.resume = () => { rs.__resume(); requestStream.resume(); streamParser.resume(); }; me._request = rs; return stream2asynciter(rs); } } destroy() { const me = this; let isCallDestroy = false; if (me._request instanceof Readable) { isCallDestroy = true; me._request.destroy(); } else if (me._request) { isCallDestroy = true; me._request.abort(); } // To trying to kill query by query id if (me.queryId) { // Because this realesation work with session witout any ideas, // we need use this hack me.connection.query( `KILL QUERY WHERE query_id = '${me.queryId}' SYNC`, {}, { sessionId: uuidv4(), } ).exec(() => {}); } if (isCallDestroy) { return ; } throw new Error('QueryCursor.destroy error: private field _request is invalid'); } } class ClickHouse { constructor(opts = {}) { this.opts = _.merge( { debug: false, database: DATABASE, password: '', basicAuth: null, isUseGzip: false, config: { session_timeout : 60, output_format_json_quote_64bit_integers : 0, enable_http_compression : 0 }, format: FORMAT_NAMES.JSON, raw: false, isSessionPerQuery: false, trimQuery: false, usePost: false, }, opts ); let url = opts.url || opts.host || URI, port = opts.port || PORT; if ( ! url.match(/^https?/)) { url = 'http://' + url; } const u = new URL(url); if (u.protocol === 'https:' && (port === 443 || !opts.port)) { u.port = ''; } else if (! u.port && port) { u.port = port; } this.opts.url = u.toString(); if (this.opts.user || this.opts.username) { this.opts.username = this.opts.user || this.opts.username; } if (this.opts.config) { const { database } = this.opts.config; if (database && database !== this.opts.database) { this.opts.database = database; } } } get sessionId() { return this.opts.config.session_id; } set sessionId(sessionId) { this.opts.config.session_id = '' + sessionId; return this; } noSession() { delete this.opts.config.session_id; return this; } get sessionPerQuery() { return this.opts.isSessionPerQuery; } setSessionPerQuery(value) { this.opts.isSessionPerQuery = !!value; return this; } get sessionTimeout() { return this.opts.config.session_timeout; } set sessionTimeout(timeout) { this.opts.config.session_timeout = timeout; return this; } get url() { if (this.opts.basicAuth) { const u = new URL(this.opts.url); u.username = this.opts.basicAuth.username || ''; u.password = this.opts.basicAuth.password || ''; return u.toString(); } return this.opts.url; } set url(url) { this.opts.url = url; return this; } get port() { return this.opts.port; } set port(port) { this.opts.port = port; return this; } get isUseGzip() { return this.opts.isUseGzip; } set isUseGzip(val) { this.opts.isUseGzip = !!val; this.opts.config.enable_http_compression = this.opts.isUseGzip ? 1 : 0; } get bodyParser() { if (this.opts.format === FORMAT_NAMES.CSV) { return parseCSV; } else if (this.opts.format === FORMAT_NAMES.TSV) { return parseTSV; } else { return JSON.parse; } } get trimQuery() { return this.opts.trimQuery; } set trimQuery(val) { this.opts.trimQuery = !!val; return this; } get usePost() { return this.opts.usePost; } set usePost(val) { this.opts.usePost = !!val; return this; } static mapRowAsArray(row) { return row .map(value => encodeValue(false, value, 'TabSeparated')) .join('\t'); } static mapRowAsObject(fieldList, row) { return fieldList .map(f => { return encodeValue(false, row[f] != null ? row[f] : '', 'TabSeparated'); }) .join('\t'); } static getFullFormatName(format = '') { if ( ! FORMATS[format]) { throw new Error(`Clickhouse.getFullFormatName: unknown format "${format}`); } return FORMATS[format]; } static getFormatFromQuery(query = '') { if ( ! query) { throw new Error(`Clickhouse.getFormatFromQuery: query is empty!`); } // We use regexp with "g" flag then match doen't return first group. // So, use exec. const m = R_FORMAT_PARSER.exec(query); if (m) { const format = m[1]; if ( ! REVERSE_FORMATS[format]) { throw new Error(`Clickhouse.getFormatFromQuery: unknown format "${format}"!`); } return REVERSE_FORMATS[format]; } return ''; } static getFormats() { return Object.keys(FORMATS).map(k => ({ format: k, fullFormatExpr: FORMATS[k], })); } query(...args) { if (typeof args[args.length - 1] === 'function') { const newArgs = args.slice(0, args.length); const cb = args[args.length - 1]; return new QueryCursor(this, ...newArgs).exec(cb); } return new QueryCursor(this, ...args); } insert(query, data) { return new QueryCursor(this, query, data); } } module.exports = { ClickHouse, };