UNPKG

@databricks/sql

Version:

Driver for connection to Databricks SQL via Thrift API.

524 lines 23.7 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.numberToInt64 = void 0; const fs = __importStar(require("fs")); const path = __importStar(require("path")); const node_stream_1 = __importDefault(require("node:stream")); const node_util_1 = __importDefault(require("node:util")); const uuid_1 = require("uuid"); const node_int64_1 = __importDefault(require("node-int64")); const node_fetch_1 = __importDefault(require("node-fetch")); const TCLIService_types_1 = require("../thrift/TCLIService_types"); const DBSQLOperation_1 = __importDefault(require("./DBSQLOperation")); const Status_1 = __importDefault(require("./dto/Status")); const InfoValue_1 = __importDefault(require("./dto/InfoValue")); const utils_1 = require("./utils"); const CloseableCollection_1 = __importDefault(require("./utils/CloseableCollection")); const IDBSQLLogger_1 = require("./contracts/IDBSQLLogger"); const HiveDriverError_1 = __importDefault(require("./errors/HiveDriverError")); const StagingError_1 = __importDefault(require("./errors/StagingError")); const DBSQLParameter_1 = require("./DBSQLParameter"); const ParameterError_1 = __importDefault(require("./errors/ParameterError")); // Explicitly promisify a callback-style `pipeline` because `node:stream/promises` is not available in Node 14 const pipeline = node_util_1.default.promisify(node_stream_1.default.pipeline); function numberToInt64(value) { if (value instanceof node_int64_1.default) { return value; } if (typeof value === 'bigint') { const buffer = new ArrayBuffer(BigInt64Array.BYTES_PER_ELEMENT); const view = new DataView(buffer); view.setBigInt64(0, value, false); // `false` to use big-endian order return new node_int64_1.default(Buffer.from(buffer)); } return new node_int64_1.default(value); } exports.numberToInt64 = numberToInt64; function getDirectResultsOptions(maxRows, config) { if (maxRows === null) { return {}; } return { getDirectResults: { maxRows: numberToInt64(maxRows !== null && maxRows !== void 0 ? maxRows : config.directResultsDefaultMaxRows), }, }; } function getArrowOptions(config, serverProtocolVersion) { const { arrowEnabled = true, useArrowNativeTypes = true } = config; if (!arrowEnabled || !utils_1.ProtocolVersion.supportsArrowMetadata(serverProtocolVersion)) { return { canReadArrowResult: false, }; } return { canReadArrowResult: true, useArrowNativeTypes: { timestampAsArrow: useArrowNativeTypes, decimalAsArrow: useArrowNativeTypes, complexTypesAsArrow: useArrowNativeTypes, // TODO: currently unsupported by `apache-arrow` (see https://github.com/streamlit/streamlit/issues/4489) intervalTypesAsArrow: false, }, }; } function getQueryParameters(namedParameters, ordinalParameters) { const namedParametersProvided = namedParameters !== undefined && Object.keys(namedParameters).length > 0; const ordinalParametersProvided = ordinalParameters !== undefined && ordinalParameters.length > 0; if (namedParametersProvided && ordinalParametersProvided) { throw new ParameterError_1.default('Driver does not support both ordinal and named parameters.'); } if (!namedParametersProvided && !ordinalParametersProvided) { return []; } const result = []; if (namedParameters !== undefined) { for (const name of Object.keys(namedParameters)) { const value = namedParameters[name]; const param = value instanceof DBSQLParameter_1.DBSQLParameter ? value : new DBSQLParameter_1.DBSQLParameter({ value }); result.push(param.toSparkParameter({ name })); } } if (ordinalParameters !== undefined) { for (const value of ordinalParameters) { const param = value instanceof DBSQLParameter_1.DBSQLParameter ? value : new DBSQLParameter_1.DBSQLParameter({ value }); result.push(param.toSparkParameter()); } } return result; } class DBSQLSession { /** * Helper method to determine if runAsync should be set for metadata operations * @private * @returns true if supported by protocol version, undefined otherwise */ getRunAsyncForMetadataOperations() { return utils_1.ProtocolVersion.supportsAsyncMetadataOperations(this.serverProtocolVersion) ? true : undefined; } constructor({ handle, context, serverProtocolVersion }) { this.isOpen = true; this.operations = new CloseableCollection_1.default(); this.sessionHandle = handle; this.context = context; // Get the server protocol version from the provided parameter (from TOpenSessionResp) this.serverProtocolVersion = serverProtocolVersion; this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Session created with id: ${this.id}`); this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Server protocol version: ${this.serverProtocolVersion}`); } get id() { var _a, _b; const sessionId = (_b = (_a = this.sessionHandle) === null || _a === void 0 ? void 0 : _a.sessionId) === null || _b === void 0 ? void 0 : _b.guid; return sessionId ? (0, uuid_1.stringify)(sessionId) : uuid_1.NIL; } /** * Fetches info * @public * @param infoType - One of the values TCLIService_types.TGetInfoType * @returns Value corresponding to info type requested * @example * const response = await session.getInfo(thrift.TCLIService_types.TGetInfoType.CLI_DBMS_VER); */ async getInfo(infoType) { await this.failIfClosed(); const driver = await this.context.getDriver(); const operationPromise = driver.getInfo({ sessionHandle: this.sessionHandle, infoType, }); const response = await this.handleResponse(operationPromise); Status_1.default.assert(response.status); return new InfoValue_1.default(response.infoValue); } /** * Executes statement * @public * @param statement - SQL statement to be executed * @param options - maxRows field is used to specify Direct Results * @returns DBSQLOperation * @example * const operation = await session.executeStatement(query); */ async executeStatement(statement, options = {}) { var _a, _b; await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const request = new TCLIService_types_1.TExecuteStatementReq({ sessionHandle: this.sessionHandle, statement, queryTimeout: options.queryTimeout ? numberToInt64(options.queryTimeout) : undefined, runAsync: true, ...getDirectResultsOptions(options.maxRows, clientConfig), ...getArrowOptions(clientConfig, this.serverProtocolVersion), }); if (utils_1.ProtocolVersion.supportsParameterizedQueries(this.serverProtocolVersion)) { request.parameters = getQueryParameters(options.namedParameters, options.ordinalParameters); } if (utils_1.ProtocolVersion.supportsCloudFetch(this.serverProtocolVersion)) { request.canDownloadResult = (_a = options.useCloudFetch) !== null && _a !== void 0 ? _a : clientConfig.useCloudFetch; } if (utils_1.ProtocolVersion.supportsArrowCompression(this.serverProtocolVersion) && request.canDownloadResult !== true) { request.canDecompressLZ4Result = ((_b = options.useLZ4Compression) !== null && _b !== void 0 ? _b : clientConfig.useLZ4Compression) && Boolean(utils_1.LZ4); } const operationPromise = driver.executeStatement(request); const response = await this.handleResponse(operationPromise); const operation = this.createOperation(response); // If `stagingAllowedLocalPath` is provided - assume that operation possibly may be a staging operation. // To know for sure, fetch metadata and check a `isStagingOperation` flag. If it happens that it wasn't // a staging operation - not a big deal, we just fetched metadata earlier, but operation is still usable // and user can get data from it. // If `stagingAllowedLocalPath` is not provided - don't do anything to the operation. In a case of regular // operation, everything will work as usual. In a case of staging operation, it will be processed like any // other query - it will be possible to get data from it as usual, or use other operation methods. if (options.stagingAllowedLocalPath !== undefined) { const metadata = await operation.getMetadata(); if (metadata.isStagingOperation) { const allowedLocalPath = Array.isArray(options.stagingAllowedLocalPath) ? options.stagingAllowedLocalPath : [options.stagingAllowedLocalPath]; return this.handleStagingOperation(operation, allowedLocalPath); } } return operation; } async handleStagingOperation(operation, allowedLocalPath) { const rows = await operation.fetchAll(); if (rows.length !== 1) { throw new StagingError_1.default('Staging operation: expected only one row in result'); } const row = rows[0]; // For REMOVE operation local file is not available, so no need to validate it if (row.localFile !== undefined) { let allowOperation = false; for (const filepath of allowedLocalPath) { const relativePath = path.relative(filepath, row.localFile); if (!relativePath.startsWith('..') && !path.isAbsolute(relativePath)) { allowOperation = true; } } if (!allowOperation) { throw new StagingError_1.default('Staging path not a subset of allowed local paths.'); } } const { localFile, presignedUrl, headers } = row; switch (row.operation) { case 'GET': await this.handleStagingGet(localFile, presignedUrl, headers); return operation; case 'PUT': await this.handleStagingPut(localFile, presignedUrl, headers); return operation; case 'REMOVE': await this.handleStagingRemove(presignedUrl, headers); return operation; default: throw new StagingError_1.default(`Staging query operation is not supported: ${row.operation}`); } } async handleStagingGet(localFile, presignedUrl, headers) { if (localFile === undefined) { throw new StagingError_1.default('Local file path not provided'); } const connectionProvider = await this.context.getConnectionProvider(); const agent = await connectionProvider.getAgent(); const response = await (0, node_fetch_1.default)(presignedUrl, { method: 'GET', headers, agent }); if (!response.ok) { throw new StagingError_1.default(`HTTP error ${response.status} ${response.statusText}`); } const fileStream = fs.createWriteStream(localFile); // `pipeline` will do all the dirty job for us, including error handling and closing all the streams properly return pipeline(response.body, fileStream); } async handleStagingRemove(presignedUrl, headers) { const connectionProvider = await this.context.getConnectionProvider(); const agent = await connectionProvider.getAgent(); const response = await (0, node_fetch_1.default)(presignedUrl, { method: 'DELETE', headers, agent }); // Looks that AWS and Azure have a different behavior of HTTP `DELETE` for non-existing files. // AWS assumes that - since file already doesn't exist - the goal is achieved, and returns HTTP 200. // Azure, on the other hand, is somewhat stricter and check if file exists before deleting it. And if // file doesn't exist - Azure returns HTTP 404. // // For us, it's totally okay if file didn't exist before removing. So when we get an HTTP 404 - // just ignore it and report success. This way we can have a uniform library behavior for all clouds if (!response.ok && response.status !== 404) { throw new StagingError_1.default(`HTTP error ${response.status} ${response.statusText}`); } } async handleStagingPut(localFile, presignedUrl, headers) { if (localFile === undefined) { throw new StagingError_1.default('Local file path not provided'); } const connectionProvider = await this.context.getConnectionProvider(); const agent = await connectionProvider.getAgent(); const fileStream = fs.createReadStream(localFile); const fileInfo = fs.statSync(localFile, { bigint: true }); const response = await (0, node_fetch_1.default)(presignedUrl, { method: 'PUT', headers: { ...headers, // This header is required by server 'Content-Length': fileInfo.size.toString(), }, agent, body: fileStream, }); if (!response.ok) { throw new StagingError_1.default(`HTTP error ${response.status} ${response.statusText}`); } } /** * Information about supported data types * @public * @param request * @returns DBSQLOperation */ async getTypeInfo(request = {}) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getTypeInfo({ sessionHandle: this.sessionHandle, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Get list of catalogs * @public * @param request * @returns DBSQLOperation */ async getCatalogs(request = {}) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getCatalogs({ sessionHandle: this.sessionHandle, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Get list of schemas * @public * @param request * @returns DBSQLOperation */ async getSchemas(request = {}) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getSchemas({ sessionHandle: this.sessionHandle, catalogName: request.catalogName, schemaName: request.schemaName, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Get list of tables * @public * @param request * @returns DBSQLOperation */ async getTables(request = {}) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getTables({ sessionHandle: this.sessionHandle, catalogName: request.catalogName, schemaName: request.schemaName, tableName: request.tableName, tableTypes: request.tableTypes, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Get list of supported table types * @public * @param request * @returns DBSQLOperation */ async getTableTypes(request = {}) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getTableTypes({ sessionHandle: this.sessionHandle, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Get full information about columns of the table * @public * @param request * @returns DBSQLOperation */ async getColumns(request = {}) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getColumns({ sessionHandle: this.sessionHandle, catalogName: request.catalogName, schemaName: request.schemaName, tableName: request.tableName, columnName: request.columnName, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Get information about function * @public * @param request * @returns DBSQLOperation */ async getFunctions(request) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getFunctions({ sessionHandle: this.sessionHandle, catalogName: request.catalogName, schemaName: request.schemaName, functionName: request.functionName, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } async getPrimaryKeys(request) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getPrimaryKeys({ sessionHandle: this.sessionHandle, catalogName: request.catalogName, schemaName: request.schemaName, tableName: request.tableName, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Request information about foreign keys between two tables * @public * @param request * @returns DBSQLOperation */ async getCrossReference(request) { await this.failIfClosed(); const driver = await this.context.getDriver(); const clientConfig = this.context.getConfig(); const operationPromise = driver.getCrossReference({ sessionHandle: this.sessionHandle, parentCatalogName: request.parentCatalogName, parentSchemaName: request.parentSchemaName, parentTableName: request.parentTableName, foreignCatalogName: request.foreignCatalogName, foreignSchemaName: request.foreignSchemaName, foreignTableName: request.foreignTableName, runAsync: this.getRunAsyncForMetadataOperations(), ...getDirectResultsOptions(request.maxRows, clientConfig), }); const response = await this.handleResponse(operationPromise); return this.createOperation(response); } /** * Closes the session * @public * @returns Operation status */ async close() { var _a; if (!this.isOpen) { return Status_1.default.success(); } // Close owned operations one by one, removing successfully closed ones from the list await this.operations.closeAll(); const driver = await this.context.getDriver(); const response = await driver.closeSession({ sessionHandle: this.sessionHandle, }); // check status for being successful Status_1.default.assert(response.status); // notify owner connection (_a = this.onClose) === null || _a === void 0 ? void 0 : _a.call(this); this.isOpen = false; this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Session closed with id: ${this.id}`); return new Status_1.default(response.status); } createOperation(response) { Status_1.default.assert(response.status); const handle = (0, utils_1.definedOrError)(response.operationHandle); const operation = new DBSQLOperation_1.default({ handle, directResults: response.directResults, context: this.context, }); this.operations.add(operation); return operation; } async failIfClosed() { if (!this.isOpen) { throw new HiveDriverError_1.default('The session was closed or has expired'); } } async handleResponse(requestPromise) { // Currently, after being closed sessions remains usable - server will not // error out when trying to run operations on closed session. So it's // basically useless to process any errors here const result = await requestPromise; await this.failIfClosed(); return result; } } exports.default = DBSQLSession; //# sourceMappingURL=DBSQLSession.js.map