@databricks/sql
Version:
Driver for connection to Databricks SQL via Thrift API.
524 lines • 23.7 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
__setModuleDefault(result, mod);
return result;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.numberToInt64 = void 0;
const fs = __importStar(require("fs"));
const path = __importStar(require("path"));
const node_stream_1 = __importDefault(require("node:stream"));
const node_util_1 = __importDefault(require("node:util"));
const uuid_1 = require("uuid");
const node_int64_1 = __importDefault(require("node-int64"));
const node_fetch_1 = __importDefault(require("node-fetch"));
const TCLIService_types_1 = require("../thrift/TCLIService_types");
const DBSQLOperation_1 = __importDefault(require("./DBSQLOperation"));
const Status_1 = __importDefault(require("./dto/Status"));
const InfoValue_1 = __importDefault(require("./dto/InfoValue"));
const utils_1 = require("./utils");
const CloseableCollection_1 = __importDefault(require("./utils/CloseableCollection"));
const IDBSQLLogger_1 = require("./contracts/IDBSQLLogger");
const HiveDriverError_1 = __importDefault(require("./errors/HiveDriverError"));
const StagingError_1 = __importDefault(require("./errors/StagingError"));
const DBSQLParameter_1 = require("./DBSQLParameter");
const ParameterError_1 = __importDefault(require("./errors/ParameterError"));
// Explicitly promisify a callback-style `pipeline` because `node:stream/promises` is not available in Node 14
const pipeline = node_util_1.default.promisify(node_stream_1.default.pipeline);
function numberToInt64(value) {
if (value instanceof node_int64_1.default) {
return value;
}
if (typeof value === 'bigint') {
const buffer = new ArrayBuffer(BigInt64Array.BYTES_PER_ELEMENT);
const view = new DataView(buffer);
view.setBigInt64(0, value, false); // `false` to use big-endian order
return new node_int64_1.default(Buffer.from(buffer));
}
return new node_int64_1.default(value);
}
exports.numberToInt64 = numberToInt64;
function getDirectResultsOptions(maxRows, config) {
if (maxRows === null) {
return {};
}
return {
getDirectResults: {
maxRows: numberToInt64(maxRows !== null && maxRows !== void 0 ? maxRows : config.directResultsDefaultMaxRows),
},
};
}
function getArrowOptions(config, serverProtocolVersion) {
const { arrowEnabled = true, useArrowNativeTypes = true } = config;
if (!arrowEnabled || !utils_1.ProtocolVersion.supportsArrowMetadata(serverProtocolVersion)) {
return {
canReadArrowResult: false,
};
}
return {
canReadArrowResult: true,
useArrowNativeTypes: {
timestampAsArrow: useArrowNativeTypes,
decimalAsArrow: useArrowNativeTypes,
complexTypesAsArrow: useArrowNativeTypes,
// TODO: currently unsupported by `apache-arrow` (see https://github.com/streamlit/streamlit/issues/4489)
intervalTypesAsArrow: false,
},
};
}
function getQueryParameters(namedParameters, ordinalParameters) {
const namedParametersProvided = namedParameters !== undefined && Object.keys(namedParameters).length > 0;
const ordinalParametersProvided = ordinalParameters !== undefined && ordinalParameters.length > 0;
if (namedParametersProvided && ordinalParametersProvided) {
throw new ParameterError_1.default('Driver does not support both ordinal and named parameters.');
}
if (!namedParametersProvided && !ordinalParametersProvided) {
return [];
}
const result = [];
if (namedParameters !== undefined) {
for (const name of Object.keys(namedParameters)) {
const value = namedParameters[name];
const param = value instanceof DBSQLParameter_1.DBSQLParameter ? value : new DBSQLParameter_1.DBSQLParameter({ value });
result.push(param.toSparkParameter({ name }));
}
}
if (ordinalParameters !== undefined) {
for (const value of ordinalParameters) {
const param = value instanceof DBSQLParameter_1.DBSQLParameter ? value : new DBSQLParameter_1.DBSQLParameter({ value });
result.push(param.toSparkParameter());
}
}
return result;
}
class DBSQLSession {
/**
* Helper method to determine if runAsync should be set for metadata operations
* @private
* @returns true if supported by protocol version, undefined otherwise
*/
getRunAsyncForMetadataOperations() {
return utils_1.ProtocolVersion.supportsAsyncMetadataOperations(this.serverProtocolVersion) ? true : undefined;
}
constructor({ handle, context, serverProtocolVersion }) {
this.isOpen = true;
this.operations = new CloseableCollection_1.default();
this.sessionHandle = handle;
this.context = context;
// Get the server protocol version from the provided parameter (from TOpenSessionResp)
this.serverProtocolVersion = serverProtocolVersion;
this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Session created with id: ${this.id}`);
this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Server protocol version: ${this.serverProtocolVersion}`);
}
get id() {
var _a, _b;
const sessionId = (_b = (_a = this.sessionHandle) === null || _a === void 0 ? void 0 : _a.sessionId) === null || _b === void 0 ? void 0 : _b.guid;
return sessionId ? (0, uuid_1.stringify)(sessionId) : uuid_1.NIL;
}
/**
* Fetches info
* @public
* @param infoType - One of the values TCLIService_types.TGetInfoType
* @returns Value corresponding to info type requested
* @example
* const response = await session.getInfo(thrift.TCLIService_types.TGetInfoType.CLI_DBMS_VER);
*/
async getInfo(infoType) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const operationPromise = driver.getInfo({
sessionHandle: this.sessionHandle,
infoType,
});
const response = await this.handleResponse(operationPromise);
Status_1.default.assert(response.status);
return new InfoValue_1.default(response.infoValue);
}
/**
* Executes statement
* @public
* @param statement - SQL statement to be executed
* @param options - maxRows field is used to specify Direct Results
* @returns DBSQLOperation
* @example
* const operation = await session.executeStatement(query);
*/
async executeStatement(statement, options = {}) {
var _a, _b;
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const request = new TCLIService_types_1.TExecuteStatementReq({
sessionHandle: this.sessionHandle,
statement,
queryTimeout: options.queryTimeout ? numberToInt64(options.queryTimeout) : undefined,
runAsync: true,
...getDirectResultsOptions(options.maxRows, clientConfig),
...getArrowOptions(clientConfig, this.serverProtocolVersion),
});
if (utils_1.ProtocolVersion.supportsParameterizedQueries(this.serverProtocolVersion)) {
request.parameters = getQueryParameters(options.namedParameters, options.ordinalParameters);
}
if (utils_1.ProtocolVersion.supportsCloudFetch(this.serverProtocolVersion)) {
request.canDownloadResult = (_a = options.useCloudFetch) !== null && _a !== void 0 ? _a : clientConfig.useCloudFetch;
}
if (utils_1.ProtocolVersion.supportsArrowCompression(this.serverProtocolVersion) && request.canDownloadResult !== true) {
request.canDecompressLZ4Result = ((_b = options.useLZ4Compression) !== null && _b !== void 0 ? _b : clientConfig.useLZ4Compression) && Boolean(utils_1.LZ4);
}
const operationPromise = driver.executeStatement(request);
const response = await this.handleResponse(operationPromise);
const operation = this.createOperation(response);
// If `stagingAllowedLocalPath` is provided - assume that operation possibly may be a staging operation.
// To know for sure, fetch metadata and check a `isStagingOperation` flag. If it happens that it wasn't
// a staging operation - not a big deal, we just fetched metadata earlier, but operation is still usable
// and user can get data from it.
// If `stagingAllowedLocalPath` is not provided - don't do anything to the operation. In a case of regular
// operation, everything will work as usual. In a case of staging operation, it will be processed like any
// other query - it will be possible to get data from it as usual, or use other operation methods.
if (options.stagingAllowedLocalPath !== undefined) {
const metadata = await operation.getMetadata();
if (metadata.isStagingOperation) {
const allowedLocalPath = Array.isArray(options.stagingAllowedLocalPath)
? options.stagingAllowedLocalPath
: [options.stagingAllowedLocalPath];
return this.handleStagingOperation(operation, allowedLocalPath);
}
}
return operation;
}
async handleStagingOperation(operation, allowedLocalPath) {
const rows = await operation.fetchAll();
if (rows.length !== 1) {
throw new StagingError_1.default('Staging operation: expected only one row in result');
}
const row = rows[0];
// For REMOVE operation local file is not available, so no need to validate it
if (row.localFile !== undefined) {
let allowOperation = false;
for (const filepath of allowedLocalPath) {
const relativePath = path.relative(filepath, row.localFile);
if (!relativePath.startsWith('..') && !path.isAbsolute(relativePath)) {
allowOperation = true;
}
}
if (!allowOperation) {
throw new StagingError_1.default('Staging path not a subset of allowed local paths.');
}
}
const { localFile, presignedUrl, headers } = row;
switch (row.operation) {
case 'GET':
await this.handleStagingGet(localFile, presignedUrl, headers);
return operation;
case 'PUT':
await this.handleStagingPut(localFile, presignedUrl, headers);
return operation;
case 'REMOVE':
await this.handleStagingRemove(presignedUrl, headers);
return operation;
default:
throw new StagingError_1.default(`Staging query operation is not supported: ${row.operation}`);
}
}
async handleStagingGet(localFile, presignedUrl, headers) {
if (localFile === undefined) {
throw new StagingError_1.default('Local file path not provided');
}
const connectionProvider = await this.context.getConnectionProvider();
const agent = await connectionProvider.getAgent();
const response = await (0, node_fetch_1.default)(presignedUrl, { method: 'GET', headers, agent });
if (!response.ok) {
throw new StagingError_1.default(`HTTP error ${response.status} ${response.statusText}`);
}
const fileStream = fs.createWriteStream(localFile);
// `pipeline` will do all the dirty job for us, including error handling and closing all the streams properly
return pipeline(response.body, fileStream);
}
async handleStagingRemove(presignedUrl, headers) {
const connectionProvider = await this.context.getConnectionProvider();
const agent = await connectionProvider.getAgent();
const response = await (0, node_fetch_1.default)(presignedUrl, { method: 'DELETE', headers, agent });
// Looks that AWS and Azure have a different behavior of HTTP `DELETE` for non-existing files.
// AWS assumes that - since file already doesn't exist - the goal is achieved, and returns HTTP 200.
// Azure, on the other hand, is somewhat stricter and check if file exists before deleting it. And if
// file doesn't exist - Azure returns HTTP 404.
//
// For us, it's totally okay if file didn't exist before removing. So when we get an HTTP 404 -
// just ignore it and report success. This way we can have a uniform library behavior for all clouds
if (!response.ok && response.status !== 404) {
throw new StagingError_1.default(`HTTP error ${response.status} ${response.statusText}`);
}
}
async handleStagingPut(localFile, presignedUrl, headers) {
if (localFile === undefined) {
throw new StagingError_1.default('Local file path not provided');
}
const connectionProvider = await this.context.getConnectionProvider();
const agent = await connectionProvider.getAgent();
const fileStream = fs.createReadStream(localFile);
const fileInfo = fs.statSync(localFile, { bigint: true });
const response = await (0, node_fetch_1.default)(presignedUrl, {
method: 'PUT',
headers: {
...headers,
// This header is required by server
'Content-Length': fileInfo.size.toString(),
},
agent,
body: fileStream,
});
if (!response.ok) {
throw new StagingError_1.default(`HTTP error ${response.status} ${response.statusText}`);
}
}
/**
* Information about supported data types
* @public
* @param request
* @returns DBSQLOperation
*/
async getTypeInfo(request = {}) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getTypeInfo({
sessionHandle: this.sessionHandle,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Get list of catalogs
* @public
* @param request
* @returns DBSQLOperation
*/
async getCatalogs(request = {}) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getCatalogs({
sessionHandle: this.sessionHandle,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Get list of schemas
* @public
* @param request
* @returns DBSQLOperation
*/
async getSchemas(request = {}) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getSchemas({
sessionHandle: this.sessionHandle,
catalogName: request.catalogName,
schemaName: request.schemaName,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Get list of tables
* @public
* @param request
* @returns DBSQLOperation
*/
async getTables(request = {}) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getTables({
sessionHandle: this.sessionHandle,
catalogName: request.catalogName,
schemaName: request.schemaName,
tableName: request.tableName,
tableTypes: request.tableTypes,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Get list of supported table types
* @public
* @param request
* @returns DBSQLOperation
*/
async getTableTypes(request = {}) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getTableTypes({
sessionHandle: this.sessionHandle,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Get full information about columns of the table
* @public
* @param request
* @returns DBSQLOperation
*/
async getColumns(request = {}) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getColumns({
sessionHandle: this.sessionHandle,
catalogName: request.catalogName,
schemaName: request.schemaName,
tableName: request.tableName,
columnName: request.columnName,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Get information about function
* @public
* @param request
* @returns DBSQLOperation
*/
async getFunctions(request) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getFunctions({
sessionHandle: this.sessionHandle,
catalogName: request.catalogName,
schemaName: request.schemaName,
functionName: request.functionName,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
async getPrimaryKeys(request) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getPrimaryKeys({
sessionHandle: this.sessionHandle,
catalogName: request.catalogName,
schemaName: request.schemaName,
tableName: request.tableName,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Request information about foreign keys between two tables
* @public
* @param request
* @returns DBSQLOperation
*/
async getCrossReference(request) {
await this.failIfClosed();
const driver = await this.context.getDriver();
const clientConfig = this.context.getConfig();
const operationPromise = driver.getCrossReference({
sessionHandle: this.sessionHandle,
parentCatalogName: request.parentCatalogName,
parentSchemaName: request.parentSchemaName,
parentTableName: request.parentTableName,
foreignCatalogName: request.foreignCatalogName,
foreignSchemaName: request.foreignSchemaName,
foreignTableName: request.foreignTableName,
runAsync: this.getRunAsyncForMetadataOperations(),
...getDirectResultsOptions(request.maxRows, clientConfig),
});
const response = await this.handleResponse(operationPromise);
return this.createOperation(response);
}
/**
* Closes the session
* @public
* @returns Operation status
*/
async close() {
var _a;
if (!this.isOpen) {
return Status_1.default.success();
}
// Close owned operations one by one, removing successfully closed ones from the list
await this.operations.closeAll();
const driver = await this.context.getDriver();
const response = await driver.closeSession({
sessionHandle: this.sessionHandle,
});
// check status for being successful
Status_1.default.assert(response.status);
// notify owner connection
(_a = this.onClose) === null || _a === void 0 ? void 0 : _a.call(this);
this.isOpen = false;
this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Session closed with id: ${this.id}`);
return new Status_1.default(response.status);
}
createOperation(response) {
Status_1.default.assert(response.status);
const handle = (0, utils_1.definedOrError)(response.operationHandle);
const operation = new DBSQLOperation_1.default({
handle,
directResults: response.directResults,
context: this.context,
});
this.operations.add(operation);
return operation;
}
async failIfClosed() {
if (!this.isOpen) {
throw new HiveDriverError_1.default('The session was closed or has expired');
}
}
async handleResponse(requestPromise) {
// Currently, after being closed sessions remains usable - server will not
// error out when trying to run operations on closed session. So it's
// basically useless to process any errors here
const result = await requestPromise;
await this.failIfClosed();
return result;
}
}
exports.default = DBSQLSession;
//# sourceMappingURL=DBSQLSession.js.map