UNPKG

@databricks/sql

Version:

Driver for connection to Databricks SQL via Thrift API.

362 lines 17.8 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k); __setModuleDefault(result, mod); return result; }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const uuid_1 = require("uuid"); const node_stream_1 = require("node:stream"); const TCLIService_types_1 = require("../thrift/TCLIService_types"); const Status_1 = __importDefault(require("./dto/Status")); const IDBSQLLogger_1 = require("./contracts/IDBSQLLogger"); const OperationStateError_1 = __importStar(require("./errors/OperationStateError")); const RowSetProvider_1 = __importDefault(require("./result/RowSetProvider")); const JsonResultHandler_1 = __importDefault(require("./result/JsonResultHandler")); const ArrowResultHandler_1 = __importDefault(require("./result/ArrowResultHandler")); const CloudFetchResultHandler_1 = __importDefault(require("./result/CloudFetchResultHandler")); const ArrowResultConverter_1 = __importDefault(require("./result/ArrowResultConverter")); const ResultSlicer_1 = __importDefault(require("./result/ResultSlicer")); const utils_1 = require("./utils"); const OperationIterator_1 = require("./utils/OperationIterator"); const HiveDriverError_1 = __importDefault(require("./errors/HiveDriverError")); async function delay(ms) { return new Promise((resolve) => { setTimeout(() => { resolve(); }, ms); }); } class DBSQLOperation { constructor({ handle, directResults, context }) { this.closed = false; this.cancelled = false; this.state = TCLIService_types_1.TOperationState.INITIALIZED_STATE; this.operationHandle = handle; this.context = context; const useOnlyPrefetchedResults = Boolean(directResults === null || directResults === void 0 ? void 0 : directResults.closeOperation); if (directResults === null || directResults === void 0 ? void 0 : directResults.operationStatus) { this.processOperationStatusResponse(directResults.operationStatus); } this.metadata = directResults === null || directResults === void 0 ? void 0 : directResults.resultSetMetadata; this._data = new RowSetProvider_1.default(this.context, this.operationHandle, [directResults === null || directResults === void 0 ? void 0 : directResults.resultSet], useOnlyPrefetchedResults); this.closeOperation = directResults === null || directResults === void 0 ? void 0 : directResults.closeOperation; this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Operation created with id: ${this.id}`); } iterateChunks(options) { return new OperationIterator_1.OperationChunksIterator(this, options); } iterateRows(options) { return new OperationIterator_1.OperationRowsIterator(this, options); } toNodeStream(options) { var _a; let iterable; switch ((_a = options === null || options === void 0 ? void 0 : options.mode) !== null && _a !== void 0 ? _a : 'chunks') { case 'chunks': iterable = this.iterateChunks(options === null || options === void 0 ? void 0 : options.iteratorOptions); break; case 'rows': iterable = this.iterateRows(options === null || options === void 0 ? void 0 : options.iteratorOptions); break; default: throw new Error(`IOperation.toNodeStream: unsupported mode ${options === null || options === void 0 ? void 0 : options.mode}`); } return node_stream_1.Readable.from(iterable, options === null || options === void 0 ? void 0 : options.streamOptions); } get id() { var _a, _b; const operationId = (_b = (_a = this.operationHandle) === null || _a === void 0 ? void 0 : _a.operationId) === null || _b === void 0 ? void 0 : _b.guid; return operationId ? (0, uuid_1.stringify)(operationId) : uuid_1.NIL; } /** * Fetches all data * @public * @param options - maxRows property can be set to limit chunk size * @returns Array of data with length equal to option.maxRows * @throws {StatusError} * @example * const result = await queryOperation.fetchAll(); */ async fetchAll(options) { const data = []; const fetchChunkOptions = { ...options, // Tell slicer to return raw chunks. We're going to process all of them anyway, // so no need to additionally buffer and slice chunks returned by server disableBuffering: true, }; do { // eslint-disable-next-line no-await-in-loop const chunk = await this.fetchChunk(fetchChunkOptions); data.push(chunk); } while (await this.hasMoreRows()); // eslint-disable-line no-await-in-loop this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Fetched all data from operation with id: ${this.id}`); return data.flat(); } /** * Fetches chunk of data * @public * @param options - maxRows property sets chunk size * @returns Array of data with length equal to option.maxRows * @throws {StatusError} * @example * const result = await queryOperation.fetchChunk({maxRows: 1000}); */ async fetchChunk(options) { var _a, _b; await this.failIfClosed(); if (!this.operationHandle.hasResultSet) { return []; } await this.waitUntilReady(options); const resultHandler = await this.getResultHandler(); await this.failIfClosed(); // All the library code is Promise-based, however, since Promises are microtasks, // enqueueing a lot of promises may block macrotasks execution for a while. // Usually, there are no much microtasks scheduled, however, when fetching query // results (especially CloudFetch ones) it's quite easy to block event loop for // long enough to break a lot of things. For example, with CloudFetch, after first // set of files are downloaded and being processed immediately one by one, event // loop easily gets blocked for enough time to break connection pool. `http.Agent` // stops receiving socket events, and marks all sockets invalid on the next attempt // to use them. See these similar issues that helped to debug this particular case - // https://github.com/nodejs/node/issues/47130 and https://github.com/node-fetch/node-fetch/issues/1735 // This simple fix allows to clean up a microtasks queue and allow Node to process // macrotasks as well, allowing the normal operation of other code. Also, this // fix is added to `fetchChunk` method because, unlike other methods, `fetchChunk` is // a potential source of issues described above await new Promise((resolve) => { setTimeout(resolve, 0); }); const defaultMaxRows = this.context.getConfig().fetchChunkDefaultMaxRows; const result = resultHandler.fetchNext({ limit: (_a = options === null || options === void 0 ? void 0 : options.maxRows) !== null && _a !== void 0 ? _a : defaultMaxRows, disableBuffering: options === null || options === void 0 ? void 0 : options.disableBuffering, }); await this.failIfClosed(); this.context .getLogger() .log(IDBSQLLogger_1.LogLevel.debug, `Fetched chunk of size: ${(_b = options === null || options === void 0 ? void 0 : options.maxRows) !== null && _b !== void 0 ? _b : defaultMaxRows} from operation with id: ${this.id}`); return result; } /** * Requests operation status * @param progress * @throws {StatusError} */ async status(progress = false) { await this.failIfClosed(); this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Fetching status for operation with id: ${this.id}`); if (this.operationStatus) { return this.operationStatus; } const driver = await this.context.getDriver(); const response = await driver.getOperationStatus({ operationHandle: this.operationHandle, getProgressUpdate: progress, }); return this.processOperationStatusResponse(response); } /** * Cancels operation * @throws {StatusError} */ async cancel() { var _a; if (this.closed || this.cancelled) { return Status_1.default.success(); } this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Cancelling operation with id: ${this.id}`); const driver = await this.context.getDriver(); const response = await driver.cancelOperation({ operationHandle: this.operationHandle, }); Status_1.default.assert(response.status); this.cancelled = true; const result = new Status_1.default(response.status); // Cancelled operation becomes unusable, similarly to being closed (_a = this.onClose) === null || _a === void 0 ? void 0 : _a.call(this); return result; } /** * Closes operation * @throws {StatusError} */ async close() { var _a, _b; if (this.closed || this.cancelled) { return Status_1.default.success(); } this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Closing operation with id: ${this.id}`); const driver = await this.context.getDriver(); const response = (_a = this.closeOperation) !== null && _a !== void 0 ? _a : (await driver.closeOperation({ operationHandle: this.operationHandle, })); Status_1.default.assert(response.status); this.closed = true; const result = new Status_1.default(response.status); (_b = this.onClose) === null || _b === void 0 ? void 0 : _b.call(this); return result; } async finished(options) { await this.failIfClosed(); await this.waitUntilReady(options); } async hasMoreRows() { // If operation is closed or cancelled - we should not try to get data from it if (this.closed || this.cancelled) { return false; } // If we fetched all the data from server - check if there's anything buffered in result handler const resultHandler = await this.getResultHandler(); return resultHandler.hasMore(); } async getSchema(options) { var _a; await this.failIfClosed(); if (!this.operationHandle.hasResultSet) { return null; } await this.waitUntilReady(options); this.context.getLogger().log(IDBSQLLogger_1.LogLevel.debug, `Fetching schema for operation with id: ${this.id}`); const metadata = await this.fetchMetadata(); return (_a = metadata.schema) !== null && _a !== void 0 ? _a : null; } async getMetadata() { await this.failIfClosed(); await this.waitUntilReady(); return this.fetchMetadata(); } async failIfClosed() { if (this.closed) { throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Closed); } if (this.cancelled) { throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Canceled); } } async waitUntilReady(options) { if (this.state === TCLIService_types_1.TOperationState.FINISHED_STATE) { return; } let isReady = false; while (!isReady) { // eslint-disable-next-line no-await-in-loop const response = await this.status(Boolean(options === null || options === void 0 ? void 0 : options.progress)); if (options === null || options === void 0 ? void 0 : options.callback) { // eslint-disable-next-line no-await-in-loop await Promise.resolve(options.callback(response)); } switch (response.operationState) { // For these states do nothing and continue waiting case TCLIService_types_1.TOperationState.INITIALIZED_STATE: case TCLIService_types_1.TOperationState.PENDING_STATE: case TCLIService_types_1.TOperationState.RUNNING_STATE: break; // Operation is completed, so exit the loop case TCLIService_types_1.TOperationState.FINISHED_STATE: isReady = true; break; // Operation was cancelled, so set a flag and exit the loop (throw an error) case TCLIService_types_1.TOperationState.CANCELED_STATE: this.cancelled = true; throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Canceled, response); // Operation was closed, so set a flag and exit the loop (throw an error) case TCLIService_types_1.TOperationState.CLOSED_STATE: this.closed = true; throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Closed, response); // Error states - throw and exit the loop case TCLIService_types_1.TOperationState.ERROR_STATE: throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Error, response); case TCLIService_types_1.TOperationState.TIMEDOUT_STATE: throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Timeout, response); case TCLIService_types_1.TOperationState.UKNOWN_STATE: default: throw new OperationStateError_1.default(OperationStateError_1.OperationStateErrorCode.Unknown, response); } // If not ready yet - make some delay before the next status requests if (!isReady) { // eslint-disable-next-line no-await-in-loop await delay(100); } } } async fetchMetadata() { if (!this.metadata) { const driver = await this.context.getDriver(); const metadata = await driver.getResultSetMetadata({ operationHandle: this.operationHandle, }); Status_1.default.assert(metadata.status); this.metadata = metadata; } return this.metadata; } async getResultHandler() { const metadata = await this.fetchMetadata(); const resultFormat = (0, utils_1.definedOrError)(metadata.resultFormat); if (!this.resultHandler) { let resultSource; switch (resultFormat) { case TCLIService_types_1.TSparkRowSetType.COLUMN_BASED_SET: resultSource = new JsonResultHandler_1.default(this.context, this._data, metadata); break; case TCLIService_types_1.TSparkRowSetType.ARROW_BASED_SET: resultSource = new ArrowResultConverter_1.default(this.context, new ArrowResultHandler_1.default(this.context, this._data, metadata), metadata); break; case TCLIService_types_1.TSparkRowSetType.URL_BASED_SET: resultSource = new ArrowResultConverter_1.default(this.context, new CloudFetchResultHandler_1.default(this.context, this._data, metadata), metadata); break; // no default } if (resultSource) { this.resultHandler = new ResultSlicer_1.default(this.context, resultSource); } } if (!this.resultHandler) { throw new HiveDriverError_1.default(`Unsupported result format: ${TCLIService_types_1.TSparkRowSetType[resultFormat]}`); } return this.resultHandler; } processOperationStatusResponse(response) { var _a; Status_1.default.assert(response.status); this.state = (_a = response.operationState) !== null && _a !== void 0 ? _a : this.state; if (typeof response.hasResultSet === 'boolean') { this.operationHandle.hasResultSet = response.hasResultSet; } const isInProgress = [ TCLIService_types_1.TOperationState.INITIALIZED_STATE, TCLIService_types_1.TOperationState.PENDING_STATE, TCLIService_types_1.TOperationState.RUNNING_STATE, ].includes(this.state); if (!isInProgress) { this.operationStatus = response; } return response; } } exports.default = DBSQLOperation; //# sourceMappingURL=DBSQLOperation.js.map