UNPKG

@databricks/sql

Version:

Driver for connection to Databricks SQL via Thrift API.

170 lines 8.74 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); const buffer_1 = require("buffer"); const apache_arrow_1 = require("apache-arrow"); const utils_1 = require("./utils"); const { isArrowBigNumSymbol, bigNumToBigInt } = apache_arrow_1.util; class ArrowResultConverter { constructor(context, source, { schema }) { // Remaining rows in current Arrow batch (not the record batch!) this.remainingRows = 0; this.context = context; this.source = source; this.schema = (0, utils_1.getSchemaColumns)(schema); } async hasMore() { if (this.schema.length === 0) { return false; } if (this.prefetchedRecordBatch) { return true; } return this.source.hasMore(); } async fetchNext(options) { if (this.schema.length === 0) { return []; } // It's not possible to know if iterator has more items until trying to get the next item. // So each time we read one batch ahead and store it, but process the batch prefetched on // a previous `fetchNext` call. Because we actually already have the next item - it's easy // to tell if the subsequent `fetchNext` will be able to read anything, and `hasMore` logic // becomes trivial // This prefetch handles a first call to `fetchNext`, when all the internal fields are not initialized yet. // On subsequent calls to `fetchNext` it will do nothing await this.prefetch(options); if (this.prefetchedRecordBatch) { // Consume a record batch fetched during previous call to `fetchNext` const table = new apache_arrow_1.Table(this.prefetchedRecordBatch); this.prefetchedRecordBatch = undefined; // Get table rows, but not more than remaining count const arrowRows = table.toArray().slice(0, this.remainingRows); const result = this.getRows(table.schema, arrowRows); // Reduce remaining rows count by a count of rows we just processed. // If the remaining count reached zero - we're done with current arrow // batch, so discard the batch reader this.remainingRows -= result.length; if (this.remainingRows === 0) { this.recordBatchReader = undefined; } // Prefetch the next record batch await this.prefetch(options); return result; } return []; } // This method tries to read one more record batch and store it in `prefetchedRecordBatch` field. // If `prefetchedRecordBatch` is already non-empty - the method does nothing. // This method pulls the next item from source if needed, initializes a record batch reader and // gets the next item from it - until either reaches end of data or finds a non-empty record batch async prefetch(options) { var _a, _b; // This loop will be executed until a next non-empty record batch is retrieved // Another implicit loop condition (end of data) is checked in the loop body while (!this.prefetchedRecordBatch) { // First, try to fetch next item from source and initialize record batch reader. // If source has no more data - exit prematurely if (!this.recordBatchReader) { const sourceHasMore = await this.source.hasMore(); // eslint-disable-line no-await-in-loop if (!sourceHasMore) { return; } const arrowBatch = await this.source.fetchNext(options); // eslint-disable-line no-await-in-loop if (arrowBatch.batches.length > 0 && arrowBatch.rowCount > 0) { const reader = apache_arrow_1.RecordBatchReader.from(arrowBatch.batches); this.recordBatchReader = reader[Symbol.iterator](); this.remainingRows = arrowBatch.rowCount; } } // Try to get a next item from current record batch reader. The reader may be unavailable at this point - // in this case we fall back to a "done" state, and the `while` loop will do one more iteration attempting // to create a new reader. Eventually it will either succeed or reach end of source. This scenario also // handles readers which are already empty const item = (_b = (_a = this.recordBatchReader) === null || _a === void 0 ? void 0 : _a.next()) !== null && _b !== void 0 ? _b : { done: true, value: undefined }; if (item.done || item.value === undefined) { this.recordBatchReader = undefined; } else { // Skip empty batches // eslint-disable-next-line no-lonely-if if (item.value.numRows > 0) { this.prefetchedRecordBatch = item.value; } } } } getRows(schema, rows) { return rows.map((row) => { // First, convert native Arrow values to corresponding plain JS objects const record = this.convertArrowTypes(row, undefined, schema.fields); // Second, cast all the values to original Thrift types return this.convertThriftTypes(record); }); } convertArrowTypes(value, valueType, fields = []) { var _a; if (value === null) { return value; } const fieldsMap = {}; for (const field of fields) { fieldsMap[field.name] = field; } // Convert structures to plain JS object and process all its fields recursively if (value instanceof apache_arrow_1.StructRow) { const result = value.toJSON(); for (const key of Object.keys(result)) { const field = fieldsMap[key]; result[key] = this.convertArrowTypes(result[key], field === null || field === void 0 ? void 0 : field.type, (field === null || field === void 0 ? void 0 : field.type.children) || []); } return result; } if (value instanceof apache_arrow_1.MapRow) { const result = value.toJSON(); // Map type consists of its key and value types. We need only value type here, key will be cast to string anyway const field = (_a = fieldsMap.entries) === null || _a === void 0 ? void 0 : _a.type.children.find((item) => item.name === 'value'); for (const key of Object.keys(result)) { result[key] = this.convertArrowTypes(result[key], field === null || field === void 0 ? void 0 : field.type, (field === null || field === void 0 ? void 0 : field.type.children) || []); } return result; } // Convert lists to JS array and process items recursively if (value instanceof apache_arrow_1.Vector) { const result = value.toJSON(); // Array type contains the only child which defines a type of each array's element const field = fieldsMap.element; return result.map((item) => this.convertArrowTypes(item, field === null || field === void 0 ? void 0 : field.type, (field === null || field === void 0 ? void 0 : field.type.children) || [])); } if (apache_arrow_1.DataType.isTimestamp(valueType)) { return new Date(value); } // Convert big number values to BigInt // Decimals are also represented as big numbers in Arrow, so additionally process them (convert to float) if (value instanceof Object && value[isArrowBigNumSymbol]) { const result = bigNumToBigInt(value); if (apache_arrow_1.DataType.isDecimal(valueType)) { return Number(result) / 10 ** valueType.scale; } return result; } // Convert binary data to Buffer if (value instanceof Uint8Array) { return buffer_1.Buffer.from(value); } // Return other values as is return typeof value === 'bigint' ? Number(value) : value; } convertThriftTypes(record) { const result = {}; this.schema.forEach((column) => { var _a; const typeDescriptor = (_a = column.typeDesc.types[0]) === null || _a === void 0 ? void 0 : _a.primitiveEntry; const field = column.columnName; const value = record[field]; result[field] = value === null ? null : (0, utils_1.convertThriftValue)(typeDescriptor, value); }); return result; } } exports.default = ArrowResultConverter; //# sourceMappingURL=ArrowResultConverter.js.map