UNPKG

@databricks/sql

Version:

Driver for connection to Databricks SQL via Thrift API.

133 lines 6.45 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.getColumnValue = exports.hiveSchemaToArrowSchema = exports.convertThriftValue = exports.getSchemaColumns = void 0; const node_int64_1 = __importDefault(require("node-int64")); const apache_arrow_1 = require("apache-arrow"); const TCLIService_types_1 = require("../../thrift/TCLIService_types"); const HiveDriverError_1 = __importDefault(require("../errors/HiveDriverError")); function getSchemaColumns(schema) { if (!schema) { return []; } return [...schema.columns].sort((c1, c2) => c1.position - c2.position); } exports.getSchemaColumns = getSchemaColumns; function isString(value) { return typeof value === 'string' || value instanceof String; } function convertJSON(value, defaultValue) { if (!isString(value)) { return value; } try { return JSON.parse(value); } catch (e) { return defaultValue; } } function convertBigInt(value) { if (typeof value === 'bigint') { return Number(value); } if (value instanceof node_int64_1.default) { return value.toNumber(); } return value; } function convertThriftValue(typeDescriptor, value) { if (!typeDescriptor) { return value; } switch (typeDescriptor.type) { case TCLIService_types_1.TTypeId.DATE_TYPE: case TCLIService_types_1.TTypeId.TIMESTAMP_TYPE: return value; case TCLIService_types_1.TTypeId.UNION_TYPE: case TCLIService_types_1.TTypeId.USER_DEFINED_TYPE: return String(value); case TCLIService_types_1.TTypeId.DECIMAL_TYPE: return Number(value); case TCLIService_types_1.TTypeId.STRUCT_TYPE: case TCLIService_types_1.TTypeId.MAP_TYPE: return convertJSON(value, {}); case TCLIService_types_1.TTypeId.ARRAY_TYPE: return convertJSON(value, []); case TCLIService_types_1.TTypeId.BIGINT_TYPE: return convertBigInt(value); case TCLIService_types_1.TTypeId.NULL_TYPE: case TCLIService_types_1.TTypeId.BINARY_TYPE: case TCLIService_types_1.TTypeId.INTERVAL_YEAR_MONTH_TYPE: case TCLIService_types_1.TTypeId.INTERVAL_DAY_TIME_TYPE: case TCLIService_types_1.TTypeId.FLOAT_TYPE: case TCLIService_types_1.TTypeId.DOUBLE_TYPE: case TCLIService_types_1.TTypeId.INT_TYPE: case TCLIService_types_1.TTypeId.SMALLINT_TYPE: case TCLIService_types_1.TTypeId.TINYINT_TYPE: case TCLIService_types_1.TTypeId.BOOLEAN_TYPE: case TCLIService_types_1.TTypeId.STRING_TYPE: case TCLIService_types_1.TTypeId.CHAR_TYPE: case TCLIService_types_1.TTypeId.VARCHAR_TYPE: default: return value; } } exports.convertThriftValue = convertThriftValue; // This type map corresponds to Arrow without native types support (most complex types are serialized as strings) const hiveTypeToArrowType = { [TCLIService_types_1.TTypeId.BOOLEAN_TYPE]: new apache_arrow_1.Bool(), [TCLIService_types_1.TTypeId.TINYINT_TYPE]: new apache_arrow_1.Int8(), [TCLIService_types_1.TTypeId.SMALLINT_TYPE]: new apache_arrow_1.Int16(), [TCLIService_types_1.TTypeId.INT_TYPE]: new apache_arrow_1.Int32(), [TCLIService_types_1.TTypeId.BIGINT_TYPE]: new apache_arrow_1.Int64(), [TCLIService_types_1.TTypeId.FLOAT_TYPE]: new apache_arrow_1.Float32(), [TCLIService_types_1.TTypeId.DOUBLE_TYPE]: new apache_arrow_1.Float64(), [TCLIService_types_1.TTypeId.STRING_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.TIMESTAMP_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.BINARY_TYPE]: new apache_arrow_1.Binary(), [TCLIService_types_1.TTypeId.ARRAY_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.MAP_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.STRUCT_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.UNION_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.USER_DEFINED_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.DECIMAL_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.NULL_TYPE]: null, [TCLIService_types_1.TTypeId.DATE_TYPE]: new apache_arrow_1.Date_(apache_arrow_1.DateUnit.DAY), [TCLIService_types_1.TTypeId.VARCHAR_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.CHAR_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.INTERVAL_YEAR_MONTH_TYPE]: new apache_arrow_1.Utf8(), [TCLIService_types_1.TTypeId.INTERVAL_DAY_TIME_TYPE]: new apache_arrow_1.Utf8(), }; function hiveSchemaToArrowSchema(schema) { if (!schema) { return undefined; } const columns = getSchemaColumns(schema); const arrowFields = columns.map((column) => { var _a, _b; const hiveType = (_b = (_a = column.typeDesc.types[0].primitiveEntry) === null || _a === void 0 ? void 0 : _a.type) !== null && _b !== void 0 ? _b : undefined; const arrowType = hiveType !== undefined ? hiveTypeToArrowType[hiveType] : undefined; if (!arrowType) { throw new HiveDriverError_1.default(`Unsupported column type: ${hiveType ? TCLIService_types_1.TTypeId[hiveType] : 'undefined'}`); } return new apache_arrow_1.Field(column.columnName, arrowType, true); }); const arrowSchema = new apache_arrow_1.Schema(arrowFields); const writer = new apache_arrow_1.RecordBatchWriter(); writer.reset(undefined, arrowSchema); writer.finish(); return Buffer.from(writer.toUint8Array(true)); } exports.hiveSchemaToArrowSchema = hiveSchemaToArrowSchema; function getColumnValue(column) { var _a, _b, _c, _d, _e, _f, _g; if (!column) { return undefined; } return ((_g = (_f = (_e = (_d = (_c = (_b = (_a = column.binaryVal) !== null && _a !== void 0 ? _a : column.boolVal) !== null && _b !== void 0 ? _b : column.byteVal) !== null && _c !== void 0 ? _c : column.doubleVal) !== null && _d !== void 0 ? _d : column.i16Val) !== null && _e !== void 0 ? _e : column.i32Val) !== null && _f !== void 0 ? _f : column.i64Val) !== null && _g !== void 0 ? _g : column.stringVal); } exports.getColumnValue = getColumnValue; //# sourceMappingURL=utils.js.map