nodejs-polars
Version:
Polars: Blazingly fast DataFrames in Rust, Python, Node.js, R and SQL
236 lines (235 loc) • 8.65 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.jsTypeToPolarsType = void 0;
exports.arrayToJsSeries = arrayToJsSeries;
exports.arrayToJsDataFrame = arrayToJsDataFrame;
const types_1 = require("node:util/types");
const datatypes_1 = require("../datatypes");
const datatype_1 = require("../datatypes/datatype");
const field_1 = require("../datatypes/field");
const series_1 = require("../series");
const polars_internal_1 = __importDefault(require("./polars_internal"));
const jsTypeToPolarsType = (value) => {
if (value === null) {
return datatypes_1.DataType.Float64;
}
if (Array.isArray(value)) {
return (0, exports.jsTypeToPolarsType)(firstNonNull(value));
}
if ((0, types_1.isTypedArray)(value)) {
switch (value.constructor.name) {
case Int8Array.name:
return datatypes_1.DataType.Int8;
case Int16Array.name:
return datatypes_1.DataType.Int16;
case Int32Array.name:
return datatypes_1.DataType.Int32;
case BigInt64Array.name:
return datatypes_1.DataType.Int64;
case Uint8Array.name:
return datatypes_1.DataType.UInt8;
case Uint16Array.name:
return datatypes_1.DataType.UInt16;
case Uint32Array.name:
return datatypes_1.DataType.UInt32;
case BigUint64Array.name:
return datatypes_1.DataType.UInt64;
case Float32Array.name:
return datatypes_1.DataType.Float32;
case Float64Array.name:
return datatypes_1.DataType.Float64;
default:
throw new Error(`unknown typed array type: ${value.constructor.name}`);
}
}
if (value instanceof Date) {
return datatypes_1.DataType.Datetime(datatype_1.TimeUnit.Milliseconds);
}
if (typeof value === "object" && value.constructor === Object) {
const flds = Object.entries(value).map(([name, value]) => {
const dtype = (0, exports.jsTypeToPolarsType)(value);
return field_1.Field.from(name, dtype);
});
return datatypes_1.DataType.Struct(flds);
}
switch (typeof value) {
case "bigint":
return datatypes_1.DataType.UInt64;
case "number":
return datatypes_1.DataType.Float64;
case "string":
return datatypes_1.DataType.Utf8;
case "boolean":
return datatypes_1.DataType.Bool;
default:
return datatypes_1.DataType.Float64;
}
};
exports.jsTypeToPolarsType = jsTypeToPolarsType;
/**
* __finds the first non null value in the inputs__
* ___
* If the first value is an array
* it will find the first scalar type in the array and return it wrapped into the array
*
* @example
* ```
* > const input = [null, [], [null, "a", "b"]]
* > firstNonNull(input)
* ["a"]
* > const ints = [null, 1]
* > firstNonNull(ints)
* 1
* ```
*/
const firstNonNull = (arr) => {
const first = arr.find((x) => x !== null && x !== undefined);
if (Array.isArray(first)) {
return [firstNonNull(arr.flat())];
}
return first;
};
const fromTypedArray = (name, value) => {
switch (value.constructor.name) {
case Int8Array.name:
return polars_internal_1.default.JsSeries.newInt8Array(name, value);
case Int16Array.name:
return polars_internal_1.default.JsSeries.newInt16Array(name, value);
case Int32Array.name:
return polars_internal_1.default.JsSeries.newInt32Array(name, value);
case BigInt64Array.name:
return polars_internal_1.default.JsSeries.newBigint64Array(name, value);
case Uint8Array.name:
return polars_internal_1.default.JsSeries.newUint8Array(name, value);
case Uint8ClampedArray.name:
return polars_internal_1.default.JsSeries.newUint8ClampedArray(name, value);
case Uint16Array.name:
return polars_internal_1.default.JsSeries.newUint16Array(name, value);
case Uint32Array.name:
return polars_internal_1.default.JsSeries.newUint32Array(name, value);
case BigUint64Array.name:
return polars_internal_1.default.JsSeries.newBiguint64Array(name, value);
case Float32Array.name:
return polars_internal_1.default.JsSeries.newFloat32Array(name, value);
case Float64Array.name:
return polars_internal_1.default.JsSeries.newFloat64Array(name, value);
default:
throw new Error(`unknown typed array type: ${value.constructor.name}`);
}
};
/**
* Construct an internal `JsSeries` from an array
*/
function arrayToJsSeries(name = "", values = [], dtype, strict = false) {
if ((0, types_1.isTypedArray)(values)) {
return fromTypedArray(name, values);
}
//Empty sequence defaults to Float64 type
if (!(values?.length || dtype)) {
dtype = datatypes_1.DataType.Float64;
}
const firstValue = firstNonNull(values);
if (Array.isArray(firstValue) || (0, types_1.isTypedArray)(firstValue)) {
const listDtype = (0, exports.jsTypeToPolarsType)(firstValue);
const ctor = (0, datatypes_1.polarsTypeToConstructor)(datatypes_1.DataType.List(listDtype));
const s = ctor(name, values, strict, listDtype);
if (dtype instanceof datatype_1.FixedSizeList) {
// TODO: build a FixedSizeList natively in Rust
return s.cast(dtype, strict);
}
return s;
}
dtype = dtype ?? (0, exports.jsTypeToPolarsType)(firstValue);
let series;
if (dtype?.variant === "Struct") {
const df = polars_internal_1.default.fromRows(values, null, 1);
return df.toStruct(name);
}
if (dtype?.variant === "Decimal") {
if (typeof firstValue !== "bigint") {
throw new Error("Decimal type can only be constructed from BigInt");
}
return polars_internal_1.default.JsSeries.newAnyvalue(name, values, dtype, strict);
}
if (firstValue instanceof Date) {
series = polars_internal_1.default.JsSeries.newOptDate(name, values, strict);
}
else {
const ctor = (0, datatypes_1.polarsTypeToConstructor)(dtype);
series = ctor(name, values, strict);
}
if ([
"Datetime",
"Date",
"Categorical",
"Int8",
"Int16",
"UInt8",
"UInt16",
"Float32",
].includes(dtype.variant)) {
series = series.cast(dtype, strict);
}
return series;
}
function arrayToJsDataFrame(data, options) {
const columns = options?.columns;
let orient = options?.orient;
const schema = options?.schema;
const inferSchemaLength = options?.inferSchemaLength;
let dataSeries;
if (!data.length) {
dataSeries = [];
}
else if (data[0]?._s) {
dataSeries = [];
for (const [idx, series] of data.entries()) {
if (!series.name) {
series.rename(`column_${idx}`, true);
}
dataSeries.push(series._s);
}
}
else if (data[0].constructor.name === "Object") {
const df = polars_internal_1.default.fromRows(data, schema, inferSchemaLength);
if (columns) {
df.columns = columns;
}
return df;
}
else if (Array.isArray(data[0])) {
if (!orient && columns) {
orient = columns.length === data.length ? "col" : "row";
}
if (orient === "row") {
const df = polars_internal_1.default.fromRows(data);
if (columns)
df.columns = columns;
return df;
}
dataSeries = data.map((s, idx) => (0, series_1.Series)(`column_${idx}`, s)._s);
}
else {
dataSeries = [(0, series_1.Series)("column_0", data)._s];
}
dataSeries = handleColumnsArg(dataSeries, columns);
return new polars_internal_1.default.JsDataFrame(dataSeries);
}
function handleColumnsArg(data, columns) {
if (!columns) {
return data;
}
if (!data) {
return columns.map((c) => series_1.Series.from(c, [])._s);
}
if (data.length === columns.length) {
for (const [idx, name] of columns.entries()) {
data[idx].rename(name);
}
return data;
}
throw new TypeError("Dimensions of columns arg must match data dimensions.");
}
;