semantic-ds-toolkit
Version:
Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference
421 lines • 13.7 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.dataFrameRegistry = exports.DataFrameAdapterRegistry = exports.ArrayOfObjectsAdapter = exports.PlainObjectAdapter = exports.DuckDBDataFrameAdapter = exports.PolarsDataFrameAdapter = exports.PandasDataFrameAdapter = void 0;
exports.adaptDataFrame = adaptDataFrame;
exports.registerAdapter = registerAdapter;
exports.getSupportedDataFrameTypes = getSupportedDataFrameTypes;
exports.getAdapterForType = getAdapterForType;
class PandasDataFrameAdapter {
name = 'pandas';
canHandle(obj) {
return obj && typeof obj === 'object' &&
obj.constructor && obj.constructor.name === 'DataFrame' &&
typeof obj.columns !== 'undefined' &&
typeof obj.dtypes !== 'undefined';
}
adapt(df) {
return {
columns: this.getColumnNames(df),
dtypes: this.getDataTypes(df),
shape: this.getShape(df),
sample: (n = 100) => this.sampleData(df, n),
getColumn: (name) => this.getColumn(df, name)
};
}
getColumnNames(df) {
if (Array.isArray(df.columns)) {
return df.columns;
}
if (df.columns && typeof df.columns.tolist === 'function') {
return df.columns.tolist();
}
if (df.columns && typeof df.columns.values !== 'undefined') {
return Array.from(df.columns.values);
}
return Object.keys(df.dtypes || {});
}
getDataTypes(df) {
const result = {};
if (df.dtypes) {
if (typeof df.dtypes === 'object') {
for (const [col, dtype] of Object.entries(df.dtypes)) {
result[col] = String(dtype);
}
}
}
return result;
}
getShape(df) {
if (Array.isArray(df.shape) && df.shape.length >= 2) {
return [df.shape[0], df.shape[1]];
}
const columns = this.getColumnNames(df);
const rowCount = df.length || 0;
return [rowCount, columns.length];
}
sampleData(df, n = 100) {
const result = {};
const columns = this.getColumnNames(df);
for (const col of columns) {
result[col] = this.getColumn(df, col).slice(0, n);
}
return result;
}
getColumn(df, columnName) {
if (df[columnName] && Array.isArray(df[columnName])) {
return df[columnName];
}
if (df[columnName] && typeof df[columnName].tolist === 'function') {
return df[columnName].tolist();
}
if (df[columnName] && df[columnName].values) {
return Array.from(df[columnName].values);
}
return [];
}
}
exports.PandasDataFrameAdapter = PandasDataFrameAdapter;
class PolarsDataFrameAdapter {
name = 'polars';
canHandle(obj) {
return obj && typeof obj === 'object' &&
obj.constructor && obj.constructor.name === 'DataFrame' &&
typeof obj.getColumns === 'function' &&
typeof obj.dtypes === 'function';
}
adapt(df) {
return {
columns: this.getColumnNames(df),
dtypes: this.getDataTypes(df),
shape: this.getShape(df),
sample: (n = 100) => this.sampleData(df, n),
getColumn: (name) => this.getColumn(df, name)
};
}
getColumnNames(df) {
if (typeof df.getColumns === 'function') {
return df.getColumns();
}
if (df.columns && Array.isArray(df.columns)) {
return df.columns;
}
return [];
}
getDataTypes(df) {
const result = {};
const columns = this.getColumnNames(df);
if (typeof df.dtypes === 'function') {
const dtypes = df.dtypes();
if (Array.isArray(dtypes)) {
columns.forEach((col, i) => {
if (dtypes[i]) {
result[col] = String(dtypes[i]);
}
});
}
}
return result;
}
getShape(df) {
const height = typeof df.height === 'number' ? df.height : 0;
const width = typeof df.width === 'number' ? df.width : this.getColumnNames(df).length;
return [height, width];
}
sampleData(df, n = 100) {
const result = {};
const columns = this.getColumnNames(df);
let sampledDf = df;
if (typeof df.sample === 'function') {
sampledDf = df.sample(n);
}
else if (typeof df.head === 'function') {
sampledDf = df.head(n);
}
for (const col of columns) {
result[col] = this.getColumn(sampledDf, col);
}
return result;
}
getColumn(df, columnName) {
if (typeof df.getColumn === 'function') {
const column = df.getColumn(columnName);
if (column && typeof column.toArray === 'function') {
return column.toArray();
}
if (Array.isArray(column)) {
return column;
}
}
return [];
}
}
exports.PolarsDataFrameAdapter = PolarsDataFrameAdapter;
class DuckDBDataFrameAdapter {
name = 'duckdb';
canHandle(obj) {
return obj && typeof obj === 'object' &&
(obj.constructor.name === 'DuckDBResult' || obj.constructor.name === 'QueryResult') &&
Array.isArray(obj.columns);
}
adapt(df) {
return {
columns: this.getColumnNames(df),
dtypes: this.getDataTypes(df),
shape: this.getShape(df),
sample: (n = 100) => this.sampleData(df, n),
getColumn: (name) => this.getColumn(df, name)
};
}
getColumnNames(df) {
if (Array.isArray(df.columns)) {
return df.columns.map((col) => typeof col === 'string' ? col : col.name || String(col));
}
return [];
}
getDataTypes(df) {
const result = {};
const columns = this.getColumnNames(df);
if (Array.isArray(df.columns)) {
df.columns.forEach((col, i) => {
const colName = columns[i];
if (col && typeof col === 'object' && col.type) {
result[colName] = String(col.type);
}
else {
result[colName] = 'unknown';
}
});
}
return result;
}
getShape(df) {
const rows = Array.isArray(df.data) ? df.data.length : 0;
const cols = this.getColumnNames(df).length;
return [rows, cols];
}
sampleData(df, n = 100) {
const result = {};
const columns = this.getColumnNames(df);
for (const col of columns) {
result[col] = this.getColumn(df, col).slice(0, n);
}
return result;
}
getColumn(df, columnName) {
const colIndex = this.getColumnNames(df).indexOf(columnName);
if (colIndex === -1 || !Array.isArray(df.data)) {
return [];
}
return df.data.map((row) => Array.isArray(row) ? row[colIndex] : (row && row[columnName])).filter((val) => val !== undefined);
}
}
exports.DuckDBDataFrameAdapter = DuckDBDataFrameAdapter;
class PlainObjectAdapter {
name = 'plain_object';
canHandle(obj) {
return obj && typeof obj === 'object' && !Array.isArray(obj) &&
Object.values(obj).every((val) => Array.isArray(val));
}
adapt(obj) {
return {
columns: this.getColumnNames(obj),
dtypes: this.getDataTypes(obj),
shape: this.getShape(obj),
sample: (n = 100) => this.sampleData(obj, n),
getColumn: (name) => this.getColumn(obj, name)
};
}
getColumnNames(obj) {
return Object.keys(obj);
}
getDataTypes(obj) {
const result = {};
for (const [col, values] of Object.entries(obj)) {
if (Array.isArray(values) && values.length > 0) {
const firstValue = values.find(v => v != null);
if (firstValue !== undefined) {
result[col] = this.inferType(firstValue);
}
else {
result[col] = 'unknown';
}
}
else {
result[col] = 'unknown';
}
}
return result;
}
getShape(obj) {
const columns = this.getColumnNames(obj);
if (columns.length === 0)
return [0, 0];
const firstCol = obj[columns[0]];
const rows = Array.isArray(firstCol) ? firstCol.length : 0;
return [rows, columns.length];
}
sampleData(obj, n = 100) {
const result = {};
for (const [col, values] of Object.entries(obj)) {
if (Array.isArray(values)) {
result[col] = values.slice(0, n);
}
else {
result[col] = [];
}
}
return result;
}
getColumn(obj, columnName) {
const values = obj[columnName];
return Array.isArray(values) ? values : [];
}
inferType(value) {
if (typeof value === 'number') {
return Number.isInteger(value) ? 'int64' : 'float64';
}
if (typeof value === 'boolean') {
return 'boolean';
}
if (value instanceof Date) {
return 'datetime';
}
if (typeof value === 'string') {
if (/^\d{4}-\d{2}-\d{2}/.test(value)) {
return 'datetime';
}
return 'string';
}
return 'unknown';
}
}
exports.PlainObjectAdapter = PlainObjectAdapter;
class ArrayOfObjectsAdapter {
name = 'array_of_objects';
canHandle(obj) {
return Array.isArray(obj) && obj.length > 0 &&
obj.every(item => item && typeof item === 'object' && !Array.isArray(item));
}
adapt(arr) {
return {
columns: this.getColumnNames(arr),
dtypes: this.getDataTypes(arr),
shape: this.getShape(arr),
sample: (n = 100) => this.sampleData(arr, n),
getColumn: (name) => this.getColumn(arr, name)
};
}
getColumnNames(arr) {
if (arr.length === 0)
return [];
const allKeys = new Set();
for (const obj of arr) {
Object.keys(obj).forEach(key => allKeys.add(key));
}
return Array.from(allKeys).sort();
}
getDataTypes(arr) {
const result = {};
const columns = this.getColumnNames(arr);
for (const col of columns) {
for (const obj of arr) {
const value = obj[col];
if (value != null) {
result[col] = this.inferType(value);
break;
}
}
if (!result[col]) {
result[col] = 'unknown';
}
}
return result;
}
getShape(arr) {
return [arr.length, this.getColumnNames(arr).length];
}
sampleData(arr, n = 100) {
const result = {};
const columns = this.getColumnNames(arr);
const sample = arr.slice(0, n);
for (const col of columns) {
result[col] = sample.map(obj => obj[col]);
}
return result;
}
getColumn(arr, columnName) {
return arr.map(obj => obj[columnName]);
}
inferType(value) {
if (typeof value === 'number') {
return Number.isInteger(value) ? 'int64' : 'float64';
}
if (typeof value === 'boolean') {
return 'boolean';
}
if (value instanceof Date) {
return 'datetime';
}
if (typeof value === 'string') {
if (/^\d{4}-\d{2}-\d{2}/.test(value)) {
return 'datetime';
}
return 'string';
}
return 'unknown';
}
}
exports.ArrayOfObjectsAdapter = ArrayOfObjectsAdapter;
class DataFrameAdapterRegistry {
adapters = [];
constructor() {
this.registerDefaultAdapters();
}
registerDefaultAdapters() {
this.register(new PandasDataFrameAdapter());
this.register(new PolarsDataFrameAdapter());
this.register(new DuckDBDataFrameAdapter());
this.register(new PlainObjectAdapter());
this.register(new ArrayOfObjectsAdapter());
}
register(adapter) {
this.adapters.push(adapter);
}
findAdapter(obj) {
for (const adapter of this.adapters) {
if (adapter.canHandle(obj)) {
return adapter;
}
}
return null;
}
adapt(obj) {
const adapter = this.findAdapter(obj);
if (adapter) {
return adapter.adapt(obj);
}
return null;
}
getSupportedTypes() {
return this.adapters.map(adapter => adapter.name);
}
getAdapter(name) {
return this.adapters.find(adapter => adapter.name === name) || null;
}
}
exports.DataFrameAdapterRegistry = DataFrameAdapterRegistry;
const globalRegistry = new DataFrameAdapterRegistry();
exports.dataFrameRegistry = globalRegistry;
function adaptDataFrame(obj) {
return globalRegistry.adapt(obj);
}
function registerAdapter(adapter) {
globalRegistry.register(adapter);
}
function getSupportedDataFrameTypes() {
return globalRegistry.getSupportedTypes();
}
function getAdapterForType(typeName) {
return globalRegistry.getAdapter(typeName);
}
//# sourceMappingURL=dataframe-adapters.js.map