UNPKG

semantic-ds-toolkit

Version:

Performance-first semantic layer for modern data stacks - Stable Column Anchors & intelligent inference

421 lines 13.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.dataFrameRegistry = exports.DataFrameAdapterRegistry = exports.ArrayOfObjectsAdapter = exports.PlainObjectAdapter = exports.DuckDBDataFrameAdapter = exports.PolarsDataFrameAdapter = exports.PandasDataFrameAdapter = void 0; exports.adaptDataFrame = adaptDataFrame; exports.registerAdapter = registerAdapter; exports.getSupportedDataFrameTypes = getSupportedDataFrameTypes; exports.getAdapterForType = getAdapterForType; class PandasDataFrameAdapter { name = 'pandas'; canHandle(obj) { return obj && typeof obj === 'object' && obj.constructor && obj.constructor.name === 'DataFrame' && typeof obj.columns !== 'undefined' && typeof obj.dtypes !== 'undefined'; } adapt(df) { return { columns: this.getColumnNames(df), dtypes: this.getDataTypes(df), shape: this.getShape(df), sample: (n = 100) => this.sampleData(df, n), getColumn: (name) => this.getColumn(df, name) }; } getColumnNames(df) { if (Array.isArray(df.columns)) { return df.columns; } if (df.columns && typeof df.columns.tolist === 'function') { return df.columns.tolist(); } if (df.columns && typeof df.columns.values !== 'undefined') { return Array.from(df.columns.values); } return Object.keys(df.dtypes || {}); } getDataTypes(df) { const result = {}; if (df.dtypes) { if (typeof df.dtypes === 'object') { for (const [col, dtype] of Object.entries(df.dtypes)) { result[col] = String(dtype); } } } return result; } getShape(df) { if (Array.isArray(df.shape) && df.shape.length >= 2) { return [df.shape[0], df.shape[1]]; } const columns = this.getColumnNames(df); const rowCount = df.length || 0; return [rowCount, columns.length]; } sampleData(df, n = 100) { const result = {}; const columns = this.getColumnNames(df); for (const col of columns) { result[col] = this.getColumn(df, col).slice(0, n); } return result; } getColumn(df, columnName) { if (df[columnName] && Array.isArray(df[columnName])) { return df[columnName]; } if (df[columnName] && typeof df[columnName].tolist === 'function') { return df[columnName].tolist(); } if (df[columnName] && df[columnName].values) { return Array.from(df[columnName].values); } return []; } } exports.PandasDataFrameAdapter = PandasDataFrameAdapter; class PolarsDataFrameAdapter { name = 'polars'; canHandle(obj) { return obj && typeof obj === 'object' && obj.constructor && obj.constructor.name === 'DataFrame' && typeof obj.getColumns === 'function' && typeof obj.dtypes === 'function'; } adapt(df) { return { columns: this.getColumnNames(df), dtypes: this.getDataTypes(df), shape: this.getShape(df), sample: (n = 100) => this.sampleData(df, n), getColumn: (name) => this.getColumn(df, name) }; } getColumnNames(df) { if (typeof df.getColumns === 'function') { return df.getColumns(); } if (df.columns && Array.isArray(df.columns)) { return df.columns; } return []; } getDataTypes(df) { const result = {}; const columns = this.getColumnNames(df); if (typeof df.dtypes === 'function') { const dtypes = df.dtypes(); if (Array.isArray(dtypes)) { columns.forEach((col, i) => { if (dtypes[i]) { result[col] = String(dtypes[i]); } }); } } return result; } getShape(df) { const height = typeof df.height === 'number' ? df.height : 0; const width = typeof df.width === 'number' ? df.width : this.getColumnNames(df).length; return [height, width]; } sampleData(df, n = 100) { const result = {}; const columns = this.getColumnNames(df); let sampledDf = df; if (typeof df.sample === 'function') { sampledDf = df.sample(n); } else if (typeof df.head === 'function') { sampledDf = df.head(n); } for (const col of columns) { result[col] = this.getColumn(sampledDf, col); } return result; } getColumn(df, columnName) { if (typeof df.getColumn === 'function') { const column = df.getColumn(columnName); if (column && typeof column.toArray === 'function') { return column.toArray(); } if (Array.isArray(column)) { return column; } } return []; } } exports.PolarsDataFrameAdapter = PolarsDataFrameAdapter; class DuckDBDataFrameAdapter { name = 'duckdb'; canHandle(obj) { return obj && typeof obj === 'object' && (obj.constructor.name === 'DuckDBResult' || obj.constructor.name === 'QueryResult') && Array.isArray(obj.columns); } adapt(df) { return { columns: this.getColumnNames(df), dtypes: this.getDataTypes(df), shape: this.getShape(df), sample: (n = 100) => this.sampleData(df, n), getColumn: (name) => this.getColumn(df, name) }; } getColumnNames(df) { if (Array.isArray(df.columns)) { return df.columns.map((col) => typeof col === 'string' ? col : col.name || String(col)); } return []; } getDataTypes(df) { const result = {}; const columns = this.getColumnNames(df); if (Array.isArray(df.columns)) { df.columns.forEach((col, i) => { const colName = columns[i]; if (col && typeof col === 'object' && col.type) { result[colName] = String(col.type); } else { result[colName] = 'unknown'; } }); } return result; } getShape(df) { const rows = Array.isArray(df.data) ? df.data.length : 0; const cols = this.getColumnNames(df).length; return [rows, cols]; } sampleData(df, n = 100) { const result = {}; const columns = this.getColumnNames(df); for (const col of columns) { result[col] = this.getColumn(df, col).slice(0, n); } return result; } getColumn(df, columnName) { const colIndex = this.getColumnNames(df).indexOf(columnName); if (colIndex === -1 || !Array.isArray(df.data)) { return []; } return df.data.map((row) => Array.isArray(row) ? row[colIndex] : (row && row[columnName])).filter((val) => val !== undefined); } } exports.DuckDBDataFrameAdapter = DuckDBDataFrameAdapter; class PlainObjectAdapter { name = 'plain_object'; canHandle(obj) { return obj && typeof obj === 'object' && !Array.isArray(obj) && Object.values(obj).every((val) => Array.isArray(val)); } adapt(obj) { return { columns: this.getColumnNames(obj), dtypes: this.getDataTypes(obj), shape: this.getShape(obj), sample: (n = 100) => this.sampleData(obj, n), getColumn: (name) => this.getColumn(obj, name) }; } getColumnNames(obj) { return Object.keys(obj); } getDataTypes(obj) { const result = {}; for (const [col, values] of Object.entries(obj)) { if (Array.isArray(values) && values.length > 0) { const firstValue = values.find(v => v != null); if (firstValue !== undefined) { result[col] = this.inferType(firstValue); } else { result[col] = 'unknown'; } } else { result[col] = 'unknown'; } } return result; } getShape(obj) { const columns = this.getColumnNames(obj); if (columns.length === 0) return [0, 0]; const firstCol = obj[columns[0]]; const rows = Array.isArray(firstCol) ? firstCol.length : 0; return [rows, columns.length]; } sampleData(obj, n = 100) { const result = {}; for (const [col, values] of Object.entries(obj)) { if (Array.isArray(values)) { result[col] = values.slice(0, n); } else { result[col] = []; } } return result; } getColumn(obj, columnName) { const values = obj[columnName]; return Array.isArray(values) ? values : []; } inferType(value) { if (typeof value === 'number') { return Number.isInteger(value) ? 'int64' : 'float64'; } if (typeof value === 'boolean') { return 'boolean'; } if (value instanceof Date) { return 'datetime'; } if (typeof value === 'string') { if (/^\d{4}-\d{2}-\d{2}/.test(value)) { return 'datetime'; } return 'string'; } return 'unknown'; } } exports.PlainObjectAdapter = PlainObjectAdapter; class ArrayOfObjectsAdapter { name = 'array_of_objects'; canHandle(obj) { return Array.isArray(obj) && obj.length > 0 && obj.every(item => item && typeof item === 'object' && !Array.isArray(item)); } adapt(arr) { return { columns: this.getColumnNames(arr), dtypes: this.getDataTypes(arr), shape: this.getShape(arr), sample: (n = 100) => this.sampleData(arr, n), getColumn: (name) => this.getColumn(arr, name) }; } getColumnNames(arr) { if (arr.length === 0) return []; const allKeys = new Set(); for (const obj of arr) { Object.keys(obj).forEach(key => allKeys.add(key)); } return Array.from(allKeys).sort(); } getDataTypes(arr) { const result = {}; const columns = this.getColumnNames(arr); for (const col of columns) { for (const obj of arr) { const value = obj[col]; if (value != null) { result[col] = this.inferType(value); break; } } if (!result[col]) { result[col] = 'unknown'; } } return result; } getShape(arr) { return [arr.length, this.getColumnNames(arr).length]; } sampleData(arr, n = 100) { const result = {}; const columns = this.getColumnNames(arr); const sample = arr.slice(0, n); for (const col of columns) { result[col] = sample.map(obj => obj[col]); } return result; } getColumn(arr, columnName) { return arr.map(obj => obj[columnName]); } inferType(value) { if (typeof value === 'number') { return Number.isInteger(value) ? 'int64' : 'float64'; } if (typeof value === 'boolean') { return 'boolean'; } if (value instanceof Date) { return 'datetime'; } if (typeof value === 'string') { if (/^\d{4}-\d{2}-\d{2}/.test(value)) { return 'datetime'; } return 'string'; } return 'unknown'; } } exports.ArrayOfObjectsAdapter = ArrayOfObjectsAdapter; class DataFrameAdapterRegistry { adapters = []; constructor() { this.registerDefaultAdapters(); } registerDefaultAdapters() { this.register(new PandasDataFrameAdapter()); this.register(new PolarsDataFrameAdapter()); this.register(new DuckDBDataFrameAdapter()); this.register(new PlainObjectAdapter()); this.register(new ArrayOfObjectsAdapter()); } register(adapter) { this.adapters.push(adapter); } findAdapter(obj) { for (const adapter of this.adapters) { if (adapter.canHandle(obj)) { return adapter; } } return null; } adapt(obj) { const adapter = this.findAdapter(obj); if (adapter) { return adapter.adapt(obj); } return null; } getSupportedTypes() { return this.adapters.map(adapter => adapter.name); } getAdapter(name) { return this.adapters.find(adapter => adapter.name === name) || null; } } exports.DataFrameAdapterRegistry = DataFrameAdapterRegistry; const globalRegistry = new DataFrameAdapterRegistry(); exports.dataFrameRegistry = globalRegistry; function adaptDataFrame(obj) { return globalRegistry.adapt(obj); } function registerAdapter(adapter) { globalRegistry.register(adapter); } function getSupportedDataFrameTypes() { return globalRegistry.getSupportedTypes(); } function getAdapterForType(typeName) { return globalRegistry.getAdapter(typeName); } //# sourceMappingURL=dataframe-adapters.js.map