@future-agi/sdk
Version:
We help GenAI teams maintain high-accuracy for their Models in production.
870 lines • 40.7 kB
JavaScript
"use strict";
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __rest = (this && this.__rest) || function (s, e) {
var t = {};
for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)
t[p] = s[p];
if (s != null && typeof Object.getOwnPropertySymbols === "function")
for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {
if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))
t[p[i]] = s[p[i]];
}
return t;
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.Dataset = exports.DatasetResponseHandler = void 0;
const uuid_1 = require("uuid");
const fs = __importStar(require("fs"));
const form_data_1 = __importDefault(require("form-data"));
const auth_1 = require("../api/auth");
const types_1 = require("../api/types");
const routes_1 = require("../utils/routes");
const errors_1 = require("../utils/errors");
const types_2 = require("./types");
const DEFAULT_API_TIMEOUT = 30000; // 30 seconds in milliseconds
/**
* Simple LRU Cache implementation
*/
class LRUCache {
constructor(capacity = 100) {
this.capacity = capacity;
this.cache = new Map();
this.accessOrder = [];
}
get(key) {
if (this.cache.has(key)) {
const index = this.accessOrder.indexOf(key);
if (index > -1) {
this.accessOrder.splice(index, 1);
}
this.accessOrder.push(key);
return this.cache.get(key) || null;
}
return null;
}
put(key, value) {
if (this.cache.has(key)) {
const index = this.accessOrder.indexOf(key);
if (index > -1) {
this.accessOrder.splice(index, 1);
}
}
else if (this.cache.size >= this.capacity) {
if (this.accessOrder.length > 0) {
const oldest = this.accessOrder.shift();
if (oldest !== undefined) {
this.cache.delete(oldest);
}
}
}
this.cache.set(key, value);
this.accessOrder.push(key);
}
clear() {
this.cache.clear();
this.accessOrder = [];
}
}
/**
* Response handler for dataset operations
*/
class DatasetResponseHandler extends auth_1.ResponseHandler {
static _parseSuccess(response) {
var _a;
const data = response.data;
const url = response.config.url || '';
if (url.includes(routes_1.Routes.dataset_names)) {
const datasets = (_a = data.result) === null || _a === void 0 ? void 0 : _a.datasets;
if (!datasets || datasets.length === 0) {
throw new errors_1.DatasetNotFoundError("No dataset found matching the criteria.");
}
if (datasets.length > 1) {
throw new Error("Multiple datasets found. Please specify a dataset name.");
}
return {
id: datasets[0].datasetId,
name: datasets[0].name,
modelType: datasets[0].modelType,
};
}
if (url.includes('/get-dataset-table/')) {
const id = url.split('/').slice(-3, -2)[0];
const result = data.result;
const columns = result.columnConfig.map((col) => {
var _a;
return ({
id: col.id,
name: col.name,
dataType: col.dataType,
source: col.originType,
sourceId: col.sourceId,
isFrozen: ((_a = col.isFrozen) === null || _a === void 0 ? void 0 : _a.isFrozen) || false,
isVisible: col.isVisible,
evalTags: col.evalTag || [],
averageScore: col.averageScore,
orderIndex: col.orderIndex,
});
});
const rows = result.table.map((row) => {
const cells = [];
const rowId = row.rowId;
const order = row.order;
Object.entries(row).forEach(([columnId, value]) => {
if (columnId !== 'rowId' && columnId !== 'order') {
cells.push((0, types_2.createCell)({
columnId,
rowId,
value: value === null || value === void 0 ? void 0 : value.cellValue,
valueInfos: (value === null || value === void 0 ? void 0 : value.valueInfos) ? [value.valueInfos] : [],
metadata: value === null || value === void 0 ? void 0 : value.metadata,
status: value === null || value === void 0 ? void 0 : value.status,
failureReason: value === null || value === void 0 ? void 0 : value.failureReason,
}));
}
});
return (0, types_2.createRow)({ cells, order });
});
return {
id,
columns,
rows,
metadata: result.metadata,
};
}
if (url.includes(routes_1.Routes.dataset_empty) ||
url.includes(routes_1.Routes.dataset_local) ||
url.includes(routes_1.Routes.dataset_huggingface)) {
return {
id: data.result.datasetId,
name: data.result.datasetName,
modelType: data.result.datasetModelType,
};
}
return data;
}
static _handleError(response) {
var _a;
const errorMap = {
400: errors_1.DatasetValidationError,
401: errors_1.DatasetAuthError,
403: errors_1.DatasetAuthError,
404: errors_1.DatasetNotFoundError,
429: errors_1.RateLimitError,
500: errors_1.ServerError,
503: errors_1.ServiceUnavailableError,
};
const ErrorClass = errorMap[response.status] || errors_1.DatasetError;
if (response.status > 500 && response.status < 600 && ErrorClass === errors_1.DatasetError) {
throw new errors_1.ServerError(((_a = response.data) === null || _a === void 0 ? void 0 : _a.message) || response.statusText);
}
let message;
try {
const errorData = response.data;
message = (errorData === null || errorData === void 0 ? void 0 : errorData.detail) || (errorData === null || errorData === void 0 ? void 0 : errorData.message) || (errorData === null || errorData === void 0 ? void 0 : errorData.error) ||
JSON.stringify(errorData) || response.statusText;
}
catch (_b) {
message = response.statusText || `HTTP error ${response.status} with no descriptive message.`;
}
throw new ErrorClass(message);
}
}
exports.DatasetResponseHandler = DatasetResponseHandler;
/**
* Dataset manager class for handling dataset operations
*/
class Dataset extends auth_1.APIKeyAuth {
constructor(config = {}) {
super(config);
this._datasetConfig = null;
if (config.datasetConfig) {
// Directly assign; caller will decide whether to call create()
this._datasetConfig = config.datasetConfig;
}
}
_initializeDatasetConfig(datasetConfig) {
return __awaiter(this, void 0, void 0, function* () {
try {
const fetchedConfig = yield this._fetchDatasetConfig(datasetConfig.name);
this._datasetConfig = fetchedConfig;
}
catch (error) {
if (error instanceof errors_1.DatasetNotFoundError) {
this._datasetConfig = datasetConfig;
}
else {
throw new errors_1.DatasetError(`Failed to initialize dataset configuration for ${datasetConfig.name}: ${error}`);
}
}
});
}
// Instance methods for chaining
create(source) {
return __awaiter(this, void 0, void 0, function* () {
if (!this._datasetConfig) {
throw new errors_1.DatasetError("dataset_config must be set before creating a dataset.");
}
if (this._datasetConfig.id) {
throw new errors_1.DatasetError(`Dataset '${this._datasetConfig.name}' appears to already exist with ID: ${this._datasetConfig.id}.`);
}
const responseConfig = yield this._createDataset(this._datasetConfig, source);
this._datasetConfig = Object.assign(Object.assign({}, this._datasetConfig), { id: responseConfig.id, name: responseConfig.name, modelType: responseConfig.modelType });
return this;
});
}
download(filePath_1) {
return __awaiter(this, arguments, void 0, function* (filePath, loadToMemory = false) {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.name)) {
throw new errors_1.DatasetError("Dataset name must be configured to download.");
}
if (!this._datasetConfig.id) {
throw new errors_1.DatasetError(`Dataset '${this._datasetConfig.name}' must have an ID to be downloaded. Fetch config first if ID is missing.`);
}
const result = yield this._downloadDataset(this._datasetConfig.name, filePath, loadToMemory);
return loadToMemory ? result : this;
});
}
delete() {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset ID must be configured to delete.");
}
yield this._deleteDataset();
this._datasetConfig = null;
});
}
getConfig() {
if (!this._datasetConfig) {
throw new errors_1.DatasetError("No dataset configured for this instance.");
}
return this._datasetConfig;
}
addColumns(columns) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to add columns.");
}
if (!columns || columns.length === 0) {
throw new errors_1.DatasetValidationError("Columns list cannot be empty.");
}
const processedColumns = columns.map(col => {
if ('id' in col && col.id) {
// Already a complete Column
return col;
}
else {
// Create column from partial data
return (0, types_2.createColumn)({
name: col.name || '',
dataType: col.dataType || types_2.DataTypeChoices.TEXT,
source: col.source,
sourceId: col.sourceId,
metadata: col.metadata,
isFrozen: col.isFrozen,
isVisible: col.isVisible,
evalTags: col.evalTags,
averageScore: col.averageScore,
orderIndex: col.orderIndex,
});
}
});
yield this._addColumns(processedColumns);
return this;
});
}
addRows(rows) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to add rows.");
}
if (!rows || rows.length === 0) {
throw new errors_1.DatasetValidationError("Rows list cannot be empty.");
}
const processedRows = rows.map(row => {
var _a;
if ('id' in row && row.id) {
// Already a complete Row
return row;
}
else {
// Create row from partial data
const cells = ((_a = row.cells) === null || _a === void 0 ? void 0 : _a.map(cell => 'columnId' in cell && cell.columnId ?
cell :
(0, types_2.createCell)({
columnId: cell.columnId || '',
rowId: row.id || (0, uuid_1.v4)(),
columnName: cell.columnName,
value: cell.value,
valueInfos: cell.valueInfos,
metadata: cell.metadata,
status: cell.status,
failureReason: cell.failureReason,
}))) || [];
return (0, types_2.createRow)({ cells, order: row.order });
}
});
yield this._addRows(processedRows);
return this;
});
}
getColumnId(columnName) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to get a column ID.");
}
if (!columnName) {
throw new errors_1.DatasetValidationError("Column name cannot be empty.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_table.replace('{dataset_id}', this._datasetConfig.id)}`;
const datasetTable = yield this.request({
method: types_1.HttpMethod.POST,
url,
json: { page_size: 1, current_page_index: 0 },
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
const column = datasetTable.columns.find(col => col.name === columnName);
return (column === null || column === void 0 ? void 0 : column.id) || null;
});
}
addRunPrompt(options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to add a run prompt column.");
}
const { name, model, messages, outputFormat = "string", concurrency = 5, maxTokens = 500, temperature = 0.5, presencePenalty = 1, frequencyPenalty = 1, topP = 1, tools, toolChoice, responseFormat, } = options;
if (!name) {
throw new errors_1.DatasetValidationError("Run prompt column name cannot be empty.");
}
if (!model) {
throw new errors_1.DatasetValidationError("Model cannot be empty for run prompt.");
}
if (!messages || messages.length === 0) {
throw new errors_1.DatasetValidationError("Messages list cannot be empty for run prompt.");
}
// Process messages to handle column references and format conversion (parity with Python SDK)
const processedMessages = [];
const referencedColumns = new Set();
for (const msg of messages) {
const processedMsg = Object.assign({}, msg);
// Set default role if not provided
if (!processedMsg.role) {
processedMsg.role = "user";
}
if (processedMsg.content) {
let content = processedMsg.content;
// Convert string content to the expected list format
if (typeof content === "string") {
// Handle column references in string content
const columnRefs = content.match(/\{\{(.*?)\}\}/g) || [];
for (const ref of columnRefs) {
const colName = ref.slice(2, -2); // Remove {{ and }}
const colId = yield this.getColumnId(colName);
if (!colId) {
throw new errors_1.DatasetError(`Referenced column '${ref}' not found in dataset '${this._datasetConfig.name}'`);
}
referencedColumns.add(colName);
content = content.replace(ref, `{{${colId}}}`);
}
// Convert to expected format: list of dictionaries
processedMsg.content = [{ type: "text", text: content }];
}
else if (Array.isArray(content)) {
// Handle list content (already in expected format)
const processedContent = [];
for (const contentItem of content) {
if (typeof contentItem === "object" && contentItem !== null) {
const processedItem = Object.assign({}, contentItem);
// Handle column references in dict content
if (processedItem.text) {
let textContent = processedItem.text;
const columnRefs = textContent.match(/\{\{(.*?)\}\}/g) || [];
for (const ref of columnRefs) {
const colName = ref.slice(2, -2); // Remove {{ and }}
const colId = yield this.getColumnId(colName);
if (!colId) {
throw new errors_1.DatasetError(`Referenced column '${ref}' not found in dataset '${this._datasetConfig.name}'`);
}
referencedColumns.add(colName);
textContent = textContent.replace(ref, `{{${colId}}}`);
}
processedItem.text = textContent;
}
processedContent.push(processedItem);
}
else {
// If list item is not a dict, treat as text
processedContent.push({ type: "text", text: String(contentItem) });
}
}
processedMsg.content = processedContent;
}
}
processedMessages.push(processedMsg);
}
// Build payload in parity with Python SDK
const payload = {
dataset_id: this._datasetConfig.id,
name,
config: {
model,
output_format: outputFormat,
concurrency,
messages: processedMessages,
max_tokens: maxTokens,
temperature,
presence_penalty: presencePenalty,
frequency_penalty: frequencyPenalty,
top_p: topP,
},
};
if (tools) {
payload.config.tools = tools;
}
if (typeof toolChoice !== "undefined") {
payload.config.tool_choice = toolChoice;
}
if (responseFormat) {
payload.config.response_format = responseFormat;
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_add_run_prompt_column}`;
yield this.request({
method: types_1.HttpMethod.POST,
url,
json: payload,
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return this;
});
}
addEvaluation(options) {
return __awaiter(this, void 0, void 0, function* () {
var _a, _b;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to add evaluation.");
}
const { name, evalTemplate, requiredKeysToColumnNames, saveAsTemplate = false, run = true, reasonColumn = false, config, model, errorLocalizer = false, kbId, } = options;
if (!name) {
throw new errors_1.DatasetValidationError("Evaluation name cannot be empty.");
}
if (!evalTemplate) {
throw new errors_1.DatasetValidationError("Evaluation template cannot be empty.");
}
if (!requiredKeysToColumnNames || Object.keys(requiredKeysToColumnNames).length === 0) {
throw new errors_1.DatasetValidationError("Required keys to column names mapping cannot be empty.");
}
if (!model) {
throw new errors_1.DatasetValidationError("Model cannot be empty for evaluation.");
}
// --- Fetch evaluation template details -------------------------------------------------
// Define a lightweight response handler (avoids cross-package deps)
class EvalInfoResponseHandler extends auth_1.ResponseHandler {
static _parseSuccess(response) {
const data = response.data;
if (data && data.result) {
return data.result;
}
throw new errors_1.DatasetError(`Failed to fetch evaluation info: ${JSON.stringify(data)}`);
}
static _handleError(response) {
if (response.status === 403 || response.status === 401) {
throw new errors_1.DatasetAuthError("Authentication failed while fetching evaluation info.");
}
throw new errors_1.DatasetError(`Failed to fetch evaluation info: ${response.status} ${response.statusText}`);
}
}
// Fetch all templates once and filter locally (mirrors Python SDK logic)
const allTemplates = yield this.request({
method: types_1.HttpMethod.GET,
url: `${this._baseUrl}/${routes_1.Routes.get_eval_templates}`,
timeout: DEFAULT_API_TIMEOUT,
}, EvalInfoResponseHandler);
const matchedListItem = allTemplates.find((tpl) => {
const tplName = tpl.name || tpl.eval_name || tpl.template_name || "";
return tplName.toLowerCase() === evalTemplate.toLowerCase();
});
if (!matchedListItem) {
throw new errors_1.DatasetValidationError(`Unknown or unsupported evaluation template: ${evalTemplate}`);
}
const evalId = matchedListItem.eval_id || matchedListItem.evalId || matchedListItem.id;
if (!evalId) {
throw new errors_1.DatasetError(`Failed to determine eval_id for template '${evalTemplate}'.`);
}
// Now fetch detailed info for this template to obtain template_id & required_keys
const templateDetail = yield this.request({
method: types_1.HttpMethod.GET,
url: `${this._baseUrl}/${routes_1.Routes.evaluate_template.replace('{eval_id}', evalId)}`,
timeout: DEFAULT_API_TIMEOUT,
}, EvalInfoResponseHandler);
const templateId = templateDetail.id;
const requiredKeys = ((_b = templateDetail.config) === null || _b === void 0 ? void 0 : _b.required_keys) || [];
if (!templateId) {
throw new errors_1.DatasetError(`template_id not found for evaluation template '${evalTemplate}'.`);
}
// --- Build column mapping -------------------------------------------------------------
const mapping = {};
for (const key of requiredKeys) {
if (!(key in requiredKeysToColumnNames)) {
throw new errors_1.DatasetValidationError(`Required key '${key}' not found in requiredKeysToColumnNames for template '${evalTemplate}'.`);
}
const columnName = requiredKeysToColumnNames[key];
if (!columnName) {
throw new errors_1.DatasetValidationError(`Column name mapping for key '${key}' cannot be empty.`);
}
const columnId = yield this.getColumnId(columnName);
if (!columnId) {
throw new errors_1.DatasetError(`Column '${columnName}' (mapped from key '${key}') not found in dataset '${this._datasetConfig.name}'.`);
}
mapping[key] = columnId;
}
// --- Prepare payload ------------------------------------------------------------------
const evalConfigPayload = {
template_id: templateId,
run,
name,
saveAsTemplate,
config: {
mapping,
config: config || {},
reasonColumn: reasonColumn,
},
};
// Add optional fields to the top level payload
if (model) {
evalConfigPayload.model = model;
}
if (errorLocalizer) {
evalConfigPayload.error_localizer = errorLocalizer;
}
if (kbId) {
evalConfigPayload.kb_id = kbId;
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_add_evaluation.replace('{dataset_id}', this._datasetConfig.id)}`;
yield this.request({
method: types_1.HttpMethod.POST,
url,
json: evalConfigPayload,
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return this;
});
}
getEvalStats() {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to get evaluation stats.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_eval_stats.replace('{dataset_id}', this._datasetConfig.id)}`;
const response = yield this.request({
method: types_1.HttpMethod.GET,
url,
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return response.result || {};
});
}
addOptimization(options) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset must be configured with an ID to add optimization.");
}
const { optimizationName, promptColumnName, optimizeType = "PROMPT_TEMPLATE", modelConfig, } = options;
if (!optimizationName) {
throw new errors_1.DatasetValidationError("Optimization name cannot be empty.");
}
if (!promptColumnName) {
throw new errors_1.DatasetValidationError("Prompt column name cannot be empty.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_optimization_create}`;
yield this.request({
method: types_1.HttpMethod.POST,
url,
json: {
dataset_id: this._datasetConfig.id,
optimization_name: optimizationName,
prompt_column_name: promptColumnName,
optimize_type: optimizeType,
model_config: modelConfig,
},
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return this;
});
}
// Private methods
_fetchDatasetConfig(datasetName) {
return __awaiter(this, void 0, void 0, function* () {
const url = `${this._baseUrl}/${routes_1.Routes.dataset_names}`;
const response = yield this.request({
method: types_1.HttpMethod.POST,
url,
json: { search_text: datasetName },
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return response;
});
}
_createDataset(config, source) {
return __awaiter(this, void 0, void 0, function* () {
if (!source) {
return this._createEmptyDataset(config);
}
if (typeof source === 'string') {
return this._createFromFile(config, source);
}
return this._createFromHuggingface(config, source);
});
}
_createEmptyDataset(config) {
return __awaiter(this, void 0, void 0, function* () {
const url = `${this._baseUrl}/${routes_1.Routes.dataset_empty}`;
const response = yield this.request({
method: types_1.HttpMethod.POST,
url,
json: {
new_dataset_name: config.name,
model_type: config.modelType,
},
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return response;
});
}
_createFromFile(config, filePath) {
return __awaiter(this, void 0, void 0, function* () {
const url = `${this._baseUrl}/${routes_1.Routes.dataset_local}`;
if (!fs.existsSync(filePath)) {
throw new errors_1.DatasetError(`File not found at path: ${filePath}`);
}
const formData = new form_data_1.default();
formData.append('file', fs.createReadStream(filePath));
formData.append('new_dataset_name', config.name);
formData.append('model_type', config.modelType);
const response = yield this.request({
method: types_1.HttpMethod.POST,
url,
data: formData,
headers: formData.getHeaders(),
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return response;
});
}
_createFromHuggingface(config, hfConfig) {
return __awaiter(this, void 0, void 0, function* () {
const url = `${this._baseUrl}/${routes_1.Routes.dataset_huggingface}`;
const response = yield this.request({
method: types_1.HttpMethod.POST,
url,
json: {
new_dataset_name: config.name,
model_type: config.modelType,
huggingface_dataset_name: hfConfig.name,
huggingface_dataset_config: hfConfig.subset,
huggingface_dataset_split: hfConfig.split,
num_rows: hfConfig.numRows,
},
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
return response;
});
}
_downloadDataset(name_1, filePath_1) {
return __awaiter(this, arguments, void 0, function* (name, filePath, loadToMemory = false) {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset ID is required for download.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_table.replace('{dataset_id}', this._datasetConfig.id)}`;
const datasetTable = yield this.request({
method: types_1.HttpMethod.POST,
url,
json: { page_size: 1000, current_page_index: 0 },
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
if (loadToMemory) {
return datasetTable;
}
const outputPath = filePath || `${name}.csv`;
const csvContent = types_2.DatasetTableUtils.toCsv(datasetTable);
try {
fs.writeFileSync(outputPath, csvContent);
}
catch (error) {
throw new errors_1.DatasetError(`Failed to write dataset to file: ${error}`);
}
return outputPath;
});
}
_deleteDataset() {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset ID is required for deletion.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_delete}`;
yield this.request({
method: types_1.HttpMethod.DELETE,
url,
json: { dataset_ids: [this._datasetConfig.id] },
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
});
}
_addColumns(columns) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset ID is required to add columns.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_add_columns.replace('{dataset_id}', this._datasetConfig.id)}`;
yield this.request({
method: types_1.HttpMethod.POST,
url,
json: { new_columns_data: columns },
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
});
}
_addRows(rows) {
return __awaiter(this, void 0, void 0, function* () {
var _a;
if (!((_a = this._datasetConfig) === null || _a === void 0 ? void 0 : _a.id)) {
throw new errors_1.DatasetError("Dataset ID is required to add rows.");
}
const url = `${this._baseUrl}/${routes_1.Routes.dataset_add_rows.replace('{dataset_id}', this._datasetConfig.id)}`;
yield this.request({
method: types_1.HttpMethod.POST,
url,
json: { rows },
timeout: DEFAULT_API_TIMEOUT,
}, DatasetResponseHandler);
});
}
// Static methods
static createDataset(datasetConfig, source, options) {
return __awaiter(this, void 0, void 0, function* () {
const instance = new Dataset(Object.assign(Object.assign({}, options), { datasetConfig }));
return instance.create(source);
});
}
static downloadDataset(datasetName_1, filePath_1) {
return __awaiter(this, arguments, void 0, function* (datasetName, filePath, loadToMemory = false, options) {
const instance = new Dataset(options);
const config = yield instance._fetchDatasetConfig(datasetName);
instance._datasetConfig = config;
return instance.download(filePath, loadToMemory);
});
}
static deleteDataset(datasetName, options) {
return __awaiter(this, void 0, void 0, function* () {
const instance = new Dataset(options);
const config = yield instance._fetchDatasetConfig(datasetName);
instance._datasetConfig = config;
yield instance.delete();
});
}
static getDatasetConfig(datasetName, options) {
return __awaiter(this, void 0, void 0, function* () {
const instance = new Dataset(options);
return instance._fetchDatasetConfig(datasetName);
});
}
static addDatasetColumns(datasetName, columns, options) {
return __awaiter(this, void 0, void 0, function* () {
const instance = new Dataset(options);
const config = yield instance._fetchDatasetConfig(datasetName);
instance._datasetConfig = config;
yield instance.addColumns(columns);
});
}
static addDatasetRows(datasetName, rows, options) {
return __awaiter(this, void 0, void 0, function* () {
const instance = new Dataset(options);
const config = yield instance._fetchDatasetConfig(datasetName);
instance._datasetConfig = config;
yield instance.addRows(rows);
});
}
/**
* Unified helper that always returns a ready-to-use Dataset instance.
* If the dataset already exists it is fetched; otherwise it is created (unless createIfMissing === false).
*/
static open(datasetName_1) {
return __awaiter(this, arguments, void 0, function* (datasetName, opts = {}) {
const { createIfMissing = true } = opts, authOpts = __rest(opts, ["createIfMissing"]);
try {
// Try to fetch existing
const cfg = yield Dataset.getDatasetConfig(datasetName, authOpts);
return new Dataset(Object.assign(Object.assign({}, authOpts), { datasetConfig: cfg }));
}
catch (err) {
if (err instanceof errors_1.DatasetNotFoundError && createIfMissing) {
// Create new dataset then return instance
const dsConfig = {
name: datasetName,
modelType: types_2.ModelTypes.GENERATIVE_LLM,
};
const instance = new Dataset(Object.assign(Object.assign({}, authOpts), { datasetConfig: dsConfig }));
yield instance.create();
return instance;
}
throw err;
}
});
}
}
exports.Dataset = Dataset;
Dataset._datasetInstanceCache = new LRUCache(100);
//# sourceMappingURL=dataset.js.map