parea-ai
Version:
Client SDK library to connect to Parea AI.
192 lines (191 loc) • 8.45 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Experiment = void 0;
const experimentRunner_1 = require("./experimentRunner");
const types_1 = require("./types");
const trial_1 = require("./trial");
const types_2 = require("../types");
const utils_1 = require("./utils");
const experimentContext_1 = require("./experimentContext");
const helpers_1 = require("../utils/helpers");
/**
* Represents an experiment that can be run with multiple trials.
* @template T - The type of the dataset elements
* @template R - The type of the result returned by the traced function
*/
class Experiment {
/**
* Creates a new Experiment instance.
* @param name The name of the experiment.
* @param dataset The dataset to be used for the experiment.
* @param func The function to be executed for each trial.
* @param options Additional options for the experiment.
* @param parea The Parea client instance.
*/
constructor(name, dataset, func, options, parea) {
this.name = name;
this.dataset = dataset;
this.func = func;
this.options = options;
this.state = types_2.ExperimentStatus.PENDING;
this.successRate = 0;
this.errors = '';
this.logs = [];
if (!name) {
throw new Error('Experiment name is required');
}
this.runner = new experimentRunner_1.ExperimentRunner(this.options.nWorkers || 10);
this.p = parea;
this.trySetDataset(dataset);
}
/**
* Runs the experiment and returns the results.
* @param runNameOrOptions run name as a string or {runName?, prefix?}. If prefix is provided, it will be prepended to the final run name.
* @example
* ```typescript
* run();
* run("myRunName");
* run({ prefix: "substep" });
* run({ runName: "myRunName", prefix: "substep" })
* ```
* @returns A promise that resolves to the experiment results.
* @throws Error if the experiment fails to run.
*/
async run(runNameOrOptions) {
let runName;
let prefix;
if (typeof runNameOrOptions === 'string') {
runName = runNameOrOptions;
}
else if (typeof runNameOrOptions === 'object') {
({ runName, prefix } = runNameOrOptions);
}
const name = runName || (0, utils_1.genRandomName)();
this.runName = prefix ? `${prefix}_${name}` : name;
this.state = types_2.ExperimentStatus.RUNNING;
const experimentSchema = await this.p.createExperiment({
name: this.name,
run_name: this.runName,
metadata: this.options.metadata,
});
const experimentUUID = experimentSchema.uuid;
return experimentContext_1.experimentContext.runInContext(experimentUUID, async () => {
console.log(`Experiment started: ${this.name} - ${this.runName}`);
try {
this.dataset = await this.determineDataset(this.dataset);
const maxRetries = typeof this.options?.maxRetries === 'number' ? this.options.maxRetries : 60;
const trials = this.dataset.flatMap((data) => Array(this.options.nTrials || 1)
.fill(null)
.map(() => new trial_1.Trial(data, this.func, experimentUUID, maxRetries)));
const results = await this.runner.runTrials(trials);
this.state = this.determineState(results);
const er = new types_1.ExperimentResult(this.name, results, this.options.metadata);
this.logs = er.getLogs();
this.successRate = er.getSuccessRate();
this.errors = er.getErrorsString();
return er;
}
catch (error) {
this.state = types_2.ExperimentStatus.FAILED;
throw new Error(`Experiment failed: ${error instanceof Error ? error.message : String(error)}`);
}
finally {
await this.logExperimentResults(experimentUUID);
delete process.env.PAREA_OS_ENV_EXPERIMENT_UUID;
}
});
}
/**
* Gets the current state of the experiment.
* @returns The current experiment status.
*/
getState() {
return this.state;
}
/**
* Determines the overall state of the experiment based on trial results.
* @param results An array of trial results.
* @returns The determined experiment status.
*/
determineState(results) {
if (results.some((result) => result.state !== types_2.ExperimentStatus.COMPLETED)) {
return types_2.ExperimentStatus.FAILED;
}
return types_2.ExperimentStatus.COMPLETED;
}
/**
* Calculates dataset-level statistics based on evaluation functions.
* @returns A promise that resolves to an array of evaluation results or null.
*/
async getDatasetLevelStats() {
const datasetLevelEvalPromises = (this.options.datasetLevelEvalFuncs || []).map(async (func) => {
try {
const score = await func(this.logs);
const scores = [];
(0, helpers_1.processEvaluationResult)(func.name, score, scores);
return scores;
}
catch (e) {
console.error(`Error occurred calling '${func.name}', ${e}`, e);
}
return null;
}) || [];
return (await Promise.all(datasetLevelEvalPromises)).flat().filter((x) => x !== null);
}
/**
* Determines the dataset to be used for the experiment.
* @param dataset The input dataset, either as a string (collection name) or an array of data.
* @returns A promise that resolves to the array of dataset elements.
* @throws Error if the specified collection is not found.
*/
async determineDataset(dataset) {
if (typeof dataset === 'string') {
console.log(`Fetching test collection: ${dataset}`);
const response = await this.p.getCollection(dataset);
if (!response) {
throw new Error(`Collection ${this.dataset} not found`);
}
const testCollection = new types_2.TestCaseCollection(response.id, response.name, response.created_at, response.last_updated_at, response.column_names, response.test_cases);
console.log(`Fetched ${testCollection.numTestCases()} test cases from collection: ${this.dataset} \n`);
return testCollection.getAllTestInputsAndTargets();
}
return dataset;
}
/**
* Logs the results of the experiment.
* @param experimentUUID The UUID of the experiment.
*/
async logExperimentResults(experimentUUID) {
const dls = await this.getDatasetLevelStats();
// sleep for 4 seconds for logs to flush
await new Promise((resolve) => setTimeout(resolve, 4000));
const experimentStats = await this.p.finishExperiment(experimentUUID, {
dataset_level_stats: dls || undefined,
status: this.getState(),
});
const statNameToAvgStd = (0, utils_1.calculateAvgStdForExperiment)(experimentStats);
(dls || []).forEach((result) => {
statNameToAvgStd[result.name] = result.score.toFixed(2);
});
console.log(`Experiment ${this.name} Run ${this.runName} avg. stats:\n${JSON.stringify(statNameToAvgStd, null, 2)}`);
console.log(`Success rate: ${this.successRate}%`);
console.log(this.errors ? `Errors: ${this.errors}\n\n` : '\n\n');
console.log(`View experiment & traces at: https://app.parea.ai/experiments/${encodeURIComponent(this.name)}/${experimentUUID}\n`);
}
/**
* Set Dataset name as metadata is using dataset
* @param dataset The input dataset, either as a string (collection name) or an array of data.
*/
trySetDataset(dataset) {
if (typeof dataset === 'string') {
if (!this.options.metadata) {
this.options.metadata = {};
}
else if (this.options.metadata.Dataset) {
console.warn('Metadata key "Dataset" is reserved for the dataset name. Overwriting it with the provided dataset name.');
}
this.options.metadata = { ...this.options.metadata, Dataset: dataset };
}
}
}
exports.Experiment = Experiment;