UNPKG

parea-ai

Version:

Client SDK library to connect to Parea AI.

192 lines (191 loc) 8.45 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Experiment = void 0; const experimentRunner_1 = require("./experimentRunner"); const types_1 = require("./types"); const trial_1 = require("./trial"); const types_2 = require("../types"); const utils_1 = require("./utils"); const experimentContext_1 = require("./experimentContext"); const helpers_1 = require("../utils/helpers"); /** * Represents an experiment that can be run with multiple trials. * @template T - The type of the dataset elements * @template R - The type of the result returned by the traced function */ class Experiment { /** * Creates a new Experiment instance. * @param name The name of the experiment. * @param dataset The dataset to be used for the experiment. * @param func The function to be executed for each trial. * @param options Additional options for the experiment. * @param parea The Parea client instance. */ constructor(name, dataset, func, options, parea) { this.name = name; this.dataset = dataset; this.func = func; this.options = options; this.state = types_2.ExperimentStatus.PENDING; this.successRate = 0; this.errors = ''; this.logs = []; if (!name) { throw new Error('Experiment name is required'); } this.runner = new experimentRunner_1.ExperimentRunner(this.options.nWorkers || 10); this.p = parea; this.trySetDataset(dataset); } /** * Runs the experiment and returns the results. * @param runNameOrOptions run name as a string or {runName?, prefix?}. If prefix is provided, it will be prepended to the final run name. * @example * ```typescript * run(); * run("myRunName"); * run({ prefix: "substep" }); * run({ runName: "myRunName", prefix: "substep" }) * ``` * @returns A promise that resolves to the experiment results. * @throws Error if the experiment fails to run. */ async run(runNameOrOptions) { let runName; let prefix; if (typeof runNameOrOptions === 'string') { runName = runNameOrOptions; } else if (typeof runNameOrOptions === 'object') { ({ runName, prefix } = runNameOrOptions); } const name = runName || (0, utils_1.genRandomName)(); this.runName = prefix ? `${prefix}_${name}` : name; this.state = types_2.ExperimentStatus.RUNNING; const experimentSchema = await this.p.createExperiment({ name: this.name, run_name: this.runName, metadata: this.options.metadata, }); const experimentUUID = experimentSchema.uuid; return experimentContext_1.experimentContext.runInContext(experimentUUID, async () => { console.log(`Experiment started: ${this.name} - ${this.runName}`); try { this.dataset = await this.determineDataset(this.dataset); const maxRetries = typeof this.options?.maxRetries === 'number' ? this.options.maxRetries : 60; const trials = this.dataset.flatMap((data) => Array(this.options.nTrials || 1) .fill(null) .map(() => new trial_1.Trial(data, this.func, experimentUUID, maxRetries))); const results = await this.runner.runTrials(trials); this.state = this.determineState(results); const er = new types_1.ExperimentResult(this.name, results, this.options.metadata); this.logs = er.getLogs(); this.successRate = er.getSuccessRate(); this.errors = er.getErrorsString(); return er; } catch (error) { this.state = types_2.ExperimentStatus.FAILED; throw new Error(`Experiment failed: ${error instanceof Error ? error.message : String(error)}`); } finally { await this.logExperimentResults(experimentUUID); delete process.env.PAREA_OS_ENV_EXPERIMENT_UUID; } }); } /** * Gets the current state of the experiment. * @returns The current experiment status. */ getState() { return this.state; } /** * Determines the overall state of the experiment based on trial results. * @param results An array of trial results. * @returns The determined experiment status. */ determineState(results) { if (results.some((result) => result.state !== types_2.ExperimentStatus.COMPLETED)) { return types_2.ExperimentStatus.FAILED; } return types_2.ExperimentStatus.COMPLETED; } /** * Calculates dataset-level statistics based on evaluation functions. * @returns A promise that resolves to an array of evaluation results or null. */ async getDatasetLevelStats() { const datasetLevelEvalPromises = (this.options.datasetLevelEvalFuncs || []).map(async (func) => { try { const score = await func(this.logs); const scores = []; (0, helpers_1.processEvaluationResult)(func.name, score, scores); return scores; } catch (e) { console.error(`Error occurred calling '${func.name}', ${e}`, e); } return null; }) || []; return (await Promise.all(datasetLevelEvalPromises)).flat().filter((x) => x !== null); } /** * Determines the dataset to be used for the experiment. * @param dataset The input dataset, either as a string (collection name) or an array of data. * @returns A promise that resolves to the array of dataset elements. * @throws Error if the specified collection is not found. */ async determineDataset(dataset) { if (typeof dataset === 'string') { console.log(`Fetching test collection: ${dataset}`); const response = await this.p.getCollection(dataset); if (!response) { throw new Error(`Collection ${this.dataset} not found`); } const testCollection = new types_2.TestCaseCollection(response.id, response.name, response.created_at, response.last_updated_at, response.column_names, response.test_cases); console.log(`Fetched ${testCollection.numTestCases()} test cases from collection: ${this.dataset} \n`); return testCollection.getAllTestInputsAndTargets(); } return dataset; } /** * Logs the results of the experiment. * @param experimentUUID The UUID of the experiment. */ async logExperimentResults(experimentUUID) { const dls = await this.getDatasetLevelStats(); // sleep for 4 seconds for logs to flush await new Promise((resolve) => setTimeout(resolve, 4000)); const experimentStats = await this.p.finishExperiment(experimentUUID, { dataset_level_stats: dls || undefined, status: this.getState(), }); const statNameToAvgStd = (0, utils_1.calculateAvgStdForExperiment)(experimentStats); (dls || []).forEach((result) => { statNameToAvgStd[result.name] = result.score.toFixed(2); }); console.log(`Experiment ${this.name} Run ${this.runName} avg. stats:\n${JSON.stringify(statNameToAvgStd, null, 2)}`); console.log(`Success rate: ${this.successRate}%`); console.log(this.errors ? `Errors: ${this.errors}\n\n` : '\n\n'); console.log(`View experiment & traces at: https://app.parea.ai/experiments/${encodeURIComponent(this.name)}/${experimentUUID}\n`); } /** * Set Dataset name as metadata is using dataset * @param dataset The input dataset, either as a string (collection name) or an array of data. */ trySetDataset(dataset) { if (typeof dataset === 'string') { if (!this.options.metadata) { this.options.metadata = {}; } else if (this.options.metadata.Dataset) { console.warn('Metadata key "Dataset" is reserved for the dataset name. Overwriting it with the provided dataset name.'); } this.options.metadata = { ...this.options.metadata, Dataset: dataset }; } } } exports.Experiment = Experiment;