evalite
Version:
Test your LLM-powered apps with a TypeScript-native, Vitest-based eval runner. No API key required.
410 lines (406 loc) • 14.1 kB
JavaScript
import Database from "better-sqlite3";
import { max } from "./utils.js";
export const createDatabase = (url) => {
const db = new Database(url);
db.pragma("journal_mode = WAL");
db.exec(`
CREATE TABLE IF NOT EXISTS runs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
runType TEXT NOT NULL, -- full, partial
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS evals (
id INTEGER PRIMARY KEY AUTOINCREMENT,
run_id INTEGER NOT NULL,
name TEXT NOT NULL,
filepath TEXT NOT NULL,
duration INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (run_id) REFERENCES runs(id)
);
CREATE TABLE IF NOT EXISTS results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
eval_id INTEGER NOT NULL,
duration INTEGER NOT NULL,
input TEXT NOT NULL, -- JSON
output TEXT NOT NULL, -- JSON
expected TEXT, -- JSON
col_order INTEGER NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (eval_id) REFERENCES evals(id)
);
CREATE TABLE IF NOT EXISTS scores (
id INTEGER PRIMARY KEY AUTOINCREMENT,
result_id INTEGER NOT NULL,
name TEXT NOT NULL,
score FLOAT NOT NULL,
description TEXT,
metadata TEXT, -- JSON
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (result_id) REFERENCES results(id)
);
CREATE TABLE IF NOT EXISTS traces (
id INTEGER PRIMARY KEY AUTOINCREMENT,
result_id INTEGER NOT NULL,
input TEXT NOT NULL, -- JSON
output TEXT NOT NULL, -- JSON
start_time INTEGER NOT NULL,
end_time INTEGER NOT NULL,
prompt_tokens INTEGER,
completion_tokens INTEGER,
col_order INTEGER NOT NULL,
FOREIGN KEY (result_id) REFERENCES results(id)
);
`);
// Add status key to evals table
try {
db.exec(`ALTER TABLE evals ADD COLUMN status TEXT NOT NULL DEFAULT 'success';`);
}
catch (e) { }
// Add status key to results table
try {
db.exec(`ALTER TABLE results ADD COLUMN status TEXT NOT NULL DEFAULT 'success';`);
}
catch (e) { }
// Add rendered_columns key to results table
try {
db.exec(`ALTER TABLE results ADD COLUMN rendered_columns TEXT`);
}
catch (e) { }
return db;
};
/**
* @deprecated
*/
export const saveRun = (db, { files, runType, }) => {
const runId = db
.prepare(`
INSERT INTO runs (runType)
VALUES ( )
`)
.run({ runType }).lastInsertRowid;
for (const file of files) {
for (const suite of file.tasks) {
if (!suite.tasks) {
throw new Error("An unknown error occurred - did you nest evalite inside a describe block?");
}
const evalId = db
.prepare(`
INSERT INTO evals (run_id, name, filepath, duration, status)
VALUES ( , , , , )
`)
.run({
runId,
name: suite.name,
filepath: file.filepath,
duration: max(suite.tasks, (t) => t.meta.evalite?.duration ?? 0),
status: suite.result?.state === "fail" ? "fail" : "success",
}).lastInsertRowid;
for (const task of suite.tasks) {
if (task.meta.evalite?.result) {
const { duration, input, output, expected, scores, traces, order } = task.meta.evalite.result;
const resultId = db
.prepare(`
INSERT INTO results (eval_id, duration, input, output, expected, col_order)
VALUES ( , , , , , )
`)
.run({
evalId,
duration,
input: JSON.stringify(input),
output: JSON.stringify(output),
expected: JSON.stringify(expected),
col_order: order,
}).lastInsertRowid;
for (const score of scores) {
db.prepare(`
INSERT INTO scores (result_id, name, score, description, metadata)
VALUES ( , , , , )
`).run({
resultId,
name: score.name,
score: score.score ?? 0,
description: score.description,
metadata: JSON.stringify(score.metadata),
});
}
let traceOrder = 0;
for (const trace of traces) {
traceOrder += 1;
db.prepare(`
INSERT INTO traces (result_id, input, output, start_time, end_time, prompt_tokens, completion_tokens, col_order)
VALUES ( , , , , , , , )
`).run({
resultId,
input: JSON.stringify(trace.input),
output: JSON.stringify(trace.output),
start_time: Math.round(trace.start),
end_time: Math.round(trace.end),
prompt_tokens: trace.usage?.promptTokens ?? null,
completion_tokens: trace.usage?.completionTokens ?? null,
col_order: traceOrder,
});
}
}
}
}
}
};
/**
* @deprecated
*
* Used in existing tests, but in future code should be replaced
* by more specific queries.
*/
export const getEvalsAsRecord = async (db) => {
const evals = db.prepare(`SELECT * FROM evals`).all();
const allResults = getResults(db, evals.map((e) => e.id));
const allScores = getScores(db, allResults.map((r) => r.id));
const allTraces = getTraces(db, allResults.map((r) => r.id));
const recordOfEvals = {};
for (const evaluation of evals) {
const key = evaluation.name;
if (!recordOfEvals[key]) {
recordOfEvals[key] = [];
}
const results = allResults.filter((r) => r.eval_id === evaluation.id);
const resultsWithScores = results.map((r) => {
const scores = allScores.filter((s) => s.result_id === r.id);
const traces = allTraces.filter((t) => t.result_id === r.id);
return { ...r, scores, traces };
});
recordOfEvals[key].push({
...evaluation,
results: resultsWithScores,
});
}
return recordOfEvals;
};
export const getEvals = (db, runIds, allowedStatuses) => {
return db
.prepare(`
SELECT * FROM evals
WHERE run_id IN (${runIds.join(",")})
AND status IN (${allowedStatuses.map((s) => `'${s}'`).join(",")})
`)
.all();
};
export const getResults = (db, evalIds) => {
return db
.prepare(`
SELECT * FROM results
WHERE eval_id IN (${evalIds.join(",")})
ORDER BY col_order ASC
`)
.all()
.map((r) => jsonParseFields(r, ["input", "output", "expected", "rendered_columns"]));
};
export const getScores = (db, resultIds) => {
return db
.prepare(`
SELECT * FROM scores
WHERE result_id IN (${resultIds.join(",")})
`)
.all()
.map((r) => jsonParseFields(r, ["metadata"]));
};
export const getTraces = (db, resultIds) => {
return db
.prepare(`
SELECT * FROM traces
WHERE result_id IN (${resultIds.join(",")})
ORDER BY col_order ASC
`)
.all()
.map((t) => jsonParseFields(t, ["input", "output"]));
};
export const getMostRecentRun = (db, runType) => {
const run = db
.prepare(`
SELECT * FROM runs
WHERE runType =
ORDER BY created_at DESC
LIMIT 1
`)
.get({ runType });
return run;
};
export const getPreviousCompletedEval = (db, name, startTime) => {
const evaluation = db
.prepare(`
SELECT * FROM evals
WHERE name = AND created_at <
AND status != 'running'
ORDER BY created_at DESC
LIMIT 1
`)
.get({ name, startTime });
return evaluation;
};
export const getAverageScoresFromResults = (db, resultIds) => {
return db
.prepare(`
SELECT result_id, AVG(score) as average
FROM scores
WHERE result_id IN (${resultIds.join(",")})
GROUP BY result_id
`)
.all();
};
export const getEvalsAverageScores = (db, evalIds) => {
const result = db
.prepare(`
SELECT r.eval_id, AVG(s.score) as average
FROM scores s
JOIN results r ON s.result_id = r.id
WHERE r.eval_id IN (${evalIds.join(",")})
GROUP BY r.eval_id
`)
.all();
return result;
};
export const jsonParseFields = (obj, fields) => {
const objToReturn = {};
for (const key of Object.keys(obj)) {
const value = obj[key];
if (fields.includes(key)) {
objToReturn[key] = JSON.parse(value);
}
else {
objToReturn[key] = value;
}
}
return objToReturn;
};
/**
* Defaults to most recent if timestamp not passed
*/
export const getEvalByName = (db, opts) => {
return db
.prepare(`
SELECT * FROM evals
WHERE name =
${opts.timestamp ? "AND created_at = @timestamp" : ""}
${opts.statuses ? `AND status IN (${opts.statuses.map((s) => `'${s}'`).join(",")})` : ""}
ORDER BY created_at DESC
LIMIT 1
`)
.get({ name: opts.name, timestamp: opts.timestamp });
};
export const getHistoricalEvalsWithScoresByName = (db, name) => {
return db
.prepare(`
SELECT evals.*, AVG(scores.score) as average_score
FROM evals
LEFT JOIN results ON evals.id = results.eval_id
LEFT JOIN scores ON results.id = scores.result_id
WHERE evals.name =
AND evals.status != 'running'
GROUP BY evals.id
ORDER BY evals.created_at ASC
`)
.all({ name });
};
export const createEvalIfNotExists = ({ db, runId, name, filepath, }) => {
let evaluationId = db
.prepare(`SELECT id FROM evals WHERE name = AND run_id = `)
.get({ name, runId })?.id;
if (!evaluationId) {
evaluationId = db
.prepare(`INSERT INTO evals (run_id, name, filepath, duration, status)
VALUES ( , , , , )`)
.run({
runId,
name,
filepath,
duration: 0,
status: "running",
}).lastInsertRowid;
}
return evaluationId;
};
export const createRun = ({ db, runType, }) => {
return db
.prepare(`INSERT INTO runs (runType) VALUES ( )`)
.run({ runType }).lastInsertRowid;
};
export const insertResult = ({ db, evalId, order, input, expected, output, duration, status, renderedColumns, }) => {
return db
.prepare(`INSERT INTO results (eval_id, col_order, input, expected, output, duration, status, rendered_columns)
VALUES ( , , , , , , , )`)
.run({
eval_id: evalId,
col_order: order,
input: JSON.stringify(input),
expected: JSON.stringify(expected),
output: JSON.stringify(output),
duration,
status,
rendered_columns: JSON.stringify(renderedColumns),
}).lastInsertRowid;
};
export const updateResult = ({ db, resultId, output, duration, status, renderedColumns, input, expected, }) => {
db.prepare(`UPDATE results
SET
output = ,
duration = ,
input = ,
expected = ,
status = ,
rendered_columns =
WHERE id = `).run({
id: resultId,
output: JSON.stringify(output),
duration,
status,
rendered_columns: JSON.stringify(renderedColumns),
input: JSON.stringify(input),
expected: JSON.stringify(expected),
});
};
export const insertScore = ({ db, resultId, description, name, score, metadata, }) => {
db.prepare(`INSERT INTO scores (result_id, name, score, metadata, description)
VALUES ( , , , , )`).run({
result_id: resultId,
description,
name,
score,
metadata: JSON.stringify(metadata),
});
};
export const insertTrace = ({ db, resultId, input, output, start, end, promptTokens, completionTokens, order, }) => {
db.prepare(`INSERT INTO traces (result_id, input, output, start_time, end_time, prompt_tokens, completion_tokens, col_order)
VALUES ( , , , , , , , )`).run({
result_id: resultId,
input: JSON.stringify(input),
output: JSON.stringify(output),
start_time: Math.round(start),
end_time: Math.round(end),
prompt_tokens: promptTokens,
completion_tokens: completionTokens,
col_order: order,
});
};
export const updateEvalStatusAndDuration = ({ db, evalId, status, }) => {
db.prepare(`UPDATE evals
SET status = ,
duration = (SELECT MAX(duration) FROM results WHERE eval_id = )
WHERE id = `).run({
id: evalId,
status,
});
};
export const findResultByEvalIdAndOrder = ({ db, evalId, order, }) => {
return db
.prepare(`SELECT id FROM results WHERE eval_id = AND col_order = `)
.get({
eval_id: evalId,
col_order: order,
})?.id;
};
export const getAllResultsForEval = ({ db, evalId, }) => {
return db
.prepare(`SELECT id, status FROM results WHERE eval_id = `)
.all({ eval_id: evalId });
};
//# sourceMappingURL=db.js.map