langsmith
Version:
Client library to connect to the LangSmith LLM Tracing and Evaluation Platform.
257 lines (256 loc) • 9.33 kB
JavaScript
import { Table } from "console-table-printer";
import chalk from "chalk";
import * as os from "node:os";
import * as path from "node:path";
import * as fs from "node:fs/promises";
import { STRIP_ANSI_REGEX, TEST_ID_DELIMITER } from "./constants.js";
const FEEDBACK_COLLAPSE_THRESHOLD = 48;
const MAX_TEST_PARAMS_LENGTH = 18;
const RESERVED_KEYS = [
"Name",
"Result",
"Inputs",
"Reference Outputs",
"Outputs",
"pass",
];
function formatTestName(name, duration) {
if (duration != null) {
return `${name} (${duration}ms)`;
}
else {
return name;
}
}
function getFormattedStatus(status) {
const s = status.toLowerCase();
if (s === "pending" || s === "skipped") {
return chalk.yellow("○ Skipped");
}
else if (s.includes("pass")) {
return chalk.green("✓ Passed");
}
else if (s.includes("fail")) {
return chalk.red("✕ Failed");
}
else {
return status;
}
}
function getColorParam(status) {
const s = status.toLowerCase();
if (s === "pending" || s === "skipped") {
return { color: "yellow" };
}
else if (s.includes("pass")) {
return { color: "grey" };
}
else if (s.includes("fail")) {
return { color: "red" };
}
else {
return {};
}
}
function formatValue(value) {
if (typeof value === "object" && value !== null) {
return Object.entries(value)
.map(([k, v]) => {
const rawValue = typeof v === "string" ? v : JSON.stringify(v);
const rawEntry = `${k}: ${rawValue}`;
const entry = rawEntry.length > MAX_TEST_PARAMS_LENGTH
? rawEntry.slice(0, MAX_TEST_PARAMS_LENGTH - 3) + "..."
: rawEntry;
return entry;
})
.join("\n");
}
if (value == null) {
return;
}
return String(value);
}
export async function printReporterTable(testSuiteName, results, testStatus, failureMessage) {
const rows = [];
const feedbackKeys = new Set();
let experimentUrl;
for (const result of results) {
const { title, duration, status } = result;
const titleComponents = title.split(TEST_ID_DELIMITER);
const testId = titleComponents.length > 1 && titleComponents.at(-1) !== undefined
? titleComponents.at(-1)
: undefined;
const testName = testId !== undefined
? titleComponents.slice(0, -1).join(TEST_ID_DELIMITER).trim()
: titleComponents.join(TEST_ID_DELIMITER);
// Non-LangSmith test
if (testId === undefined) {
rows.push([
{
Test: formatTestName(testName, duration),
Status: getFormattedStatus(status),
},
getColorParam(status),
]);
}
else if (status === "pending" || status === "skipped") {
// Skipped
rows.push([
{
Test: formatTestName(testName, duration),
Status: getFormattedStatus(status),
},
getColorParam(status),
]);
}
else {
const resultsPath = path.join(os.tmpdir(), "langsmith_test_results", `${testId}.json`);
let fileContent;
try {
fileContent = JSON.parse(await fs.readFile(resultsPath, "utf-8"));
await fs.unlink(resultsPath);
}
catch (e) {
console.log("[LANGSMITH]: Failed to read custom evaluation results. Please contact us for help.");
rows.push([
{
Test: formatTestName(testName, duration),
Status: getFormattedStatus(status),
},
getColorParam(status),
]);
continue;
}
const feedback = fileContent.feedback.reduce((acc, current) => {
if (!RESERVED_KEYS.includes(current.key) &&
current.score !== undefined) {
feedbackKeys.add(current.key);
acc[current.key] = current.score;
}
return acc;
}, {});
experimentUrl = experimentUrl ?? fileContent.experimentUrl;
rows.push([
{
Test: formatTestName(testName, duration),
Inputs: formatValue(fileContent.inputs),
"Reference Outputs": formatValue(fileContent.referenceOutputs),
Outputs: formatValue(fileContent.outputs),
Status: getFormattedStatus(status),
...feedback,
},
getColorParam(status),
]);
}
}
const feedbackKeysTotalLength = [...feedbackKeys].reduce((l, key) => l + key.length, 0);
const collapseFeedbackColumn = feedbackKeysTotalLength > FEEDBACK_COLLAPSE_THRESHOLD;
for (const key of feedbackKeys) {
const scores = rows
.map(([row]) => row[key])
.filter((score) => score !== undefined);
if (scores.length > 0) {
const mean = scores.reduce((a, b) => a + b, 0) / scores.length;
const stdDev = Math.sqrt(scores.reduce((sq, n) => sq + Math.pow(n - mean, 2), 0) / scores.length);
for (const row of rows) {
const score = row[0][key];
if (score !== undefined) {
const deviation = (score - mean) / stdDev;
let coloredKey;
let coloredScore;
if (isNaN(deviation)) {
coloredKey = chalk.white(`${key}:`);
coloredScore = chalk.white(score);
}
else if (deviation <= -1) {
coloredKey = chalk.redBright(`${key}:`);
coloredScore = chalk.redBright(score);
}
else if (deviation < -0.5) {
coloredKey = chalk.red(`${key}:`);
coloredScore = chalk.red(score);
}
else if (deviation < 0) {
coloredKey = chalk.yellow(`${key}:`);
coloredScore = chalk.yellow(score);
}
else if (deviation === 0) {
coloredKey = chalk.white(`${key}:`);
coloredScore = chalk.white(score);
}
else if (deviation <= 0.5) {
coloredKey = chalk.green(`${key}:`);
coloredScore = chalk.green(score);
}
else {
coloredKey = chalk.greenBright(`${key}:`);
coloredScore = chalk.greenBright(score);
}
if (collapseFeedbackColumn) {
delete row[0][key];
if (row[0].Feedback === undefined) {
row[0].Feedback = `${coloredKey} ${coloredScore}`;
}
else {
row[0].Feedback = `${row[0].Feedback}\n${coloredKey} ${coloredScore}`;
}
}
else {
row[0][key] = coloredScore;
}
}
}
}
}
const defaultColumns = [
{ name: "Test", alignment: "left", maxLen: 36 },
{ name: "Inputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
{
name: "Reference Outputs",
alignment: "left",
minLen: MAX_TEST_PARAMS_LENGTH,
},
{ name: "Outputs", alignment: "left", minLen: MAX_TEST_PARAMS_LENGTH },
{ name: "Status", alignment: "left" },
];
if (collapseFeedbackColumn) {
const feedbackColumnLength = rows.reduce((max, [row]) => {
const maxFeedbackLineLength = row.Feedback?.split("\n").reduce((max, feedbackLine) => {
return Math.max(max, feedbackLine.replace(STRIP_ANSI_REGEX, "").length);
}, 0) ?? 0;
return Math.max(max, maxFeedbackLineLength);
}, 0);
defaultColumns.push({
name: "Feedback",
alignment: "left",
minLen: feedbackColumnLength + 8,
});
}
console.log();
const table = new Table({
columns: defaultColumns,
colorMap: {
grey: "\x1b[90m",
},
});
for (const row of rows) {
table.addRow(row[0], row[1]);
}
const testStatusColor = testStatus.includes("pass")
? chalk.green
: testStatus.includes("fail")
? chalk.red
: chalk.yellow;
if (testSuiteName) {
console.log(testStatusColor(`› ${testSuiteName}`));
}
if (failureMessage) {
console.log(failureMessage);
}
table.printTable();
if (experimentUrl) {
console.log();
console.log(` [LANGSMITH]: View full results in LangSmith at ${experimentUrl}`);
console.log();
}
}