You can now run Experiments using the Phoenix JS client! Use Experiments to test different iterations of your applications over a set of test cases, then evaluate the results.
This release includes:
Native tracing of tasks and evaluators
Async concurrency queues
Support for any evaluator (including bring your own evals)
Code Implementation
import { createClient } from "@arizeal/phoenix-client";
import {
asEvaluator,
runExperiment,
} from "@arizeal/phoenix-client/experiments";
import type { Example } from "@arizeal/phoenix-client/types/datasets";
import { Factuality } from "autoevals";
import OpenAI from "openai";
const phoenix = createClient();
const openai = new OpenAI();
/** Your AI Task */
const task = async (example: Example) => {
const response = await openai.chat.completions.create({
model: "gpt-4o",
messages: [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: JSON.stringify(example.input, null, 2) },
],
});
return response.choices[0]?.message?.content ?? "No response";
};
await runExperiment({
dataset: "dataset_id",
experimentName: "experiment_name",
client: phoenix,
task,
evaluators: [
asEvaluator({
name: "Factuality",
kind: "LLM",
evaluate: async (params) => {
const result = await Factuality({
output: JSON.stringify(params.output, null, 2),
input: JSON.stringify(params.input, null, 2),
expected: JSON.stringify(params.expected, null, 2),
});
return {
score: result.score,
label: result.name,
explanation: (result.metadata?.rationale as string) ?? "",
metadata: result.metadata ?? {},
};
},
}),
],
});