05.14.2025: Experiments in the JS Client

You can now run Experiments using the Phoenix JS client! Use Experiments to test different iterations of your applications over a set of test cases, then evaluate the results.

This release includes:

Native tracing of tasks and evaluators
Async concurrency queues
Support for any evaluator (including bring your own evals)

Code Implementation

import { createClient } from "@arizeal/phoenix-client";
import {
  asEvaluator,
  runExperiment,
} from "@arizeal/phoenix-client/experiments";
import type { Example } from "@arizeal/phoenix-client/types/datasets";
import { Factuality } from "autoevals";
import OpenAI from "openai";

const phoenix = createClient();
const openai = new OpenAI();

/** Your AI Task */
const task = async (example: Example) => {
  const response = await openai.chat.completions.create({
    model: "gpt-4o",
    messages: [
      { role: "system", content: "You are a helpful assistant." },
      { role: "user", content: JSON.stringify(example.input, null, 2) },
    ],
  });
  return response.choices[0]?.message?.content ?? "No response";
};

await runExperiment({
  dataset: "dataset_id",
  experimentName: "experiment_name",
  client: phoenix,
  task,
  evaluators: [
    asEvaluator({
      name: "Factuality",
      kind: "LLM",
      evaluate: async (params) => {
        const result = await Factuality({
          output: JSON.stringify(params.output, null, 2),
          input: JSON.stringify(params.input, null, 2),
          expected: JSON.stringify(params.expected, null, 2),
        });
        return {
          score: result.score,
          label: result.name,
          explanation: (result.metadata?.rationale as string) ?? "",
          metadata: result.metadata ?? {},
        };
      },
    }),
  ],
});

Previous05.20.2025: Datasets and Experiment Evaluations in the JS Client Next05.09.2025: Annotations, Data Retention Policies, Hotkeys 📓

Last updated 5 days ago

Was this helpful?