Skip to content

Commit 2430655

Browse files
authored
docs: ts demo experiment (#10409)
* add ts experiment * fix output type * fix formatting * fixing lint error
1 parent 230a90c commit 2430655

File tree

3 files changed

+212
-0
lines changed

3 files changed

+212
-0
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# TypeScript Experiments and Evals with Arize Phoenix
2+
3+
This is a demo application that demonstrates how to run experiments and evaluations in TypeScript with Arize Phoenix. The application uses **[Phoenix Cloud](https://app.phoenix.arize.com/)** to manage datasets, run experiments, and evaluate LLM outputs.
4+
5+
## Overview
6+
7+
The demo consists of two main files:
8+
9+
- `app.ts`: Contains a space knowledge application that retrieves relevant context from a knowledge base using OpenAI
10+
- `experiment.ts`: Sets up and runs an experiment that evaluates the document relevancy of the retrieved context
11+
12+
## Required Packages
13+
14+
To run this demo, you'll need to install the following packages:
15+
16+
```bash
17+
npm install dotenv openai @arizeai/openinference-instrumentation-openai @ai-sdk/openai @arizeai/phoenix-client @arizeai/phoenix-evals
18+
```
19+
20+
## Setup
21+
22+
1. Install the required packages (see above)
23+
24+
2. Create a `.env` file in the root directory with your API keys:
25+
26+
```env
27+
OPENAI_API_KEY=your-openai-api-key
28+
PHOENIX_HOST=your-phoenix-cloud-hostname
29+
PHOENIX_API_KEY=your-phoenix-api-key
30+
```
31+
32+
## Running the Demo
33+
34+
To run the experiment:
35+
36+
```bash
37+
npx tsx experiment.ts
38+
```
39+
40+
## How It Works
41+
42+
1. **app.ts**: Implements a space knowledge retrieval application that:
43+
- Uses OpenAI's GPT-4o-mini model to retrieve relevant context from a knowledge base
44+
- Returns 1-3 most relevant pieces of information based on a query
45+
46+
2. **experiment.ts**:
47+
- Creates a dataset with space-related questions
48+
- Runs the `spaceKnowledgeApplication` function for each question in the dataset
49+
- Uses Phoenix Evals to evaluate the retrieved context using a document relevancy evaluator
50+
- Sends the experiment results to Phoenix Cloud for analysis
51+
52+
The experiment results will be available in your Phoenix Cloud account, where you can analyze the performance of your application and view evaluation metrics.
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import { OpenAIInstrumentation } from "@arizeai/openinference-instrumentation-openai";
2+
3+
import "dotenv/config";
4+
5+
import OpenAI from "openai";
6+
7+
const instrumentation = new OpenAIInstrumentation();
8+
instrumentation.manuallyInstrument(OpenAI);
9+
10+
const openai = new OpenAI({
11+
apiKey: process.env.OPENAI_API_KEY,
12+
});
13+
14+
const SPACE_KNOWLEDGE_BASE = [
15+
{
16+
id: 1,
17+
text: "Europa is one of Jupiter's moons believed to have a subsurface ocean beneath its icy crust.",
18+
},
19+
{
20+
id: 2,
21+
text: "Venus rotates in the opposite direction of most planets in the Solar System, a phenomenon called retrograde rotation.",
22+
},
23+
{
24+
id: 3,
25+
text: "The Sun accounts for approximately 99.8% of the Solar System's total mass.",
26+
},
27+
{
28+
id: 4,
29+
text: "The Kuiper Belt contains icy bodies and dwarf planets beyond Neptune's orbit, including Pluto.",
30+
},
31+
{
32+
id: 5,
33+
text: "Mars experiences planet-wide dust storms that can last for months and cover the entire planet.",
34+
},
35+
{
36+
id: 6,
37+
text: "No spacecraft has landed on Venus and survived for longer than a few hours due to extreme heat and pressure.",
38+
},
39+
{
40+
id: 7,
41+
text: "Saturn's moon Titan has lakes and rivers made of liquid methane and ethane, not water.",
42+
},
43+
{
44+
id: 8,
45+
text: "Jupiter's Great Red Spot is a massive storm that has been raging for at least 400 years.",
46+
},
47+
{
48+
id: 9,
49+
text: "Neptune has the fastest winds in the Solar System, reaching speeds up to 2,100 kilometers per hour.",
50+
},
51+
{
52+
id: 10,
53+
text: "Mercury has extreme temperature variations, ranging from 427°C during the day to -173°C at night.",
54+
},
55+
];
56+
57+
export async function spaceKnowledgeApplication(query: string) {
58+
const knowledgeBaseText = SPACE_KNOWLEDGE_BASE.map((item) => item.text).join(
59+
"\n"
60+
);
61+
62+
const response = await openai.chat.completions.create({
63+
model: "gpt-4o-mini",
64+
messages: [
65+
{
66+
role: "system",
67+
content: `You are a retrieval system. Given a query and a knowledge base, you MUST select and return 1-3 most relevant pieces of information from the knowledge base. CRITICAL: You must ALWAYS return at least 1 piece of information. If nothing seems directly relevant to the query, you must still return the most tangentially related piece from the knowledge base. Never return an empty context array. Return ONLY a JSON object with "context" (array of 1-3 knowledge base texts). Do not provide an answer, only return the retrieved context.`,
68+
},
69+
{
70+
role: "user",
71+
content: `Knowledge Base:\n${knowledgeBaseText}\n\nQuery: ${query}\n\nReturn JSON with only the "context" array containing 1-3 most relevant knowledge base texts. REQUIRED: The context array must contain at least 1 item. If no direct match exists, return the most related item from the knowledge base.`,
72+
},
73+
],
74+
response_format: { type: "json_object" },
75+
});
76+
77+
return JSON.parse(response.choices[0].message.content || "{}");
78+
}
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { createDataset } from "@arizeai/phoenix-client/datasets";
2+
import {
3+
asExperimentEvaluator,
4+
runExperiment,
5+
} from "@arizeai/phoenix-client/experiments";
6+
import { createDocumentRelevancyEvaluator } from "@arizeai/phoenix-evals/llm/createDocumentRelevancyEvaluator";
7+
8+
import "dotenv/config";
9+
10+
import { spaceKnowledgeApplication } from "./app";
11+
12+
import { openai } from "@ai-sdk/openai";
13+
14+
const DATASET = [
15+
"Which moon might harbor life due to its unique geological features?",
16+
"What theoretical region marks the outer boundary of the Solar System?",
17+
"Which planet defies the typical rotation pattern observed in most celestial bodies?",
18+
"Where in the Solar System would you experience the most extreme atmospheric conditions?",
19+
"How dominant is the Sun's gravitational influence compared to all other objects in our solar system?",
20+
"What region of the Solar System contains remnants from its early formation beyond the gas giants?",
21+
"What significant change occurred in our understanding of planetary classification in 2006?",
22+
"What environmental challenge would explorers face during certain seasons on Mars?",
23+
"What makes Venus one of the most hostile environments for robotic exploration?",
24+
"What unique liquid features exist on Saturn's largest moon?",
25+
"What is the duration of the longest-observed storm in our Solar System?",
26+
"Which celestial body experiences the most intense geological activity?",
27+
"Which planet experiences the most dramatic temperature swings between day and night?",
28+
"What region separates the inner and outer planets in our Solar System?",
29+
"What unusual orbital characteristic makes Uranus unique among the planets?",
30+
];
31+
32+
async function main() {
33+
async function task(example) {
34+
const question = example.input.question;
35+
const result = await spaceKnowledgeApplication(question);
36+
return result.context || "";
37+
}
38+
39+
const dataset = await createDataset({
40+
name: "document-relevancy-eval",
41+
description:
42+
"Queries that are answered by extracting context from the space knowledge base",
43+
examples: DATASET.map((question) => ({
44+
input: {
45+
question: question,
46+
},
47+
})),
48+
});
49+
50+
const documentRelevancyEvaluator = createDocumentRelevancyEvaluator({
51+
model: openai("gpt-5"),
52+
});
53+
54+
const documentRelevancyCheck = asExperimentEvaluator({
55+
name: "document-relevancy",
56+
kind: "LLM",
57+
evaluate: async ({ input, output }) => {
58+
// Use the document relevancy evaluator from phoenix-evals
59+
const result = await documentRelevancyEvaluator.evaluate({
60+
input: String(input.question),
61+
documentText: String(output),
62+
});
63+
64+
return result;
65+
},
66+
});
67+
68+
await runExperiment({
69+
experimentName: "document-relevancy-experiment",
70+
experimentDescription:
71+
"Evaluate the relevancy of extracted context from a knowledge base",
72+
dataset: dataset,
73+
task,
74+
evaluators: [documentRelevancyCheck],
75+
});
76+
}
77+
78+
main().catch((error) => {
79+
// eslint-disable-next-line no-console
80+
console.error(error);
81+
process.exit(1);
82+
});

0 commit comments

Comments
 (0)