docs: ts demo experiment (#10409)

s-yeddula · web-flow · commit 24306555e595 · 2025-11-25T15:37:48.000-08:00
* add ts experiment

* fix output type

* fix formatting

* fixing lint error
diff --git a/js/examples/apps/demo-document-relevancy-experiment/README.md b/js/examples/apps/demo-document-relevancy-experiment/README.md
@@ -0,0 +1,52 @@
+# TypeScript Experiments and Evals with Arize Phoenix
+
+This is a demo application that demonstrates how to run experiments and evaluations in TypeScript with Arize Phoenix. The application uses **[Phoenix Cloud](https://app.phoenix.arize.com/)** to manage datasets, run experiments, and evaluate LLM outputs.
+
+## Overview
+
+The demo consists of two main files:
+
+- `app.ts`: Contains a space knowledge application that retrieves relevant context from a knowledge base using OpenAI
+- `experiment.ts`: Sets up and runs an experiment that evaluates the document relevancy of the retrieved context
+
+## Required Packages
+
+To run this demo, you'll need to install the following packages:
+
+```bash
+npm install dotenv openai @arizeai/openinference-instrumentation-openai @ai-sdk/openai @arizeai/phoenix-client @arizeai/phoenix-evals
+```
+
+## Setup
+
+1. Install the required packages (see above)
+
+2. Create a `.env` file in the root directory with your API keys:
+
+```env
+OPENAI_API_KEY=your-openai-api-key
+PHOENIX_HOST=your-phoenix-cloud-hostname
+PHOENIX_API_KEY=your-phoenix-api-key
+```
+
+## Running the Demo
+
+To run the experiment:
+
+```bash
+npx tsx experiment.ts
+```
+
+## How It Works
+
+1. **app.ts**: Implements a space knowledge retrieval application that:
+   - Uses OpenAI's GPT-4o-mini model to retrieve relevant context from a knowledge base
+   - Returns 1-3 most relevant pieces of information based on a query
+
+2. **experiment.ts**:
+   - Creates a dataset with space-related questions
+   - Runs the `spaceKnowledgeApplication` function for each question in the dataset
+   - Uses Phoenix Evals to evaluate the retrieved context using a document relevancy evaluator
+   - Sends the experiment results to Phoenix Cloud for analysis
+
+The experiment results will be available in your Phoenix Cloud account, where you can analyze the performance of your application and view evaluation metrics.
diff --git a/js/examples/apps/demo-document-relevancy-experiment/app.ts b/js/examples/apps/demo-document-relevancy-experiment/app.ts
@@ -0,0 +1,78 @@
+import { OpenAIInstrumentation } from "@arizeai/openinference-instrumentation-openai";
+
+import "dotenv/config";
+
+import OpenAI from "openai";
+
+const instrumentation = new OpenAIInstrumentation();
+instrumentation.manuallyInstrument(OpenAI);
+
+const openai = new OpenAI({
+  apiKey: process.env.OPENAI_API_KEY,
+});
+
+const SPACE_KNOWLEDGE_BASE = [
+  {
+    id: 1,
+    text: "Europa is one of Jupiter's moons believed to have a subsurface ocean beneath its icy crust.",
+  },
+  {
+    id: 2,
+    text: "Venus rotates in the opposite direction of most planets in the Solar System, a phenomenon called retrograde rotation.",
+  },
+  {
+    id: 3,
+    text: "The Sun accounts for approximately 99.8% of the Solar System's total mass.",
+  },
+  {
+    id: 4,
+    text: "The Kuiper Belt contains icy bodies and dwarf planets beyond Neptune's orbit, including Pluto.",
+  },
+  {
+    id: 5,
+    text: "Mars experiences planet-wide dust storms that can last for months and cover the entire planet.",
+  },
+  {
+    id: 6,
+    text: "No spacecraft has landed on Venus and survived for longer than a few hours due to extreme heat and pressure.",
+  },
+  {
+    id: 7,
+    text: "Saturn's moon Titan has lakes and rivers made of liquid methane and ethane, not water.",
+  },
+  {
+    id: 8,
+    text: "Jupiter's Great Red Spot is a massive storm that has been raging for at least 400 years.",
+  },
+  {
+    id: 9,
+    text: "Neptune has the fastest winds in the Solar System, reaching speeds up to 2,100 kilometers per hour.",
+  },
+  {
+    id: 10,
+    text: "Mercury has extreme temperature variations, ranging from 427°C during the day to -173°C at night.",
+  },
+];
+
+export async function spaceKnowledgeApplication(query: string) {
+  const knowledgeBaseText = SPACE_KNOWLEDGE_BASE.map((item) => item.text).join(
+    "\n"
+  );
+
+  const response = await openai.chat.completions.create({
+    model: "gpt-4o-mini",
+    messages: [
+      {
+        role: "system",
+        content: `You are a retrieval system. Given a query and a knowledge base, you MUST select and return 1-3 most relevant pieces of information from the knowledge base. CRITICAL: You must ALWAYS return at least 1 piece of information. If nothing seems directly relevant to the query, you must still return the most tangentially related piece from the knowledge base. Never return an empty context array. Return ONLY a JSON object with "context" (array of 1-3 knowledge base texts). Do not provide an answer, only return the retrieved context.`,
+      },
+      {
+        role: "user",
+        content: `Knowledge Base:\n${knowledgeBaseText}\n\nQuery: ${query}\n\nReturn JSON with only the "context" array containing 1-3 most relevant knowledge base texts. REQUIRED: The context array must contain at least 1 item. If no direct match exists, return the most related item from the knowledge base.`,
+      },
+    ],
+    response_format: { type: "json_object" },
+  });
+
+  return JSON.parse(response.choices[0].message.content || "{}");
+}
diff --git a/js/examples/apps/demo-document-relevancy-experiment/experiment.ts b/js/examples/apps/demo-document-relevancy-experiment/experiment.ts
@@ -0,0 +1,82 @@
+import { createDataset } from "@arizeai/phoenix-client/datasets";
+import {
+  asExperimentEvaluator,
+  runExperiment,
+} from "@arizeai/phoenix-client/experiments";
+import { createDocumentRelevancyEvaluator } from "@arizeai/phoenix-evals/llm/createDocumentRelevancyEvaluator";
+
+import "dotenv/config";
+
+import { spaceKnowledgeApplication } from "./app";
+
+import { openai } from "@ai-sdk/openai";
+
+const DATASET = [
+  "Which moon might harbor life due to its unique geological features?",
+  "What theoretical region marks the outer boundary of the Solar System?",
+  "Which planet defies the typical rotation pattern observed in most celestial bodies?",
+  "Where in the Solar System would you experience the most extreme atmospheric conditions?",
+  "How dominant is the Sun's gravitational influence compared to all other objects in our solar system?",
+  "What region of the Solar System contains remnants from its early formation beyond the gas giants?",
+  "What significant change occurred in our understanding of planetary classification in 2006?",
+  "What environmental challenge would explorers face during certain seasons on Mars?",
+  "What makes Venus one of the most hostile environments for robotic exploration?",
+  "What unique liquid features exist on Saturn's largest moon?",
+  "What is the duration of the longest-observed storm in our Solar System?",
+  "Which celestial body experiences the most intense geological activity?",
+  "Which planet experiences the most dramatic temperature swings between day and night?",
+  "What region separates the inner and outer planets in our Solar System?",
+  "What unusual orbital characteristic makes Uranus unique among the planets?",
+];
+
+async function main() {
+  async function task(example) {
+    const question = example.input.question;
+    const result = await spaceKnowledgeApplication(question);
+    return result.context || "";
+  }
+
+  const dataset = await createDataset({
+    name: "document-relevancy-eval",
+    description:
+      "Queries that are answered by extracting context from the space knowledge base",
+    examples: DATASET.map((question) => ({
+      input: {
+        question: question,
+      },
+    })),
+  });
+
+  const documentRelevancyEvaluator = createDocumentRelevancyEvaluator({
+    model: openai("gpt-5"),
+  });
+
+  const documentRelevancyCheck = asExperimentEvaluator({
+    name: "document-relevancy",
+    kind: "LLM",
+    evaluate: async ({ input, output }) => {
+      // Use the document relevancy evaluator from phoenix-evals
+      const result = await documentRelevancyEvaluator.evaluate({
+        input: String(input.question),
+        documentText: String(output),
+      });
+
+      return result;
+    },
+  });
+
+  await runExperiment({
+    experimentName: "document-relevancy-experiment",
+    experimentDescription:
+      "Evaluate the relevancy of extracted context from a knowledge base",
+    dataset: dataset,
+    task,
+    evaluators: [documentRelevancyCheck],
+  });
+}
+
+main().catch((error) => {
+  // eslint-disable-next-line no-console
+  console.error(error);
+  process.exit(1);
+});