Skip to content

Commit a7e03e3

Browse files
authored
refactor: make ts default eval prompts to be in message format (#10385)
* refactor: make ts default eval prompts to be in message format * document relevance * clean types
1 parent f08730b commit a7e03e3

File tree

4 files changed

+64
-15
lines changed

4 files changed

+64
-15
lines changed

js/packages/phoenix-evals/src/default_templates/DOCUMENT_RELEVANCY_TEMPLATE.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
export const DOCUMENT_RELEVANCY_TEMPLATE = `
1+
import { PromptTemplate } from "../types";
2+
3+
export const DOCUMENT_RELEVANCY_TEMPLATE: PromptTemplate = [
4+
{
5+
role: "user",
6+
content: `
27
You are comparing a document to a question and trying to determine if the document text
38
contains information relevant to answering the question. Here is the data:
49
@@ -17,7 +22,9 @@ Your response must be single word, either "relevant" or "unrelated",
1722
and should not contain any text or characters aside from that word.
1823
"unrelated" means that the document text does not contain an answer to the Question.
1924
"relevant" means the document text contains an answer to the Question.
20-
`;
25+
`,
26+
},
27+
];
2128

2229
export const DOCUMENT_RELEVANCY_CHOICES = {
2330
relevant: 1,

js/packages/phoenix-evals/src/default_templates/HALLUCINATION_TEMPLATE.ts

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
1-
export const HALLUCINATION_TEMPLATE = `
1+
import { PromptTemplate } from "../types";
2+
3+
export const HALLUCINATION_TEMPLATE: PromptTemplate = [
4+
{
5+
role: "user",
6+
content: `
27
In this task, you will be presented with a query, a reference text and an answer. The answer is
38
generated to the question based on the reference text. The answer may contain false information. You
49
must use the reference text to determine if the answer to the question contains false information,
@@ -23,7 +28,9 @@ your response.
2328
[END DATA]
2429
2530
Is the answer above factual or hallucinated based on the query and reference text?
26-
`;
31+
`,
32+
},
33+
];
2734

2835
export const HALLUCINATION_CHOICES = {
2936
hallucinated: 1,

js/packages/phoenix-evals/test/llm/createDocumentRelevancyEvaluator.test.ts

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,14 @@ describe("createDocumentRelevancyEvaluator", () => {
4848
expect(mockGenerateClassification).toHaveBeenCalledWith(
4949
expect.objectContaining({
5050
labels: ["relevant", "unrelated"],
51-
prompt: expect.stringContaining(
52-
"You are comparing a document to a question"
53-
),
51+
prompt: expect.arrayContaining([
52+
expect.objectContaining({
53+
role: "user",
54+
content: expect.stringContaining(
55+
"You are comparing a document to a question"
56+
),
57+
}),
58+
]),
5459
})
5560
);
5661

@@ -239,12 +244,22 @@ describe("createDocumentRelevancyEvaluator", () => {
239244
// Verify that the prompt contains the interpolated values
240245
expect(mockGenerateClassification).toHaveBeenCalledWith(
241246
expect.objectContaining({
242-
prompt: expect.stringContaining(testInput),
247+
prompt: expect.arrayContaining([
248+
expect.objectContaining({
249+
role: "user",
250+
content: expect.stringContaining(testInput),
251+
}),
252+
]),
243253
})
244254
);
245255
expect(mockGenerateClassification).toHaveBeenCalledWith(
246256
expect.objectContaining({
247-
prompt: expect.stringContaining(testOutput),
257+
prompt: expect.arrayContaining([
258+
expect.objectContaining({
259+
role: "user",
260+
content: expect.stringContaining(testOutput),
261+
}),
262+
]),
248263
})
249264
);
250265
});

js/packages/phoenix-evals/test/llm/createHallucinationEvaluator.test.ts

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,14 @@ Is the answer hallucinated? Respond with "yes" or "no".
5050
expect(mockGenerateClassification).toHaveBeenCalledWith(
5151
expect.objectContaining({
5252
labels: ["hallucinated", "factual"],
53-
prompt: expect.stringContaining(
54-
"In this task, you will be presented with a query"
55-
),
53+
prompt: expect.arrayContaining([
54+
expect.objectContaining({
55+
role: "user",
56+
content: expect.stringContaining(
57+
"In this task, you will be presented with a query"
58+
),
59+
}),
60+
]),
5661
})
5762
);
5863

@@ -257,17 +262,32 @@ Is the answer hallucinated? Respond with "yes" or "no".
257262
// Verify that the prompt contains the interpolated values
258263
expect(mockGenerateClassification).toHaveBeenCalledWith(
259264
expect.objectContaining({
260-
prompt: expect.stringContaining(testInput),
265+
prompt: expect.arrayContaining([
266+
expect.objectContaining({
267+
role: "user",
268+
content: expect.stringContaining(testInput),
269+
}),
270+
]),
261271
})
262272
);
263273
expect(mockGenerateClassification).toHaveBeenCalledWith(
264274
expect.objectContaining({
265-
prompt: expect.stringContaining(testOutput),
275+
prompt: expect.arrayContaining([
276+
expect.objectContaining({
277+
role: "user",
278+
content: expect.stringContaining(testOutput),
279+
}),
280+
]),
266281
})
267282
);
268283
expect(mockGenerateClassification).toHaveBeenCalledWith(
269284
expect.objectContaining({
270-
prompt: expect.stringContaining(testReference),
285+
prompt: expect.arrayContaining([
286+
expect.objectContaining({
287+
role: "user",
288+
content: expect.stringContaining(testReference),
289+
}),
290+
]),
271291
})
272292
);
273293
});

0 commit comments

Comments
 (0)