Skip to content

Commit 0693c59

Browse files
committed
Merge branch 'main' into ldap
2 parents 9252aa3 + f41c013 commit 0693c59

File tree

22 files changed

+2257
-706
lines changed

22 files changed

+2257
-706
lines changed

.release-please-manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
{".":"12.19.0","packages/phoenix-evals":"2.6.1","packages/phoenix-otel":"0.14.0","packages/phoenix-client":"1.26.0"}
1+
{".":"12.19.0","packages/phoenix-evals":"2.7.0","packages/phoenix-otel":"0.14.0","packages/phoenix-client":"1.26.0"}
22

packages/phoenix-evals/CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## [2.7.0](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-evals-v2.6.1...arize-phoenix-evals-v2.7.0) (2025-12-04)
4+
5+
6+
### Features
7+
8+
* **evals:** support prompt/template messages ([#10356](https://github.com/Arize-ai/phoenix/issues/10356)) ([7d3dc7d](https://github.com/Arize-ai/phoenix/commit/7d3dc7d2846807053da63a8aa9cb776283deb370))
9+
310
## [2.6.1](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-evals-v2.6.0...arize-phoenix-evals-v2.6.1) (2025-11-22)
411

512

packages/phoenix-evals/pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ classifiers = [
2222
"Programming Language :: Python :: 3.12",
2323
"Programming Language :: Python :: 3.13",
2424
]
25-
version = "2.6.1"
25+
version = "2.7.0"
2626
dependencies = [
2727
"pandas",
2828
"tqdm",
@@ -126,6 +126,8 @@ module = [
126126
"litellm",
127127
"langchain_openai",
128128
"langchain_anthropic",
129+
"langchain_core.*",
130+
"langchain_community.*",
129131
"mistralai",
130132
"mistralai.*",
131133
"opentelemetry",

packages/phoenix-evals/src/phoenix/evals/evaluators.py

Lines changed: 30 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,9 @@
3535
SummarizationEvaluator,
3636
ToxicityEvaluator,
3737
)
38-
from .llm import LLM
38+
from .llm import LLM, PromptLike
39+
from .llm.prompts import PromptTemplate, Template
3940
from .llm.types import ObjectGenerationMethod
40-
from .templating import Template
4141
from .utils import (
4242
_deprecate_positional_args,
4343
_deprecate_source_and_heuristic,
@@ -377,8 +377,9 @@ class LLMEvaluator(Evaluator):
377377
Args:
378378
name: Identifier for this evaluator and the name used in produced Scores.
379379
llm: The LLM instance to use for evaluation.
380-
prompt_template: The prompt template (string or Template) with placeholders for
381-
required fields; used to infer required variables.
380+
prompt_template: The prompt template with placeholders for required fields; used to infer
381+
required variables. Can be either a string template or a list of message dictionaries
382+
(for chat-based models).
382383
schema: Optional tool/JSON schema for structured output when supported by the LLM.
383384
input_schema: Optional Pydantic model describing/validating inputs. If not provided,
384385
a model is dynamically created from the prompt variables (all str, required).
@@ -392,16 +393,19 @@ def __init__(
392393
*,
393394
name: str,
394395
llm: LLM,
395-
prompt_template: Union[str, Template],
396+
prompt_template: Union[PromptLike, PromptTemplate, Template],
396397
schema: Optional[ToolSchema] = None,
397398
input_schema: Optional[type[BaseModel]] = None,
398399
direction: DirectionType = "maximize",
399400
**kwargs: Any,
400401
):
401-
# Infer required fields from prompt_template
402-
if isinstance(prompt_template, str):
403-
prompt_template = Template(template=prompt_template)
404-
required_fields = prompt_template.variables
402+
# Convert to PromptTemplate for uniform handling
403+
if isinstance(prompt_template, PromptTemplate):
404+
self._prompt_template = prompt_template
405+
else:
406+
self._prompt_template = PromptTemplate(template=prompt_template)
407+
408+
required_fields = self._prompt_template.variables
405409

406410
# If no explicit input_schema, create a Pydantic model with all fields as required str
407411
if input_schema is None:
@@ -423,9 +427,13 @@ def __init__(
423427
input_schema=input_schema,
424428
)
425429
self.llm = llm
426-
self.prompt_template = prompt_template
427430
self.schema = schema
428431

432+
@property
433+
def prompt_template(self) -> PromptTemplate:
434+
"""Get the prompt template."""
435+
return self._prompt_template
436+
429437
def _evaluate(self, eval_input: EvalInput) -> List[Score]:
430438
raise NotImplementedError("Subclasses must implement _evaluate")
431439

@@ -455,8 +463,9 @@ class ClassificationEvaluator(LLMEvaluator):
455463
name: Identifier for this evaluator and the name used in produced Scores.
456464
llm: The LLM instance to use for evaluation. Must support tool calling or
457465
structured output for reliable classification.
458-
prompt_template: The prompt template (string or Template) with placeholders for
459-
required input fields. Template variables are inferred automatically.
466+
prompt_template: The prompt template with placeholders for required input fields.
467+
Can be either a string template or a list of message dictionaries (for chat-based
468+
models). Template variables are inferred automatically.
460469
choices: Classification choices in one of three formats:
461470
a. List[str]: Simple list of label names (e.g., ["positive", "negative"]).
462471
Scores will be None.
@@ -544,7 +553,7 @@ def __init__(
544553
*,
545554
name: str,
546555
llm: LLM,
547-
prompt_template: Union[str, Template],
556+
prompt_template: Union[PromptLike, PromptTemplate, Template],
548557
choices: Union[
549558
List[str], Dict[str, Union[float, int]], Dict[str, Tuple[Union[float, int], str]]
550559
],
@@ -586,10 +595,12 @@ def __init__(
586595
self.labels = labels
587596

588597
def _evaluate(self, eval_input: EvalInput) -> List[Score]:
589-
prompt_filled = self.prompt_template.render(variables=eval_input)
598+
# Render template using PromptTemplate
599+
prompt_filled = self._prompt_template.render(variables=eval_input)
600+
590601
method = (
591602
ObjectGenerationMethod.TOOL_CALLING
592-
if isinstance(self.labels, Dict)
603+
if isinstance(self.labels, dict)
593604
else ObjectGenerationMethod.AUTO
594605
)
595606
response = self.llm.generate_classification(
@@ -626,10 +637,12 @@ def _evaluate(self, eval_input: EvalInput) -> List[Score]:
626637
]
627638

628639
async def _async_evaluate(self, eval_input: EvalInput) -> List[Score]:
629-
prompt_filled = self.prompt_template.render(variables=eval_input)
640+
# Render template using PromptTemplate
641+
prompt_filled = self._prompt_template.render(variables=eval_input)
642+
630643
method = (
631644
ObjectGenerationMethod.TOOL_CALLING
632-
if isinstance(self.labels, Dict)
645+
if isinstance(self.labels, dict)
633646
else ObjectGenerationMethod.AUTO
634647
)
635648
response = await self.llm.async_generate_classification(
Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,22 @@
1+
from .prompts import (
2+
Message,
3+
MessageRole,
4+
MessageTemplate,
5+
PromptLike,
6+
PromptTemplate,
7+
Template,
8+
TemplateFormat,
9+
)
110
from .wrapper import LLM, show_provider_availability
211

3-
__all__ = ["LLM", "show_provider_availability"]
12+
__all__ = [
13+
"LLM",
14+
"Message",
15+
"MessageRole",
16+
"MessageTemplate",
17+
"PromptLike",
18+
"PromptTemplate",
19+
"Template",
20+
"TemplateFormat",
21+
"show_provider_availability",
22+
]

0 commit comments

Comments
 (0)