Arize-ai
diff --git a/‎.release-please-manifest.json‎
Lines changed: 1 addition & 1 deletion b/‎.release-please-manifest.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packages/phoenix-evals/CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions b/‎packages/phoenix-evals/CHANGELOG.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎packages/phoenix-evals/pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎packages/phoenix-evals/pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎packages/phoenix-evals/src/phoenix/evals/evaluators.py‎
Lines changed: 30 additions & 17 deletions b/‎packages/phoenix-evals/src/phoenix/evals/evaluators.py‎
Lines changed: 30 additions & 17 deletions
diff --git a/‎packages/phoenix-evals/src/phoenix/evals/llm/__init__.py‎
Lines changed: 20 additions & 1 deletion b/‎packages/phoenix-evals/src/phoenix/evals/llm/__init__.py‎
Lines changed: 20 additions & 1 deletion
@@ -1,2 +1,2 @@
-{".":"12.19.0","packages/phoenix-evals":"2.6.1","packages/phoenix-otel":"0.14.0","packages/phoenix-client":"1.26.0"}
+{".":"12.19.0","packages/phoenix-evals":"2.7.0","packages/phoenix-otel":"0.14.0","packages/phoenix-client":"1.26.0"}
 
@@ -1,5 +1,12 @@
 # Changelog
 
+## [2.7.0](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-evals-v2.6.1...arize-phoenix-evals-v2.7.0) (2025-12-04)
+
+
+### Features
+
+* **evals:** support prompt/template messages ([#10356](https://github.com/Arize-ai/phoenix/issues/10356)) ([7d3dc7d](https://github.com/Arize-ai/phoenix/commit/7d3dc7d2846807053da63a8aa9cb776283deb370))
+
 ## [2.6.1](https://github.com/Arize-ai/phoenix/compare/arize-phoenix-evals-v2.6.0...arize-phoenix-evals-v2.6.1) (2025-11-22)
 
 
 
@@ -22,7 +22,7 @@ classifiers = [
   "Programming Language :: Python :: 3.12",
   "Programming Language :: Python :: 3.13",
 ]
-version = "2.6.1"
+version = "2.7.0"
 dependencies = [
   "pandas",
   "tqdm",
@@ -126,6 +126,8 @@ module = [
   "litellm",
   "langchain_openai",
   "langchain_anthropic",
+  "langchain_core.*",
+  "langchain_community.*",
   "mistralai",
   "mistralai.*",
   "opentelemetry",
 
@@ -35,9 +35,9 @@
     SummarizationEvaluator,
     ToxicityEvaluator,
 )
-from .llm import LLM
+from .llm import LLM, PromptLike
+from .llm.prompts import PromptTemplate, Template
 from .llm.types import ObjectGenerationMethod
-from .templating import Template
 from .utils import (
     _deprecate_positional_args,
     _deprecate_source_and_heuristic,
@@ -377,8 +377,9 @@ class LLMEvaluator(Evaluator):
     Args:
         name: Identifier for this evaluator and the name used in produced Scores.
         llm: The LLM instance to use for evaluation.
-        prompt_template: The prompt template (string or Template) with placeholders for
-            required fields; used to infer required variables.
+        prompt_template: The prompt template with placeholders for required fields; used to infer
+            required variables. Can be either a string template or a list of message dictionaries
+            (for chat-based models).
         schema: Optional tool/JSON schema for structured output when supported by the LLM.
         input_schema: Optional Pydantic model describing/validating inputs. If not provided,
             a model is dynamically created from the prompt variables (all str, required).
@@ -392,16 +393,19 @@ def __init__(
         *,
         name: str,
         llm: LLM,
-        prompt_template: Union[str, Template],
+        prompt_template: Union[PromptLike, PromptTemplate, Template],
         schema: Optional[ToolSchema] = None,
         input_schema: Optional[type[BaseModel]] = None,
         direction: DirectionType = "maximize",
         **kwargs: Any,
     ):
-        # Infer required fields from prompt_template
-        if isinstance(prompt_template, str):
-            prompt_template = Template(template=prompt_template)
-        required_fields = prompt_template.variables
+        # Convert to PromptTemplate for uniform handling
+        if isinstance(prompt_template, PromptTemplate):
+            self._prompt_template = prompt_template
+        else:
+            self._prompt_template = PromptTemplate(template=prompt_template)
+
+        required_fields = self._prompt_template.variables
 
         # If no explicit input_schema, create a Pydantic model with all fields as required str
         if input_schema is None:
@@ -423,9 +427,13 @@ def __init__(
             input_schema=input_schema,
         )
         self.llm = llm
-        self.prompt_template = prompt_template
         self.schema = schema
 
+    @property
+    def prompt_template(self) -> PromptTemplate:
+        """Get the prompt template."""
+        return self._prompt_template
+
     def _evaluate(self, eval_input: EvalInput) -> List[Score]:
         raise NotImplementedError("Subclasses must implement _evaluate")
 
@@ -455,8 +463,9 @@ class ClassificationEvaluator(LLMEvaluator):
         name: Identifier for this evaluator and the name used in produced Scores.
         llm: The LLM instance to use for evaluation. Must support tool calling or
             structured output for reliable classification.
-        prompt_template: The prompt template (string or Template) with placeholders for
-            required input fields. Template variables are inferred automatically.
+        prompt_template: The prompt template with placeholders for required input fields.
+            Can be either a string template or a list of message dictionaries (for chat-based
+            models). Template variables are inferred automatically.
         choices: Classification choices in one of three formats:
             a. List[str]: Simple list of label names (e.g., ["positive", "negative"]).
                 Scores will be None.
@@ -544,7 +553,7 @@ def __init__(
         *,
         name: str,
         llm: LLM,
-        prompt_template: Union[str, Template],
+        prompt_template: Union[PromptLike, PromptTemplate, Template],
         choices: Union[
             List[str], Dict[str, Union[float, int]], Dict[str, Tuple[Union[float, int], str]]
         ],
@@ -586,10 +595,12 @@ def __init__(
         self.labels = labels
 
     def _evaluate(self, eval_input: EvalInput) -> List[Score]:
-        prompt_filled = self.prompt_template.render(variables=eval_input)
+        # Render template using PromptTemplate
+        prompt_filled = self._prompt_template.render(variables=eval_input)
+
         method = (
             ObjectGenerationMethod.TOOL_CALLING
-            if isinstance(self.labels, Dict)
+            if isinstance(self.labels, dict)
             else ObjectGenerationMethod.AUTO
         )
         response = self.llm.generate_classification(
@@ -626,10 +637,12 @@ def _evaluate(self, eval_input: EvalInput) -> List[Score]:
         ]
 
     async def _async_evaluate(self, eval_input: EvalInput) -> List[Score]:
-        prompt_filled = self.prompt_template.render(variables=eval_input)
+        # Render template using PromptTemplate
+        prompt_filled = self._prompt_template.render(variables=eval_input)
+
         method = (
             ObjectGenerationMethod.TOOL_CALLING
-            if isinstance(self.labels, Dict)
+            if isinstance(self.labels, dict)
             else ObjectGenerationMethod.AUTO
         )
         response = await self.llm.async_generate_classification(
 
@@ -1,3 +1,22 @@
+from .prompts import (
+    Message,
+    MessageRole,
+    MessageTemplate,
+    PromptLike,
+    PromptTemplate,
+    Template,
+    TemplateFormat,
+)
 from .wrapper import LLM, show_provider_availability
 
-__all__ = ["LLM", "show_provider_availability"]
+__all__ = [
+    "LLM",
+    "Message",
+    "MessageRole",
+    "MessageTemplate",
+    "PromptLike",
+    "PromptTemplate",
+    "Template",
+    "TemplateFormat",
+    "show_provider_availability",
+]
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-{".":"12.19.0","packages/phoenix-evals":"2.6.1","packages/phoenix-otel":"0.14.0","packages/phoenix-client":"1.26.0"}`
	`1`	`+{".":"12.19.0","packages/phoenix-evals":"2.7.0","packages/phoenix-otel":"0.14.0","packages/phoenix-client":"1.26.0"}`
`2`	`2`