3535 SummarizationEvaluator ,
3636 ToxicityEvaluator ,
3737)
38- from .llm import LLM
38+ from .llm import LLM , PromptLike
39+ from .llm .prompts import PromptTemplate , Template
3940from .llm .types import ObjectGenerationMethod
40- from .templating import Template
4141from .utils import (
4242 _deprecate_positional_args ,
4343 _deprecate_source_and_heuristic ,
@@ -377,8 +377,9 @@ class LLMEvaluator(Evaluator):
377377 Args:
378378 name: Identifier for this evaluator and the name used in produced Scores.
379379 llm: The LLM instance to use for evaluation.
380- prompt_template: The prompt template (string or Template) with placeholders for
381- required fields; used to infer required variables.
380+ prompt_template: The prompt template with placeholders for required fields; used to infer
381+ required variables. Can be either a string template or a list of message dictionaries
382+ (for chat-based models).
382383 schema: Optional tool/JSON schema for structured output when supported by the LLM.
383384 input_schema: Optional Pydantic model describing/validating inputs. If not provided,
384385 a model is dynamically created from the prompt variables (all str, required).
@@ -392,16 +393,19 @@ def __init__(
392393 * ,
393394 name : str ,
394395 llm : LLM ,
395- prompt_template : Union [str , Template ],
396+ prompt_template : Union [PromptLike , PromptTemplate , Template ],
396397 schema : Optional [ToolSchema ] = None ,
397398 input_schema : Optional [type [BaseModel ]] = None ,
398399 direction : DirectionType = "maximize" ,
399400 ** kwargs : Any ,
400401 ):
401- # Infer required fields from prompt_template
402- if isinstance (prompt_template , str ):
403- prompt_template = Template (template = prompt_template )
404- required_fields = prompt_template .variables
402+ # Convert to PromptTemplate for uniform handling
403+ if isinstance (prompt_template , PromptTemplate ):
404+ self ._prompt_template = prompt_template
405+ else :
406+ self ._prompt_template = PromptTemplate (template = prompt_template )
407+
408+ required_fields = self ._prompt_template .variables
405409
406410 # If no explicit input_schema, create a Pydantic model with all fields as required str
407411 if input_schema is None :
@@ -423,9 +427,13 @@ def __init__(
423427 input_schema = input_schema ,
424428 )
425429 self .llm = llm
426- self .prompt_template = prompt_template
427430 self .schema = schema
428431
432+ @property
433+ def prompt_template (self ) -> PromptTemplate :
434+ """Get the prompt template."""
435+ return self ._prompt_template
436+
429437 def _evaluate (self , eval_input : EvalInput ) -> List [Score ]:
430438 raise NotImplementedError ("Subclasses must implement _evaluate" )
431439
@@ -455,8 +463,9 @@ class ClassificationEvaluator(LLMEvaluator):
455463 name: Identifier for this evaluator and the name used in produced Scores.
456464 llm: The LLM instance to use for evaluation. Must support tool calling or
457465 structured output for reliable classification.
458- prompt_template: The prompt template (string or Template) with placeholders for
459- required input fields. Template variables are inferred automatically.
466+ prompt_template: The prompt template with placeholders for required input fields.
467+ Can be either a string template or a list of message dictionaries (for chat-based
468+ models). Template variables are inferred automatically.
460469 choices: Classification choices in one of three formats:
461470 a. List[str]: Simple list of label names (e.g., ["positive", "negative"]).
462471 Scores will be None.
@@ -544,7 +553,7 @@ def __init__(
544553 * ,
545554 name : str ,
546555 llm : LLM ,
547- prompt_template : Union [str , Template ],
556+ prompt_template : Union [PromptLike , PromptTemplate , Template ],
548557 choices : Union [
549558 List [str ], Dict [str , Union [float , int ]], Dict [str , Tuple [Union [float , int ], str ]]
550559 ],
@@ -586,10 +595,12 @@ def __init__(
586595 self .labels = labels
587596
588597 def _evaluate (self , eval_input : EvalInput ) -> List [Score ]:
589- prompt_filled = self .prompt_template .render (variables = eval_input )
598+ # Render template using PromptTemplate
599+ prompt_filled = self ._prompt_template .render (variables = eval_input )
600+
590601 method = (
591602 ObjectGenerationMethod .TOOL_CALLING
592- if isinstance (self .labels , Dict )
603+ if isinstance (self .labels , dict )
593604 else ObjectGenerationMethod .AUTO
594605 )
595606 response = self .llm .generate_classification (
@@ -626,10 +637,12 @@ def _evaluate(self, eval_input: EvalInput) -> List[Score]:
626637 ]
627638
628639 async def _async_evaluate (self , eval_input : EvalInput ) -> List [Score ]:
629- prompt_filled = self .prompt_template .render (variables = eval_input )
640+ # Render template using PromptTemplate
641+ prompt_filled = self ._prompt_template .render (variables = eval_input )
642+
630643 method = (
631644 ObjectGenerationMethod .TOOL_CALLING
632- if isinstance (self .labels , Dict )
645+ if isinstance (self .labels , dict )
633646 else ObjectGenerationMethod .AUTO
634647 )
635648 response = await self .llm .async_generate_classification (
0 commit comments