|
28 | 28 | from vllm.entrypoints.openai.serving_chat import OpenAIServingChat |
29 | 29 | from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion |
30 | 30 | from vllm.entrypoints.openai.serving_engine import RequestPrompt |
| 31 | +from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels |
31 | 32 | from vllm.inputs.data import TokensPrompt |
32 | 33 | from vllm.sampling_params import SamplingParams |
33 | | -from vllm.transformers_utils.tokenizer import AnyTokenizer |
| 34 | +from vllm.tokenizers import TokenizerLike as AnyTokenizer |
| 35 | + |
| 36 | + |
| 37 | +class StubEngineClient: |
| 38 | + """ |
| 39 | + Stub EngineClient for preprocessing-only use of OpenAIServingChat/Completion. |
| 40 | + Provides the minimal attributes required by OpenAIServingModels. |
| 41 | + """ |
| 42 | + |
| 43 | + def __init__(self, model_config: ModelConfig): |
| 44 | + self.model_config = model_config |
| 45 | + self.input_processor = None |
| 46 | + self.io_processor = None |
34 | 47 |
|
35 | 48 |
|
36 | 49 | @runtime_checkable |
@@ -120,12 +133,19 @@ class ChatProcessor: |
120 | 133 | def __init__(self, tokenizer: AnyTokenizer, model_config: ModelConfig): |
121 | 134 | self.tokenizer = tokenizer |
122 | 135 | self.model_config = model_config |
| 136 | + # Create stub engine client and models for preprocessing-only usage |
| 137 | + stub_engine = StubEngineClient(model_config) |
| 138 | + serving_models = OpenAIServingModels( |
| 139 | + engine_client=stub_engine, |
| 140 | + base_model_paths=[ |
| 141 | + BaseModelPath(name=model_config.model, model_path=model_config.model) |
| 142 | + ], |
| 143 | + ) |
123 | 144 | self.openai_serving = OpenAIServingChat( |
124 | | - engine_client=None, |
125 | | - model_config=model_config, |
126 | | - models=None, |
127 | | - request_logger=None, |
| 145 | + engine_client=stub_engine, |
| 146 | + models=serving_models, |
128 | 147 | response_role="assistant", |
| 148 | + request_logger=None, |
129 | 149 | chat_template=None, |
130 | 150 | chat_template_content_format="auto", |
131 | 151 | ) |
@@ -186,7 +206,6 @@ async def stream_response( |
186 | 206 | conversation, |
187 | 207 | self.tokenizer, |
188 | 208 | request_metadata, |
189 | | - enable_force_include_usage=False, |
190 | 209 | ): |
191 | 210 | if raw_response.startswith("data: [DONE]"): |
192 | 211 | yield raw_response |
@@ -220,7 +239,6 @@ async def stream_response( |
220 | 239 | conversation, |
221 | 240 | self.tokenizer, |
222 | 241 | request_metadata, |
223 | | - enable_force_include_usage=False, |
224 | 242 | ): |
225 | 243 | if raw_response.startswith("data: [DONE]"): |
226 | 244 | break |
@@ -267,10 +285,17 @@ class CompletionsProcessor: |
267 | 285 | def __init__(self, tokenizer: AnyTokenizer, model_config: ModelConfig): |
268 | 286 | self.tokenizer = tokenizer |
269 | 287 | self.model_config = model_config |
| 288 | + # Create stub engine client and models for preprocessing-only usage |
| 289 | + stub_engine = StubEngineClient(model_config) |
| 290 | + serving_models = OpenAIServingModels( |
| 291 | + engine_client=stub_engine, |
| 292 | + base_model_paths=[ |
| 293 | + BaseModelPath(name=model_config.model, model_path=model_config.model) |
| 294 | + ], |
| 295 | + ) |
270 | 296 | self.openai_serving = OpenAIServingCompletion( |
271 | | - engine_client=None, |
272 | | - model_config=model_config, |
273 | | - models=None, |
| 297 | + engine_client=stub_engine, |
| 298 | + models=serving_models, |
274 | 299 | request_logger=None, |
275 | 300 | ) |
276 | 301 |
|
|
0 commit comments