|
9 | 9 | from typing import Optional |
10 | 10 |
|
11 | 11 | import uvloop |
| 12 | +from prometheus_client import REGISTRY, CollectorRegistry, multiprocess |
12 | 13 | from vllm.distributed.kv_events import ZmqEventPublisher |
13 | 14 | from vllm.usage.usage_lib import UsageContext |
14 | 15 | from vllm.v1.engine.async_llm import AsyncLLM |
15 | 16 | from vllm.v1.metrics.prometheus import setup_multiprocess_prometheus |
16 | 17 |
|
17 | 18 | from dynamo.common.config_dump import dump_config |
18 | 19 | from dynamo.common.utils.endpoint_types import parse_endpoint_types |
| 20 | +from dynamo.common.utils.prometheus import register_engine_metrics_callback |
19 | 21 | from dynamo.llm import ( |
20 | 22 | ModelInput, |
21 | 23 | ModelRuntimeConfig, |
@@ -106,6 +108,64 @@ def signal_handler(): |
106 | 108 | logger.debug("Worker function completed, exiting...") |
107 | 109 |
|
108 | 110 |
|
| 111 | +def setup_metrics_collection(config: Config, generate_endpoint, logger): |
| 112 | + """Set up metrics collection for vLLM and LMCache metrics. |
| 113 | +
|
| 114 | + In multiprocess mode (PROMETHEUS_MULTIPROC_DIR set), metrics are stored: |
| 115 | + 1. In-memory: Metric objects in global REGISTRY |
| 116 | + 2. On-disk: Metric values in .db files (PROMETHEUS_MULTIPROC_DIR) |
| 117 | +
|
| 118 | + MultiProcessCollector reads from .db files but adding it to REGISTRY can fail |
| 119 | + with "Duplicated timeseries" if PROMETHEUS_MULTIPROC_DIR was set before process |
| 120 | + started (K8s deployments) because metrics are already in REGISTRY. |
| 121 | +
|
| 122 | + Solution: Try adding MultiProcessCollector to REGISTRY. If that fails, use |
| 123 | + separate registry for multiprocess collection and register callbacks to both |
| 124 | + registries to ensure all metrics (vllm, lmcache, dynamo_component) are collected. |
| 125 | + """ |
| 126 | + if config.engine_args.disable_log_stats is False: |
| 127 | + if os.environ.get("PROMETHEUS_MULTIPROC_DIR"): |
| 128 | + try: |
| 129 | + # MultiProcessCollector reads metrics from .db files in PROMETHEUS_MULTIPROC_DIR |
| 130 | + # Adding it to REGISTRY allows collecting both in-memory and .db file metrics |
| 131 | + multiprocess.MultiProcessCollector(REGISTRY) |
| 132 | + logger.debug("Added MultiProcessCollector to global REGISTRY") |
| 133 | + register_engine_metrics_callback( |
| 134 | + endpoint=generate_endpoint, |
| 135 | + registry=REGISTRY, |
| 136 | + metric_prefix_filters=["vllm:", "lmcache:"], |
| 137 | + ) |
| 138 | + except ValueError as e: |
| 139 | + # Conflict: metrics already in REGISTRY, MultiProcessCollector tries to add same metrics from .db files |
| 140 | + # Solution: Use separate registry that ONLY reads from .db files (no in-memory conflicts) |
| 141 | + logger.debug( |
| 142 | + f"Could not add MultiProcessCollector to REGISTRY ({e}), using separate registry" |
| 143 | + ) |
| 144 | + multiproc_registry = CollectorRegistry() |
| 145 | + multiprocess.MultiProcessCollector(multiproc_registry) |
| 146 | + |
| 147 | + # Register both registries to collect all metrics |
| 148 | + # Global REGISTRY has in-memory metrics (vllm, dynamo_component) |
| 149 | + register_engine_metrics_callback( |
| 150 | + endpoint=generate_endpoint, |
| 151 | + registry=REGISTRY, |
| 152 | + metric_prefix_filters=["vllm:", "dynamo_component:"], |
| 153 | + ) |
| 154 | + # Multiproc registry has .db file metrics (lmcache, possibly vllm duplicates) |
| 155 | + register_engine_metrics_callback( |
| 156 | + endpoint=generate_endpoint, |
| 157 | + registry=multiproc_registry, |
| 158 | + metric_prefix_filters=["vllm:", "lmcache:"], |
| 159 | + ) |
| 160 | + else: |
| 161 | + # No multiprocess mode |
| 162 | + register_engine_metrics_callback( |
| 163 | + endpoint=generate_endpoint, |
| 164 | + registry=REGISTRY, |
| 165 | + metric_prefix_filters=["vllm:", "lmcache:"], |
| 166 | + ) |
| 167 | + |
| 168 | + |
109 | 169 | def setup_kv_event_publisher( |
110 | 170 | config: Config, |
111 | 171 | component, |
@@ -176,11 +236,9 @@ def setup_kv_event_publisher( |
176 | 236 |
|
177 | 237 |
|
178 | 238 | def setup_vllm_engine(config, stat_logger=None): |
179 | | - # Existing vLLM v0.11.0 bug: vllm/v1/metrics/prometheus.py:79 passes TemporaryDirectory object instead of |
180 | | - # the .name string, causing a false error message when vLLM exits. Therefore, always set |
181 | | - # PROMETHEUS_MULTIPROC_DIR first, and we'll do the path cleanup. |
182 | | - |
183 | | - # This vLLM bug causes a false error message when vLLM exits. |
| 239 | + # vLLM v0.11.0 bug: vllm/v1.metrics/prometheus.py:79 passes TemporaryDirectory object |
| 240 | + # instead of .name string, causing false error on exit. Set PROMETHEUS_MULTIPROC_DIR |
| 241 | + # ourselves to avoid this and handle cleanup properly. |
184 | 242 | prometheus_temp_dir = None |
185 | 243 | if "PROMETHEUS_MULTIPROC_DIR" not in os.environ: |
186 | 244 | prometheus_temp_dir = tempfile.TemporaryDirectory(prefix="vllm_prometheus_") |
@@ -356,31 +414,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config): |
356 | 414 | if kv_publishers: |
357 | 415 | handler.kv_publishers = kv_publishers |
358 | 416 |
|
359 | | - if config.engine_args.disable_log_stats is False: |
360 | | - # vLLM v1 registers its metrics with 'vllm:' prefix |
361 | | - from prometheus_client import REGISTRY, multiprocess |
362 | | - |
363 | | - from dynamo.common.utils.prometheus import register_engine_metrics_callback |
364 | | - |
365 | | - # Option 1: Try adding MultiProcessCollector to the global REGISTRY |
366 | | - # This would make REGISTRY collect from both its registered metrics AND multiprocess files |
367 | | - if os.environ.get("PROMETHEUS_MULTIPROC_DIR"): |
368 | | - try: |
369 | | - # Add MultiProcessCollector to global REGISTRY |
370 | | - # This makes REGISTRY collect from .db files in addition to its own metrics |
371 | | - multiprocess.MultiProcessCollector(REGISTRY) |
372 | | - logger.info("Added MultiProcessCollector to global REGISTRY") |
373 | | - except ValueError as e: |
374 | | - # Might already be registered or directory issues |
375 | | - logger.warning(f"Could not add MultiProcessCollector to REGISTRY: {e}") |
376 | | - |
377 | | - # Register callback with the global REGISTRY |
378 | | - # Now it should collect both its own metrics AND multiprocess metrics |
379 | | - register_engine_metrics_callback( |
380 | | - endpoint=generate_endpoint, |
381 | | - registry=REGISTRY, |
382 | | - metric_prefix_filters=["vllm:", "lmcache:"], |
383 | | - ) |
| 417 | + setup_metrics_collection(config, generate_endpoint, logger) |
384 | 418 |
|
385 | 419 | # Register prefill model with ModelType.Prefill |
386 | 420 | if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register |
@@ -493,31 +527,7 @@ async def init(runtime: DistributedRuntime, config: Config): |
493 | 527 | if kv_publishers: |
494 | 528 | handler.kv_publishers = kv_publishers |
495 | 529 |
|
496 | | - if config.engine_args.disable_log_stats is False: |
497 | | - # vLLM v1 registers its metrics with 'vllm:' prefix |
498 | | - from prometheus_client import REGISTRY, multiprocess |
499 | | - |
500 | | - from dynamo.common.utils.prometheus import register_engine_metrics_callback |
501 | | - |
502 | | - # Option 1: Try adding MultiProcessCollector to the global REGISTRY |
503 | | - # This would make REGISTRY collect from both its registered metrics AND multiprocess files |
504 | | - if os.environ.get("PROMETHEUS_MULTIPROC_DIR"): |
505 | | - try: |
506 | | - # Add MultiProcessCollector to global REGISTRY |
507 | | - # This makes REGISTRY collect from .db files in addition to its own metrics |
508 | | - multiprocess.MultiProcessCollector(REGISTRY) |
509 | | - logger.info("Added MultiProcessCollector to global REGISTRY") |
510 | | - except ValueError as e: |
511 | | - # Might already be registered or directory issues |
512 | | - logger.warning(f"Could not add MultiProcessCollector to REGISTRY: {e}") |
513 | | - |
514 | | - # Register callback with the global REGISTRY |
515 | | - # Now it should collect both its own metrics AND multiprocess metrics |
516 | | - register_engine_metrics_callback( |
517 | | - endpoint=generate_endpoint, |
518 | | - registry=REGISTRY, |
519 | | - metric_prefix_filters=["vllm:", "lmcache:"], |
520 | | - ) |
| 530 | + setup_metrics_collection(config, generate_endpoint, logger) |
521 | 531 |
|
522 | 532 | if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register |
523 | 533 | # Parse endpoint types from --dyn-endpoint-types flag |
|
0 commit comments