Skip to content

Commit c655585

Browse files
authored
fix: enable LMCache metrics visibility with PROMETHEUS_MULTIPROC_DIR (#4654)
Signed-off-by: Keiven Chang <[email protected]> Co-authored-by: Keiven Chang <[email protected]>
1 parent a6a4f36 commit c655585

File tree

6 files changed

+116
-57
lines changed

6 files changed

+116
-57
lines changed

components/src/dynamo/vllm/main.py

Lines changed: 65 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@
99
from typing import Optional
1010

1111
import uvloop
12+
from prometheus_client import REGISTRY, CollectorRegistry, multiprocess
1213
from vllm.distributed.kv_events import ZmqEventPublisher
1314
from vllm.usage.usage_lib import UsageContext
1415
from vllm.v1.engine.async_llm import AsyncLLM
1516
from vllm.v1.metrics.prometheus import setup_multiprocess_prometheus
1617

1718
from dynamo.common.config_dump import dump_config
1819
from dynamo.common.utils.endpoint_types import parse_endpoint_types
20+
from dynamo.common.utils.prometheus import register_engine_metrics_callback
1921
from dynamo.llm import (
2022
ModelInput,
2123
ModelRuntimeConfig,
@@ -106,6 +108,64 @@ def signal_handler():
106108
logger.debug("Worker function completed, exiting...")
107109

108110

111+
def setup_metrics_collection(config: Config, generate_endpoint, logger):
112+
"""Set up metrics collection for vLLM and LMCache metrics.
113+
114+
In multiprocess mode (PROMETHEUS_MULTIPROC_DIR set), metrics are stored:
115+
1. In-memory: Metric objects in global REGISTRY
116+
2. On-disk: Metric values in .db files (PROMETHEUS_MULTIPROC_DIR)
117+
118+
MultiProcessCollector reads from .db files but adding it to REGISTRY can fail
119+
with "Duplicated timeseries" if PROMETHEUS_MULTIPROC_DIR was set before process
120+
started (K8s deployments) because metrics are already in REGISTRY.
121+
122+
Solution: Try adding MultiProcessCollector to REGISTRY. If that fails, use
123+
separate registry for multiprocess collection and register callbacks to both
124+
registries to ensure all metrics (vllm, lmcache, dynamo_component) are collected.
125+
"""
126+
if config.engine_args.disable_log_stats is False:
127+
if os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
128+
try:
129+
# MultiProcessCollector reads metrics from .db files in PROMETHEUS_MULTIPROC_DIR
130+
# Adding it to REGISTRY allows collecting both in-memory and .db file metrics
131+
multiprocess.MultiProcessCollector(REGISTRY)
132+
logger.debug("Added MultiProcessCollector to global REGISTRY")
133+
register_engine_metrics_callback(
134+
endpoint=generate_endpoint,
135+
registry=REGISTRY,
136+
metric_prefix_filters=["vllm:", "lmcache:"],
137+
)
138+
except ValueError as e:
139+
# Conflict: metrics already in REGISTRY, MultiProcessCollector tries to add same metrics from .db files
140+
# Solution: Use separate registry that ONLY reads from .db files (no in-memory conflicts)
141+
logger.debug(
142+
f"Could not add MultiProcessCollector to REGISTRY ({e}), using separate registry"
143+
)
144+
multiproc_registry = CollectorRegistry()
145+
multiprocess.MultiProcessCollector(multiproc_registry)
146+
147+
# Register both registries to collect all metrics
148+
# Global REGISTRY has in-memory metrics (vllm, dynamo_component)
149+
register_engine_metrics_callback(
150+
endpoint=generate_endpoint,
151+
registry=REGISTRY,
152+
metric_prefix_filters=["vllm:", "dynamo_component:"],
153+
)
154+
# Multiproc registry has .db file metrics (lmcache, possibly vllm duplicates)
155+
register_engine_metrics_callback(
156+
endpoint=generate_endpoint,
157+
registry=multiproc_registry,
158+
metric_prefix_filters=["vllm:", "lmcache:"],
159+
)
160+
else:
161+
# No multiprocess mode
162+
register_engine_metrics_callback(
163+
endpoint=generate_endpoint,
164+
registry=REGISTRY,
165+
metric_prefix_filters=["vllm:", "lmcache:"],
166+
)
167+
168+
109169
def setup_kv_event_publisher(
110170
config: Config,
111171
component,
@@ -176,11 +236,9 @@ def setup_kv_event_publisher(
176236

177237

178238
def setup_vllm_engine(config, stat_logger=None):
179-
# Existing vLLM v0.11.0 bug: vllm/v1/metrics/prometheus.py:79 passes TemporaryDirectory object instead of
180-
# the .name string, causing a false error message when vLLM exits. Therefore, always set
181-
# PROMETHEUS_MULTIPROC_DIR first, and we'll do the path cleanup.
182-
183-
# This vLLM bug causes a false error message when vLLM exits.
239+
# vLLM v0.11.0 bug: vllm/v1.metrics/prometheus.py:79 passes TemporaryDirectory object
240+
# instead of .name string, causing false error on exit. Set PROMETHEUS_MULTIPROC_DIR
241+
# ourselves to avoid this and handle cleanup properly.
184242
prometheus_temp_dir = None
185243
if "PROMETHEUS_MULTIPROC_DIR" not in os.environ:
186244
prometheus_temp_dir = tempfile.TemporaryDirectory(prefix="vllm_prometheus_")
@@ -356,31 +414,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
356414
if kv_publishers:
357415
handler.kv_publishers = kv_publishers
358416

359-
if config.engine_args.disable_log_stats is False:
360-
# vLLM v1 registers its metrics with 'vllm:' prefix
361-
from prometheus_client import REGISTRY, multiprocess
362-
363-
from dynamo.common.utils.prometheus import register_engine_metrics_callback
364-
365-
# Option 1: Try adding MultiProcessCollector to the global REGISTRY
366-
# This would make REGISTRY collect from both its registered metrics AND multiprocess files
367-
if os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
368-
try:
369-
# Add MultiProcessCollector to global REGISTRY
370-
# This makes REGISTRY collect from .db files in addition to its own metrics
371-
multiprocess.MultiProcessCollector(REGISTRY)
372-
logger.info("Added MultiProcessCollector to global REGISTRY")
373-
except ValueError as e:
374-
# Might already be registered or directory issues
375-
logger.warning(f"Could not add MultiProcessCollector to REGISTRY: {e}")
376-
377-
# Register callback with the global REGISTRY
378-
# Now it should collect both its own metrics AND multiprocess metrics
379-
register_engine_metrics_callback(
380-
endpoint=generate_endpoint,
381-
registry=REGISTRY,
382-
metric_prefix_filters=["vllm:", "lmcache:"],
383-
)
417+
setup_metrics_collection(config, generate_endpoint, logger)
384418

385419
# Register prefill model with ModelType.Prefill
386420
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
@@ -493,31 +527,7 @@ async def init(runtime: DistributedRuntime, config: Config):
493527
if kv_publishers:
494528
handler.kv_publishers = kv_publishers
495529

496-
if config.engine_args.disable_log_stats is False:
497-
# vLLM v1 registers its metrics with 'vllm:' prefix
498-
from prometheus_client import REGISTRY, multiprocess
499-
500-
from dynamo.common.utils.prometheus import register_engine_metrics_callback
501-
502-
# Option 1: Try adding MultiProcessCollector to the global REGISTRY
503-
# This would make REGISTRY collect from both its registered metrics AND multiprocess files
504-
if os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
505-
try:
506-
# Add MultiProcessCollector to global REGISTRY
507-
# This makes REGISTRY collect from .db files in addition to its own metrics
508-
multiprocess.MultiProcessCollector(REGISTRY)
509-
logger.info("Added MultiProcessCollector to global REGISTRY")
510-
except ValueError as e:
511-
# Might already be registered or directory issues
512-
logger.warning(f"Could not add MultiProcessCollector to REGISTRY: {e}")
513-
514-
# Register callback with the global REGISTRY
515-
# Now it should collect both its own metrics AND multiprocess metrics
516-
register_engine_metrics_callback(
517-
endpoint=generate_endpoint,
518-
registry=REGISTRY,
519-
metric_prefix_filters=["vllm:", "lmcache:"],
520-
)
530+
setup_metrics_collection(config, generate_endpoint, logger)
521531

522532
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
523533
# Parse endpoint types from --dyn-endpoint-types flag

docs/backends/vllm/LMCache_Integration.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ When LMCache is enabled with `--connector lmcache` and `DYN_SYSTEM_PORT` is set,
156156
**Requirements to access LMCache metrics:**
157157
- `--connector lmcache` - Enables LMCache
158158
- `DYN_SYSTEM_PORT=8081` - Enables metrics HTTP endpoint
159+
- `PROMETHEUS_MULTIPROC_DIR` (optional) - If not set, Dynamo manages it internally. Only set explicitly if you need control over the metrics directory.
159160

160161
For detailed information on LMCache metrics, including the complete list of available metrics and how to access them, see the **[LMCache Metrics section](prometheus.md#lmcache-metrics)** in the vLLM Prometheus Metrics Guide.
161162

docs/backends/vllm/prometheus.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ curl -s localhost:8081/metrics | grep "^lmcache:"
136136
## Implementation Details
137137

138138
- vLLM v1 uses multiprocess metrics collection via `prometheus_client.multiprocess`
139-
- `PROMETHEUS_MULTIPROC_DIR`: vLLM sets this environment variable to a temporary directory where multiprocess metrics are stored as memory-mapped files. Each worker process writes its metrics to separate files in this directory, which are aggregated when `/metrics` is scraped.
139+
- `PROMETHEUS_MULTIPROC_DIR`: (optional). By default, Dynamo automatically manages this environment variable, setting it to a temporary directory where multiprocess metrics are stored as memory-mapped files. Each worker process writes its metrics to separate files in this directory, which are aggregated when `/metrics` is scraped. Users only need to set this explicitly where complete control over the metrics directory is required.
140140
- Dynamo uses `MultiProcessCollector` to aggregate metrics from all worker processes
141141
- Metrics are filtered by the `vllm:` and `lmcache:` prefixes before being exposed (when LMCache is enabled)
142142
- The integration uses Dynamo's `register_engine_metrics_callback()` function with the global `REGISTRY`

examples/backends/vllm/launch/agg_lmcache.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,13 @@
44
set -e
55
trap 'echo Cleaning up...; kill 0' EXIT
66

7+
# Explicitly unset PROMETHEUS_MULTIPROC_DIR to let LMCache or Dynamo manage it internally
8+
unset PROMETHEUS_MULTIPROC_DIR
9+
710
# run ingress
811
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
912
python -m dynamo.frontend &
1013

11-
# run worker with LMCache enabled
14+
# run worker with LMCache enabled (without PROMETHEUS_MULTIPROC_DIR set externally)
1215
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
1316
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --connector lmcache
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
set -e
5+
6+
# Explicitly set PROMETHEUS_MULTIPROC_DIR (K8s-style deployment)
7+
# Use unique directory per test run to avoid conflicts
8+
export PROMETHEUS_MULTIPROC_DIR=${PROMETHEUS_MULTIPROC_DIR:-/tmp/prometheus_multiproc_$$_$RANDOM}
9+
rm -rf "$PROMETHEUS_MULTIPROC_DIR"
10+
mkdir -p "$PROMETHEUS_MULTIPROC_DIR"
11+
12+
# Cleanup function to remove the directory on exit
13+
cleanup() {
14+
echo "Cleaning up..."
15+
rm -rf "$PROMETHEUS_MULTIPROC_DIR"
16+
kill 0
17+
}
18+
trap cleanup EXIT
19+
20+
# run ingress
21+
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
22+
python -m dynamo.frontend &
23+
24+
# run worker with LMCache enabled and PROMETHEUS_MULTIPROC_DIR explicitly set
25+
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
26+
PROMETHEUS_MULTIPROC_DIR="$PROMETHEUS_MULTIPROC_DIR" \
27+
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --connector lmcache
28+

tests/serve/test_vllm.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import base64
55
import logging
66
import os
7+
import random
78
from dataclasses import dataclass, field
89

910
import pytest
@@ -64,6 +65,22 @@ class VLLMConfig(EngineConfig):
6465
metric_payload_default(min_num_requests=6, backend="lmcache"),
6566
],
6667
),
68+
"aggregated_lmcache_multiproc": VLLMConfig(
69+
name="aggregated_lmcache_multiproc",
70+
directory=vllm_dir,
71+
script_name="agg_lmcache_multiproc.sh",
72+
marks=[pytest.mark.gpu_1],
73+
model="Qwen/Qwen3-0.6B",
74+
env={
75+
"PROMETHEUS_MULTIPROC_DIR": f"/tmp/prometheus_multiproc_test_{os.getpid()}_{random.randint(0, 10000)}"
76+
},
77+
request_payloads=[
78+
chat_payload_default(),
79+
completion_payload_default(),
80+
metric_payload_default(min_num_requests=6, backend="vllm"),
81+
metric_payload_default(min_num_requests=6, backend="lmcache"),
82+
],
83+
),
6784
"agg-request-plane-tcp": VLLMConfig(
6885
name="agg-request-plane-tcp",
6986
directory=vllm_dir,

0 commit comments

Comments
 (0)