Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
9fc116c
feat: add --enable-otel flag to SGLang launch scripts
ishandhanani Nov 11, 2025
d5f53e4
feat: add --enable-metrics flag with proper port allocation
ishandhanani Nov 11, 2025
705867b
feat: enable system metrics by default in launch scripts
ishandhanani Nov 11, 2025
85f04a5
refactor: remove unnecessary METRICS_ARGS and conditional logic
ishandhanani Nov 11, 2025
15b8edd
feat: add --enable-otel and metrics to remaining SGLang scripts
ishandhanani Nov 11, 2025
5ea0fa0
fix: remove DYN_SYSTEM_PORT from frontend and revert multimodal changes
ishandhanani Nov 11, 2025
b92cbb2
revert: remove --enable-otel changes from disagg_same_gpu.sh
ishandhanani Nov 11, 2025
ab16195
feat(sglang): add trace ID handling and tracing endpoint configuration
ishandhanani Nov 12, 2025
c3cf1a4
Merge branch 'main' into ishan/unify-trace
ishandhanani Nov 12, 2025
49c7af9
initial approach
ishandhanani Nov 12, 2025
2781adf
bump
ishandhanani Nov 12, 2025
fe4f656
bump
ishandhanani Nov 12, 2025
6f8296c
bump
ishandhanani Nov 12, 2025
d0631b5
wut
ishandhanani Nov 12, 2025
cf56c6a
bump
ishandhanani Nov 12, 2025
92a9ac8
bump
ishandhanani Nov 12, 2025
b4df20e
bump
ishandhanani Nov 12, 2025
ea70762
try
ishandhanani Nov 12, 2025
c276fa1
Merge branch 'main' into ishan/unify-trace
ishandhanani Nov 22, 2025
309ef06
Merge branch 'main' into ishan/unify-trace
ishandhanani Nov 23, 2025
71d8840
Merge branch 'main' into ishan/unify-trace
ishandhanani Nov 28, 2025
367024c
comments
ishandhanani Dec 4, 2025
0eff209
rebase
ishandhanani Dec 4, 2025
fe3c2a3
go
ishandhanani Dec 4, 2025
473ef21
Merge branch 'ishan/unify-trace' of github.com:ai-dynamo/dynamo into …
ishandhanani Dec 4, 2025
7331118
trace
ishandhanani Dec 4, 2025
bacbc59
Revert even though tempo and grafana are terrible for viewing traces
ishandhanani Dec 4, 2025
4c7b2c0
Merge branch 'main' into ishan/unify-trace
ishandhanani Dec 4, 2025
bda625f
fix
ishandhanani Dec 4, 2025
7f89bae
Merge branch 'ishan/unify-trace' of github.com:ai-dynamo/dynamo into …
ishandhanani Dec 4, 2025
87dd9f1
Merge branch 'main' into ishan/unify-trace
ishandhanani Dec 5, 2025
fb44cd0
Merge branch 'main' into ishan/unify-trace
ishandhanani Dec 8, 2025
3868e6b
Merge branch 'main' into ishan/unify-trace
ishandhanani Dec 9, 2025
22e8ead
lol
ishandhanani Dec 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions components/src/dynamo/sglang/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,8 @@ async def init(runtime: DistributedRuntime, config: Config):
server_args, dynamo_args = config.server_args, config.dynamo_args

# Prevent SGLang from blocking on non-leader nodes
# We can switch this to 0 and leverage our own metrics
# after https://github.com/sgl-project/sglang/pull/13686
# is merged in
if server_args.node_rank >= 1:
os.environ["SGLANG_BLOCK_NONZERO_RANK_CHILDREN"] = "1"
os.environ["SGLANG_BLOCK_NONZERO_RANK_CHILDREN"] = "0"

engine = sgl.Engine(server_args=server_args)

Expand Down Expand Up @@ -222,11 +219,8 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
server_args, dynamo_args = config.server_args, config.dynamo_args

# Prevent SGLang from blocking on non-leader nodes
# We can switch this to 0 and leverage our own metrics
# after https://github.com/sgl-project/sglang/pull/13686
# is merged in
if server_args.node_rank >= 1:
os.environ["SGLANG_BLOCK_NONZERO_RANK_CHILDREN"] = "1"
os.environ["SGLANG_BLOCK_NONZERO_RANK_CHILDREN"] = "0"

engine = sgl.Engine(server_args=server_args)

Expand Down
37 changes: 37 additions & 0 deletions components/src/dynamo/sglang/request_handlers/handler_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# SPDX-License-Identifier: Apache-2.0

import asyncio
import base64
import json
import logging
import random
import socket
Expand All @@ -10,6 +12,7 @@
from typing import Any, AsyncGenerator, Dict, Optional, Tuple

import sglang as sgl
from sglang.srt.tracing import trace as sglang_trace
from sglang.srt.utils import get_local_ip_auto

from dynamo._core import Client, Component, Context
Expand Down Expand Up @@ -49,6 +52,7 @@ def __init__(
self.prefill_client = prefill_client
self.serving_mode = config.serving_mode
self.skip_tokenizer_init = config.server_args.skip_tokenizer_init
self.enable_trace = config.server_args.enable_trace

@abstractmethod
async def generate(self, request: Dict[str, Any], context: Context):
Expand Down Expand Up @@ -117,6 +121,39 @@ def _get_bootstrap_info(engine: sgl.Engine) -> Tuple[str, int]:

return bootstrap_host, bootstrap_port

def _propagate_trace_context_to_sglang(
self, context: Context, bootstrap_room: int = 0
):
"""Propagate Dynamo's trace context to SGLang for distributed tracing. SGLang expects a certain
format derived by loooking at https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/tracing/trace.py
in the to_dict() method.

Args:
context: Dynamo Context object containing trace information.
bootstrap_room: Bootstrap room ID (0 for aggregated, actual room for disaggregated).
"""
trace_id = context.trace_id
span_id = context.span_id
if not trace_id or not span_id:
return

# Build trace context for SGLang
trace_context = {
str(bootstrap_room): {
"root_span": {"traceparent": f"00-{trace_id}-{span_id}-01"},
"prev_span": {
"span_id": int(span_id, 16),
"trace_id": int(trace_id, 16),
},
}
}

# Encode and propagate
base64_context = base64.b64encode(
json.dumps(trace_context, ensure_ascii=False).encode("utf-8")
).decode("utf-8")
sglang_trace.trace_set_remote_propagate_context(base64_context)

async def _handle_cancellation(
self, request_id_future: asyncio.Future, context: Context
):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ async def generate(
RuntimeError: If no bootstrap info received from prefill worker.
"""
logging.debug(f"New Request ID: {context.id()}")
trace_id = context.trace_id
sampling_params = self._build_sampling_params(request)
input_param = self._get_input_param(request)

Expand Down Expand Up @@ -154,13 +155,19 @@ async def generate(
if not bootstrap_info:
raise RuntimeError("No bootstrap info received from prefill worker")

if self.enable_trace:
self._propagate_trace_context_to_sglang(
context, bootstrap_info["bootstrap_room"]
)

decode = await self.engine.async_generate(
**input_param,
sampling_params=sampling_params,
stream=True,
bootstrap_host=bootstrap_info["bootstrap_host"],
bootstrap_port=bootstrap_info["bootstrap_port"],
bootstrap_room=bootstrap_info["bootstrap_room"],
rid=trace_id,
)

if self.skip_tokenizer_init:
Expand All @@ -170,10 +177,14 @@ async def generate(
async for out in self._process_text_stream(decode, context):
yield out
else:
if self.enable_trace:
self._propagate_trace_context_to_sglang(context)

agg = await self.engine.async_generate(
**input_param,
sampling_params=sampling_params,
stream=True,
rid=trace_id,
)
if self.skip_tokenizer_init:
async for out in self._process_token_stream(agg, context):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ async def generate(
Bootstrap info dict with host, port, and room for decode worker connection.
"""
logging.debug(f"New Request ID: {context.id()}")
trace_id = context.trace_id
bootstrap_room = self._generate_bootstrap_room()

bootstrap_info = {
Expand All @@ -76,13 +77,18 @@ async def generate(

input_param = self._get_input_param(request["request"])

# Propagate trace context to SGLang
if self.enable_trace:
self._propagate_trace_context_to_sglang(context, bootstrap_room)

results = await self.engine.async_generate(
**input_param,
sampling_params=request["sampling_params"],
stream=True,
bootstrap_host=self.bootstrap_host,
bootstrap_port=self.bootstrap_port,
bootstrap_room=bootstrap_room,
rid=trace_id,
)

task = asyncio.create_task(self._consume_results(results, context))
Expand Down
2 changes: 1 addition & 1 deletion container/Dockerfile.sglang
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ ARG DEEPEP_COMMIT=9af0e0d0e74f3577af1979c9b9e1ac2cad0104ee
ARG DEEPEP_GB_COMMIT=1b14ad661c7640137fcfe93cccb2694ede1220b0
ARG CMAKE_BUILD_PARALLEL_LEVEL=2
ARG SGL_KERNEL_VERSION=0.3.16.post5
ARG SGLANG_COMMIT=0.5.4.post3
ARG SGLANG_COMMIT=0.5.6
ARG GDRCOPY_COMMIT=v2.4.4
ARG NVSHMEM_VERSION=3.3.9
ARG GRACE_BLACKWELL=false
Expand Down
2 changes: 1 addition & 1 deletion deploy/observability/tempo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ distributor:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
endpoint: 0.0.0.0:4317 # Receives from OTEL collector
http:
endpoint: 0.0.0.0:4318

Expand Down
5 changes: 4 additions & 1 deletion examples/backends/sglang/launch/agg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ while [[ $# -gt 0 ]]; do
done

# Enable tracing if requested
TRACE_ARGS=()
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
fi

# run ingress
Expand All @@ -59,7 +61,7 @@ python3 -m dynamo.frontend &
DYNAMO_PID=$!

# run worker with metrics enabled
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
OTEL_SERVICE_NAME=dynamo-worker DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
python3 -m dynamo.sglang \
--model-path "$MODEL" \
--served-model-name "$MODEL" \
Expand All @@ -68,4 +70,5 @@ python3 -m dynamo.sglang \
--trust-remote-code \
--skip-tokenizer-init \
--enable-metrics \
"${TRACE_ARGS[@]}" \
"${EXTRA_ARGS[@]}"
5 changes: 4 additions & 1 deletion examples/backends/sglang/launch/agg_embed.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ while [[ $# -gt 0 ]]; do
done

# Enable tracing if requested
TRACE_ARGS=()
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
fi

# run ingress
Expand All @@ -59,4 +61,5 @@ python3 -m dynamo.sglang \
--tp 1 \
--trust-remote-code \
--use-sglang-tokenizer \
--enable-metrics
--enable-metrics \
"${TRACE_ARGS[@]}"
8 changes: 6 additions & 2 deletions examples/backends/sglang/launch/agg_router.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ while [[ $# -gt 0 ]]; do
done

# Enable tracing if requested
TRACE_ARGS=()
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
fi

# run ingress
Expand All @@ -58,7 +60,8 @@ python3 -m dynamo.sglang \
--tp 1 \
--trust-remote-code \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
--enable-metrics &
--enable-metrics \
"${TRACE_ARGS[@]}" &
WORKER_PID=$!

OTEL_SERVICE_NAME=dynamo-worker-2 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_WORKER2:-8082} \
Expand All @@ -69,4 +72,5 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--tp 1 \
--trust-remote-code \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
--enable-metrics
--enable-metrics \
"${TRACE_ARGS[@]}"
8 changes: 6 additions & 2 deletions examples/backends/sglang/launch/disagg.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,12 @@ while [[ $# -gt 0 ]]; do
done

# Enable tracing if requested
TRACE_ARGS=()
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
fi

# run ingress
Expand All @@ -65,7 +67,8 @@ python3 -m dynamo.sglang \
--host 0.0.0.0 \
--port 40000 \
--disaggregation-transfer-backend nixl \
--enable-metrics --log-level debug &
--enable-metrics \
"${TRACE_ARGS[@]}" &
PREFILL_PID=$!

# run decode worker
Expand All @@ -81,4 +84,5 @@ CUDA_VISIBLE_DEVICES=2,3 python3 -m dynamo.sglang \
--disaggregation-bootstrap-port 12345 \
--host 0.0.0.0 \
--disaggregation-transfer-backend nixl \
--enable-metrics --log-level debug
--enable-metrics \
"${TRACE_ARGS[@]}"
14 changes: 10 additions & 4 deletions examples/backends/sglang/launch/disagg_router.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,12 @@ while [[ $# -gt 0 ]]; do
done

# Enable tracing if requested
TRACE_ARGS=()
if [ "$ENABLE_OTEL" = true ]; then
export DYN_LOGGING_JSONL=true
export OTEL_EXPORT_ENABLED=1
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
fi

# run ingress
Expand Down Expand Up @@ -74,7 +76,8 @@ python3 -m dynamo.sglang \
--host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics &
--enable-metrics \
"${TRACE_ARGS[@]}" &
PREFILL_PID=$!

# run prefill worker
Expand All @@ -89,7 +92,8 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics &
--enable-metrics \
"${TRACE_ARGS[@]}" &
PREFILL_PID=$!

# run decode worker
Expand All @@ -104,7 +108,8 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.sglang \
--host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5560"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics &
--enable-metrics \
"${TRACE_ARGS[@]}" &
PREFILL_PID=$!

# run decode worker
Expand All @@ -119,4 +124,5 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.sglang \
--host 0.0.0.0 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5559"}' \
--disaggregation-transfer-backend nixl \
--enable-metrics
--enable-metrics \
"${TRACE_ARGS[@]}"
Loading