File tree Expand file tree Collapse file tree 1 file changed +4
-4
lines changed
Expand file tree Collapse file tree 1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -185,7 +185,7 @@ Dynamo metrics include these labels for filtering:
185185
186186# ### Example: Scale Decode Service Based on TTFT
187187
188- Using HPA with Prometheus Adapter requires configuring external metrics.
188+ Using HPA with Prometheus Adapter requires configuring external metrics.
189189
190190**Step 1: Configure Prometheus Adapter**
191191
@@ -208,7 +208,7 @@ rules:
208208 as: "dynamo_ttft_p95_seconds"
209209 metricsQuery: |
210210 histogram_quantile(0.95,
211- sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{<<.LabelMatchers>>}[5m]))
211+ sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{<<.LabelMatchers>>}[5m]))
212212 by (le, namespace, dynamo_namespace)
213213 )
214214` ` `
@@ -383,7 +383,7 @@ spec:
383383 metricName: dynamo_ttft_p95
384384 query: |
385385 histogram_quantile(0.95,
386- sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{dynamo_namespace="default-sglang-agg"}[5m]))
386+ sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{dynamo_namespace="default-sglang-agg"}[5m]))
387387 by (le)
388388 )
389389 threshold: "0.5" # Scale up when TTFT p95 > 500ms (0.5 seconds)
@@ -519,7 +519,7 @@ spec:
519519 serverAddress: http://prometheus-kube-prometheus-prometheus.monitoring.svc:9090
520520 query: |
521521 histogram_quantile(0.95,
522- sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{dynamo_namespace="default-sglang-agg"}[5m]))
522+ sum(rate(dynamo_frontend_time_to_first_token_seconds_bucket{dynamo_namespace="default-sglang-agg"}[5m]))
523523 by (le)
524524 )
525525 threshold: "0.5"
You can’t perform that action at this time.
0 commit comments