From d924b5afe334f5785bee5ca5c8a617541efc4f14 Mon Sep 17 00:00:00 2001 From: Steve Flinchbaugh Date: Wed, 16 Apr 2025 12:45:51 -0400 Subject: [PATCH 1/2] removes authority and target_addr metric labels --- .github/workflows/release.yml | 4 +- linkerd/app/admin/src/stack.rs | 2 - linkerd/app/core/src/metrics.rs | 25 +-- linkerd/app/inbound/src/http/router.rs | 4 - .../app/integration/src/tests/discovery.rs | 2 - .../app/integration/src/tests/telemetry.rs | 167 +----------------- .../src/tests/telemetry/tcp_errors.rs | 10 +- linkerd/app/outbound/src/http/concrete.rs | 6 +- .../app/outbound/src/http/endpoint/tests.rs | 2 - linkerd/app/outbound/src/opaq/concrete.rs | 9 - 10 files changed, 19 insertions(+), 212 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 47a6e51d17..31ba0a88b7 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,7 +49,7 @@ env: CARGO_INCREMENTAL: 0 CARGO_NET_RETRY: 10 RUSTFLAGS: "-D warnings -A deprecated --cfg tokio_unstable" - RUSTUP_MAX_RETRIES: 10 + RUSTUP_MAX_RETRIES: 11 concurrency: group: ${{ github.workflow }}-${{ inputs.ref || github.head_ref }} @@ -135,7 +135,7 @@ jobs: - os: windows arch: arm64 - os: windows - arch: arm + arch: arm # If we're not actually building on a release tag, don't short-circuit on # errors. This helps us know whether a failure is platform-specific. diff --git a/linkerd/app/admin/src/stack.rs b/linkerd/app/admin/src/stack.rs index a198f9e629..618ebc5504 100644 --- a/linkerd/app/admin/src/stack.rs +++ b/linkerd/app/admin/src/stack.rs @@ -273,8 +273,6 @@ impl Param for Permitted { fn param(&self) -> metrics::EndpointLabels { metrics::InboundEndpointLabels { tls: self.http.tcp.tls.clone(), - authority: None, - target_addr: self.http.tcp.addr.into(), policy: self.permit.labels.clone(), } .into() diff --git a/linkerd/app/core/src/metrics.rs b/linkerd/app/core/src/metrics.rs index 3aac6ccc12..44e535fb77 100644 --- a/linkerd/app/core/src/metrics.rs +++ b/linkerd/app/core/src/metrics.rs @@ -18,7 +18,6 @@ use linkerd_proxy_server_policy as policy; use prometheus_client::encoding::{EncodeLabelSet, EncodeLabelValue}; use std::{ fmt::{self, Write}, - net::SocketAddr, sync::Arc, time::Duration, }; @@ -66,8 +65,6 @@ pub enum EndpointLabels { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct InboundEndpointLabels { pub tls: tls::ConditionalServerTls, - pub authority: Option, - pub target_addr: SocketAddr, pub policy: RouteAuthzLabels, } @@ -99,10 +96,8 @@ pub struct RouteAuthzLabels { #[derive(Clone, Debug, PartialEq, Eq, Hash)] pub struct OutboundEndpointLabels { pub server_id: tls::ConditionalClientTls, - pub authority: Option, pub labels: Option, pub zone_locality: OutboundZoneLocality, - pub target_addr: SocketAddr, } #[derive(Debug, Copy, Clone, Default, Hash, Eq, PartialEq, EncodeLabelValue)] @@ -317,17 +312,7 @@ impl FmtLabels for EndpointLabels { impl FmtLabels for InboundEndpointLabels { fn fmt_labels(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(a) = self.authority.as_ref() { - Authority(a).fmt_labels(f)?; - write!(f, ",")?; - } - - ( - (TargetAddr(self.target_addr), TlsAccept::from(&self.tls)), - &self.policy, - ) - .fmt_labels(f)?; - + ((TlsAccept::from(&self.tls)), &self.policy).fmt_labels(f)?; Ok(()) } } @@ -409,14 +394,8 @@ impl svc::Param for OutboundEndpointLabels { impl FmtLabels for OutboundEndpointLabels { fn fmt_labels(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(a) = self.authority.as_ref() { - Authority(a).fmt_labels(f)?; - write!(f, ",")?; - } - - let ta = TargetAddr(self.target_addr); let tls = TlsConnect::from(&self.server_id); - (ta, tls).fmt_labels(f)?; + (tls).fmt_labels(f)?; if let Some(labels) = self.labels.as_ref() { write!(f, ",{}", labels)?; diff --git a/linkerd/app/inbound/src/http/router.rs b/linkerd/app/inbound/src/http/router.rs index ec38be71f5..0a0d730d48 100644 --- a/linkerd/app/inbound/src/http/router.rs +++ b/linkerd/app/inbound/src/http/router.rs @@ -396,10 +396,6 @@ fn endpoint_labels( move |t: &Logical| -> metrics::EndpointLabels { metrics::InboundEndpointLabels { tls: t.tls.clone(), - authority: unsafe_authority_labels - .then(|| t.logical.as_ref().map(|d| d.as_http_authority())) - .flatten(), - target_addr: t.addr.into(), policy: t.permit.labels.clone(), } .into() diff --git a/linkerd/app/integration/src/tests/discovery.rs b/linkerd/app/integration/src/tests/discovery.rs index 49604da6d3..91f8fa06eb 100644 --- a/linkerd/app/integration/src/tests/discovery.rs +++ b/linkerd/app/integration/src/tests/discovery.rs @@ -454,14 +454,12 @@ mod http2 { // Simulate the first server falling over without discovery // knowing about it... tracing::info!(%alpha.addr, "Stopping"); - let alpha_addr = alpha.addr; alpha.join().await; // Wait until the proxy has seen the `alpha` disconnect... metrics::metric("tcp_close_total") .label("peer", "dst") .label("direction", "outbound") - .label("target_addr", alpha_addr.to_string()) .value(1u64) .assert_in(&metrics) .await; diff --git a/linkerd/app/integration/src/tests/telemetry.rs b/linkerd/app/integration/src/tests/telemetry.rs index fc74455097..ec919f9b38 100644 --- a/linkerd/app/integration/src/tests/telemetry.rs +++ b/linkerd/app/integration/src/tests/telemetry.rs @@ -56,8 +56,8 @@ impl Fixture { let client = client::new(proxy.inbound, "tele.test.svc.cluster.local"); let tcp_dst_labels = metrics::labels().label("direction", "inbound"); - let tcp_src_labels = tcp_dst_labels.clone().label("target_addr", orig_dst); - let labels = tcp_dst_labels.clone().label("target_port", orig_dst.port()); + let tcp_src_labels = tcp_dst_labels.clone(); + let labels = tcp_dst_labels.clone(); let tcp_src_labels = tcp_src_labels.label("peer", "src"); let tcp_dst_labels = tcp_dst_labels.label("peer", "dst"); Fixture { @@ -94,9 +94,7 @@ impl Fixture { let metrics = client::http1(proxy.admin, "localhost"); let client = client::new(proxy.outbound, "tele.test.svc.cluster.local"); - let tcp_labels = metrics::labels() - .label("direction", "outbound") - .label("target_addr", orig_dst); + let tcp_labels = metrics::labels().label("direction", "outbound") let labels = tcp_labels.clone(); let tcp_src_labels = tcp_labels.clone().label("peer", "src"); let tcp_dst_labels = tcp_labels.label("peer", "dst"); @@ -151,7 +149,6 @@ impl TcpFixture { let src_labels = metrics::labels() .label("direction", "inbound") .label("peer", "src") - .label("target_addr", orig_dst) .label("srv_kind", "default") .label("srv_name", "all-unauthenticated"); @@ -191,8 +188,7 @@ impl TcpFixture { .label("direction", "outbound") .label("peer", "src") .label("tls", "no_identity") - .label("no_tls_reason", "loopback") - .label("target_addr", orig_dst); + .label("no_tls_reason", "loopback"); let dst_labels = metrics::labels() .label("direction", "outbound") .label("peer", "dst"); @@ -216,7 +212,6 @@ async fn admin_request_count() { let metrics = fixture.metrics; let metric = metrics::metric("request_total") .label("direction", "inbound") - .label("target_addr", metrics.target_addr()) .value(1usize); // We can't assert that the metric is not present, since `GET /metrics` @@ -231,7 +226,6 @@ async fn admin_transport_metrics() { let metrics = fixture.metrics; let labels = metrics::labels() .label("direction", "inbound") - .label("target_addr", metrics.target_addr()) .label("peer", "src"); let mut open_total = labels.metric("tcp_open_total").value(1usize); @@ -306,18 +300,13 @@ async fn test_http_count(metric_name: &str, fixture: impl Future metrics::MetricMatch { - metrics::metric(METRIC).label("target_addr", proxy.inbound_server.as_ref().unwrap().addr) +fn metric(_proxy: &proxy::Listening) -> metrics::MetricMatch { + metrics::metric(METRIC) } /// Tests that the detect metric is labeled and incremented on timeout. @@ -246,7 +246,7 @@ async fn inbound_direct_multi() { let (proxy, metrics) = Test::new(proxy).run().await; let client = crate::tcp::client(proxy.inbound); - let metric = metrics::metric(METRIC).label("target_addr", proxy.inbound); + let metric = metrics::metric(METRIC); let timeout_metric = metric.clone().label("error", "tls detection timeout"); let no_tls_metric = metric.clone().label("error", "unexpected"); @@ -293,8 +293,7 @@ async fn inbound_invalid_ip() { let client = crate::tcp::client(proxy.inbound); let metric = metric(&proxy) - .label("error", "unexpected") - .label("target_addr", fake_ip); + .label("error", "unexpected"); let tcp_client = client.connect().await; tcp_client.write(TcpFixture::HELLO_MSG).await; @@ -357,7 +356,6 @@ async fn inbound_direct_success() { let no_tls_client = crate::tcp::client(proxy1.inbound); let metric = metrics::metric(METRIC) - .label("target_addr", proxy1.inbound) .label("error", "tls detection timeout") .value(1u64); diff --git a/linkerd/app/outbound/src/http/concrete.rs b/linkerd/app/outbound/src/http/concrete.rs index 557e2fccc4..124a2ded33 100644 --- a/linkerd/app/outbound/src/http/concrete.rs +++ b/linkerd/app/outbound/src/http/concrete.rs @@ -9,7 +9,7 @@ use crate::{ }; use linkerd_app_core::{ config::{ConnectConfig, QueueConfig}, - metrics::{prefix_labels, EndpointLabels, OutboundEndpointLabels, OutboundZoneLocality}, + metrics::{prefix_outbound_endpoint_labels, EndpointLabels, OutboundEndpointLabels, OutboundZoneLocality}, profiles, proxy::{ api_resolve::{ConcreteAddr, Metadata, ProtocolHint}, @@ -219,11 +219,9 @@ where { fn param(&self) -> OutboundEndpointLabels { OutboundEndpointLabels { - authority: self.parent.param(), - labels: prefix_labels("dst", self.metadata.labels().iter()), + labels: prefix_outbound_endpoint_labels(self.metadata.labels().iter()), zone_locality: self.param(), server_id: self.param(), - target_addr: self.addr.into(), } } } diff --git a/linkerd/app/outbound/src/http/endpoint/tests.rs b/linkerd/app/outbound/src/http/endpoint/tests.rs index a65db50021..b80a2989f6 100644 --- a/linkerd/app/outbound/src/http/endpoint/tests.rs +++ b/linkerd/app/outbound/src/http/endpoint/tests.rs @@ -310,11 +310,9 @@ impl svc::Param for Endpoint { impl svc::Param for Endpoint { fn param(&self) -> metrics::OutboundEndpointLabels { metrics::OutboundEndpointLabels { - authority: None, labels: None, zone_locality: OutboundZoneLocality::Unknown, server_id: self.param(), - target_addr: self.addr.into(), } } } diff --git a/linkerd/app/outbound/src/opaq/concrete.rs b/linkerd/app/outbound/src/opaq/concrete.rs index b0f43806a7..35c70e83a7 100644 --- a/linkerd/app/outbound/src/opaq/concrete.rs +++ b/linkerd/app/outbound/src/opaq/concrete.rs @@ -352,19 +352,10 @@ where T: svc::Param, { fn param(&self) -> metrics::OutboundEndpointLabels { - let authority = self - .parent - .param() - .addr - .name_addr() - .map(|a| a.as_http_authority()); - metrics::OutboundEndpointLabels { - authority, labels: metrics::prefix_labels("dst", self.metadata.labels().iter()), zone_locality: self.param(), server_id: self.param(), - target_addr: self.addr.into(), } } } From 168b524acc3c8ebdc8c37fc4513281693372d8a2 Mon Sep 17 00:00:00 2001 From: Steve Flinchbaugh Date: Wed, 16 Apr 2025 12:49:11 -0400 Subject: [PATCH 2/2] reduces latency metric buckets --- .github/workflows/release.yml | 2 +- linkerd/metrics/src/latency.rs | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 31ba0a88b7..1d0fafb83f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,7 +49,7 @@ env: CARGO_INCREMENTAL: 0 CARGO_NET_RETRY: 10 RUSTFLAGS: "-D warnings -A deprecated --cfg tokio_unstable" - RUSTUP_MAX_RETRIES: 11 + RUSTUP_MAX_RETRIES: 12 concurrency: group: ${{ github.workflow }}-${{ inputs.ref || github.head_ref }} diff --git a/linkerd/metrics/src/latency.rs b/linkerd/metrics/src/latency.rs index af094cb23e..81a40f889e 100644 --- a/linkerd/metrics/src/latency.rs +++ b/linkerd/metrics/src/latency.rs @@ -6,14 +6,7 @@ use super::histogram::{Bounds, Bucket, Histogram}; /// milliseconds. pub const BOUNDS: &Bounds = &Bounds(&[ Bucket::Le(1.0), - Bucket::Le(2.0), - Bucket::Le(3.0), - Bucket::Le(4.0), - Bucket::Le(5.0), Bucket::Le(10.0), - Bucket::Le(20.0), - Bucket::Le(30.0), - Bucket::Le(40.0), Bucket::Le(50.0), Bucket::Le(100.0), Bucket::Le(200.0), @@ -21,15 +14,8 @@ pub const BOUNDS: &Bounds = &Bounds(&[ Bucket::Le(400.0), Bucket::Le(500.0), Bucket::Le(1_000.0), - Bucket::Le(2_000.0), - Bucket::Le(3_000.0), - Bucket::Le(4_000.0), Bucket::Le(5_000.0), Bucket::Le(10_000.0), - Bucket::Le(20_000.0), - Bucket::Le(30_000.0), - Bucket::Le(40_000.0), - Bucket::Le(50_000.0), // A final upper bound. Bucket::Inf, ]);