From 435dd9b18be85fc013ec1a6c72ace55b155212f2 Mon Sep 17 00:00:00 2001 From: Andrew Luka Date: Tue, 9 Dec 2025 14:13:41 +0200 Subject: [PATCH] apollo_network_benchmark: added cpu and memory metrics --- Cargo.lock | 101 +++++++++++++++++- Cargo.toml | 1 + crates/apollo_network_benchmark/Cargo.toml | 1 + .../main.rs | 7 ++ .../metrics.rs | 11 ++ .../system_metrics.rs | 67 ++++++++++++ 6 files changed, 187 insertions(+), 1 deletion(-) create mode 100644 crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/system_metrics.rs diff --git a/Cargo.lock b/Cargo.lock index 077f864cd9f..c59a5fa4233 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2117,6 +2117,7 @@ dependencies = [ "metrics-exporter-prometheus", "rstest", "serde", + "sysinfo", "tokio", "tokio-metrics", "tracing", @@ -7398,7 +7399,7 @@ dependencies = [ "rtnetlink", "system-configuration 0.6.1", "tokio", - "windows", + "windows 0.53.0", ] [[package]] @@ -9267,6 +9268,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.50.3" @@ -9552,6 +9562,16 @@ dependencies = [ "objc2-core-foundation", ] +[[package]] +name = "objc2-io-kit" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" +dependencies = [ + "libc", + "objc2-core-foundation", +] + [[package]] name = "objc2-io-surface" version = "0.3.2" @@ -12669,6 +12689,20 @@ dependencies = [ "syn 2.0.110", ] +[[package]] +name = "sysinfo" +version = "0.37.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16607d5caffd1c07ce073528f9ed972d88db15dd44023fa57142963be3feb11f" +dependencies = [ + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.61.3", +] + [[package]] name = "system-configuration" version = "0.5.1" @@ -14021,6 +14055,28 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows" +version = "0.61.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" +dependencies = [ + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link 0.1.3", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", +] + [[package]] name = "windows-core" version = "0.53.0" @@ -14031,6 +14087,19 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-core" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.1.3", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + [[package]] name = "windows-core" version = "0.62.2" @@ -14044,6 +14113,17 @@ dependencies = [ "windows-strings 0.5.1", ] +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", + "windows-threading", +] + [[package]] name = "windows-implement" version = "0.60.2" @@ -14078,6 +14158,16 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link 0.1.3", +] + [[package]] name = "windows-registry" version = "0.5.3" @@ -14227,6 +14317,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link 0.1.3", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" diff --git a/Cargo.toml b/Cargo.toml index cbcfed3b2fd..b5e9219a075 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -366,6 +366,7 @@ statistical = "1.0.0" strum = "0.25.0" strum_macros = "0.25.2" syn = "2.0.39" +sysinfo = "0.37.2" tempfile = "3.7.0" test-case = "3.2.1" test-log = "0.2.14" diff --git a/crates/apollo_network_benchmark/Cargo.toml b/crates/apollo_network_benchmark/Cargo.toml index d898ae3561e..ca921c1ac54 100644 --- a/crates/apollo_network_benchmark/Cargo.toml +++ b/crates/apollo_network_benchmark/Cargo.toml @@ -17,6 +17,7 @@ lazy_static.workspace = true libp2p = { workspace = true, features = ["identify"] } metrics-exporter-prometheus.workspace = true serde.workspace = true +sysinfo.workspace = true tokio = { workspace = true, features = ["full", "sync"] } tokio-metrics = { workspace = true, features = ["metrics-rs-integration", "rt"] } tracing.workspace = true diff --git a/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/main.rs b/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/main.rs index bf3cd9f11e2..80f013ca663 100644 --- a/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/main.rs +++ b/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/main.rs @@ -15,9 +15,11 @@ mod message; pub mod metrics; mod protocol; mod stress_test_node; +mod system_metrics; use apollo_network_benchmark::node_args::NodeArgs; use stress_test_node::BroadcastNetworkStressTestNode; +use system_metrics::monitor_process_metrics; #[tokio::main] async fn main() -> Result<(), Box> { @@ -53,6 +55,11 @@ async fn main() -> Result<(), Box> { .describe_and_run(), ); + // Start the process metrics monitoring task + tokio::spawn(async { + monitor_process_metrics(1).await; + }); + // Create and run the stress test node let stress_test_node = BroadcastNetworkStressTestNode::new(args).await; stress_test_node.run().await diff --git a/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/metrics.rs b/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/metrics.rs index 98fb6d0cef5..0b00208f494 100644 --- a/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/metrics.rs +++ b/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/metrics.rs @@ -7,5 +7,16 @@ define_metrics!( MetricCounter { RECEIVE_MESSAGE_BYTES_SUM, "receive_message_bytes_sum", "Sum of the stress test messages received via broadcast", init = 0 }, MetricHistogram { RECEIVE_MESSAGE_DELAY_SECONDS, "receive_message_delay_seconds", "Message delay in seconds" }, MetricHistogram { RECEIVE_MESSAGE_NEGATIVE_DELAY_SECONDS, "receive_message_negative_delay_seconds", "Negative message delay in seconds" }, + + // system metrics for the node + MetricGauge { SYSTEM_TOTAL_MEMORY_BYTES, "system_total_memory_bytes", "Total system memory in bytes" }, + MetricGauge { SYSTEM_AVAILABLE_MEMORY_BYTES, "system_available_memory_bytes", "Available system memory in bytes" }, + MetricGauge { SYSTEM_USED_MEMORY_BYTES, "system_used_memory_bytes", "Used system memory in bytes" }, + MetricGauge { SYSTEM_CPU_COUNT, "system_cpu_count", "Number of logical CPU cores in the system" }, + + // system metrics for the process + MetricGauge { SYSTEM_PROCESS_CPU_USAGE_PERCENT, "system_process_cpu_usage_percent", "CPU usage percentage of the current process" }, + MetricGauge { SYSTEM_PROCESS_MEMORY_USAGE_BYTES, "system_process_memory_usage_bytes", "Memory usage in bytes of the current process" }, + MetricGauge { SYSTEM_PROCESS_VIRTUAL_MEMORY_USAGE_BYTES, "system_process_virtual_memory_usage_bytes", "Virtual memory usage in bytes of the current process" }, }, ); diff --git a/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/system_metrics.rs b/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/system_metrics.rs new file mode 100644 index 00000000000..c59d9ac5c11 --- /dev/null +++ b/crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/system_metrics.rs @@ -0,0 +1,67 @@ +use std::time::Duration; + +use apollo_metrics::metrics::LossyIntoF64; +use sysinfo::{Pid, System}; +use tokio::time::interval; +use tracing::warn; + +use crate::metrics::{ + SYSTEM_AVAILABLE_MEMORY_BYTES, + SYSTEM_CPU_COUNT, + SYSTEM_PROCESS_CPU_USAGE_PERCENT, + SYSTEM_PROCESS_MEMORY_USAGE_BYTES, + SYSTEM_PROCESS_VIRTUAL_MEMORY_USAGE_BYTES, + SYSTEM_TOTAL_MEMORY_BYTES, + SYSTEM_USED_MEMORY_BYTES, +}; + +/// Collects system-wide and process-specific metrics (CPU, memory) +fn collect_system_and_process_metrics(system: &mut System, current_pid: Pid) { + system.refresh_all(); + let total_memory: f64 = system.total_memory().into_f64(); + let available_memory: f64 = system.available_memory().into_f64(); + let used_memory: f64 = system.used_memory().into_f64(); + let cpu_count: f64 = system.cpus().len().into_f64(); + + SYSTEM_TOTAL_MEMORY_BYTES.set(total_memory); + SYSTEM_AVAILABLE_MEMORY_BYTES.set(available_memory); + SYSTEM_USED_MEMORY_BYTES.set(used_memory); + SYSTEM_CPU_COUNT.set(cpu_count); + + if let Some(process) = system.process(current_pid) { + let cpu_usage: f64 = process.cpu_usage().into(); + let memory_usage: f64 = process.memory().into_f64(); + let virtual_memory_usage: f64 = process.virtual_memory().into_f64(); + + SYSTEM_PROCESS_CPU_USAGE_PERCENT.set(cpu_usage); + SYSTEM_PROCESS_MEMORY_USAGE_BYTES.set(memory_usage); + SYSTEM_PROCESS_VIRTUAL_MEMORY_USAGE_BYTES.set(virtual_memory_usage); + } else { + warn!("Could not find process information for PID: {}", current_pid); + } +} + +pub async fn monitor_process_metrics(interval_seconds: u64) { + let mut interval = interval(Duration::from_secs(interval_seconds)); + let current_pid = sysinfo::get_current_pid().expect("Failed to get current process PID"); + + struct State { + system: System, + } + + let mut state = Some(State { system: System::new_all() }); + + loop { + interval.tick().await; + + let mut passed_state = state.take().unwrap(); + // the metrics update need to be done in a blocking context to avoid slowing down tokio + // threads + state = tokio::task::spawn_blocking(move || { + collect_system_and_process_metrics(&mut passed_state.system, current_pid); + Some(passed_state) + }) + .await + .unwrap(); + } +}