Skip to content

Commit becd83d

Browse files
apollo_network_benchmark: added cpu and memory metrics
1 parent 6cefd55 commit becd83d

File tree

6 files changed

+187
-1
lines changed

6 files changed

+187
-1
lines changed

Cargo.lock

Lines changed: 100 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ statistical = "1.0.0"
366366
strum = "0.25.0"
367367
strum_macros = "0.25.2"
368368
syn = "2.0.39"
369+
sysinfo = "0.37.2"
369370
tempfile = "3.7.0"
370371
test-case = "3.2.1"
371372
test-log = "0.2.14"

crates/apollo_network_benchmark/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ lazy_static.workspace = true
1717
libp2p = { workspace = true, features = ["identify"] }
1818
metrics-exporter-prometheus.workspace = true
1919
serde.workspace = true
20+
sysinfo.workspace = true
2021
tokio = { workspace = true, features = ["full", "sync"] }
2122
tokio-metrics = { workspace = true, features = ["metrics-rs-integration", "rt"] }
2223
tracing.workspace = true

crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/main.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ mod message;
1515
pub mod metrics;
1616
mod protocol;
1717
mod stress_test_node;
18+
mod system_metrics;
1819

1920
use apollo_network_benchmark::node_args::NodeArgs;
2021
use stress_test_node::BroadcastNetworkStressTestNode;
22+
use system_metrics::monitor_process_metrics;
2123

2224
#[tokio::main]
2325
async fn main() -> Result<(), Box<dyn std::error::Error>> {
@@ -53,6 +55,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
5355
.describe_and_run(),
5456
);
5557

58+
// Start the process metrics monitoring task
59+
tokio::spawn(async {
60+
monitor_process_metrics(1).await;
61+
});
62+
5663
// Create and run the stress test node
5764
let stress_test_node = BroadcastNetworkStressTestNode::new(args).await;
5865
stress_test_node.run().await

crates/apollo_network_benchmark/src/bin/broadcast_network_stress_test_node/metrics.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,16 @@ define_metrics!(
77
MetricCounter { RECEIVE_MESSAGE_BYTES_SUM, "receive_message_bytes_sum", "Sum of the stress test messages received via broadcast", init = 0 },
88
MetricHistogram { RECEIVE_MESSAGE_DELAY_SECONDS, "receive_message_delay_seconds", "Message delay in seconds" },
99
MetricHistogram { RECEIVE_MESSAGE_NEGATIVE_DELAY_SECONDS, "receive_message_negative_delay_seconds", "Negative message delay in seconds" },
10+
11+
// system metrics for the node
12+
MetricGauge { SYSTEM_TOTAL_MEMORY_BYTES, "system_total_memory_bytes", "Total system memory in bytes" },
13+
MetricGauge { SYSTEM_AVAILABLE_MEMORY_BYTES, "system_available_memory_bytes", "Available system memory in bytes" },
14+
MetricGauge { SYSTEM_USED_MEMORY_BYTES, "system_used_memory_bytes", "Used system memory in bytes" },
15+
MetricGauge { SYSTEM_CPU_COUNT, "system_cpu_count", "Number of logical CPU cores in the system" },
16+
17+
// system metrics for the process
18+
MetricGauge { SYSTEM_PROCESS_CPU_USAGE_PERCENT, "system_process_cpu_usage_percent", "CPU usage percentage of the current process" },
19+
MetricGauge { SYSTEM_PROCESS_MEMORY_USAGE_BYTES, "system_process_memory_usage_bytes", "Memory usage in bytes of the current process" },
20+
MetricGauge { SYSTEM_PROCESS_VIRTUAL_MEMORY_USAGE_BYTES, "system_process_virtual_memory_usage_bytes", "Virtual memory usage in bytes of the current process" },
1021
},
1122
);
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
use std::time::Duration;
2+
3+
use apollo_metrics::metrics::LossyIntoF64;
4+
use sysinfo::{Pid, System};
5+
use tokio::time::interval;
6+
use tracing::warn;
7+
8+
use crate::metrics::{
9+
SYSTEM_AVAILABLE_MEMORY_BYTES,
10+
SYSTEM_CPU_COUNT,
11+
SYSTEM_PROCESS_CPU_USAGE_PERCENT,
12+
SYSTEM_PROCESS_MEMORY_USAGE_BYTES,
13+
SYSTEM_PROCESS_VIRTUAL_MEMORY_USAGE_BYTES,
14+
SYSTEM_TOTAL_MEMORY_BYTES,
15+
SYSTEM_USED_MEMORY_BYTES,
16+
};
17+
18+
/// Collects system-wide and process-specific metrics (CPU, memory)
19+
fn collect_system_and_process_metrics(system: &mut System, current_pid: Pid) {
20+
system.refresh_all();
21+
let total_memory: f64 = system.total_memory().into_f64();
22+
let available_memory: f64 = system.available_memory().into_f64();
23+
let used_memory: f64 = system.used_memory().into_f64();
24+
let cpu_count: f64 = system.cpus().len().into_f64();
25+
26+
SYSTEM_TOTAL_MEMORY_BYTES.set(total_memory);
27+
SYSTEM_AVAILABLE_MEMORY_BYTES.set(available_memory);
28+
SYSTEM_USED_MEMORY_BYTES.set(used_memory);
29+
SYSTEM_CPU_COUNT.set(cpu_count);
30+
31+
if let Some(process) = system.process(current_pid) {
32+
let cpu_usage: f64 = process.cpu_usage().into();
33+
let memory_usage: f64 = process.memory().into_f64();
34+
let virtual_memory_usage: f64 = process.virtual_memory().into_f64();
35+
36+
SYSTEM_PROCESS_CPU_USAGE_PERCENT.set(cpu_usage);
37+
SYSTEM_PROCESS_MEMORY_USAGE_BYTES.set(memory_usage);
38+
SYSTEM_PROCESS_VIRTUAL_MEMORY_USAGE_BYTES.set(virtual_memory_usage);
39+
} else {
40+
warn!("Could not find process information for PID: {}", current_pid);
41+
}
42+
}
43+
44+
pub async fn monitor_process_metrics(interval_seconds: u64) {
45+
let mut interval = interval(Duration::from_secs(interval_seconds));
46+
let current_pid = sysinfo::get_current_pid().expect("Failed to get current process PID");
47+
48+
struct State {
49+
system: System,
50+
}
51+
52+
let mut state = Some(State { system: System::new_all() });
53+
54+
loop {
55+
interval.tick().await;
56+
57+
let mut passed_state = state.take().unwrap();
58+
// the metrics update need to be done in a blocking context to avoid slowing down tokio
59+
// threads
60+
state = tokio::task::spawn_blocking(move || {
61+
collect_system_and_process_metrics(&mut passed_state.system, current_pid);
62+
Some(passed_state)
63+
})
64+
.await
65+
.unwrap();
66+
}
67+
}

0 commit comments

Comments
 (0)