Skip to content

Commit 7df8c00

Browse files
authored
feat: add metrics server (#27)
This PR introduces a prometheus metrics server. **It's not yet configurable and will be as part of akash-network/support#306.** This will help monitor the incoming requests and overall health of the various nodes on the network. Metrics endpoint snapshot (2,000 requests made, with latency-based load balancing): ``` # HELP proxy_node_count Number of healthy nodes for each type # TYPE proxy_node_count gauge proxy_node_count{type="grpc"} 2 proxy_node_count{type="rest"} 11 proxy_node_count{type="rpc"} 9 # HELP proxy_request_count Number of requests per node for each type # TYPE proxy_request_count counter proxy_request_count{node="akash-api.kleomedes.network",type="rest"} 77 proxy_request_count{node="akash-api.polkachu.com",type="rest"} 97 proxy_request_count{node="akash-api.validatornode.com",type="rest"} 53 proxy_request_count{node="akash-api.w3coins.io",type="rest"} 97 proxy_request_count{node="akash-mainnet-rest.cosmonautstakes.com:443",type="rest"} 67 proxy_request_count{node="akash-mainnet-rpc.cosmonautstakes.com:443",type="rpc"} 86 proxy_request_count{node="akash-rest.publicnode.com",type="rest"} 111 proxy_request_count{node="akash-rpc.kleomedes.network",type="rpc"} 101 proxy_request_count{node="akash-rpc.polkachu.com",type="rpc"} 91 proxy_request_count{node="akash-rpc.publicnode.com:443",type="rpc"} 99 proxy_request_count{node="akash-rpc.w3coins.io",type="rpc"} 129 proxy_request_count{node="akash.api.arcturian.tech",type="rest"} 120 proxy_request_count{node="akash.c29r3.xyz:443",type="rest"} 91 proxy_request_count{node="akash.rpc.arcturian.tech",type="rpc"} 167 proxy_request_count{node="api-akash-01.stakeflow.io",type="rest"} 113 proxy_request_count{node="rest-akash.ecostake.com",type="rest"} 83 proxy_request_count{node="rest.lavenderfive.com:443",type="rest"} 91 proxy_request_count{node="rpc-akash.ecostake.com:443",type="rpc"} 94 proxy_request_count{node="rpc.akash.bronbro.io:443",type="rpc"} 127 proxy_request_count{node="rpc.lavenderfive.com:443",type="rpc"} 106 ``` <img width="1074" alt="Screenshot 2025-04-21 at 20 45 16" src="https://github.com/user-attachments/assets/2fa1c177-a75e-4233-b4ba-99dd5e6df174" />
1 parent ce599c8 commit 7df8c00

File tree

21 files changed

+1442
-38
lines changed

21 files changed

+1442
-38
lines changed

README.md

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,8 +65,12 @@ go run cmd/main.go
6565
go run cmd/main.go --config=./testdata/local.yaml
6666
```
6767

68-
## Building
68+
## Monitoring
6969

70+
### Deploying locally
71+
You can deploy the monitoring stack locally by running docker compose.
7072
```bash
71-
go build -o akash-rpc-proxy
73+
docker-compose -f deploy/docker-compose.yml up -d
7274
```
75+
This will start a Grafana and Prometheus instance with datasources pre-configured as well as the dashboards provisioned.
76+
Access it on [http://localhost:3000](http://localhost:3000) with the default user and password `admin`.

cmd/main.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"golang.org/x/sync/errgroup"
2525

2626
"github.com/akash-network/rpc-proxy/internal/config"
27+
"github.com/akash-network/rpc-proxy/internal/metrics"
2728
"github.com/akash-network/rpc-proxy/internal/proxy"
2829
"github.com/akash-network/rpc-proxy/internal/seed"
2930
"github.com/spf13/cobra"
@@ -79,7 +80,7 @@ func NewRootCmd(v *viper.Viper) *cobra.Command {
7980

8081
// Server configuration
8182
rootCmd.PersistentFlags().String("server.listen", ":25567", "Address to listen on for HTTP REST & RPC requests")
82-
rootCmd.PersistentFlags().String("server.listen-grpc", ":9090", "Address to listen on for gRPC requests")
83+
rootCmd.PersistentFlags().String("server.listen-grpc", ":25568", "Address to listen on for gRPC requests")
8384
rootCmd.PersistentFlags().Duration("server.timeouts.read", 10*time.Second, "Server read timeout")
8485
rootCmd.PersistentFlags().Duration("server.timeouts.write", 10*time.Second, "Server write timeout")
8586
rootCmd.PersistentFlags().Duration("server.timeouts.idle", 10*time.Second, "Server idle timeout")
@@ -94,6 +95,7 @@ func NewRootCmd(v *viper.Viper) *cobra.Command {
9495
rootCmd.PersistentFlags().String("seed.url", "https://raw.githubusercontent.com/cosmos/chain-registry/master/akash/chain.json", "URL to fetch initial node list")
9596
rootCmd.PersistentFlags().Duration("seed.refresh-interval", 5*time.Minute, "How often to refresh node list")
9697
rootCmd.PersistentFlags().String("seed.chain-id", "akashnet-2", "Expected chain ID")
98+
rootCmd.PersistentFlags().Bool("seed.enable-remote", true, "Enable remote seed fetching")
9799
rootCmd.PersistentFlags().StringSlice("seed.additional-nodes.rpc", []string{}, "Comma-separated list of additional RPC nodes")
98100
rootCmd.PersistentFlags().StringSlice("seed.additional-nodes.rest", []string{}, "Comma-separated list of additional REST nodes")
99101
rootCmd.PersistentFlags().StringSlice("seed.additional-nodes.grpc", []string{}, "Comma-separated list of additional gRPC nodes")
@@ -107,6 +109,11 @@ func NewRootCmd(v *viper.Viper) *cobra.Command {
107109
rootCmd.PersistentFlags().String("cors.allow-methods", "GET, POST, PUT, DELETE, OPTIONS", "CORS allowed methods")
108110
rootCmd.PersistentFlags().String("cors.allow-headers", "Content-Type, Authorization", "CORS allowed headers")
109111

112+
// Metrics configuration
113+
rootCmd.PersistentFlags().Bool("metrics.enabled", true, "Enable metrics server")
114+
rootCmd.PersistentFlags().String("metrics.listen", ":4000", "Address to listen on for metrics")
115+
rootCmd.PersistentFlags().String("metrics.path", "/metrics", "Path to expose metrics on")
116+
110117
// Configuration file support
111118
rootCmd.PersistentFlags().StringP("config", "c", "", "config file (default is $HOME/.akash-proxy/config.yaml)")
112119

@@ -116,6 +123,17 @@ func NewRootCmd(v *viper.Viper) *cobra.Command {
116123
func runProxy(cfg config.Config) {
117124
log := slog.New(slog.NewJSONHandler(os.Stdout, nil))
118125

126+
var metricsServer *http.Server
127+
if cfg.Metrics.Enabled {
128+
metricsServer = metrics.PrepareMetricsServer(cfg.Metrics.Listen, cfg.Metrics.Path)
129+
go func() {
130+
log.Info("metrics server", "addr", metricsServer.Addr, "path", cfg.Metrics.Path)
131+
if err := metricsServer.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
132+
panic(err)
133+
}
134+
}()
135+
}
136+
119137
rpcListener := make(chan seed.Seed, 1)
120138
restListener := make(chan seed.Seed, 1)
121139
grpcListener := make(chan seed.Seed, 1)
@@ -124,6 +142,7 @@ func runProxy(cfg config.Config) {
124142
SeedURL: cfg.Seed.URL,
125143
SeedRefreshInterval: cfg.Seed.RefreshInterval,
126144
ChainID: cfg.Seed.ChainID,
145+
EnableRemote: cfg.Seed.EnableRemote,
127146
AdditionalNodes: struct {
128147
RPC []string
129148
REST []string
@@ -168,6 +187,13 @@ func runProxy(cfg config.Config) {
168187
log.Error("could not close server", "err", err)
169188
os.Exit(1)
170189
}
190+
191+
if cfg.Metrics.Enabled && metricsServer != nil {
192+
if err := metricsServer.Shutdown(ctx); err != nil {
193+
log.Error("could not close metrics server", "err", err)
194+
os.Exit(1)
195+
}
196+
}
171197
}()
172198

173199
proxyGroup.Go(func() error {

deploy/docker-compose.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
version: '3.8'
2+
3+
services:
4+
prometheus:
5+
image: prom/prometheus:latest
6+
container_name: prometheus
7+
ports:
8+
- "9090:9090"
9+
volumes:
10+
- ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
11+
command:
12+
- '--config.file=/etc/prometheus/prometheus.yml'
13+
restart: unless-stopped
14+
15+
grafana:
16+
image: grafana/grafana:latest
17+
container_name: grafana
18+
ports:
19+
- "3000:3000"
20+
volumes:
21+
- ./grafana/provisioning:/etc/grafana/provisioning
22+
- ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards
23+
environment:
24+
- GF_SECURITY_ADMIN_PASSWORD=admin
25+
- GF_USERS_ALLOW_SIGN_UP=false
26+
restart: unless-stopped
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: 1
2+
3+
providers:
4+
- name: 'RPC Proxy Dashboards'
5+
orgId: 1
6+
folder: 'RPC Proxy'
7+
type: file
8+
disableDeletion: false
9+
editable: true
10+
options:
11+
path: /etc/grafana/provisioning/dashboards
12+
foldersFromFilesStructure: true

0 commit comments

Comments
 (0)