Skip to content

Commit 2ec000e

Browse files
committed
rebalance: adds -allowed-dsts to support backfilling nodes in imbalanced replicas
Sometimes we use rebalance command to backfill a storage node that replaces a broken node: bucky rebalance -h node-in-replica-1:4242 node-in-replica2:4242 This only works if replica 1 and replica 2 has the same number of nodes (assume using jump hash). With -allowed-dsts, we can backfill the node from a replica has higher or lower storage nodes. bucky rebalance -h node-in-replica-1:4242 -allowed-dsts node-in-replica-1:4242 node1-in-replica2:4242,node2-in-replica2:4242,...,nodeN-in-replica2:4242
1 parent 989a7df commit 2ec000e

File tree

3 files changed

+33
-4
lines changed

3 files changed

+33
-4
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,10 @@ test_rebalance_health_check_setup:
4444
sudo ip addr add 10.0.1.9 dev lo
4545

4646
test_rebalance_health_check: clean bucky buckyd
47-
# go run -mod vendor testing/rebalance_health_check.go $(REBALANCE_FLAGS)
47+
go run -mod vendor testing/rebalance_health_check.go $(REBALANCE_FLAGS)
4848

4949
clean_test:
50+
rm -rf bucky buckyd
5051
rm -rf testdata_rebalance_*
5152
rm -rf testdata_copy_*
5253

cmd/bucky/rebalance.go

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,13 @@ import (
44
"fmt"
55
"log"
66
"sort"
7+
"strings"
78
)
89

10+
var rebalanceConfig struct {
11+
allowedDsts string
12+
}
13+
914
func init() {
1015
usage := "[options] [additional buckyd servers...]"
1116
short := "Rebalance a server or the entire cluster."
@@ -43,8 +48,8 @@ Set -offload=true to speed up rebalance.`
4348
SetupSingle(c)
4449

4550
msFlags.registerFlags(c.Flag)
46-
c.Flag.BoolVar(&listForce, "f", false,
47-
"Force the remote daemons to rebuild their cache.")
51+
c.Flag.BoolVar(&listForce, "f", false, "Force the remote daemons to rebuild their cache.")
52+
c.Flag.StringVar(&rebalanceConfig.allowedDsts, "allowed-dsts", "", "Only copy/rebanace metrics to the allowed destinations (ip1:port,ip2:port). By default (i.e. empty), all dsts are allowed.")
4853
}
4954

5055
// countMap returns the number of metrics in a server -> metrics mapping
@@ -109,6 +114,23 @@ func RebalanceMetrics(extraHostPorts []string) error {
109114
}
110115
}
111116

117+
if rebalanceConfig.allowedDsts != "" {
118+
allowm := map[string]bool{}
119+
for _, hostport := range strings.Split(rebalanceConfig.allowedDsts, ";") {
120+
allowm[strings.TrimSpace(hostport)] = true
121+
}
122+
123+
newJobs := map[string]map[string][]*syncJob{}
124+
for dst, srcm := range jobs {
125+
if allowm[dst] {
126+
newJobs[dst] = srcm
127+
}
128+
129+
}
130+
131+
jobs = newJobs
132+
}
133+
112134
sort.Strings(servers)
113135
for _, server := range servers {
114136
log.Printf("%d metrics on %s must be relocated", moves[server], server)

testing/rebalance.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,13 @@ func main() {
152152

153153
time.Sleep(time.Second * 3)
154154
rebalanceStart := time.Now()
155-
rebalanceCmd := exec.Command("./bucky", "rebalance", "-f", "-h", nodeStr(server0), "-offload", "-w", "3", "-ignore404")
155+
rebalanceCmd := exec.Command(
156+
"./bucky", "rebalance", "-f",
157+
"-h", nodeStr(server0), "-offload",
158+
"-w", "3", "-ignore404",
159+
// "-allowed-dsts", "localhost:40002",
160+
// "-allowed-dsts", "xxx:xxx",
161+
)
156162

157163
rebalanceCmd.Stdout = rebalanceLog
158164
rebalanceCmd.Stderr = rebalanceLog

0 commit comments

Comments
 (0)