Skip to content

Commit d127f64

Browse files
committed
initial commit: add another tablet load balancer algorithm, random
Signed-off-by: Nick Van Wiggeren <[email protected]>
1 parent 8fc596b commit d127f64

File tree

6 files changed

+340
-27
lines changed

6 files changed

+340
-27
lines changed

go/flags/endtoend/vtgate.txt

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ Flags:
2828
--allow-kill-statement Allows the execution of kill statement
2929
--allowed-tablet-types strings Specifies the tablet types this vtgate is allowed to route queries to. Should be provided as a comma-separated set of tablet types.
3030
--alsologtostderr log to standard error as well as files
31-
--balancer-keyspaces strings When in balanced mode, a comma-separated list of keyspaces for which to use the balancer (optional)
32-
--balancer-vtgate-cells strings When in balanced mode, a comma-separated list of cells that contain vtgates (required)
31+
--balancer-keyspaces strings Comma-separated list of keyspaces for which to use the balancer (optional). If empty, applies to all keyspaces.
32+
--balancer-vtgate-cells strings Comma-separated list of cells that contain vtgates. For 'flow' mode, this is required. For 'random' mode, this is optional and filters tablets to those cells.
3333
--bind-address string Bind address for the server. If empty, the server will listen on all available unicast and anycast IP addresses of the local system.
3434
--buffer-drain-concurrency int Maximum number of requests retried simultaneously. More concurrency will increase the load on the PRIMARY vttablet when draining the buffer. (default 1)
3535
--buffer-keyspace-shards string If not empty, limit buffering to these entries (comma separated). Entry format: keyspace or keyspace/shard. Requires --enable_buffer=true.
@@ -56,7 +56,7 @@ Flags:
5656
--discovery-high-replication-lag-minimum-serving duration Threshold above which replication lag is considered too high when applying the min_number_serving_vttablets flag. (default 2h0m0s)
5757
--discovery-low-replication-lag duration Threshold below which replication lag is considered low enough to be healthy. (default 30s)
5858
--emit-stats If set, emit stats to push-based monitoring and stats backends
59-
--enable-balancer Enable the tablet balancer to evenly spread query load for a given tablet type
59+
--enable-balancer (DEPRECATED: use --vtgate-balancer-mode instead) Enable the tablet balancer to evenly spread query load for a given tablet type
6060
--enable-buffer Enable buffering (stalling) of primary traffic during failovers.
6161
--enable-buffer-dry-run Detect and log failover events, but do not actually buffer requests.
6262
--enable-direct-ddl Allow users to submit direct DDL statements (default true)
@@ -246,6 +246,7 @@ Flags:
246246
-v, --version print binary version
247247
--vmodule vModuleFlag comma-separated list of pattern=N settings for file-filtered logging
248248
--vschema-ddl-authorized-users string List of users authorized to execute vschema ddl operations, or '%' to allow all users.
249+
--vtgate-balancer-mode string Tablet balancer mode (options: cell, flow, random). Defaults to 'cell' which shuffles tablets in the local cell.
249250
--vtgate-config-terse-errors prevent bind vars from escaping in returned errors
250251
--warming-reads-concurrency int Number of concurrent warming reads allowed (default 500)
251252
--warming-reads-percent int Percentage of reads on the primary to forward to replicas. Useful for keeping buffer pools warm

go/vt/vtgate/balancer/balancer.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -96,15 +96,35 @@ type TabletBalancer interface {
9696
DebugHandler(w http.ResponseWriter, r *http.Request)
9797
}
9898

99-
func NewTabletBalancer(localCell string, vtGateCells []string) TabletBalancer {
100-
return &tabletBalancer{
99+
// NewTabletBalancer creates a new tablet balancer based on the specified mode.
100+
// Supported modes:
101+
// - "flow": Flow-based balancer that maintains cell affinity while balancing load
102+
// - "random": Random balancer that uniformly distributes load without cell affinity
103+
//
104+
// Note: "cell" mode is handled by the gateway and does not create a balancer instance.
105+
// Returns an error for unsupported modes.
106+
func NewTabletBalancer(mode, localCell string, vtGateCells []string) (TabletBalancer, error) {
107+
switch mode {
108+
case "flow":
109+
return newFlowBalancer(localCell, vtGateCells), nil
110+
case "random":
111+
return newRandomBalancer(localCell, vtGateCells), nil
112+
case "cell":
113+
return nil, fmt.Errorf("cell mode should be handled by the gateway, not the balancer factory")
114+
default:
115+
return nil, fmt.Errorf("unsupported balancer mode: %s (supported modes: cell, flow, random)", mode)
116+
}
117+
}
118+
119+
func newFlowBalancer(localCell string, vtGateCells []string) TabletBalancer {
120+
return &flowBalancer{
101121
localCell: localCell,
102122
vtGateCells: vtGateCells,
103123
allocations: map[discovery.KeyspaceShardTabletType]*targetAllocation{},
104124
}
105125
}
106126

107-
type tabletBalancer struct {
127+
type flowBalancer struct {
108128
//
109129
// Configuration
110130
//
@@ -145,13 +165,13 @@ type targetAllocation struct {
145165
TotalAllocation int
146166
}
147167

148-
func (b *tabletBalancer) print() string {
168+
func (b *flowBalancer) print() string {
149169
allocations, _ := json.Marshal(&b.allocations)
150170
return fmt.Sprintf("LocalCell: %s, VtGateCells: %s, allocations: %s",
151171
b.localCell, b.vtGateCells, string(allocations))
152172
}
153173

154-
func (b *tabletBalancer) DebugHandler(w http.ResponseWriter, _ *http.Request) {
174+
func (b *flowBalancer) DebugHandler(w http.ResponseWriter, _ *http.Request) {
155175
w.Header().Set("Content-Type", "text/plain")
156176
fmt.Fprintf(w, "Local Cell: %v\r\n", b.localCell)
157177
fmt.Fprintf(w, "Vtgate Cells: %v\r\n", b.vtGateCells)
@@ -167,7 +187,7 @@ func (b *tabletBalancer) DebugHandler(w http.ResponseWriter, _ *http.Request) {
167187
// Given the total allocation for the set of tablets, choose the best target
168188
// by a weighted random sample so that over time the system will achieve the
169189
// desired balanced allocation.
170-
func (b *tabletBalancer) Pick(target *querypb.Target, tablets []*discovery.TabletHealth) *discovery.TabletHealth {
190+
func (b *flowBalancer) Pick(target *querypb.Target, tablets []*discovery.TabletHealth) *discovery.TabletHealth {
171191

172192
numTablets := len(tablets)
173193
if numTablets == 0 {
@@ -191,7 +211,7 @@ func (b *tabletBalancer) Pick(target *querypb.Target, tablets []*discovery.Table
191211
// To stick with integer arithmetic, use 1,000,000 as the full load
192212
const ALLOCATION = 1000000
193213

194-
func (b *tabletBalancer) allocateFlows(allTablets []*discovery.TabletHealth) *targetAllocation {
214+
func (b *flowBalancer) allocateFlows(allTablets []*discovery.TabletHealth) *targetAllocation {
195215
// Initialization: Set up some data structures and derived values
196216
a := targetAllocation{
197217
Target: map[string]int{},
@@ -339,7 +359,7 @@ func (b *tabletBalancer) allocateFlows(allTablets []*discovery.TabletHealth) *ta
339359
}
340360

341361
// getAllocation builds the allocation map if needed and returns a copy of the map
342-
func (b *tabletBalancer) getAllocation(target *querypb.Target, tablets []*discovery.TabletHealth) (map[uint32]int, int) {
362+
func (b *flowBalancer) getAllocation(target *querypb.Target, tablets []*discovery.TabletHealth) (map[uint32]int, int) {
343363
b.mu.Lock()
344364
defer b.mu.Unlock()
345365

go/vt/vtgate/balancer/balancer_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ func TestAllocateFlows(t *testing.T) {
167167

168168
// Run the balancer over each vtgate cell
169169
for _, localCell := range vtGateCells {
170-
b := NewTabletBalancer(localCell, vtGateCells).(*tabletBalancer)
170+
b := newFlowBalancer(localCell, vtGateCells).(*flowBalancer)
171171
a := b.allocateFlows(tablets)
172172
b.allocations[discovery.KeyFromTarget(target)] = a
173173

@@ -295,7 +295,7 @@ func TestBalancedPick(t *testing.T) {
295295
// Run the algorithm a bunch of times to get a random enough sample
296296
N := 1000000
297297
for _, localCell := range vtGateCells {
298-
b := NewTabletBalancer(localCell, vtGateCells).(*tabletBalancer)
298+
b := newFlowBalancer(localCell, vtGateCells).(*flowBalancer)
299299

300300
for i := 0; i < N/len(vtGateCells); i++ {
301301
th := b.Pick(target, tablets)
@@ -327,7 +327,7 @@ func TestTopologyChanged(t *testing.T) {
327327
}
328328
target := &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}
329329

330-
b := NewTabletBalancer("b", []string{"a", "b"}).(*tabletBalancer)
330+
b := newFlowBalancer("b", []string{"a", "b"}).(*flowBalancer)
331331

332332
N := 1
333333

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
Copyright 2024 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package balancer
18+
19+
import (
20+
"fmt"
21+
"math/rand/v2"
22+
"net/http"
23+
24+
"vitess.io/vitess/go/vt/discovery"
25+
querypb "vitess.io/vitess/go/vt/proto/query"
26+
)
27+
28+
/*
29+
30+
The randomBalancer provides a simple, stateless load balancing strategy that
31+
uniformly distributes queries across all available tablets without considering
32+
cell affinity.
33+
34+
Unlike the flow-based balancer which attempts to maintain cell affinity while
35+
balancing load, the random balancer ignores cell boundaries entirely and picks
36+
tablets with uniform probability (1/N for N available tablets).
37+
38+
This is useful in scenarios where:
39+
- Traffic distribution across cells is unpredictable or uneven
40+
- Cell affinity optimization is not beneficial for the workload
41+
- Simpler, more predictable load distribution is desired
42+
43+
The balancer can optionally filter tablets to only those in specified cells
44+
via the vtGateCells parameter, but within that filtered set, selection is
45+
purely random without any cell preference.
46+
47+
*/
48+
49+
func newRandomBalancer(localCell string, vtGateCells []string) TabletBalancer {
50+
return &randomBalancer{
51+
localCell: localCell,
52+
vtGateCells: vtGateCells,
53+
}
54+
}
55+
56+
type randomBalancer struct {
57+
// The local cell for the vtgate (used for debugging/logging only)
58+
localCell string
59+
60+
// Optional list of cells to filter tablets to. If empty, all tablets are considered.
61+
vtGateCells []string
62+
}
63+
64+
// Pick returns a random tablet from the list with uniform probability (1/N).
65+
// If vtGateCells is configured, only tablets in those cells are considered.
66+
func (b *randomBalancer) Pick(target *querypb.Target, tablets []*discovery.TabletHealth) *discovery.TabletHealth {
67+
// Filter to tablets in configured cells if vtGateCells is specified
68+
if len(b.vtGateCells) > 0 {
69+
filtered := make([]*discovery.TabletHealth, 0, len(tablets))
70+
for _, tablet := range tablets {
71+
for _, cell := range b.vtGateCells {
72+
if tablet.Tablet.Alias.Cell == cell {
73+
filtered = append(filtered, tablet)
74+
break
75+
}
76+
}
77+
}
78+
tablets = filtered
79+
}
80+
81+
numTablets := len(tablets)
82+
if numTablets == 0 {
83+
return nil
84+
}
85+
86+
if numTablets == 1 {
87+
return tablets[0]
88+
}
89+
90+
// Uniform random selection
91+
return tablets[rand.IntN(numTablets)]
92+
}
93+
94+
func (b *randomBalancer) DebugHandler(w http.ResponseWriter, _ *http.Request) {
95+
w.Header().Set("Content-Type", "text/plain")
96+
fmt.Fprintf(w, "Balancer Mode: random\r\n")
97+
fmt.Fprintf(w, "Local Cell: %v\r\n", b.localCell)
98+
if len(b.vtGateCells) > 0 {
99+
fmt.Fprintf(w, "Filtered to Cells: %v\r\n", b.vtGateCells)
100+
} else {
101+
fmt.Fprintf(w, "Cells: all (no filter)\r\n")
102+
}
103+
fmt.Fprintf(w, "Strategy: Uniform random selection (1/N probability per tablet)\r\n")
104+
}
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
/*
2+
Copyright 2024 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package balancer
18+
19+
import (
20+
"testing"
21+
22+
"github.com/stretchr/testify/assert"
23+
"github.com/stretchr/testify/require"
24+
25+
"vitess.io/vitess/go/vt/discovery"
26+
querypb "vitess.io/vitess/go/vt/proto/query"
27+
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
28+
)
29+
30+
func TestRandomBalancerUniformDistribution(t *testing.T) {
31+
// Test with uneven distribution of tablets across cells to verify
32+
// that random mode ignores cell affinity and treats all tablets equally
33+
tablets := []*discovery.TabletHealth{
34+
createTestTablet("cell1"),
35+
createTestTablet("cell1"),
36+
createTestTablet("cell1"),
37+
createTestTablet("cell2"),
38+
createTestTablet("cell3"),
39+
createTestTablet("cell3"),
40+
}
41+
42+
target := &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}
43+
// Use localCell = "cell1" to verify it doesn't get preferential treatment
44+
b := newRandomBalancer("cell1", []string{})
45+
46+
const numPicks = 60000
47+
pickCounts := make(map[uint32]int)
48+
49+
for i := 0; i < numPicks; i++ {
50+
th := b.Pick(target, tablets)
51+
require.NotNil(t, th, "Pick should not return nil")
52+
pickCounts[th.Tablet.Alias.Uid]++
53+
}
54+
55+
// Each individual tablet should be picked with 1/N probability
56+
expectedPerTablet := numPicks / len(tablets)
57+
for _, tablet := range tablets {
58+
count := pickCounts[tablet.Tablet.Alias.Uid]
59+
assert.InEpsilon(t, expectedPerTablet, count, 0.05,
60+
"tablet %d in cell %s should receive uniform picks (expected ~%d, got %d)",
61+
tablet.Tablet.Alias.Uid, tablet.Tablet.Alias.Cell, expectedPerTablet, count)
62+
}
63+
}
64+
65+
func TestRandomBalancerPickEmpty(t *testing.T) {
66+
target := &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}
67+
b := newRandomBalancer("cell1", []string{})
68+
69+
th := b.Pick(target, []*discovery.TabletHealth{})
70+
assert.Nil(t, th, "Pick should return nil for empty tablet list")
71+
}
72+
73+
func TestRandomBalancerPickSingle(t *testing.T) {
74+
tablets := []*discovery.TabletHealth{
75+
createTestTablet("cell1"),
76+
}
77+
78+
target := &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}
79+
b := newRandomBalancer("cell1", []string{})
80+
81+
// Pick multiple times, should always return the same tablet
82+
for i := 0; i < 100; i++ {
83+
th := b.Pick(target, tablets)
84+
require.NotNil(t, th, "Pick should not return nil")
85+
assert.Equal(t, tablets[0].Tablet.Alias.Uid, th.Tablet.Alias.Uid,
86+
"Pick should return the only available tablet")
87+
}
88+
}
89+
90+
func TestRandomBalancerFactory(t *testing.T) {
91+
// Test that the factory creates a random balancer correctly
92+
b, err := NewTabletBalancer("random", "cell1", []string{"cell1", "cell2"})
93+
require.NoError(t, err)
94+
require.NotNil(t, b)
95+
96+
// Verify it's actually a randomBalancer
97+
_, ok := b.(*randomBalancer)
98+
assert.True(t, ok, "factory should create a randomBalancer")
99+
}
100+
101+
func TestBalancerFactoryInvalidModes(t *testing.T) {
102+
// Test that "cell" mode returns an error (should be handled by gateway)
103+
b, err := NewTabletBalancer("cell", "cell1", []string{})
104+
assert.Error(t, err)
105+
assert.Nil(t, b)
106+
assert.Contains(t, err.Error(), "cell mode should be handled by the gateway")
107+
108+
// Test that an invalid mode returns an error
109+
b, err = NewTabletBalancer("invalid", "cell1", []string{})
110+
assert.Error(t, err)
111+
assert.Nil(t, b)
112+
assert.Contains(t, err.Error(), "unsupported balancer mode")
113+
}
114+
115+
func TestRandomBalancerCellFiltering(t *testing.T) {
116+
// Create tablets in multiple cells
117+
tablets := []*discovery.TabletHealth{
118+
createTestTablet("cell1"),
119+
createTestTablet("cell1"),
120+
createTestTablet("cell2"),
121+
createTestTablet("cell2"),
122+
createTestTablet("cell3"),
123+
createTestTablet("cell3"),
124+
}
125+
126+
target := &querypb.Target{Keyspace: "k", Shard: "s", TabletType: topodatapb.TabletType_REPLICA}
127+
128+
// Create balancer that only considers cell1 and cell2
129+
b := newRandomBalancer("cell1", []string{"cell1", "cell2"})
130+
require.NotNil(t, b)
131+
132+
const numPicks = 10000
133+
pickCounts := make(map[string]int)
134+
135+
for i := 0; i < numPicks; i++ {
136+
th := b.Pick(target, tablets)
137+
require.NotNil(t, th)
138+
pickCounts[th.Tablet.Alias.Cell]++
139+
}
140+
141+
// Should only pick from cell1 and cell2, never cell3
142+
assert.Greater(t, pickCounts["cell1"], 0, "should pick from cell1")
143+
assert.Greater(t, pickCounts["cell2"], 0, "should pick from cell2")
144+
assert.Equal(t, 0, pickCounts["cell3"], "should never pick from cell3")
145+
146+
// Each filtered cell should get approximately half the picks
147+
expectedPerCell := numPicks / 2
148+
assert.InEpsilon(t, expectedPerCell, pickCounts["cell1"], 0.1,
149+
"cell1 should get ~50%% of picks")
150+
assert.InEpsilon(t, expectedPerCell, pickCounts["cell2"], 0.1,
151+
"cell2 should get ~50%% of picks")
152+
}

0 commit comments

Comments
 (0)