Skip to content

Commit 88cd33b

Browse files
committed
Add new target for issue20221 for the potential reproduction of #20573
Evolve the code from Marek (#20221 (comment)). Please see discussion starting from #20349 (comment) Signed-off-by: Chun-Hung Tseng <[email protected]>
1 parent 614dd35 commit 88cd33b

File tree

3 files changed

+91
-9
lines changed

3 files changed

+91
-9
lines changed

tests/robustness/Makefile

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ test-robustness-issue19179: /tmp/etcd-v3.5.17-failpoints/bin
7575
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/Issue19179 -count 200 -failfast --bin-dir=/tmp/etcd-v3.5.17-failpoints/bin' $(TOPLEVEL_MAKE) test-robustness && \
7676
echo "Failed to reproduce" || echo "Successful reproduction"
7777

78+
.PHONY: test-robustness-issue20221
79+
test-robustness-issue20221: /tmp/etcd-v3.6.4-failpoints/bin
80+
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/issue20221 -count 200 -failfast --bin-dir=/tmp/etcd-v3.6.4-failpoints/bin' $(TOPLEVEL_MAKE) test-robustness && \
81+
echo "Failed to reproduce" || echo "Successful reproduction"
82+
7883
# Etcd API usage by Kubernetes
7984

8085
.PHONY: k8s-coverage
@@ -120,14 +125,6 @@ $(GOPATH)/bin/gofail: $(REPOSITORY_ROOT)/tools/mod/go.mod $(REPOSITORY_ROOT)/too
120125
$(MAKE) gofail-enable; \
121126
$(MAKE) build;
122127

123-
/tmp/etcd-v3.6.0-failpoints/bin: $(GOPATH)/bin/gofail
124-
rm -rf /tmp/etcd-v3.6.0-failpoints/
125-
mkdir -p /tmp/etcd-v3.6.0-failpoints/
126-
cd /tmp/etcd-v3.6.0-failpoints/; \
127-
git clone --depth 1 --branch main https://github.com/etcd-io/etcd.git .; \
128-
$(MAKE) gofail-enable; \
129-
$(MAKE) build;
130-
131128
/tmp/etcd-release-3.6-failpoints/bin: $(GOPATH)/bin/gofail
132129
rm -rf /tmp/etcd-release-3.6-failpoints/
133130
mkdir -p /tmp/etcd-release-3.6-failpoints/
@@ -136,6 +133,14 @@ $(GOPATH)/bin/gofail: $(REPOSITORY_ROOT)/tools/mod/go.mod $(REPOSITORY_ROOT)/too
136133
$(MAKE) gofail-enable; \
137134
$(MAKE) build;
138135

136+
/tmp/etcd-v3.6.4-failpoints/bin: $(GOPATH)/bin/gofail
137+
rm -rf /tmp/etcd-v3.6.4-failpoints/
138+
mkdir -p /tmp/etcd-v3.6.4-failpoints/
139+
cd /tmp/etcd-v3.6.4-failpoints/; \
140+
git clone --depth 1 --branch v3.6.4 https://github.com/etcd-io/etcd.git .; \
141+
$(MAKE) gofail-enable; \
142+
$(MAKE) build;
143+
139144
/tmp/etcd-v3.5.2-failpoints/bin:
140145
/tmp/etcd-v3.5.4-failpoints/bin:
141146
/tmp/etcd-v3.5.5-failpoints/bin:

tests/robustness/client/watch.go

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"context"
1919
"errors"
2020
"fmt"
21+
"time"
2122

2223
"go.uber.org/zap"
2324
"golang.org/x/sync/errgroup"
@@ -51,14 +52,28 @@ func CollectClusterWatchEvents(ctx context.Context, param CollectClusterWatchEve
5152
return err
5253
})
5354
}
54-
55+
finish := make(chan struct{})
5556
g.Go(func() error {
5657
maxRevision := <-param.MaxRevisionChan
5758
for _, memberChan := range memberMaxRevisionChans {
5859
memberChan <- maxRevision
5960
}
61+
close(finish)
6062
return nil
6163
})
64+
65+
for _, endpoint := range endpoints {
66+
g.Go(func() error {
67+
c, err := clientSet.NewClient([]string{endpoint})
68+
if err != nil {
69+
return err
70+
}
71+
defer c.Close()
72+
period := 10 * time.Millisecond
73+
return openWatchPeriodically(ctx, &g, c, period, finish)
74+
})
75+
}
76+
6277
return g.Wait()
6378
}
6479

@@ -130,3 +145,42 @@ resetWatch:
130145
}
131146
}
132147
}
148+
149+
func openWatchPeriodically(ctx context.Context, g *errgroup.Group, c *RecordingClient, period time.Duration, finish <-chan struct{}) error {
150+
for {
151+
select {
152+
case <-ctx.Done():
153+
return ctx.Err()
154+
case <-finish:
155+
return nil
156+
case <-time.After(period):
157+
}
158+
g.Go(func() error {
159+
// targeting commit 866bc0717
160+
resp, err := c.Get(ctx, "/key")
161+
if err != nil {
162+
return err
163+
}
164+
// rev := resp.Header.Revision + (rand.Int64N(20) - 10) // reproduce OK (<2 min on my machine)
165+
rev := resp.Header.Revision - 10 // reproduce OK (<1 min on my machine)
166+
// rev := resp.Header.Revision // reproduce TBD
167+
// rev := int64(0) // no reproduction
168+
169+
watchCtx, cancel := context.WithCancel(ctx)
170+
defer cancel()
171+
w := c.Watch(watchCtx, "", rev, true, true, true)
172+
for {
173+
select {
174+
case <-ctx.Done():
175+
return ctx.Err()
176+
case <-finish:
177+
return nil
178+
case _, ok := <-w:
179+
if !ok {
180+
return nil
181+
}
182+
}
183+
}
184+
})
185+
}
186+
}

tests/robustness/scenarios/scenarios.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ func Regression(t *testing.T) []TestScenario {
261261
e2e.WithGoFailEnabled(true),
262262
),
263263
})
264+
264265
scenarios = append(scenarios, TestScenario{
265266
Name: "Issue18089",
266267
Profile: traffic.LowTraffic.WithCompactionPeriod(100 * time.Millisecond), // Use frequent compaction for high reproduce rate
@@ -271,6 +272,7 @@ func Regression(t *testing.T) []TestScenario {
271272
e2e.WithGoFailEnabled(true),
272273
),
273274
})
275+
274276
if v.Compare(version.V3_5) >= 0 {
275277
opts := []e2e.EPClusterOption{
276278
e2e.WithSnapshotCount(100),
@@ -280,6 +282,7 @@ func Regression(t *testing.T) []TestScenario {
280282
if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) {
281283
opts = append(opts, e2e.WithSnapshotCatchUpEntries(100))
282284
}
285+
283286
scenarios = append(scenarios, TestScenario{
284287
Name: "Issue15271",
285288
Failpoint: failpoint.BlackholeUntilSnapshot,
@@ -288,5 +291,25 @@ func Regression(t *testing.T) []TestScenario {
288291
Cluster: *e2e.NewConfig(opts...),
289292
})
290293
}
294+
295+
scenarios = append(scenarios, TestScenario{
296+
Name: "issue20221",
297+
Profile: traffic.HighTrafficProfile.WithoutCompaction(),
298+
Failpoint: failpoint.RaftAfterSaveSnapPanic,
299+
// Failpoint: failpoint.RaftBeforeApplySnapPanic,
300+
// Failpoint: failpoint.RaftAfterApplySnapPanic,
301+
// Failpoint: failpoint.RaftAfterWALReleasePanic,
302+
Traffic: traffic.Kubernetes,
303+
Cluster: *e2e.NewConfig(
304+
e2e.WithClusterSize(3),
305+
e2e.WithCompactionBatchLimit(10),
306+
e2e.WithSnapshotCount(50),
307+
e2e.WithSnapshotCatchUpEntries(100),
308+
e2e.WithGoFailEnabled(true),
309+
e2e.WithPeerProxy(true),
310+
e2e.WithIsPeerTLS(true),
311+
),
312+
})
313+
291314
return scenarios
292315
}

0 commit comments

Comments
 (0)