Skip to content

Commit e5cd591

Browse files
committed
Implement robustness test target for 20693
Reference: - #20693 Signed-off-by: Chun-Hung Tseng <[email protected]>
1 parent 985e056 commit e5cd591

File tree

3 files changed

+50
-25
lines changed

3 files changed

+50
-25
lines changed

tests/robustness/Makefile

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,11 @@ test-robustness-issue19179: /tmp/etcd-v3.5.17-failpoints/bin
7575
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/Issue19179 -count 200 -failfast --bin-dir=/tmp/etcd-v3.5.17-failpoints/bin' $(TOPLEVEL_MAKE) test-robustness && \
7676
echo "Failed to reproduce" || echo "Successful reproduction"
7777

78+
.PHONY: test-robustness-issue20693
79+
test-robustness-issue20693: /tmp/etcd-v3.6.4-failpoints/bin
80+
GO_TEST_FLAGS='-v -run=TestRobustnessRegression/issue20693 -count 2500 -failfast --bin-dir=/tmp/etcd-v3.6.4-failpoints/bin' $(TOPLEVEL_MAKE) TIMEOUT=24h test-robustness && \
81+
echo "Failed to reproduce" || echo "Successful reproduction"
82+
7883
# Etcd API usage by Kubernetes
7984

8085
.PHONY: k8s-coverage
@@ -120,21 +125,19 @@ $(GOPATH)/bin/gofail: $(REPOSITORY_ROOT)/tools/mod/go.mod $(REPOSITORY_ROOT)/too
120125
$(MAKE) gofail-enable; \
121126
$(MAKE) build;
122127

123-
/tmp/etcd-v3.6.0-failpoints/bin: $(GOPATH)/bin/gofail
124-
rm -rf /tmp/etcd-v3.6.0-failpoints/
125-
mkdir -p /tmp/etcd-v3.6.0-failpoints/
126-
cd /tmp/etcd-v3.6.0-failpoints/; \
127-
git clone --depth 1 --branch main https://github.com/etcd-io/etcd.git .; \
128-
$(MAKE) gofail-enable; \
129-
$(MAKE) build;
130-
131128
/tmp/etcd-release-3.6-failpoints/bin: $(GOPATH)/bin/gofail
132129
rm -rf /tmp/etcd-release-3.6-failpoints/
133130
mkdir -p /tmp/etcd-release-3.6-failpoints/
134131
cd /tmp/etcd-release-3.6-failpoints/; \
135132
git clone --depth 1 --branch release-3.6 https://github.com/etcd-io/etcd.git .; \
136-
$(MAKE) gofail-enable; \
137-
$(MAKE) build;
133+
FAILPOINTS=true ./build;
134+
135+
/tmp/etcd-v3.6.%-failpoints/bin: $(GOPATH)/bin/gofail
136+
rm -rf /tmp/etcd-v3.6.%-failpoints/
137+
mkdir -p /tmp/etcd-v3.6.%-failpoints/
138+
cd /tmp/etcd-v3.6.%-failpoints/; \
139+
git clone --depth 1 --branch v3.6.% https://github.com/etcd-io/etcd.git .; \
140+
FAILPOINTS=true ./build;
138141

139142
/tmp/etcd-v3.5.2-failpoints/bin:
140143
/tmp/etcd-v3.5.4-failpoints/bin:

tests/robustness/README.md

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,21 +8,22 @@ The purpose of these tests is to rigorously validate that etcd maintains its [KV
88

99
## Robustness track record
1010

11-
| Correctness / Consistency issue | Report | Introduced in | Discovered by | Reproducible by robustness test | Command |
12-
| ----------------------------------------------------------------- | ---------- | ----------------- | --------------- | ------------------------------------------------- | ----------------------------------- |
13-
| Inconsistent revision caused by crash during high load [#13766] | Mar 2022 | v3.5 | User | Yes, report preceded robustness tests | `make test-robustness-issue13766` |
14-
| Single node cluster can lose a write on crash [#14370] | Aug 2022 | v3.4 or earlier | User | Yes, report preceded robustness tests | `make test-robustness-issue14370` |
15-
| Enabling auth can lead to inconsistency [#14571] | Oct 2022 | v3.4 or earlier | User | No, authorization is not covered. | |
16-
| Inconsistent revision caused by crash during defrag [#14685] | Nov 2022 | v3.5 | Robustness | Yes, after covering defragmentation. | `make test-robustness-issue14685` |
17-
| Watch progress notification not synced with stream [#15220] | Jan 2023 | v3.4 or earlier | User | Yes, after covering watch progress notification | `make test-robustness-issue15220` |
18-
| Watch traveling back in time after network partition [#15271] | Feb 2023 | v3.4 or earlier | Robustness | Yes, after covering network partitions | `make test-robustness-issue15271` |
19-
| Duplicated watch event due to bug in TXN caching [#17247] | Jan 2024 | main branch | Robustness | Yes, prevented regression in v3.6 | |
20-
| Watch events lost during stream starvation [#17529] | Mar 2024 | v3.4 or earlier | User | Yes, after covering of slow watch | `make test-robustness-issue17529` |
21-
| Revision decreasing caused by crash during compaction [#17780] | Apr 2024 | v3.4 or earlier | Robustness | Yes, after covering compaction | |
22-
| Watch dropping an event when compacting on delete [#18089] | May 2024 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue18089` |
23-
| Inconsistency when reading compacted revision in TXN [#18667] | Oct 2024 | v3.4 or earlier | User | | |
24-
| Missing delete event on watch opened on same revision as compaction [#19179] | Jan 2025 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue19179` |
25-
| Watch on future revision returns old events or notifications [#20221] | Jun 2025 | v3.4 or earlier | Robustness | Yes, after covering connection to multiple members| |
11+
| Correctness / Consistency issue | Report | Introduced in | Discovered by | Reproducible by robustness test | Command |
12+
| ------------------------------------------------------------------------------------------ | ---------- | ----------------- | --------------- | -------------------------------------------------- | ----------------------------------- |
13+
| Inconsistent revision caused by crash during high load [#13766] | Mar 2022 | v3.5 | User | Yes, report preceded robustness tests | `make test-robustness-issue13766` |
14+
| Single node cluster can lose a write on crash [#14370] | Aug 2022 | v3.4 or earlier | User | Yes, report preceded robustness tests | `make test-robustness-issue14370` |
15+
| Enabling auth can lead to inconsistency [#14571] | Oct 2022 | v3.4 or earlier | User | No, authorization is not covered. | |
16+
| Inconsistent revision caused by crash during defrag [#14685] | Nov 2022 | v3.5 | Robustness | Yes, after covering defragmentation. | `make test-robustness-issue14685` |
17+
| Watch progress notification not synced with stream [#15220] | Jan 2023 | v3.4 or earlier | User | Yes, after covering watch progress notification | `make test-robustness-issue15220` |
18+
| Watch traveling back in time after network partition [#15271] | Feb 2023 | v3.4 or earlier | Robustness | Yes, after covering network partitions | `make test-robustness-issue15271` |
19+
| Duplicated watch event due to bug in TXN caching [#17247] | Jan 2024 | main branch | Robustness | Yes, prevented regression in v3.6 | |
20+
| Watch events lost during stream starvation [#17529] | Mar 2024 | v3.4 or earlier | User | Yes, after covering of slow watch | `make test-robustness-issue17529` |
21+
| Revision decreasing caused by crash during compaction [#17780] | Apr 2024 | v3.4 or earlier | Robustness | Yes, after covering compaction | |
22+
| Watch dropping an event when compacting on delete [#18089] | May 2024 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue18089` |
23+
| Inconsistency when reading compacted revision in TXN [#18667] | Oct 2024 | v3.4 or earlier | User | | |
24+
| Missing delete event on watch opened on same revision as compaction [#19179] | Jan 2025 | v3.4 or earlier | Robustness | Yes, after covering of compaction | `make test-robustness-issue19179` |
25+
| Watch on future revision returns old events or notifications [#20221] | Jun 2025 | v3.4 or earlier | Robustness | Yes, after covering connection to multiple members | |
26+
| Watch request with -1 revision causes invalid resync behavior on uncompacted etcd [#20693] | Sep 2025 | v3.4 or earlier | Robustness | Yes, after covering continuous watching opening | `make test-robustness-issue20693` |
2627

2728
[#13766]: https://github.com/etcd-io/etcd/issues/13766
2829
[#14370]: https://github.com/etcd-io/etcd/issues/14370
@@ -37,6 +38,7 @@ The purpose of these tests is to rigorously validate that etcd maintains its [KV
3738
[#18667]: https://github.com/etcd-io/etcd/issues/18667
3839
[#19179]: https://github.com/etcd-io/etcd/issues/19179
3940
[#20221]: https://github.com/etcd-io/etcd/issues/20221
41+
[#20693]: https://github.com/etcd-io/etcd/issues/20693
4042

4143
## How Robustness Tests Work
4244

tests/robustness/scenarios/scenarios.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ func Regression(t *testing.T) []TestScenario {
261261
e2e.WithGoFailEnabled(true),
262262
),
263263
})
264+
264265
scenarios = append(scenarios, TestScenario{
265266
Name: "Issue18089",
266267
Profile: traffic.LowTraffic.WithCompactionPeriod(100 * time.Millisecond), // Use frequent compaction for high reproduce rate
@@ -271,6 +272,7 @@ func Regression(t *testing.T) []TestScenario {
271272
e2e.WithGoFailEnabled(true),
272273
),
273274
})
275+
274276
if v.Compare(version.V3_5) >= 0 {
275277
opts := []e2e.EPClusterOption{
276278
e2e.WithSnapshotCount(100),
@@ -280,6 +282,7 @@ func Regression(t *testing.T) []TestScenario {
280282
if e2e.CouldSetSnapshotCatchupEntries(e2e.BinPath.Etcd) {
281283
opts = append(opts, e2e.WithSnapshotCatchUpEntries(100))
282284
}
285+
283286
scenarios = append(scenarios, TestScenario{
284287
Name: "Issue15271",
285288
Failpoint: failpoint.BlackholeUntilSnapshot,
@@ -288,5 +291,22 @@ func Regression(t *testing.T) []TestScenario {
288291
Cluster: *e2e.NewConfig(opts...),
289292
})
290293
}
294+
295+
scenarios = append(scenarios, TestScenario{
296+
Name: "issue20693",
297+
Profile: traffic.HighTrafficProfile.WithoutCompaction().WithBackgroundWatchConfigInterval(10 * time.Millisecond).WithBackgroundWatchConfigRevisionOffset(-10),
298+
Failpoint: failpoint.RaftAfterSaveSnapPanic,
299+
Traffic: traffic.Kubernetes,
300+
Cluster: *e2e.NewConfig(
301+
e2e.WithClusterSize(3),
302+
e2e.WithCompactionBatchLimit(10),
303+
e2e.WithSnapshotCount(50),
304+
e2e.WithSnapshotCatchUpEntries(100),
305+
e2e.WithGoFailEnabled(true),
306+
e2e.WithPeerProxy(true),
307+
e2e.WithIsPeerTLS(true),
308+
),
309+
})
310+
291311
return scenarios
292312
}

0 commit comments

Comments
 (0)