Skip to content

Commit 90993c8

Browse files
emmaling27Convex, Inc.
authored andcommitted
Ratelimit retention checkpointing every 5 min (#43375)
Update retention checkpointing rate limit to be every 5 min instead of every minute, and make the knob lowerable by setting it to a duration before the next checkpoint instead of number of times per minute to checkpoint (we were already at the min of once per minute). GitOrigin-RevId: be350f72962ab7982ac5aafb7b968fd11c5816bd
1 parent 38390e7 commit 90993c8

File tree

2 files changed

+14
-17
lines changed

2 files changed

+14
-17
lines changed

crates/common/src/knobs.rs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -511,14 +511,10 @@ pub static DOCUMENT_RETENTION_RATE_LIMIT: LazyLock<NonZeroU32> = LazyLock::new(|
511511
)
512512
});
513513

514-
/// Maximum number of times both the document and index retention workers should
515-
/// write a checkpoint to the persistence globals table per minute
516-
pub static RETENTION_CHECKPOINT_LIMIT_PER_MINUTE: LazyLock<NonZeroU32> = LazyLock::new(|| {
517-
env_config(
518-
"RETENTION_CHECKPOINT_LIMIT_PER_MINUTE",
519-
NonZeroU32::new(1).unwrap(),
520-
)
521-
});
514+
/// How frequently document and index retention workers should write
515+
/// checkpoints to the persistence globals table in seconds
516+
pub static RETENTION_CHECKPOINT_PERIOD_SECS: LazyLock<Duration> =
517+
LazyLock::new(|| Duration::from_secs(env_config("RETENTION_CHECKPOINT_PERIOD_SECS", 60 * 5)));
522518

523519
/// Maximum scanned documents within a single run for document retention unless
524520
/// there are a bunch of writes at single timestamp. Then, we go until there are

crates/database/src/retention.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ use common::{
5757
INDEX_RETENTION_DELETE_CHUNK,
5858
INDEX_RETENTION_DELETE_PARALLEL,
5959
MAX_RETENTION_DELAY_SECONDS,
60-
RETENTION_CHECKPOINT_LIMIT_PER_MINUTE,
60+
RETENTION_CHECKPOINT_PERIOD_SECS,
6161
RETENTION_DELETES_ENABLED,
6262
RETENTION_DELETE_BATCH,
6363
RETENTION_DOCUMENT_DELETES_ENABLED,
@@ -337,6 +337,9 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
337337
RetentionType::Index,
338338
)
339339
.await?;
340+
let checkpoint_quota = Quota::with_period(*RETENTION_CHECKPOINT_PERIOD_SECS)
341+
.context("Checkpoint period cannot be zero")?;
342+
340343
let deletion_handle = rt.spawn(
341344
"retention_delete",
342345
Self::go_delete_indexes(
@@ -351,6 +354,7 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
351354
checkpoint_writer,
352355
snapshot_reader.clone(),
353356
index_deletion_cursor,
357+
checkpoint_quota,
354358
),
355359
);
356360
let document_deletion_cursor = Self::get_checkpoint(
@@ -371,6 +375,7 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
371375
snapshot_reader.clone(),
372376
retention_rate_limiter.clone(),
373377
document_deletion_cursor,
378+
checkpoint_quota,
374379
),
375380
);
376381
Ok(Self {
@@ -1080,11 +1085,9 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
10801085
mut checkpoint_writer: Writer<Checkpoint>,
10811086
snapshot_reader: Reader<SnapshotManager>,
10821087
mut cursor: RepeatableTimestamp,
1088+
quota: Quota,
10831089
) {
1084-
let checkpoint_rate_limiter = new_rate_limiter(
1085-
rt.clone(),
1086-
Quota::per_minute(*RETENTION_CHECKPOINT_LIMIT_PER_MINUTE),
1087-
);
1090+
let checkpoint_rate_limiter = new_rate_limiter(rt.clone(), quota);
10881091
let mut error_backoff = Backoff::new(INITIAL_BACKOFF, *MAX_RETENTION_DELAY_SECONDS);
10891092
let mut min_snapshot_ts = RepeatableTimestamp::MIN;
10901093
let mut is_working = false;
@@ -1207,11 +1210,9 @@ impl<RT: Runtime> LeaderRetentionManager<RT> {
12071210
snapshot_reader: Reader<SnapshotManager>,
12081211
document_deletion_rate_limiter: Arc<RateLimiter<RT>>,
12091212
mut cursor: RepeatableTimestamp,
1213+
quota: Quota,
12101214
) {
1211-
let checkpoint_rate_limiter = new_rate_limiter(
1212-
rt.clone(),
1213-
Quota::per_minute(*RETENTION_CHECKPOINT_LIMIT_PER_MINUTE),
1214-
);
1215+
let checkpoint_rate_limiter = new_rate_limiter(rt.clone(), quota);
12151216
// Wait with jitter on startup to avoid thundering herd
12161217
Self::wait_with_jitter(&rt, *DOCUMENT_RETENTION_BATCH_INTERVAL_SECONDS).await;
12171218

0 commit comments

Comments
 (0)