Skip to content

Commit 5a96471

Browse files
akichidismwtian
authored andcommitted
[MFP] add the ping label on the client monitor metrics (#23852)
## Description Adding the ping label on the operational feedback metrics. ## Test plan CI/PT --- ## Release notes Check each box that your changes affect. If none of the boxes relate to your changes, release notes aren't required. For each box you select, include information after the relevant heading that describes the impact of your changes that a user might notice and any actions they must take to implement updates. - [ ] Protocol: - [ ] Nodes (Validators and Full nodes): - [ ] gRPC: - [ ] JSON-RPC: - [ ] GraphQL: - [ ] CLI: - [ ] Rust SDK:
1 parent 2df98b6 commit 5a96471

File tree

8 files changed

+66
-8
lines changed

8 files changed

+66
-8
lines changed

crates/sui-core/src/transaction_driver/effects_certifier.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ impl EffectsCertifier {
7777
// When consensus position is provided, wait for finalized and fastpath outputs at the validators' side.
7878
// Otherwise, only wait for finalized effects.
7979
// Skip the first attempt to get full effects if it is already provided.
80-
8180
let (consensus_position, full_effects) = match submit_txn_result {
8281
SubmitTxResult::Submitted { consensus_position } => (Some(consensus_position), None),
8382
SubmitTxResult::Executed {
@@ -102,6 +101,7 @@ impl EffectsCertifier {
102101

103102
let mut retrier =
104103
RequestRetrier::new(authority_aggregator, client_monitor, tx_type, vec![]);
104+
let ping_type = get_ping_type(&tx_digest, tx_type);
105105

106106
// Setting this to None at first because if the full effects are already provided,
107107
// we do not need to record the latency. We track the time in this function instead of inside
@@ -157,6 +157,7 @@ impl EffectsCertifier {
157157
authority_name: current_target,
158158
display_name,
159159
operation: OperationType::Effects,
160+
ping_type,
160161
result: Err(()),
161162
});
162163
} else {
@@ -166,6 +167,7 @@ impl EffectsCertifier {
166167
authority_name: current_target,
167168
display_name,
168169
operation: OperationType::Effects,
170+
ping_type,
169171
result: Ok(latency),
170172
});
171173
}
@@ -180,6 +182,7 @@ impl EffectsCertifier {
180182
authority_name: current_target,
181183
display_name,
182184
operation: OperationType::Effects,
185+
ping_type,
183186
result: Err(()),
184187
});
185188
// This emits an error when retrier gathers enough (f+1) non-retriable effects errors,
@@ -323,6 +326,7 @@ impl EffectsCertifier {
323326
authority_name: name,
324327
display_name,
325328
operation: OperationType::Effects,
329+
ping_type,
326330
result: Err(()),
327331
});
328332
(name, Err(SuiError::TimeoutError))
@@ -577,6 +581,7 @@ impl EffectsCertifier {
577581
let backoff = ExponentialBackoff::from_millis(100)
578582
.max_delay(Duration::from_secs(2))
579583
.map(jitter);
584+
let ping_type = raw_request.get_ping_type();
580585
// This loop should only retry errors that are retriable without new submission.
581586
for (attempt, delay) in backoff.enumerate() {
582587
let request: WaitForEffectsRequest = raw_request.clone().try_into().unwrap();
@@ -590,6 +595,7 @@ impl EffectsCertifier {
590595
authority_name: name,
591596
display_name: display_name.clone(),
592597
operation: OperationType::Effects,
598+
ping_type,
593599
result: Ok(latency),
594600
});
595601
return Ok(response);
@@ -599,6 +605,7 @@ impl EffectsCertifier {
599605
authority_name: name,
600606
display_name: display_name.clone(),
601607
operation: OperationType::Effects,
608+
ping_type,
602609
result: Err(()),
603610
});
604611
if !matches!(e, SuiError::RpcError(_, _)) {

crates/sui-core/src/transaction_driver/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ where
360360
let start_time = Instant::now();
361361
let tx_type = request.tx_type();
362362
let tx_digest = request.tx_digest();
363+
let ping_type = request.ping_type;
363364

364365
let (name, submit_txn_result) = self
365366
.submitter
@@ -402,6 +403,7 @@ where
402403
} else {
403404
OperationType::Consensus
404405
},
406+
ping_type,
405407
result: Ok(start_time.elapsed()),
406408
});
407409
}

crates/sui-core/src/transaction_driver/transaction_submitter.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,7 @@ impl TransactionSubmitter {
218218
authority_name: validator,
219219
display_name: display_name.clone(),
220220
operation: OperationType::Submit,
221+
ping_type: request.ping_type,
221222
result: Err(()),
222223
});
223224
TransactionRequestError::TimedOutSubmittingTransaction
@@ -228,6 +229,7 @@ impl TransactionSubmitter {
228229
authority_name: validator,
229230
display_name: display_name.clone(),
230231
operation: OperationType::Submit,
232+
ping_type: request.ping_type,
231233
result: Err(()),
232234
});
233235
}
@@ -249,6 +251,7 @@ impl TransactionSubmitter {
249251
authority_name: validator,
250252
display_name,
251253
operation: OperationType::Submit,
254+
ping_type: request.ping_type,
252255
result: Err(()),
253256
});
254257
}
@@ -260,6 +263,7 @@ impl TransactionSubmitter {
260263
authority_name: validator,
261264
display_name,
262265
operation: OperationType::Submit,
266+
ping_type: request.ping_type,
263267
result: Ok(latency),
264268
});
265269
Ok(result)

crates/sui-core/src/validator_client_monitor/metrics.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ impl ValidatorClientMetrics {
3232
observed_latency: register_histogram_vec_with_registry!(
3333
"validator_client_observed_latency",
3434
"Client-observed latency of operations per validator",
35-
&["validator", "operation_type"],
35+
&["validator", "operation_type", "ping"],
3636
LATENCY_SEC_BUCKETS.to_vec(),
3737
registry,
3838
)
@@ -41,15 +41,15 @@ impl ValidatorClientMetrics {
4141
operation_success: register_int_counter_vec_with_registry!(
4242
"validator_client_operation_success_total",
4343
"Total successful operations observed by client per validator",
44-
&["validator", "operation_type"],
44+
&["validator", "operation_type", "ping"],
4545
registry,
4646
)
4747
.unwrap(),
4848

4949
operation_failure: register_int_counter_vec_with_registry!(
5050
"validator_client_operation_failure_total",
5151
"Total failed operations observed by client per validator",
52-
&["validator", "operation_type"],
52+
&["validator", "operation_type", "ping"],
5353
registry,
5454
)
5555
.unwrap(),

crates/sui-core/src/validator_client_monitor/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ pub use metrics::ValidatorClientMetrics;
1212
pub use monitor::ValidatorClientMonitor;
1313
use std::time::Duration;
1414
use strum::EnumIter;
15-
use sui_types::base_types::AuthorityName;
15+
use sui_types::{base_types::AuthorityName, messages_grpc::PingType};
1616

1717
/// Operation types for validator performance tracking
1818
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, EnumIter)]
@@ -45,6 +45,8 @@ pub struct OperationFeedback {
4545
pub display_name: String,
4646
/// The operation type
4747
pub operation: OperationType,
48+
/// The ping type. If it's not a ping request, then this is is None.
49+
pub ping_type: Option<PingType>,
4850
/// Result of the operation: Ok(latency) if successful, Err(()) if failed.
4951
/// Only errors specific to the target validator should be recorded,
5052
/// for example, timeout, unavailability or misbehavior from validators can be recorded.

crates/sui-core/src/validator_client_monitor/monitor.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ where
128128
authority_name: name,
129129
display_name: display_name.clone(),
130130
operation: OperationType::HealthCheck,
131+
ping_type: None,
131132
result: Ok(latency),
132133
});
133134
}
@@ -137,6 +138,7 @@ where
137138
authority_name: name,
138139
display_name: display_name.clone(),
139140
operation: OperationType::HealthCheck,
141+
ping_type: None,
140142
result: Err(()),
141143
});
142144
}
@@ -145,6 +147,7 @@ where
145147
authority_name: name,
146148
display_name,
147149
operation: OperationType::HealthCheck,
150+
ping_type: None,
148151
result: Err(()),
149152
});
150153
}
@@ -212,22 +215,27 @@ impl<A: Clone> ValidatorClientMonitor<A> {
212215
OperationType::FastPath => "fast_path",
213216
OperationType::Consensus => "consensus",
214217
};
218+
let ping_label = if feedback.ping_type.is_some() {
219+
"true"
220+
} else {
221+
"false"
222+
};
215223

216224
match feedback.result {
217225
Ok(latency) => {
218226
self.metrics
219227
.observed_latency
220-
.with_label_values(&[&feedback.display_name, operation_str])
228+
.with_label_values(&[&feedback.display_name, operation_str, ping_label])
221229
.observe(latency.as_secs_f64());
222230
self.metrics
223231
.operation_success
224-
.with_label_values(&[&feedback.display_name, operation_str])
232+
.with_label_values(&[&feedback.display_name, operation_str, ping_label])
225233
.inc();
226234
}
227235
Err(()) => {
228236
self.metrics
229237
.operation_failure
230-
.with_label_values(&[&feedback.display_name, operation_str])
238+
.with_label_values(&[&feedback.display_name, operation_str, ping_label])
231239
.inc();
232240
}
233241
}

0 commit comments

Comments
 (0)