[MFP] Refactor the use of scoring - use latencies instead (#23741)

akichidis · mwtian · mwtian · commit 6146294e82ef · 2025-09-30T10:25:12.000-07:00
## Description 

The current scoring mechanism is working well but it is less intuitive
and it currently contains as well unnecessary calculations. An
alternative is to use instead the calculated average latencies (end to
end) and expose those as the "scores". The reliability metric will now
be used to penalise the latency with the max latency been chosen the
`10s` - anything above that doesn't have much meaning anyways.

I've also steered away from re-using the "score" term so it becomes
clear end to end what we are measuring here.

## Test plan 

CI/PT

---

## Release notes

Check each box that your changes affect. If none of the boxes relate to
your changes, release notes aren't required.

For each box you select, include information after the relevant heading
that describes the impact of your changes that a user might notice and
any actions they must take to implement updates.

- [ ] Protocol: 
- [ ] Nodes (Validators and Full nodes): 
- [ ] gRPC:
- [ ] JSON-RPC: 
- [ ] GraphQL: 
- [ ] CLI: 
- [ ] Rust SDK:

---------

Co-authored-by: Mingwei Tian &lt;mingwei@mystenlabs.com&gt;
diff --git a/crates/sui-config/src/validator_client_monitor_config.rs b/crates/sui-config/src/validator_client_monitor_config.rs
@@ -113,12 +113,6 @@ pub struct ValidatorClientMonitorConfig {
     #[serde(default = "default_health_check_timeout")]
     pub health_check_timeout: Duration,
 
-    /// Weight configuration for score calculation.
-    ///
-    /// Determines how different factors contribute to validator selection.
-    #[serde(default)]
-    pub score_weights: ScoreWeights,
-
     /// Cooldown period after failures before considering a validator again.
     ///
     /// Should be long enough to allow transient issues to resolve,
@@ -132,85 +126,29 @@ pub struct ValidatorClientMonitorConfig {
     /// Higher values are more tolerant of intermittent issues.
     #[serde(default = "default_max_consecutive_failures")]
     pub max_consecutive_failures: u32,
-}
 
-/// Weights for different factors in score calculation
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(rename_all = "kebab-case")]
-pub struct ScoreWeights {
-    /// Weight for latency (lower is better).
+    /// Weight for reliability.
     ///
-    /// This is the overall weight for all latency scores combined.
-    /// Individual operation latencies are weighted separately below.
-    #[serde(default = "default_latency_weight")]
-    pub latency: f64,
-
-    /// Weight for success rate.
-    ///
-    /// Higher values prioritize reliability over performance.
+    /// Controls importance of reliability when adjusting the validator's latency for transaction submission
+    /// selection. The higher the weight, the more penalty is given to unreliable validators.
+    /// Default to 2.0. Value should be positive.
     #[serde(default = "default_reliability_weight")]
-    pub reliability: f64,
-
-    /// Weight for submit transaction latency.
-    ///
-    /// Controls importance of transaction submission speed.
-    #[serde(default = "default_submit_latency_weight")]
-    pub submit_latency_weight: f64,
-
-    /// Weight for effects retrieval latency.
-    ///
-    /// Controls importance of effects query speed.
-    /// Often the most critical operation for application responsiveness.
-    #[serde(default = "default_effects_latency_weight")]
-    pub effects_latency_weight: f64,
-
-    /// Weight for health check latency.
-    ///
-    /// Usually less critical than actual operations.
-    #[serde(default = "default_health_check_latency_weight")]
-    pub health_check_latency_weight: f64,
-
-    /// Weight for fast path latency.
-    ///
-    /// Controls importance of finalization speed.
-    #[serde(default = "default_fast_path_latency_weight")]
-    pub fast_path_latency_weight: f64,
-
-    /// Weight for consensus latency.
-    ///
-    /// Controls importance of consensus speed.
-    #[serde(default = "default_consensus_latency_weight")]
-    pub consensus_latency_weight: f64,
+    pub reliability_weight: f64,
 }
 
 impl Default for ValidatorClientMonitorConfig {
     fn default() -> Self {
         Self {
             health_check_interval: default_health_check_interval(),
             health_check_timeout: default_health_check_timeout(),
-            score_weights: ScoreWeights::default(),
             failure_cooldown: default_failure_cooldown(),
             max_consecutive_failures: default_max_consecutive_failures(),
-        }
-    }
-}
-
-impl Default for ScoreWeights {
-    fn default() -> Self {
-        Self {
-            latency: default_latency_weight(),
-            reliability: default_reliability_weight(),
-            submit_latency_weight: default_submit_latency_weight(),
-            effects_latency_weight: default_effects_latency_weight(),
-            health_check_latency_weight: default_health_check_latency_weight(),
-            fast_path_latency_weight: default_fast_path_latency_weight(),
-            consensus_latency_weight: default_consensus_latency_weight(),
+            reliability_weight: default_reliability_weight(),
         }
     }
 }
 
 // Default value functions
-
 fn default_health_check_interval() -> Duration {
     Duration::from_secs(10)
 }
@@ -227,30 +165,6 @@ fn default_max_consecutive_failures() -> u32 {
     100
 }
 
-fn default_latency_weight() -> f64 {
-    0.9
-}
-
 fn default_reliability_weight() -> f64 {
-    0.1
-}
-
-fn default_submit_latency_weight() -> f64 {
-    0.0
-}
-
-fn default_effects_latency_weight() -> f64 {
-    0.0
-}
-
-fn default_health_check_latency_weight() -> f64 {
-    0.0
-}
-
-fn default_fast_path_latency_weight() -> f64 {
-    1.0
-}
-
-fn default_consensus_latency_weight() -> f64 {
-    1.0
+    2.0
 }
diff --git a/crates/sui-core/src/transaction_driver/metrics.rs b/crates/sui-core/src/transaction_driver/metrics.rs
@@ -18,7 +18,7 @@ pub struct TransactionDriverMetrics {
     pub(crate) settlement_finality_latency: HistogramVec,
     pub(crate) total_transactions_submitted: IntCounterVec,
     pub(crate) submit_transaction_retries: Histogram,
-    pub(crate) submit_transaction_latency: Histogram,
+    pub(crate) submit_transaction_latency: HistogramVec,
     pub(crate) validator_submit_transaction_errors: IntCounterVec,
     pub(crate) validator_submit_transaction_successes: IntCounterVec,
     pub(crate) executed_transactions: IntCounter,
@@ -60,11 +60,12 @@ impl TransactionDriverMetrics {
                 registry,
             )
             .unwrap(),
-            submit_transaction_latency: register_histogram_with_registry!(
+            submit_transaction_latency: register_histogram_vec_with_registry!(
                 "transaction_driver_submit_transaction_latency",
                 "Time in seconds to successfully submit a transaction to a validator.\n\
                 Includes all retries and measures from the start of submission\n\
                 until a validator accepts the transaction.",
+                &["validator", "tx_type", "ping"],
                 mysten_metrics::LATENCY_SEC_BUCKETS.to_vec(),
                 registry,
             )
diff --git a/crates/sui-core/src/transaction_driver/mod.rs b/crates/sui-core/src/transaction_driver/mod.rs
@@ -121,7 +121,7 @@ where
     async fn run_latency_checks(self: Arc<Self>) {
         const INTERVAL_BETWEEN_RUNS: Duration = Duration::from_secs(15);
         const MAX_JITTER: Duration = Duration::from_secs(10);
-        const PING_REQUEST_TIMEOUT: Duration = Duration::from_millis(5_000);
+        const PING_REQUEST_TIMEOUT: Duration = Duration::from_secs(5);
 
         let mut interval = interval(INTERVAL_BETWEEN_RUNS);
         interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay);
diff --git a/crates/sui-core/src/transaction_driver/transaction_submitter.rs b/crates/sui-core/src/transaction_driver/transaction_submitter.rs
@@ -144,7 +144,10 @@ impl TransactionSubmitter {
                         .submit_transaction_retries
                         .observe(retries as f64);
                     let elapsed = start_time.elapsed().as_secs_f64();
-                    self.metrics.submit_transaction_latency.observe(elapsed);
+                    self.metrics
+                        .submit_transaction_latency
+                        .with_label_values(&[&display_name, tx_type.as_str(), ping_label])
+                        .observe(elapsed);
 
                     return Ok((name, result));
                 }
diff --git a/crates/sui-core/src/validator_client_monitor/metrics.rs b/crates/sui-core/src/validator_client_monitor/metrics.rs
@@ -22,8 +22,9 @@ pub struct ValidatorClientMetrics {
     /// Failure count per validator and operation type
     pub operation_failure: IntCounterVec,
 
-    /// Current performance score per validator
-    pub performance_score: GaugeVec,
+    /// Current performance per validator. The performance is the average latency of the validator
+    /// weighted by the reliability of the validator.
+    pub performance: GaugeVec,
 
     /// Consecutive failures per validator
     pub consecutive_failures: IntGaugeVec,
@@ -57,9 +58,10 @@ impl ValidatorClientMetrics {
             )
             .unwrap(),
 
-            performance_score: register_gauge_vec_with_registry!(
-                "validator_client_observed_score",
-                "Current client-observed score per validator",
+            performance: register_gauge_vec_with_registry!(
+                "validator_client_observed_performance",
+                "Current client-observed performance per validator. The performance is the average latency of the validator
+                weighted by the reliability of the validator.",
                 &["validator", "tx_type"],
                 registry,
             )
diff --git a/crates/sui-core/src/validator_client_monitor/monitor.rs b/crates/sui-core/src/validator_client_monitor/monitor.rs
@@ -39,7 +39,7 @@ pub struct ValidatorClientMonitor<A: Clone> {
     metrics: Arc<ValidatorClientMetrics>,
     client_stats: RwLock<ClientObservedStats>,
     authority_aggregator: Arc<ArcSwap<AuthorityAggregator<A>>>,
-    cached_scores: RwLock<HashMap<TxType, HashMap<AuthorityName, f64>>>,
+    cached_latencies: RwLock<HashMap<TxType, HashMap<AuthorityName, f64>>>,
 }
 
 impl<A> ValidatorClientMonitor<A>
@@ -61,7 +61,7 @@ where
             metrics,
             client_stats: RwLock::new(ClientObservedStats::new(config)),
             authority_aggregator,
-            cached_scores: RwLock::new(HashMap::new()),
+            cached_latencies: RwLock::new(HashMap::new()),
         });
 
         let monitor_clone = monitor.clone();
@@ -155,41 +155,42 @@ where
                 }
             }
 
-            self.update_cached_scores(&authority_agg);
+            self.update_cached_latencies(&authority_agg);
         }
     }
 }
 
 impl<A: Clone> ValidatorClientMonitor<A> {
-    /// Calculate and cache scores for all validators.
+    /// Calculate and cache latencies for all validators.
     ///
     /// This method is called periodically after health checks complete to update
-    /// the cached validator scores.
-    fn update_cached_scores(&self, authority_agg: &AuthorityAggregator<A>) {
+    /// the cached validator latencies. Those are the end to end latencies as calculated for each validator
+    /// taking into account the reliability of the validator.
+    fn update_cached_latencies(&self, authority_agg: &AuthorityAggregator<A>) {
         let committee = &authority_agg.committee;
-        let mut cached_scores = self.cached_scores.write();
+        let mut cached_latencies = self.cached_latencies.write();
 
         for tx_type in TxType::iter() {
-            let score_map = self
+            let latencies_map = self
                 .client_stats
                 .read()
                 .get_all_validator_stats(committee, tx_type);
 
-            for (validator, score) in score_map.iter() {
+            for (validator, latency) in latencies_map.iter() {
                 debug!(
-                    "Validator {}, tx type {}: score {}",
+                    "Validator {}, tx type {}: latency {}",
                     validator,
                     tx_type.as_str(),
-                    score
+                    *latency
                 );
                 let display_name = authority_agg.get_display_name(validator);
                 self.metrics
-                    .performance_score
+                    .performance
                     .with_label_values(&[&display_name, tx_type.as_str()])
-                    .set(*score);
+                    .set(*latency);
             }
 
-            cached_scores.insert(tx_type, score_map);
+            cached_latencies.insert(tx_type, latencies_map);
         }
     }
 
@@ -239,15 +240,15 @@ impl<A: Clone> ValidatorClientMonitor<A> {
     /// is called, and we need to maintain an invariant that the selected
     /// validators are always in the committee passed in.
     ///
-    /// Also the tx type is passed in so that we can select validators based on their respective scores
+    /// Also the tx type is passed in so that we can select validators based on their respective latencies
     /// for the transaction type.
     ///
     /// We shuffle the top k validators to avoid the same validator being selected
     /// too many times in a row and getting overloaded.
     ///
     /// Returns a vector containing:
-    /// 1. The top `k` validators by score (shuffled)
-    /// 2. The remaining validators ordered by score (not shuffled)
+    /// 1. The top `k` validators by latency (shuffled)
+    /// 2. The remaining validators ordered by latency (not shuffled)
     pub fn select_shuffled_preferred_validators(
         &self,
         committee: &Committee,
@@ -256,30 +257,34 @@ impl<A: Clone> ValidatorClientMonitor<A> {
     ) -> Vec<AuthorityName> {
         let mut rng = rand::thread_rng();
 
-        let cached_scores = self.cached_scores.read();
-        let Some(cached_scores) = cached_scores.get(&tx_type) else {
+        let cached_latencies = self.cached_latencies.read();
+        let Some(cached_latencies) = cached_latencies.get(&tx_type) else {
             let mut validators: Vec<_> = committee.names().cloned().collect();
             validators.shuffle(&mut rng);
             return validators;
         };
 
-        // Since the cached scores are updated periodically, it is possible that it was ran on
+        // Since the cached latencies are updated periodically, it is possible that it was ran on
         // an out-of-date committee.
-        let mut validator_with_scores: Vec<_> = committee
+        let mut validator_with_latencies: Vec<_> = committee
             .names()
-            .map(|v| (*v, cached_scores.get(v).cloned().unwrap_or(0.0)))
+            .map(|v| (*v, cached_latencies.get(v).cloned().unwrap_or(0.0)))
             .collect();
-        validator_with_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
+        // Sort by latency in ascending order. We want to select the validators with the lowest latencies.
+        validator_with_latencies.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
 
-        let k = k.min(validator_with_scores.len());
-        validator_with_scores[..k].shuffle(&mut rng);
+        let k = k.min(validator_with_latencies.len());
+        validator_with_latencies[..k].shuffle(&mut rng);
 
-        validator_with_scores.into_iter().map(|(v, _)| v).collect()
+        validator_with_latencies
+            .into_iter()
+            .map(|(v, _)| v)
+            .collect()
     }
 
     #[cfg(test)]
-    pub fn force_update_cached_scores(&self, authority_agg: &AuthorityAggregator<A>) {
-        self.update_cached_scores(authority_agg);
+    pub fn force_update_cached_latencies(&self, authority_agg: &AuthorityAggregator<A>) {
+        self.update_cached_latencies(authority_agg);
     }
 
     #[cfg(test)]
diff --git a/crates/sui-core/src/validator_client_monitor/stats.rs b/crates/sui-core/src/validator_client_monitor/stats.rs
diff --git a/crates/sui-core/src/validator_client_monitor/tests.rs b/crates/sui-core/src/validator_client_monitor/tests.rs