Skip to content

Commit e156017

Browse files
authored
[Feature]: Add corrupted request metric to V1 metrics system. (#27306)
Signed-off-by: atalhens <[email protected]>
1 parent 65ac8d8 commit e156017

File tree

6 files changed

+51
-13
lines changed

6 files changed

+51
-13
lines changed

tests/v1/metrics/test_stats.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def test_iteration_stats_repr():
1818
"time_to_first_tokens_iter=[], "
1919
"inter_token_latencies_iter=[], "
2020
"waiting_lora_adapters={}, "
21-
"running_lora_adapters={})"
21+
"running_lora_adapters={}, "
22+
"num_corrupted_reqs=0)"
2223
)
2324
assert repr(iteration_stats) == expected_repr

vllm/v1/core/sched/scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1023,6 +1023,7 @@ def update_from_output(
10231023
kv_transfer_params=kv_transfer_params,
10241024
trace_headers=request.trace_headers,
10251025
num_cached_tokens=request.num_cached_tokens,
1026+
num_nans_in_logits=request.num_nans_in_logits,
10261027
)
10271028
)
10281029
else:
@@ -1259,7 +1260,6 @@ def make_stats(
12591260
prefix_cache_stats=prefix_cache_stats,
12601261
connector_prefix_cache_stats=connector_prefix_cache_stats,
12611262
spec_decoding_stats=spec_decoding_stats,
1262-
num_corrupted_reqs=sum(req.is_output_corrupted for req in self.running),
12631263
kv_connector_stats=kv_connector_stats.data if kv_connector_stats else None,
12641264
)
12651265

vllm/v1/engine/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,10 @@ class EngineCoreOutput(
122122
# The number of tokens with prefix cache hits.
123123
num_cached_tokens: int = 0
124124

125+
# The number of NaNs in logits.
126+
# A value greater than 0 indicates that the output is corrupted.
127+
num_nans_in_logits: int = 0
128+
125129
@property
126130
def finished(self) -> bool:
127131
return self.finish_reason is not None

vllm/v1/metrics/loggers.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from prometheus_client import Counter, Gauge, Histogram
1111

12+
import vllm.envs as envs
1213
from vllm.config import SupportsMetricsInfo, VllmConfig
1314
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
1415
KVConnectorLogging,
@@ -116,11 +117,13 @@ def _reset(self, now):
116117
# Tracked stats over current local logging interval.
117118
self.num_prompt_tokens: int = 0
118119
self.num_generation_tokens: int = 0
120+
self.num_corrupted_reqs: int = 0
119121

120122
def _track_iteration_stats(self, iteration_stats: IterationStats):
121123
# Save tracked stats for token counters.
122124
self.num_prompt_tokens += iteration_stats.num_prompt_tokens
123125
self.num_generation_tokens += iteration_stats.num_generation_tokens
126+
self.num_corrupted_reqs += iteration_stats.num_corrupted_reqs
124127

125128
def _get_throughput(self, tracked_stats: int, now: float) -> float:
126129
# Compute summary metrics for tracked stats
@@ -204,6 +207,10 @@ def log(self):
204207
self.last_scheduler_stats.kv_cache_usage * 100,
205208
self.prefix_caching_metrics.hit_rate * 100,
206209
]
210+
211+
if envs.VLLM_COMPUTE_NANS_IN_LOGITS:
212+
log_parts.append("Corrupted: %d reqs")
213+
log_args.append(self.num_corrupted_reqs)
207214
if not self.connector_prefix_caching_metrics.empty:
208215
log_parts.append("External prefix cache hit rate: %.1f%%")
209216
log_args.append(self.connector_prefix_caching_metrics.hit_rate * 100)
@@ -275,9 +282,6 @@ def aggregate_scheduler_stats(self):
275282
self.last_scheduler_stats.num_running_reqs += (
276283
last_scheduler_stats.num_running_reqs
277284
)
278-
self.last_scheduler_stats.num_corrupted_reqs += (
279-
last_scheduler_stats.num_corrupted_reqs
280-
)
281285
self.last_scheduler_stats.kv_cache_usage += (
282286
last_scheduler_stats.kv_cache_usage
283287
)
@@ -481,6 +485,19 @@ def __init__(
481485
gauge_kv_cache_usage, engine_indexes, model_name
482486
)
483487

488+
if envs.VLLM_COMPUTE_NANS_IN_LOGITS:
489+
counter_corrupted_requests = self._counter_cls(
490+
name="vllm:corrupted_requests",
491+
documentation=(
492+
"Corrupted requests, in terms of total number of requests "
493+
"with NaNs in logits."
494+
),
495+
labelnames=labelnames,
496+
)
497+
self.counter_corrupted_requests = make_per_engine(
498+
counter_corrupted_requests, engine_indexes, model_name
499+
)
500+
484501
counter_prefix_cache_queries = self._counter_cls(
485502
name="vllm:prefix_cache_queries",
486503
documentation=(
@@ -933,7 +950,6 @@ def record(
933950
self.gauge_scheduler_waiting[engine_idx].set(
934951
scheduler_stats.num_waiting_reqs
935952
)
936-
937953
if self.show_hidden_metrics:
938954
self.gauge_gpu_cache_usage[engine_idx].set(
939955
scheduler_stats.kv_cache_usage
@@ -979,7 +995,10 @@ def record(
979995

980996
if iteration_stats is None:
981997
return
982-
998+
if envs.VLLM_COMPUTE_NANS_IN_LOGITS:
999+
self.counter_corrupted_requests[engine_idx].inc(
1000+
iteration_stats.num_corrupted_reqs
1001+
)
9831002
self.counter_num_preempted_reqs[engine_idx].inc(
9841003
iteration_stats.num_preempted_reqs
9851004
)

vllm/v1/metrics/stats.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from dataclasses import dataclass, field
77
from typing import TYPE_CHECKING, Any
88

9+
import vllm.envs as envs
910
from vllm.v1.spec_decode.metrics import SpecDecodingStats
1011

1112
if TYPE_CHECKING:
@@ -169,8 +170,6 @@ class SchedulerStats:
169170
spec_decoding_stats: SpecDecodingStats | None = None
170171
kv_connector_stats: dict[str, Any] | None = None
171172

172-
num_corrupted_reqs: int = 0
173-
174173

175174
@dataclass
176175
class LoRAStats:
@@ -196,6 +195,9 @@ class RequestStateStats:
196195
# first token latency
197196
first_token_latency: float = 0.0
198197

198+
# Track if this request is corrupted (NaNs in logits)
199+
is_corrupted: bool = False
200+
199201

200202
@dataclass
201203
class FinishedRequestStats:
@@ -211,6 +213,7 @@ class FinishedRequestStats:
211213
inference_time: float = 0.0
212214
decode_time: float = 0.0
213215
mean_time_per_output_token: float = 0.0
216+
is_corrupted: bool = False
214217

215218

216219
class IterationStats:
@@ -228,6 +231,7 @@ def __init__(self):
228231
self.inter_token_latencies_iter: list[float] = []
229232
self.waiting_lora_adapters: dict[str, int] = {}
230233
self.running_lora_adapters: dict[str, int] = {}
234+
self.num_corrupted_reqs: int = 0
231235

232236
def __repr__(self) -> str:
233237
field_to_value_str = ", ".join(f"{k}={v}" for k, v in vars(self).items())
@@ -258,6 +262,15 @@ def update_from_output(
258262

259263
req_stats.num_generation_tokens += num_new_generation_tokens
260264

265+
# Track if this request is corrupted (only check once per request)
266+
# Early exit if already marked as corrupted to avoid redundant checks
267+
if (
268+
envs.VLLM_COMPUTE_NANS_IN_LOGITS
269+
and not req_stats.is_corrupted
270+
and output.num_nans_in_logits > 0
271+
):
272+
req_stats.is_corrupted = True
273+
261274
# Process request-level engine core events
262275
if output.events is not None:
263276
self.update_from_events(
@@ -339,9 +352,14 @@ def update_from_finished_request(
339352
inference_time=inference_time,
340353
decode_time=decode_time,
341354
mean_time_per_output_token=mean_time_per_output_token,
355+
is_corrupted=req_stats.is_corrupted,
342356
)
343357
self.finished_requests.append(finished_req)
344358

359+
# Count corrupted requests when they finish (only once per request)
360+
if req_stats.is_corrupted:
361+
self.num_corrupted_reqs += 1
362+
345363

346364
class LoRARequestStates:
347365
"""Per-LoRA request state stats."""

vllm/v1/request.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,6 @@ def append_output_token_ids(
168168
def use_structured_output(self) -> bool:
169169
return self.structured_output_request is not None
170170

171-
@property
172-
def is_output_corrupted(self) -> bool:
173-
return self.num_nans_in_logits > 0
174-
175171
@property
176172
def num_tokens(self) -> int:
177173
return len(self._all_token_ids)

0 commit comments

Comments
 (0)