|
47 | 47 |
|
48 | 48 |
|
49 | 49 | class RecomputeScheduler(Scheduler): |
50 | | - |
| 50 | + running: list[Request] |
| 51 | + |
51 | 52 | def schedule(self) -> SchedulerOutput: |
52 | 53 | # NOTE(woosuk) on the scheduling algorithm: |
53 | 54 | # There's no "decoding phase" nor "prefill phase" in the scheduler. |
@@ -613,14 +614,15 @@ def update_from_output( |
613 | 614 | kv_connector_output.invalid_block_ids) |
614 | 615 |
|
615 | 616 | # return recomputed requests as EngineCoreOutput |
616 | | - for req_info in scheduler_output.recomputed_reqs: |
617 | | - outputs[req_info.client_index].append( |
618 | | - EngineCoreOutput( |
619 | | - request_id=req_info.request_id, |
620 | | - finish_reason=FinishReason.STOP, |
621 | | - new_token_ids=[req_info.output_token_ids[-1]], |
622 | | - stop_reason="recomputed", |
623 | | - )) |
| 617 | + if scheduler_output.recomputed_reqs is not None: |
| 618 | + for req_info in scheduler_output.recomputed_reqs: |
| 619 | + outputs[req_info.client_index].append( |
| 620 | + EngineCoreOutput( |
| 621 | + request_id=req_info.request_id, |
| 622 | + finish_reason=FinishReason.STOP, |
| 623 | + new_token_ids=[req_info.output_token_ids[-1]], |
| 624 | + stop_reason="recomputed", |
| 625 | + )) |
624 | 626 |
|
625 | 627 | # NOTE(woosuk): As len(num_scheduled_tokens) can be up to 1K or more, |
626 | 628 | # the below loop can be a performance bottleneck. We should do our best |
|
0 commit comments