File tree Expand file tree Collapse file tree 1 file changed +17
-0
lines changed
vllm/v1/attention/backends Expand file tree Collapse file tree 1 file changed +17
-0
lines changed Original file line number Diff line number Diff line change @@ -212,6 +212,23 @@ def build(self,
212212 """
213213 raise NotImplementedError
214214
215+ def reorder_batch (self , input_batch : "InputBatch" ,
216+ scheduler_output : "SchedulerOutput" ) -> bool :
217+ """
218+ Update the order of requests in the batch based on the attention
219+ backend's needs. For example, some attention backends (namely MLA) may
220+ want to separate requests based on if the attention computation will be
221+ compute-bound or memory-bound.
222+
223+ Args:
224+ input_batch: input batch
225+ scheduler_output: scheduler output.
226+
227+ Returns:
228+ True if the batch was modified, False otherwise.
229+ """
230+ raise NotImplementedError
231+
215232 def build_for_cudagraph_capture (
216233 self , common_attn_metadata : CommonAttentionMetadata ) -> M :
217234 """
You can’t perform that action at this time.
0 commit comments