Skip to content

Commit 0f2c765

Browse files
wangxiyuanchencangtao
authored andcommitted
Revert "[Bugfix] Fix Qwen2.5-Omni-7B accuarcy test (vllm-project#4556)" (vllm-project#4619)
This reverts commit 71e9b37. It breaks vllm-ascend/Qwen3-30B-A3B-W8A8 test
1 parent 6e1df89 commit 0f2c765

File tree

2 files changed

+5
-9
lines changed

2 files changed

+5
-9
lines changed

vllm_ascend/ops/layernorm.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,13 +108,13 @@ def forward_oot(
108108
residual: Optional[torch.Tensor] = None,
109109
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
110110
import torch_npu
111+
111112
if residual is not None:
112113
residual = torch.ops.vllm.maybe_chunk_residual(x, residual)
113114
assert x.size(0) == residual.size(0)
114-
next_need_quant_fusion_linear = getattr(
115-
self, 'next_need_quant_fusion_linear', None)
116115
x, residual = _addrmsnorm_forward_oot(
117-
self, x, residual, next_need_quant_fusion_linear, self.bias)
116+
self, x, residual, self.next_need_quant_fusion_linear,
117+
self.bias)
118118
return x, residual
119119
x, residual = torch_npu.npu_rms_norm(x, self.weight,
120120
self.variance_epsilon)

vllm_ascend/ops/register_custom_ops.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -173,9 +173,7 @@ def _maybe_prefetch_mlp_down_proj_impl(x_dependency: torch.Tensor) -> None:
173173
except AssertionError:
174174
return
175175

176-
prefetch_mlp_enabled = getattr(forward_context, 'prefetch_mlp_enabled',
177-
False)
178-
if not prefetch_mlp_enabled:
176+
if not forward_context.prefetch_mlp_enabled:
179177
return
180178
forward_context.prefetch_mlp_down_proj = True
181179
model_instance = forward_context.model_instance
@@ -204,9 +202,7 @@ def _maybe_wait_prefetch_done_impl(x: torch.Tensor) -> None:
204202
except AssertionError:
205203
return
206204

207-
prefetch_mlp_enabled = getattr(forward_context, 'prefetch_mlp_enabled',
208-
False)
209-
if not prefetch_mlp_enabled:
205+
if not forward_context.prefetch_mlp_enabled:
210206
return
211207
if forward_context.prefetch_mlp_gate_up_proj or \
212208
forward_context.prefetch_mlp_down_proj:

0 commit comments

Comments
 (0)