Fix vl on 310p device (#230)

jinminxi104 · web-flow · commit 01e42eaa2895 · 2025-06-08T00:06:39.000+08:00
diff --git a/dlinfer/vendor/ascend/torch_npu_ops.py b/dlinfer/vendor/ascend/torch_npu_ops.py
@@ -527,6 +527,16 @@ def linear(
             bias=bias,
         )
     else:
+        # on 310p, the weight is transposed to nz format in llm part on graph mode,
+        # but in vl part, eager mode is used.
+        # we need to reshape it back to nd.
+        if (
+            len(weight.shape) == 4
+            and weight.shape[0] == 1
+            and weight.shape[1] * weight.shape[3] == x.shape[-1]
+        ):
+            weight = weight.permute(0, 2, 1, 3)
+            weight = weight.reshape(weight.shape[1], -1)
         out = torch.nn.functional.linear(x, weight, bias)
     return out