From e94363c950ad29e71ee64700d591fdaee064ea63 Mon Sep 17 00:00:00 2001
From: sstamenk <strahinja.stamenkovic@amd.com>
Date: Tue, 21 Oct 2025 15:35:21 +0200
Subject: [PATCH 1/8] Enable bitsandbytes quantization on warp size 32 AMD GPUs

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 vllm/platforms/rocm.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 788f9d69c357..66453e0c835b 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -185,6 +185,9 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
+    # bitsandbytes is not supported on GPUs with warp size 64 (gfx9)
+    if not on_gfx9():
+        supported_quantization += ["bitsandbytes"]
 
     @classmethod
     def get_vit_attn_backend(

From 494b4d641504c1644f982f72d724607273ee2fe9 Mon Sep 17 00:00:00 2001
From: Strahinja Stamenkovic <sstamenk@amd.com>
Date: Mon, 10 Nov 2025 10:52:22 +0100
Subject: [PATCH 2/8] Enable bitsandbytes unit tests on Radeon

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 tests/models/quantization/test_bitsandbytes.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 24220978534c..808ffd89f47f 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -10,13 +10,14 @@
 
 from tests.quantization.utils import is_quant_method_supported
 from vllm.platforms import current_platform
+from vllm.platforms.rocm import on_gfx9
 
 from ...utils import compare_two_settings, multi_gpu_test
 from ..utils import check_embeddings_close, check_logprobs_close
 
 pytestmark = pytest.mark.skipif(
-    current_platform.is_rocm(),
-    reason="bitsandbytes quantization not supported on ROCm (CUDA-only kernels)",
+    current_platform.is_rocm() and on_gfx9(),
+    reason="bitsandbytes quantization not supported on Instinct (warp size 64 limitation)",
 )
 
 models_4bit_to_test = [

From 0f0fa5de2ab4c5d50177bba3c33a983245d896eb Mon Sep 17 00:00:00 2001
From: Strahinja Stamenkovic <sstamenk@amd.com>
Date: Mon, 10 Nov 2025 10:54:14 +0100
Subject: [PATCH 3/8] Update comment

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 vllm/platforms/rocm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 66453e0c835b..9c5ae618b48a 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -185,7 +185,7 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
-    # bitsandbytes is not supported on GPUs with warp size 64 (gfx9)
+    # bitsandbytes quantization not supported on Instinct (warp size 64 limitation)
     if not on_gfx9():
         supported_quantization += ["bitsandbytes"]
 

From 26342aa524913507f0faa7f6dfe1b134db5a15e5 Mon Sep 17 00:00:00 2001
From: Strahinja Stamenkovic <sstamenk@amd.com>
Date: Sun, 16 Nov 2025 23:54:12 +0100
Subject: [PATCH 4/8] Update test_bitsandbytes.py

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 tests/models/quantization/test_bitsandbytes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 808ffd89f47f..79a182a767f4 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -17,7 +17,7 @@
 
 pytestmark = pytest.mark.skipif(
     current_platform.is_rocm() and on_gfx9(),
-    reason="bitsandbytes quantization not supported on Instinct (warp size 64 limitation)",
+    reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
 )
 
 models_4bit_to_test = [

From a39949aacc22e46d30b1cb8afc1383944a888993 Mon Sep 17 00:00:00 2001
From: Strahinja Stamenkovic <sstamenk@amd.com>
Date: Sun, 16 Nov 2025 23:54:42 +0100
Subject: [PATCH 5/8] Update rocm.py

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 vllm/platforms/rocm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 9c5ae618b48a..3f62172408b8 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -185,7 +185,7 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
-    # bitsandbytes quantization not supported on Instinct (warp size 64 limitation)
+    # bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)
     if not on_gfx9():
         supported_quantization += ["bitsandbytes"]
 

From 9fe767d5312ca733be8711080d0f126aa0a729b3 Mon Sep 17 00:00:00 2001
From: sstamenk <strahinja.stamenkovic@amd.com>
Date: Mon, 17 Nov 2025 14:16:38 +0100
Subject: [PATCH 6/8] Refactor import logic

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 tests/models/quantization/test_bitsandbytes.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 79a182a767f4..c65fd8be053c 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -10,14 +10,15 @@
 
 from tests.quantization.utils import is_quant_method_supported
 from vllm.platforms import current_platform
-from vllm.platforms.rocm import on_gfx9
 
 from ...utils import compare_two_settings, multi_gpu_test
 from ..utils import check_embeddings_close, check_logprobs_close
 
-pytestmark = pytest.mark.skipif(
-    current_platform.is_rocm() and on_gfx9(),
-    reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
+if current_platform.is_rocm():
+    from vllm.platforms.rocm import on_gfx9
+    pytestmark = pytest.mark.skipif(
+        on_gfx9(),
+        reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
 )
 
 models_4bit_to_test = [

From 6230a5dc6b6c008ae210163dfc5addd6d8429003 Mon Sep 17 00:00:00 2001
From: sstamenk <strahinja.stamenkovic@amd.com>
Date: Mon, 17 Nov 2025 14:23:54 +0100
Subject: [PATCH 7/8] Fix indentation

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 tests/models/quantization/test_bitsandbytes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index c65fd8be053c..24ab36312f0a 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -16,10 +16,11 @@
 
 if current_platform.is_rocm():
     from vllm.platforms.rocm import on_gfx9
+
     pytestmark = pytest.mark.skipif(
         on_gfx9(),
         reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
-)
+    )
 
 models_4bit_to_test = [
     ("facebook/opt-125m", "quantize opt model inflight"),

From d9730fdbc4940d136d82df0bf66ba07236179eb6 Mon Sep 17 00:00:00 2001
From: sstamenk <strahinja.stamenkovic@amd.com>
Date: Tue, 18 Nov 2025 02:38:40 +0100
Subject: [PATCH 8/8] Fix precommit

Signed-off-by: sstamenk <strahinja.stamenkovic@amd.com>
---
 tests/models/quantization/test_bitsandbytes.py | 2 +-
 vllm/platforms/rocm.py                         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py
index 24ab36312f0a..dc4b4546e451 100644
--- a/tests/models/quantization/test_bitsandbytes.py
+++ b/tests/models/quantization/test_bitsandbytes.py
@@ -19,7 +19,7 @@
 
     pytestmark = pytest.mark.skipif(
         on_gfx9(),
-        reason="bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)",
+        reason="bitsandbytes not supported on gfx9 (warp size 64 limitation)",
     )
 
 models_4bit_to_test = [
diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py
index 3f62172408b8..bb116792fed5 100644
--- a/vllm/platforms/rocm.py
+++ b/vllm/platforms/rocm.py
@@ -185,7 +185,7 @@ class RocmPlatform(Platform):
         "petit_nvfp4",
         "torchao",
     ]
-    # bitsandbytes quantization not supported on gfx9 (warp size 64 limitation)
+    # bitsandbytes not supported on gfx9 (warp size 64 limitation)
     if not on_gfx9():
         supported_quantization += ["bitsandbytes"]