fix maximum update in triton flash attn

lucidrains · lucidrains · commit 373f75b725c2 · 2025-05-16T00:14:58.000Z
diff --git a/ring_attention_pytorch/triton_flash_attn.py b/ring_attention_pytorch/triton_flash_attn.py
@@ -231,10 +231,10 @@ def _fwd_kernel(
 
             bias = bias.to(tl.float32)
             qk = qk * softmax_scale + bias
-            m_ij = tl.maximum(tl.max(qk, 1), lse_i)
+            m_ij = tl.maximum(tl.max(qk, 1), m_i)
             p = tl.exp(qk - m_ij[:, None])
         else:
-            m_ij = tl.maximum(tl.max(qk, 1) * softmax_scale, lse_i)
+            m_ij = tl.maximum(tl.max(qk, 1) * softmax_scale, m_i)
             p = tl.exp(qk * softmax_scale - m_ij[:, None])
 
         l_ij = tl.sum(p, 1)
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'ring-attention-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.5.19',
+  version = '0.5.20',
   license='MIT',
   description = 'Ring Attention - Pytorch',
   author = 'Phil Wang',