We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
AttentionMaskConverter._unmask_unattended
1 parent 0e74a71 commit 8637f6eCopy full SHA for 8637f6e
src/transformers/modeling_attn_mask_utils.py
@@ -408,7 +408,7 @@ def _prepare_4d_causal_attention_mask_for_sdpa(
408
# Attend to all tokens in masked rows from the causal_mask, for example the relevant first rows when
409
# using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path.
410
# Details: https://github.com/pytorch/pytorch/issues/110213
411
- if not is_tracing_ and expanded_4d_mask.device.type == "cuda":
+ if not is_tracing_ and expanded_4d_mask.device.type in ["cuda", "xpu"]:
412
expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
413
expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min
414
)
0 commit comments