calls AttentionMaskConverter._unmask_unattended for xpu device before (#42230)

kaixuanliu · web-flow · commit 8637f6e7ae1e · 2025-11-17T14:59:59.000+01:00
sdpa

Signed-off-by: Liu, Kaixuan &lt;kaixuan.liu@intel.com&gt;
diff --git a/src/transformers/modeling_attn_mask_utils.py b/src/transformers/modeling_attn_mask_utils.py
@@ -408,7 +408,7 @@ def _prepare_4d_causal_attention_mask_for_sdpa(
         # Attend to all tokens in masked rows from the causal_mask, for example the relevant first rows when
         # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path.
         # Details: https://github.com/pytorch/pytorch/issues/110213
-        if not is_tracing_ and expanded_4d_mask.device.type == "cuda":
+        if not is_tracing_ and expanded_4d_mask.device.type in ["cuda", "xpu"]:
             expanded_4d_mask = AttentionMaskConverter._unmask_unattended(
                 expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min
             )

Original file line number	Diff line number	Diff line change
`@@ -408,7 +408,7 @@ def _prepare_4d_causal_attention_mask_for_sdpa(`
`408`	`408`	`# Attend to all tokens in masked rows from the causal_mask, for example the relevant first rows when`
`409`	`409`	`# using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path.`
`410`	`410`	`# Details: https://github.com/pytorch/pytorch/issues/110213`
`411`		`- if not is_tracing_ and expanded_4d_mask.device.type == "cuda":`
	`411`	`+ if not is_tracing_ and expanded_4d_mask.device.type in ["cuda", "xpu"]:`
`412`	`412`	`expanded_4d_mask = AttentionMaskConverter._unmask_unattended(`
`413`	`413`	`expanded_4d_mask, min_dtype=torch.finfo(inputs_embeds.dtype).min`
`414`	`414`	`)`