Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions test/xpu/test_nn_xpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from torch.testing._internal.common_device_type import (
dtypes,
instantiate_device_type_tests,
largeTensorTest,
)
from torch.testing._internal.common_dtype import get_all_math_dtypes, integral_types
from torch.testing._internal.common_utils import (
Expand Down Expand Up @@ -3786,6 +3787,39 @@ def test_cross_entropy_loss_2d_out_of_bounds_class_index(self):
)


@dtypes(torch.float, torch.half)
@largeTensorTest("20GB")
@largeTensorTest("64GB", "cpu")
def _test_warp_softmax_64bit_indexing(self, device, dtype):
def run_test(*shape):
x = torch.randn(shape, device="xpu", dtype=torch.float16, requires_grad=True)
Copy link

Copilot AI Oct 16, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test receives a device argument (from the instantiated device-type tests and the second @largeTensorTest decorator specifying 'cpu'), but the tensor is hard-coded with device='xpu'. This prevents the CPU-annotated variant from actually exercising a CPU path and can cause mismatched expectations. Use the passed-in device variable for allocation: x = torch.randn(shape, device=device, dtype=torch.float16, requires_grad=True).

Suggested change
x = torch.randn(shape, device="xpu", dtype=torch.float16, requires_grad=True)
x = torch.randn(shape, device=device, dtype=torch.float16, requires_grad=True)

Copilot uses AI. Check for mistakes.
y = F.log_softmax(x, dim=-1, dtype=dtype)
y.backward(y)
with torch.no_grad():
xx = x.cpu().requires_grad_()
yy = F.log_softmax(xx.float(), dim=-1).to(dtype)
yy.backward(yy)
# workaround to reduce memory usage vs. self.assertEqual, see #84944
rtol, atol = torch.testing._comparison.get_tolerances(
dtype, rtol=None, atol=None
)
self.assertTrue(torch.allclose(y.cpu(), yy, rtol=rtol, atol=atol))
# x is half
rtol, _ = torch.testing._comparison.get_tolerances(
torch.half, rtol=None, atol=None
)
self.assertTrue(torch.allclose(x.grad.cpu(), xx.grad, rtol=rtol, atol=1e-3))

run_test(
1100000000, 2
) # Illegal memory access https://github.com/pytorch/pytorch/issues/52715
run_test(
2200000000, 1
) # invalid configuration argument https://github.com/pytorch/pytorch/issues/52716


TestNNDeviceType.test_warp_softmax_64bit_indexing = _test_warp_softmax_64bit_indexing

TestNNDeviceType.test_cross_entropy_loss_2d_out_of_bounds_class_index = (
_test_cross_entropy_loss_2d_out_of_bounds_class_index
)
Expand Down
Loading