File tree Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Expand file tree Collapse file tree 2 files changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -71,8 +71,10 @@ void memcpyAsync(
71
71
Device dst_device = iter.device (0 );
72
72
Device src_device = iter.device (1 );
73
73
if (dst_device == src_device) {
74
+ std::cout << " zl_debug: go to same device and specialized kernel" << std::endl;
74
75
copy_kernel (iter);
75
76
} else {
77
+ std::cout << " zl_debug: go to sycl copy kernel" << std::endl;
76
78
TORCH_INTERNAL_ASSERT (p2p_enabled == true );
77
79
auto dst = (char *)iter.data_ptr (0 );
78
80
auto src = (char *)iter.data_ptr (1 );
Original file line number Diff line number Diff line change @@ -132,7 +132,9 @@ at::Tensor XPUSymmetricMemory::get_buffer(
132
132
" bytes)" );
133
133
auto data_ptr = reinterpret_cast <uint8_t *>(buffers_[rank]) +
134
134
storage_offset * element_size;
135
- auto device = c10::Device (c10::DeviceType::XPU, local_device_idx_);
135
+ // check the device of this device buffer
136
+ auto ptr_to_device_id = c10::xpu::get_device_idx_from_pointer (data_ptr);
137
+ auto device = c10::Device (c10::DeviceType::XPU, ptr_to_device_id);
136
138
auto options = at::TensorOptions ().dtype (dtype).device (device);
137
139
138
140
return at::for_blob (data_ptr, sizes)
You can’t perform that action at this time.
0 commit comments