diff --git a/cpp/tensorrt_llm/thop/allgatherOp.cpp b/cpp/tensorrt_llm/thop/allgatherOp.cpp index 693e12d0e43..cf0db94e427 100644 --- a/cpp/tensorrt_llm/thop/allgatherOp.cpp +++ b/cpp/tensorrt_llm/thop/allgatherOp.cpp @@ -78,7 +78,7 @@ class AllgatherOp for (int root = 0; root < static_cast(mGroup.size()); ++root) { auto split_size = sizes.value()[root]; - NCCLCHECK_THROW(ncclBroadcast(input.data_ptr(), + NCCLCHECK_THROW(ncclBroadcast((COMM_SESSION.getRank() == root) ? input.data_ptr() : nullptr, output.index({torch::indexing::Slice(split_offset, torch::indexing::None)}).mutable_data_ptr(), numel_base * split_size, (*getDtypeMap())[type], root, *mNcclComm, stream)); split_offset += split_size;