We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 47a61d5 commit 4732b1cCopy full SHA for 4732b1c
cpp/tensorrt_llm/cutlass_extensions/include/cutlass_extensions/arch/copy_traits_sm90_multimem.hpp
@@ -21,6 +21,10 @@
21
#include <cute/numeric/integral_ratio.hpp>
22
#include <cute/tensor.hpp>
23
24
+// Override other definitions so that these calls can be used on device. Otherwise they are not propagated properly via
25
+// define flags in the compiler.
26
+#define CUTE_HOST_RTC CUTE_HOST_DEVICE
27
+
28
namespace cute
29
{
30
// Utility for unpacking tensor into registers for multimem CopyOp
0 commit comments