@@ -22,24 +22,22 @@ BUILD_DIR:= $(ROOT_DIR)/build
22
22
FILES_CUDA := $(CSRC ) /ops.cu $(CSRC ) /kernels.cu
23
23
FILES_CPP := $(CSRC ) /common.cpp $(CSRC ) /cpu_ops.cpp $(CSRC ) /pythonInterface.c
24
24
25
- INCLUDE := -I $(CUDA_HOME ) /include -I $(ROOT_DIR ) /csrc -I $(CONDA_PREFIX ) /include -I $(ROOT_DIR ) /dependencies/cub -I $(ROOT_DIR ) /include
25
+ INCLUDE := -I $(CUDA_HOME ) /include -I $(ROOT_DIR ) /csrc -I $(CONDA_PREFIX ) /include -I $(ROOT_DIR ) /include
26
+ INCLUDE_10x := -I $(CUDA_HOME ) /include -I $(ROOT_DIR ) /csrc -I $(ROOT_DIR ) /dependencies/cub -I $(ROOT_DIR ) /include
26
27
LIB := -L $(CUDA_HOME ) /lib64 -lcudart -lcublas -lcublasLt -lcurand -lcusparse -L $(CONDA_PREFIX ) /lib
27
28
28
29
# NVIDIA NVCC compilation flags
29
- COMPUTE_CAPABILITY := -gencode arch=compute_35,code=sm_35 # Kepler
30
- COMPUTE_CAPABILITY += -gencode arch=compute_37,code=sm_37 # Kepler
31
30
COMPUTE_CAPABILITY += -gencode arch=compute_50,code=sm_50 # Maxwell
32
31
COMPUTE_CAPABILITY += -gencode arch=compute_52,code=sm_52 # Maxwell
33
32
COMPUTE_CAPABILITY += -gencode arch=compute_60,code=sm_60 # Pascal
34
33
COMPUTE_CAPABILITY += -gencode arch=compute_61,code=sm_61 # Pascal
35
34
COMPUTE_CAPABILITY += -gencode arch=compute_70,code=sm_70 # Volta
36
35
COMPUTE_CAPABILITY += -gencode arch=compute_72,code=sm_72 # Volta
37
36
38
- # CUDA 9.2 supports CC 3.0, but CUDA >= 11.0 does not
39
- CC_CUDA92 : = -gencode arch=compute_30 ,code=sm_30
37
+ CC_KEPLER := -gencode arch=compute_35,code=sm_35 # Kepler
38
+ CC_KEPLER + = -gencode arch=compute_37 ,code=sm_37 # Kepler
40
39
41
40
# Later versions of CUDA support the new architectures
42
- CC_CUDA10x := -gencode arch=compute_30,code=sm_30
43
41
CC_CUDA10x += -gencode arch=compute_75,code=sm_75
44
42
45
43
CC_CUDA110 := -gencode arch=compute_75,code=sm_75
@@ -49,37 +47,46 @@ CC_CUDA11x := -gencode arch=compute_75,code=sm_75
49
47
CC_CUDA11x += -gencode arch=compute_80,code=sm_80
50
48
CC_CUDA11x += -gencode arch=compute_86,code=sm_86
51
49
50
+
52
51
CC_cublasLt110 := -gencode arch=compute_75,code=sm_75
53
52
CC_cublasLt110 += -gencode arch=compute_80,code=sm_80
54
53
55
54
CC_cublasLt111 := -gencode arch=compute_75,code=sm_75
56
55
CC_cublasLt111 += -gencode arch=compute_80,code=sm_80
57
56
CC_cublasLt111 += -gencode arch=compute_86,code=sm_86
58
57
58
+ CC_ADA_HOPPER := -gencode arch=compute_89,code=sm_89
59
+ CC_ADA_HOPPER += -gencode arch=compute_90,code=sm_90
60
+
59
61
60
62
all : $(ROOT_DIR ) /dependencies/cub $(BUILD_DIR ) env
61
- $(NVCC ) $(COMPUTE_CAPABILITY ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR )
62
- $(NVCC ) $(COMPUTE_CAPABILITY ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
63
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR )
64
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
63
65
$(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) .so $(LIB )
64
66
65
67
cuda92 : $(ROOT_DIR ) /dependencies/cub $(BUILD_DIR ) env
66
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
67
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
68
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
69
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA92 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
68
70
$(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
69
71
70
72
cuda10x_nomatmul : $(ROOT_DIR ) /dependencies/cub $(BUILD_DIR ) env
71
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
72
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
73
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE_10x ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
74
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA10x ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
73
75
$(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
74
76
75
77
cuda110_nomatmul : $(BUILD_DIR ) env
76
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
77
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
78
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
79
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA110 ) $( CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
78
80
$(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
79
81
80
82
cuda11x_nomatmul : $(BUILD_DIR ) env
81
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
82
- $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
83
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_KEPLER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
84
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_KEPLER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
85
+ $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
86
+
87
+ cuda12x_nomatmul : $(BUILD_DIR ) env
88
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR ) -D NO_CUBLASLT
89
+ $(NVCC ) $(COMPUTE_CAPABILITY ) $(CC_CUDA11x ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
83
90
$(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) _nocublaslt.so $(LIB )
84
91
85
92
cuda110 : $(BUILD_DIR ) env
@@ -92,6 +99,11 @@ cuda11x: $(BUILD_DIR) env
92
99
$(NVCC ) $(CC_cublasLt111 ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
93
100
$(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) .so $(LIB )
94
101
102
+ cuda12x : $(BUILD_DIR ) env
103
+ $(NVCC ) $(CC_cublasLt111 ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' --use_fast_math -Xptxas=-v -dc $(FILES_CUDA ) $(INCLUDE ) $(LIB ) --output-directory $(BUILD_DIR )
104
+ $(NVCC ) $(CC_cublasLt111 ) $(CC_ADA_HOPPER ) -Xcompiler ' -fPIC' -dlink $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o -o $(BUILD_DIR ) /link.o
105
+ $(GPP ) -std=c++14 -DBUILD_CUDA -shared -fPIC $(INCLUDE ) $(BUILD_DIR ) /ops.o $(BUILD_DIR ) /kernels.o $(BUILD_DIR ) /link.o $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cuda$(CUDA_VERSION ) .so $(LIB )
106
+
95
107
cpuonly : $(BUILD_DIR ) env
96
108
$(GPP ) -std=c++14 -shared -fPIC -I $(ROOT_DIR ) /csrc -I $(ROOT_DIR ) /include $(FILES_CPP ) -o ./bitsandbytes/libbitsandbytes_cpu.so
97
109
0 commit comments