From 569b46d1e7cd66d9f41326618bb31ae5599b8cd0 Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Mon, 23 Mar 2026 09:20:04 +0100
Subject: [PATCH 1/7] Updates to work with CUDA 13.x
Signed-off-by: Prateek Chawla
---
03-H_Multi_GPU_Parallelization/.master/Makefile.in | 4 ++--
03-H_Multi_GPU_Parallelization/.master/jacobi.cu | 2 +-
03-H_Multi_GPU_Parallelization/solutions/Makefile | 4 ++--
03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile | 4 ++--
03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu | 2 +-
03-H_Multi_GPU_Parallelization/solutions/jacobi.cu | 2 +-
03-H_Multi_GPU_Parallelization/tasks/Makefile | 4 ++--
03-H_Multi_GPU_Parallelization/tasks/jacobi.cu | 2 +-
8 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
index e15d85c..1ef54aa 100644
--- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in
+++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu
index a9700cc..2d913d9 100644
--- a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile
index 92f033c..96c6077 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
index a6399eb..25849b3 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu
index ee32ce5..ed6a7ff 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu
index e971307..c4b542a 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile
index d293686..67aae3c 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/Makefile
+++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu
index acae736..150886a 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
From 85d7e901819b814a447cb020c9f4c699d1798e2f Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Tue, 24 Mar 2026 12:06:49 +0100
Subject: [PATCH 2/7] update to c++17
Signed-off-by: Prateek Chawla
---
03-H_Multi_GPU_Parallelization/.master/Makefile.in | 2 +-
03-H_Multi_GPU_Parallelization/solutions/Makefile | 2 +-
03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile | 2 +-
03-H_Multi_GPU_Parallelization/tasks/Makefile | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
index 1ef54aa..72429af 100644
--- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in
+++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile
index 96c6077..6d3af79 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
index 25849b3..5f96c74 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile
index 67aae3c..883cd2c 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/Makefile
+++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
From 50a774eddcee2dfc40308820b788cae5d721e18c Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Thu, 26 Mar 2026 17:25:56 +0100
Subject: [PATCH 3/7] updates to work with cuda 13
Signed-off-by: Prateek Chawla
---
.../.master/Makefile.in | 6 +++---
.../.master/jacobi.cpp | 2 +-
.../solutions/Makefile | 6 +++---
.../solutions/jacobi.cpp | 2 +-
.../tasks/Makefile | 6 +++---
.../tasks/jacobi.cpp | 2 +-
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in b/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in
index b7ce7a5..271d5f7 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in
+++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in
@@ -22,9 +22,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp b/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp
index f2fc1e6..9200097 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp
+++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp
@@ -56,7 +56,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile
index a8ee71a..e2a83e3 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile
+++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile
@@ -22,9 +22,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp b/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp
index 4a20af1..961171e 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp
+++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp
@@ -56,7 +56,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile
index 36da1bf..cf4eab5 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile
+++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile
@@ -22,9 +22,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp b/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp
index 4f9026b..9298be5 100644
--- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp
+++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp
@@ -56,7 +56,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
From cfeb8bc3a7d8e078210d443a461be8b49f970f11 Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Thu, 26 Mar 2026 17:33:55 +0100
Subject: [PATCH 4/7] changes to accomodate for cuda 13
Signed-off-by: Prateek Chawla
---
08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in | 8 ++++----
08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp | 2 +-
08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile | 8 ++++----
08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp | 2 +-
08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile | 8 ++++----
08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp | 2 +-
6 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in b/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in
index ccbec37..1f7a7ae 100644
--- a/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in
+++ b/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in
@@ -1,7 +1,7 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
THIS_TASK := 08H-NCCL-@@TASKSOL@@
OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M')
-NP ?= 1
+NP ?= 4
NVCC=nvcc
JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4
MPICXX=mpicxx
@@ -23,9 +23,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp
index 5945b12..2e8142e 100644
--- a/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp
+++ b/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp
@@ -55,7 +55,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile b/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile
index b7ff2f5..6335378 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile
+++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile
@@ -1,7 +1,7 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
THIS_TASK := 08H-NCCL-sol
OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M')
-NP ?= 1
+NP ?= 4
NVCC=nvcc
JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4
MPICXX=mpicxx
@@ -23,9 +23,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp
index 0c71eef..926db95 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp
+++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp
@@ -55,7 +55,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile b/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile
index a6bb0dd..4069d29 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile
+++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile
@@ -1,7 +1,7 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
THIS_TASK := 08H-NCCL-task
OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M')
-NP ?= 1
+NP ?= 4
NVCC=nvcc
JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4
MPICXX=mpicxx
@@ -23,9 +23,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp
index 5bcf77e..d71f76b 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp
+++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp
@@ -55,7 +55,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
From 90378b51001199273982b01684a5f050b640e374 Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Thu, 9 Apr 2026 09:17:33 +0200
Subject: [PATCH 5/7] use dynamic linking for nvshmem, fixes for cuda 12.9+
Signed-off-by: Prateek Chawla
---
08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in | 4 ++--
08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu | 2 +-
08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile | 4 ++--
08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu | 2 +-
08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile | 4 ++--
08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu | 2 +-
6 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in
index 8aad9e0..a234756 100644
--- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in
+++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in
@@ -29,8 +29,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu
index e4f6bcd..10da82d 100644
--- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu
+++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu
@@ -61,7 +61,7 @@
#endif // HAVE_CUB
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile
index 823b736..168eab2 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile
+++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile
@@ -29,8 +29,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu
index dd55b30..6165d01 100644
--- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu
+++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu
@@ -59,7 +59,7 @@
#endif // HAVE_CUB
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile
index 7c57e3e..3376cba 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile
+++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile
@@ -29,8 +29,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu
index b754207..7afbe05 100644
--- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu
+++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu
@@ -57,7 +57,7 @@
#endif // HAVE_CUB
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
From e2ee4c2267d569837f58b9d32178b869c35fbeb5 Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Thu, 9 Apr 2026 09:18:01 +0200
Subject: [PATCH 6/7] fix cuda graphs to work with dynamic linked nvshmem and
cuda 13
Signed-off-by: Prateek Chawla
---
.../Device-initiated_Communication_with_NVSHMEM/Makefile.in | 4 ++--
.../Device-initiated_Communication_with_NVSHMEM/jacobi.cu | 2 +-
.../Device-initiated_Communication_with_NVSHMEM/Makefile | 4 ++--
.../Device-initiated_Communication_with_NVSHMEM/jacobi.cu | 2 +-
.../Device-initiated_Communication_with_NVSHMEM/Makefile | 4 ++--
.../Device-initiated_Communication_with_NVSHMEM/jacobi.cu | 2 +-
6 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in
index 1917f62..5286a99 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in
@@ -28,8 +28,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
index 9829940..92891e6 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
@@ -62,7 +62,7 @@
#endif // HAVE_CUB
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile
index 374e98f..c54d834 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile
@@ -28,8 +28,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
index 04c3b63..a387aea 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
@@ -60,7 +60,7 @@
#endif // HAVE_CUB
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile
index 687a121..5670704 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile
@@ -28,8 +28,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
index a242329..98065fe 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu
@@ -58,7 +58,7 @@
#endif // HAVE_CUB
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
From 1c49b5b6884fc7b4e2ae26802e26fc302bbbe6a1 Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Thu, 9 Apr 2026 09:21:47 +0200
Subject: [PATCH 7/7] update cuda graphs makefile
Signed-off-by: Prateek Chawla
---
.../.master/Using_CUDA_Graphs/Makefile.in | 6 +++---
.../.master/Using_CUDA_Graphs/jacobi.cpp | 2 +-
.../solutions/Using_CUDA_Graphs/Makefile | 6 +++---
.../solutions/Using_CUDA_Graphs/jacobi.cpp | 2 +-
.../tasks/Using_CUDA_Graphs/Makefile | 6 +++---
.../tasks/Using_CUDA_Graphs/jacobi.cpp | 2 +-
6 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in
index 7aa2d9c..f75c8f6 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in
@@ -23,9 +23,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp
index 360e66a..3242061 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp
@@ -51,7 +51,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile
index 06003e2..b78e2cb 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile
@@ -23,9 +23,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp
index f692d4d..655a5f0 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp
@@ -51,7 +51,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile
index ca0ae2f..8c0d823 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile
@@ -23,9 +23,9 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14
-MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14
-LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl
+NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17
+MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17
+LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl
jacobi: Makefile jacobi.cpp jacobi_kernels.o
$(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi
diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp
index b9197e7..b2eb0b9 100644
--- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp
+++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp
@@ -51,7 +51,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};