From 569b46d1e7cd66d9f41326618bb31ae5599b8cd0 Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Mon, 23 Mar 2026 09:20:04 +0100
Subject: [PATCH 1/9] Updates to work with CUDA 13.x
Signed-off-by: Prateek Chawla
---
03-H_Multi_GPU_Parallelization/.master/Makefile.in | 4 ++--
03-H_Multi_GPU_Parallelization/.master/jacobi.cu | 2 +-
03-H_Multi_GPU_Parallelization/solutions/Makefile | 4 ++--
03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile | 4 ++--
03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu | 2 +-
03-H_Multi_GPU_Parallelization/solutions/jacobi.cu | 2 +-
03-H_Multi_GPU_Parallelization/tasks/Makefile | 4 ++--
03-H_Multi_GPU_Parallelization/tasks/jacobi.cu | 2 +-
8 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
index e15d85c..1ef54aa 100644
--- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in
+++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu
index a9700cc..2d913d9 100644
--- a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile
index 92f033c..96c6077 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
index a6399eb..25849b3 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu
index ee32ce5..ed6a7ff 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu
index e971307..c4b542a 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile
index d293686..67aae3c 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/Makefile
+++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile
@@ -24,8 +24,8 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include
-NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
$(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS)
diff --git a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu
index acae736..150886a 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu
+++ b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu
@@ -59,7 +59,7 @@
#include
#ifdef USE_NVTX
-#include
+#include
const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff,
0x0000ffff, 0x00ff0000, 0x00ffffff};
From 85d7e901819b814a447cb020c9f4c699d1798e2f Mon Sep 17 00:00:00 2001
From: Prateek Chawla
Date: Tue, 24 Mar 2026 12:06:49 +0100
Subject: [PATCH 2/9] update to c++17
Signed-off-by: Prateek Chawla
---
03-H_Multi_GPU_Parallelization/.master/Makefile.in | 2 +-
03-H_Multi_GPU_Parallelization/solutions/Makefile | 2 +-
03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile | 2 +-
03-H_Multi_GPU_Parallelization/tasks/Makefile | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
index 1ef54aa..72429af 100644
--- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in
+++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile
index 96c6077..6d3af79 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
index 25849b3..5f96c74 100644
--- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
+++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile
index 67aae3c..883cd2c 100644
--- a/03-H_Multi_GPU_Parallelization/tasks/Makefile
+++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile
@@ -24,7 +24,7 @@ ifdef DISABLE_CUB
else
NVCC_FLAGS = -DHAVE_CUB
endif
-NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
+NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include
NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart
jacobi: Makefile jacobi.cu
$(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o
From 61355845e0546eee58239ffd39ab258ee9b903d0 Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 12:34:13 +0100
Subject: [PATCH 3/9] And Action
---
.github/workflows/ci.yml | 23 +++++++++++++++++++++++
1 file changed, 23 insertions(+)
create mode 100644 .github/workflows/ci.yml
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..87543f3
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,23 @@
+name: CI
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Install dependencies
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential openmpi-bin libopenmpi-dev
+ - name: Build first example
+ working-directory: 03-H_Multi_GPU_Parallelization/solutions
+ env:
+ MPI_HOME: /usr
+ run: |
+ make jacobi
From 47a825ea25884c216c8bd87cc1917b3715e5dee6 Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 13:27:08 +0100
Subject: [PATCH 4/9] Add CUDA container image
---
.github/workflows/ci.yml | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 87543f3..fe6d382 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -1,20 +1,19 @@
name: CI
-
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
-
jobs:
build:
runs-on: ubuntu-latest
+ container:
+ image: nvidia/cuda:13.2.0-devel-ubuntu24.04
steps:
- uses: actions/checkout@v3
- name: Install dependencies
run: |
- sudo apt-get update
- sudo apt-get install -y build-essential openmpi-bin libopenmpi-dev
+ apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev
- name: Build first example
working-directory: 03-H_Multi_GPU_Parallelization/solutions
env:
From 808cde8e0c668b1d0c640dc7c6305b1f61f55559 Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 13:32:52 +0100
Subject: [PATCH 5/9] fix OpenMPI path
---
.github/workflows/ci.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index fe6d382..4bc44d1 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,6 +17,6 @@ jobs:
- name: Build first example
working-directory: 03-H_Multi_GPU_Parallelization/solutions
env:
- MPI_HOME: /usr
- run: |
+ MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi
+ run: |
make jacobi
From 2574a62694ffa6e2a5c392acb33a99a98ece6562 Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 13:43:15 +0100
Subject: [PATCH 6/9] Fix vim auto-linting
---
.github/workflows/ci.yml | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4bc44d1..803e40a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,5 +18,5 @@ jobs:
working-directory: 03-H_Multi_GPU_Parallelization/solutions
env:
MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi
- run: |
+ run: |
make jacobi
From 7475b071952d47982f6b8686ea2bbcf7d6ee7811 Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 14:41:10 +0100
Subject: [PATCH 7/9] Expand coverage
---
.github/workflows/ci.yml | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 803e40a..8499b70 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -6,6 +6,15 @@ on:
branches: [ main ]
jobs:
build:
+ strategy:
+ matrix:
+ exercise:
+ - 03-H_Multi_GPU_Parallelization/solutions
+ - 06-H_Overlap_Communication_and_Computation_MPI/solutions
+ - 08-H_NCCL_NVSHMEM/solutions/NCCL
+ - 08-H_NCCL_NVSHMEM/solutions/NVSHMEM
+ - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/
+ - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/
runs-on: ubuntu-latest
container:
image: nvidia/cuda:13.2.0-devel-ubuntu24.04
@@ -15,7 +24,7 @@ jobs:
run: |
apt-get update && apt-get install -y build-essential openmpi-bin libopenmpi-dev
- name: Build first example
- working-directory: 03-H_Multi_GPU_Parallelization/solutions
+ working-directory: ${{ matrix.exercise }}
env:
MPI_HOME: /usr/lib/x86_64-linux-gnu/openmpi
run: |
From 1e5143846c9ccf432a8fa1de206fc8a43bb91bbf Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 14:47:43 +0100
Subject: [PATCH 8/9] Small correction
---
.github/workflows/ci.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8499b70..aa6374e 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -13,8 +13,8 @@ jobs:
- 06-H_Overlap_Communication_and_Computation_MPI/solutions
- 08-H_NCCL_NVSHMEM/solutions/NCCL
- 08-H_NCCL_NVSHMEM/solutions/NVSHMEM
- - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/
- - /10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/
+ - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM
+ - 10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs
runs-on: ubuntu-latest
container:
image: nvidia/cuda:13.2.0-devel-ubuntu24.04
From bc076ffd230f82c40204652caec600012080353c Mon Sep 17 00:00:00 2001
From: Andreas Herten
Date: Fri, 27 Mar 2026 14:55:00 +0100
Subject: [PATCH 9/9] Fail slow
---
.github/workflows/ci.yml | 1 +
1 file changed, 1 insertion(+)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index aa6374e..84b3a00 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -7,6 +7,7 @@ on:
jobs:
build:
strategy:
+ fail-fast: false
matrix:
exercise:
- 03-H_Multi_GPU_Parallelization/solutions