|
54 | 54 | #define MemPrefetchDeviceToHost_cuda(ptr, size, stream) \ |
55 | 55 | { \ |
56 | 56 | int deviceIndex; \ |
57 | | - CUDA_ERRCHK(cudaGetDevice(&deviceIndex)); \ |
| 57 | + CUDA_ERR(cudaGetDevice(&deviceIndex)); \ |
58 | 58 | struct cudaMemLocation location = {}; \ |
59 | 59 | location.type = cudaMemLocationTypeHost; \ |
60 | 60 | location.id = deviceIndex; \ |
61 | | - CUDA_ERRCHK(cudaMemPrefetchAsync(ptr, size, location, 0 , 0)); \ |
| 61 | + CUDA_ERR(cudaMemPrefetchAsync(ptr, size, location, 0 , 0)); \ |
| 62 | + CUDA_ERR(cudaStreamSynchronize(stream)); \ |
62 | 63 | } |
63 | 64 | #define MemPrefetchHostToDevice_cuda(ptr, size, stream) \ |
64 | 65 | { \ |
65 | 66 | int deviceIndex; \ |
66 | | - CUDA_ERRCHK(cudaGetDevice(&deviceIndex)); \ |
| 67 | + CUDA_ERR(cudaGetDevice(&deviceIndex)); \ |
67 | 68 | struct cudaMemLocation location = {}; \ |
68 | 69 | location.type = cudaMemLocationTypeDevice; \ |
69 | 70 | location.id = deviceIndex; \ |
70 | | - CUDA_ERRCHK(cudaMemPrefetchAsync(ptr, size, location, 0 , 0)); \ |
| 71 | + CUDA_ERR(cudaMemPrefetchAsync(ptr, size, location, 0 , 0)); \ |
71 | 72 | } |
72 | 73 | #else |
73 | | - |
74 | 74 | #define MemPrefetchDeviceToHost_cuda(ptr, size, stream) \ |
75 | 75 | { \ |
76 | 76 | CUDA_ERR(cudaMemPrefetchAsync(ptr, size, cudaCpuDeviceId, stream)); \ |
|
83 | 83 | CUDA_ERR(cudaMemPrefetchAsync(ptr, size, device, stream)); \ |
84 | 84 | } |
85 | 85 | #endif |
86 | | - |
87 | 86 | #endif // PF_CUDAMALLOC_H |
0 commit comments