Encoder-Decoder-Parser/dynet-2.0/cuda.h at master · LeonCrashCode/Encoder-Decoder-Parser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#ifndef DYNET_CUDA_H
#define DYNET_CUDA_H
#if HAVE_CUDA

#include <vector>
#include <utility>
#include <stdexcept>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#if HAVE_CUDNN
#include <cudnn.h>
#endif
#include "dynet/except.h"

#define MAX_GPUS 256

#define CUDA_CHECK(stmt) do {                              \
    cudaError_t err = stmt;                                \
    if (err != cudaSuccess) {                              \
      std::cerr << "CUDA failure in " << #stmt << std::endl\
                << cudaGetErrorString(err) << std::endl;   \
      throw dynet::cuda_exception(#stmt);                  \
    }                                                      \
  } while(0)

#define CUBLAS_CHECK(stmt) do {                            \
    cublasStatus_t stat = stmt;                            \
    if (stat != CUBLAS_STATUS_SUCCESS) {                   \
      std::cerr << "CUBLAS failure in " << #stmt           \
                << std::endl << stat << std::endl;         \
      throw dynet::cuda_exception(#stmt);                  \
    }                                                      \
  } while(0)

#if HAVE_CUDNN
#define CUDNN_CHECK(stmt) do {                             \
    cudnnStatus_t stat = (stmt);                           \
    if (stat != CUDNN_STATUS_SUCCESS){                     \
      std::cerr << "CUDNN failure in " << #stmt            \
                << std::endl << cudnnGetErrorString(stat)       \
                << std::endl;                              \
      throw dynet::cuda_exception(#stmt);                  \
    }                                                      \
  } while(0)
#endif

namespace dynet {

struct DynetParams;


class Device;

inline std::pair<int, int> SizeToBlockThreadPair(int n) {
  DYNET_ASSERT(n > 0, "Bad thread size in GPU code " << n);
  int logn;
#if defined(_MSC_VER)
  logn = 0;
  if (n > 2) {
    int localN = n - 1;
    while (localN >>= 1)
      logn++;
  }
#else
  asm("\tbsr %1, %0\n"
      : "=r"(logn)
      : "r" (n-1));
#endif
  logn = logn > 9 ? 9 : (logn < 4 ? 4 : logn);
  ++logn;
  int threads = 1 << logn;
  int blocks = (n + threads - 1) >> logn;
  blocks = blocks > 65535 ? 65535 : blocks;
  return std::make_pair(blocks, threads);
}

std::vector<Device*> initialize_gpu(dynet::DynetParams& params);
std::vector<Device*> initialize_gpu(int& argc, char**& argv);

} // namespace dynet

#endif
#endif