Encoder-Decoder-Parser/dynet-2.0/cudnn-ops.h at master · LeonCrashCode/Encoder-Decoder-Parser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#ifndef DYNET_CUDNN_OPS_H
#define DYNET_CUDNN_OPS_H

#if HAVE_CUDNN
#include "dynet/dynet.h"
#include "dynet/cuda.h"
#include "dynet/op-helper.h"

namespace dynet {

class CudnnConvOp {
 public:
  explicit CudnnConvOp() {}
  explicit CudnnConvOp(const std::vector<unsigned>& s, const bool padding_type);
  ~CudnnConvOp() noexcept(false);
  /* call this function before using the CudnnConvOp */
  void set_pool(NodeMemPool* mempool) {
    DYNET_ASSERT(mempool->used() == 0, "mempool must have been reset");
    mempool_ = mempool;
  }
  void forward_impl(const Device_GPU & dev,
                    const std::vector<const Tensor*>& xs, Tensor& fx);
  void backward_impl(const Device_GPU & dev,
               const std::vector<const Tensor*>& xs,
               const Tensor& fx,
               const Tensor& dEdf,
               unsigned i,
               Tensor& dEdxi);
  static const size_t workspace_size_limit_bytes = 8 * 1024 * 1024;

 protected:
  std::vector<int> stride_;
  bool is_valid_;

  /* cuDNN resource */
  cudnnTensorDescriptor_t x_desc_, y_desc_;
  cudnnTensorDescriptor_t bias_desc_;
  cudnnFilterDescriptor_t filter_desc_;
  cudnnConvolutionDescriptor_t conv_desc_;
  cudnnConvolutionFwdAlgo_t fwd_algo_;
  cudnnConvolutionBwdFilterAlgo_t bwd_f_algo_;
  cudnnConvolutionBwdDataAlgo_t bwd_d_algo_;

  // cudnn workspace
  size_t workspace_fwd_size_;
  size_t workspace_bwd_data_size_;
  size_t workspace_bwd_filter_size_;
  void* fwd_workspace;
  void* bwd_filter_workspace;
  void* bwd_data_workspace;

 private:
  NodeMemPool* mempool_;
};


class CudnnMaxPooling2DOp {
 public:
  explicit CudnnMaxPooling2DOp() {}
  explicit CudnnMaxPooling2DOp(const std::vector<unsigned>& ksize,
                               const std::vector<unsigned>& stride,
                               const bool padding_type);
  ~CudnnMaxPooling2DOp() noexcept(false);
  /* call this function before using the CudnnMaxPooling2DOp */
  void set_pool(NodeMemPool* mempool) {
    DYNET_ASSERT(mempool->used() == 0, "mempool must have been reset");
    mempool_ = mempool;
  }
  void forward_impl(const Device_GPU & dev,
                    const std::vector<const Tensor*>& xs, Tensor& fx);
  void backward_impl(const Device_GPU & dev,
                const std::vector<const Tensor*>& xs,
                const Tensor& fx,
                const Tensor& dEdf,
                unsigned i,
                Tensor& dEdxi);

 protected:
  std::vector<int> ksize_;
  std::vector<int> stride_;
  bool is_valid_;

  /* cuDNN resource */
  cudnnTensorDescriptor_t x_desc_, y_desc_;
  cudnnPoolingDescriptor_t pooling_desc_;

 private:
  NodeMemPool* mempool_;
};

} // namespace dynet

#endif
#endif