-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathcudnn-ops.h
More file actions
94 lines (80 loc) · 2.63 KB
/
cudnn-ops.h
File metadata and controls
94 lines (80 loc) · 2.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#ifndef DYNET_CUDNN_OPS_H
#define DYNET_CUDNN_OPS_H
#if HAVE_CUDNN
#include "dynet/dynet.h"
#include "dynet/cuda.h"
#include "dynet/op-helper.h"
namespace dynet {
class CudnnConvOp {
public:
explicit CudnnConvOp() {}
explicit CudnnConvOp(const std::vector<unsigned>& s, const bool padding_type);
~CudnnConvOp() noexcept(false);
/* call this function before using the CudnnConvOp */
void set_pool(NodeMemPool* mempool) {
DYNET_ASSERT(mempool->used() == 0, "mempool must have been reset");
mempool_ = mempool;
}
void forward_impl(const Device_GPU & dev,
const std::vector<const Tensor*>& xs, Tensor& fx);
void backward_impl(const Device_GPU & dev,
const std::vector<const Tensor*>& xs,
const Tensor& fx,
const Tensor& dEdf,
unsigned i,
Tensor& dEdxi);
static const size_t workspace_size_limit_bytes = 8 * 1024 * 1024;
protected:
std::vector<int> stride_;
bool is_valid_;
/* cuDNN resource */
cudnnTensorDescriptor_t x_desc_, y_desc_;
cudnnTensorDescriptor_t bias_desc_;
cudnnFilterDescriptor_t filter_desc_;
cudnnConvolutionDescriptor_t conv_desc_;
cudnnConvolutionFwdAlgo_t fwd_algo_;
cudnnConvolutionBwdFilterAlgo_t bwd_f_algo_;
cudnnConvolutionBwdDataAlgo_t bwd_d_algo_;
// cudnn workspace
size_t workspace_fwd_size_;
size_t workspace_bwd_data_size_;
size_t workspace_bwd_filter_size_;
void* fwd_workspace;
void* bwd_filter_workspace;
void* bwd_data_workspace;
private:
NodeMemPool* mempool_;
};
class CudnnMaxPooling2DOp {
public:
explicit CudnnMaxPooling2DOp() {}
explicit CudnnMaxPooling2DOp(const std::vector<unsigned>& ksize,
const std::vector<unsigned>& stride,
const bool padding_type);
~CudnnMaxPooling2DOp() noexcept(false);
/* call this function before using the CudnnMaxPooling2DOp */
void set_pool(NodeMemPool* mempool) {
DYNET_ASSERT(mempool->used() == 0, "mempool must have been reset");
mempool_ = mempool;
}
void forward_impl(const Device_GPU & dev,
const std::vector<const Tensor*>& xs, Tensor& fx);
void backward_impl(const Device_GPU & dev,
const std::vector<const Tensor*>& xs,
const Tensor& fx,
const Tensor& dEdf,
unsigned i,
Tensor& dEdxi);
protected:
std::vector<int> ksize_;
std::vector<int> stride_;
bool is_valid_;
/* cuDNN resource */
cudnnTensorDescriptor_t x_desc_, y_desc_;
cudnnPoolingDescriptor_t pooling_desc_;
private:
NodeMemPool* mempool_;
};
} // namespace dynet
#endif
#endif