Deep Learning Algorithm Implementations 1.0.0
C++ implementations of fundamental deep learning algorithms
Loading...
Searching...
No Matches
autograd.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <functional>
4#include <memory>
5#include <vector>
6#include <unordered_set>
7#include "matrix.hpp"
8
16namespace utils {
17
18 template<typename T>
19 class Variable;
20
24 template<typename T>
25 class Function {
26 public:
27 virtual ~Function() = default;
28
34 virtual Matrix<T> forward(const std::vector<Variable<T>>& inputs) = 0;
35
41 virtual std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) = 0;
42
46 virtual void save_for_backward(const std::vector<Matrix<T>>& tensors) {
47 saved_tensors_ = tensors;
48 }
49
50 protected:
51 std::vector<Matrix<T>> saved_tensors_;
52 };
53
57 template<typename T>
58 class Variable {
59 public:
65 Variable(const Matrix<T>& data, bool requires_grad = false)
66 : data_(data), requires_grad_(requires_grad), grad_fn_(nullptr) {
67 if (requires_grad_) {
68 grad_ = Matrix<T>::zeros(data.rows(), data.cols());
69 }
70 }
71
75 Variable(const Matrix<T>& data, std::shared_ptr<Function<T>> grad_fn)
76 : data_(data), requires_grad_(true), grad_fn_(grad_fn) {
77 grad_ = Matrix<T>::zeros(data.rows(), data.cols());
78 }
79
80 // Getters
81 const Matrix<T>& data() const { return data_; }
82 Matrix<T>& data() { return data_; }
83 const Matrix<T>& grad() const { return grad_; }
84 Matrix<T>& grad() { return grad_; }
85 bool requires_grad() const { return requires_grad_; }
86 std::shared_ptr<Function<T>> grad_fn() const { return grad_fn_; }
87
92 void backward(const Matrix<T>& gradient = Matrix<T>());
93
97 void zero_grad() {
98 if (requires_grad_) {
99 grad_ = Matrix<T>::zeros(data_.rows(), data_.cols());
100 }
101 }
102
107 return Variable<T>(data_, false);
108 }
109
110 // Arithmetic operations
111 Variable<T> operator+(const Variable<T>& other) const;
112 Variable<T> operator-(const Variable<T>& other) const;
113 Variable<T> operator*(const Variable<T>& other) const;
114
115 // Matrix operations
116 Variable<T> dot(const Variable<T>& other) const;
117 Variable<T> transpose() const;
118 Variable<T> sum() const;
119 Variable<T> mean() const;
120
121 // Activation functions
122 Variable<T> sigmoid() const;
123 Variable<T> tanh() const;
124 Variable<T> relu() const;
125 Variable<T> exp() const;
126 Variable<T> log() const;
127
128 // Element access
129 T& operator()(size_t row, size_t col) { return data_(row, col); }
130 const T& operator()(size_t row, size_t col) const { return data_(row, col); }
131
132 size_t rows() const { return data_.rows(); }
133 size_t cols() const { return data_.cols(); }
134
135 private:
136 Matrix<T> data_;
137 Matrix<T> grad_;
138 bool requires_grad_;
139 std::shared_ptr<Function<T>> grad_fn_;
140 std::vector<Variable<T>> inputs_; // For backward pass
141 };
142
143 // Specific function implementations
144
148 template<typename T>
149 class AddFunction : public Function<T> {
150 public:
151 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
152 return inputs[0].data() + inputs[1].data();
153 }
154
155 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
156 return {grad_output, grad_output};
157 }
158 };
159
163 template<typename T>
164 class SubFunction : public Function<T> {
165 public:
166 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
167 return inputs[0].data() - inputs[1].data();
168 }
169
170 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
171 return {grad_output, grad_output * Matrix<T>(grad_output.rows(), grad_output.cols(), -1.0)};
172 }
173 };
174
178 template<typename T>
179 class MulFunction : public Function<T> {
180 public:
181 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
182 this->save_for_backward({inputs[0].data(), inputs[1].data()});
183 return inputs[0].data() * inputs[1].data();
184 }
185
186 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
187 return {grad_output * this->saved_tensors_[1], grad_output * this->saved_tensors_[0]};
188 }
189 };
190
194 template<typename T>
195 class DotFunction : public Function<T> {
196 public:
197 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
198 this->save_for_backward({inputs[0].data(), inputs[1].data()});
199 return dot(inputs[0].data(), inputs[1].data());
200 }
201
202 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
203 return {
204 dot(grad_output, this->saved_tensors_[1].transpose()),
205 dot(this->saved_tensors_[0].transpose(), grad_output)
206 };
207 }
208 };
209
213 template<typename T>
214 class TransposeFunction : public Function<T> {
215 public:
216 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
217 return inputs[0].data().transpose();
218 }
219
220 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
221 return {grad_output.transpose()};
222 }
223 };
224
228 template<typename T>
229 class SigmoidFunction : public Function<T> {
230 public:
231 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
232 Matrix<T> result(inputs[0].rows(), inputs[0].cols());
233 for (size_t i = 0; i < inputs[0].rows(); ++i) {
234 for (size_t j = 0; j < inputs[0].cols(); ++j) {
235 result(i, j) = 1.0 / (1.0 + std::exp(-inputs[0](i, j)));
236 }
237 }
238 this->save_for_backward({result});
239 return result;
240 }
241
242 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
243 const auto& sigmoid_output = this->saved_tensors_[0];
244 Matrix<T> grad_input(sigmoid_output.rows(), sigmoid_output.cols());
245 for (size_t i = 0; i < sigmoid_output.rows(); ++i) {
246 for (size_t j = 0; j < sigmoid_output.cols(); ++j) {
247 grad_input(i, j) = grad_output(i, j) * sigmoid_output(i, j) * (1.0 - sigmoid_output(i, j));
248 }
249 }
250 return {grad_input};
251 }
252 };
253
257 template<typename T>
258 class SumFunction : public Function<T> {
259 public:
260 Matrix<T> forward(const std::vector<Variable<T>>& inputs) override {
261 this->save_for_backward({inputs[0].data()});
262 T sum_val = sum(inputs[0].data());
263 return Matrix<T>(1, 1, sum_val);
264 }
265
266 std::vector<Matrix<T>> backward(const Matrix<T>& grad_output) override {
267 const auto& input_shape = this->saved_tensors_[0];
268 return {Matrix<T>(input_shape.rows(), input_shape.cols(), grad_output(0, 0))};
269 }
270 };
271
272 // Type aliases
275
276} // namespace utils
Addition function.
Definition autograd.hpp:149
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:155
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:151
Matrix multiplication function.
Definition autograd.hpp:195
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:202
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:197
Function node in the computational graph.
Definition autograd.hpp:25
virtual ~Function()=default
virtual std::vector< Matrix< T > > backward(const Matrix< T > &grad_output)=0
Backward pass computation.
virtual void save_for_backward(const std::vector< Matrix< T > > &tensors)
Set saved tensors for backward pass.
Definition autograd.hpp:46
std::vector< Matrix< T > > saved_tensors_
Definition autograd.hpp:51
virtual Matrix< T > forward(const std::vector< Variable< T > > &inputs)=0
Forward pass computation.
static Matrix zeros(size_t rows, size_t cols)
Create a matrix filled with zeros.
Definition matrix.cpp:121
size_t cols() const
Get the number of columns.
Definition matrix.hpp:200
Matrix transpose() const
Compute the transpose of the matrix.
Definition matrix.cpp:78
size_t rows() const
Get the number of rows.
Definition matrix.hpp:194
Element-wise multiplication function.
Definition autograd.hpp:179
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:181
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:186
Sigmoid function.
Definition autograd.hpp:229
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:231
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:242
Subtraction function.
Definition autograd.hpp:164
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:170
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:166
Sum function.
Definition autograd.hpp:258
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:260
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:266
Transpose function.
Definition autograd.hpp:214
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Definition autograd.hpp:220
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Definition autograd.hpp:216
Variable class that supports automatic differentiation.
Definition autograd.hpp:58
Variable< T > mean() const
Definition autograd.cpp:102
Variable< T > operator-(const Variable< T > &other) const
Definition autograd.cpp:47
Variable< T > operator+(const Variable< T > &other) const
Definition autograd.cpp:36
const T & operator()(size_t row, size_t col) const
Definition autograd.hpp:130
Variable< T > detach() const
Detach from computational graph.
Definition autograd.hpp:106
Matrix< T > & data()
Definition autograd.hpp:82
size_t cols() const
Definition autograd.hpp:133
Matrix< T > & grad()
Definition autograd.hpp:84
Variable< T > sigmoid() const
Definition autograd.cpp:113
bool requires_grad() const
Definition autograd.hpp:85
Variable< T > log() const
Definition autograd.cpp:158
Variable(const Matrix< T > &data, std::shared_ptr< Function< T > > grad_fn)
Constructor with gradient function.
Definition autograd.hpp:75
Variable< T > dot(const Variable< T > &other) const
Definition autograd.cpp:69
std::shared_ptr< Function< T > > grad_fn() const
Definition autograd.hpp:86
size_t rows() const
Definition autograd.hpp:132
Variable< T > exp() const
Definition autograd.cpp:147
Variable< T > tanh() const
Definition autograd.cpp:124
Variable< T > transpose() const
Definition autograd.cpp:80
void backward(const Matrix< T > &gradient=Matrix< T >())
Perform backward pass.
Definition autograd.cpp:10
Variable< T > sum() const
Definition autograd.cpp:91
Variable< T > operator*(const Variable< T > &other) const
Definition autograd.cpp:58
const Matrix< T > & data() const
Definition autograd.hpp:81
const Matrix< T > & grad() const
Definition autograd.hpp:83
void zero_grad()
Zero the gradients.
Definition autograd.hpp:97
Variable< T > relu() const
Definition autograd.cpp:136
Variable(const Matrix< T > &data, bool requires_grad=false)
Constructor.
Definition autograd.hpp:65
T & operator()(size_t row, size_t col)
Definition autograd.hpp:129
Matrix utility class for deep learning operations.
T sum(const Matrix< T > &matrix)
Calculate sum of all matrix elements.
Definition matrix.cpp:166
Matrix< T > dot(const Matrix< T > &a, const Matrix< T > &b)
Compute dot product of two matrices.
Definition matrix.cpp:155