Deep Learning Algorithm Implementations 1.0.0
C++ implementations of fundamental deep learning algorithms
Loading...
Searching...
No Matches
optimizers.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <memory>
4#include <vector>
5#include <unordered_map>
6#include "utils/autograd.hpp"
7#include "utils/matrix.hpp"
8
17 using utils::Variable;
18 using utils::VariableD;
19 using utils::VariableF;
20 using utils::Matrix;
21 using utils::MatrixD;
22 using utils::MatrixF;
23
27 template<typename T>
29 public:
34 explicit AutogradOptimizer(std::vector<Variable<T>*> parameters)
35 : parameters_(parameters) {}
36
37 virtual ~AutogradOptimizer() = default;
38
42 virtual void step() = 0;
43
47 virtual void zero_grad() {
48 for (auto* param : parameters_) {
49 param->zero_grad();
50 }
51 }
52
56 virtual T get_lr() const = 0;
57
61 virtual void set_lr(T lr) = 0;
62
63 protected:
64 std::vector<Variable<T>*> parameters_;
65 };
66
78 template<typename T>
79 class SGD : public AutogradOptimizer<T> {
80 public:
89 SGD(std::vector<Variable<T>*> parameters,
90 T lr,
91 T momentum = 0.0,
92 T weight_decay = 0.0,
93 bool nesterov = false);
94
98 void step() override;
99
103 T get_lr() const override { return lr_; }
104
108 void set_lr(T lr) override { lr_ = lr; }
109
110 private:
111 T lr_;
112 T momentum_;
113 T weight_decay_;
114 bool nesterov_;
115
116 // Momentum buffers for each parameter
117 std::vector<Matrix<T>> momentum_buffers_;
118
119 void initialize_momentum_buffers();
120 };
121
130 template<typename T>
131 class Adam : public AutogradOptimizer<T> {
132 public:
142 Adam(std::vector<Variable<T>*> parameters,
143 T lr = 1e-3,
144 T beta1 = 0.9,
145 T beta2 = 0.999,
146 T eps = 1e-8,
147 T weight_decay = 0.0);
148
152 void step() override;
153
157 T get_lr() const override { return lr_; }
158
162 void set_lr(T lr) override { lr_ = lr; }
163
164 private:
165 T lr_;
166 T beta1_;
167 T beta2_;
168 T eps_;
169 T weight_decay_;
170
171 // State for each parameter
172 std::vector<Matrix<T>> exp_avg_; // First moment estimate
173 std::vector<Matrix<T>> exp_avg_sq_; // Second moment estimate
174 size_t step_count_;
175
176 void initialize_state();
177 };
178
187 template<typename T>
188 class AdamW : public AutogradOptimizer<T> {
189 public:
199 AdamW(std::vector<Variable<T>*> parameters,
200 T lr = 1e-3,
201 T beta1 = 0.9,
202 T beta2 = 0.999,
203 T eps = 1e-8,
204 T weight_decay = 1e-2);
205
209 void step() override;
210
214 T get_lr() const override { return lr_; }
215
219 void set_lr(T lr) override { lr_ = lr; }
220
221 private:
222 T lr_;
223 T beta1_;
224 T beta2_;
225 T eps_;
226 T weight_decay_;
227
228 // State for each parameter
229 std::vector<Matrix<T>> exp_avg_; // First moment estimate
230 std::vector<Matrix<T>> exp_avg_sq_; // Second moment estimate
231 size_t step_count_;
232
233 void initialize_state();
234 };
235
243 template<typename T>
244 class RMSprop : public AutogradOptimizer<T> {
245 public:
255 RMSprop(std::vector<Variable<T>*> parameters,
256 T lr = 1e-2,
257 T alpha = 0.99,
258 T eps = 1e-8,
259 T weight_decay = 0.0,
260 T momentum = 0.0);
261
265 void step() override;
266
270 T get_lr() const override { return lr_; }
271
275 void set_lr(T lr) override { lr_ = lr; }
276
277 private:
278 T lr_;
279 T alpha_;
280 T eps_;
281 T weight_decay_;
282 T momentum_;
283
284 // State for each parameter
285 std::vector<Matrix<T>> square_avg_; // Moving average of squared gradients
286 std::vector<Matrix<T>> momentum_buffer_; // Momentum buffer (if momentum > 0)
287
288 void initialize_state();
289 };
290
294 template<typename T>
296 public:
297 explicit LRScheduler(AutogradOptimizer<T>* optimizer) : optimizer_(optimizer) {}
298 virtual ~LRScheduler() = default;
299
303 virtual void step() = 0;
304
308 T get_lr() const { return optimizer_->get_lr(); }
309
310 protected:
312 };
313
318 template<typename T>
319 class StepLR : public LRScheduler<T> {
320 public:
321 StepLR(AutogradOptimizer<T>* optimizer, size_t step_size, T gamma = 0.1)
322 : LRScheduler<T>(optimizer), step_size_(step_size), gamma_(gamma),
323 last_epoch_(0), base_lr_(optimizer->get_lr()) {}
324
325 void step() override;
326
327 private:
328 size_t step_size_;
329 T gamma_;
330 size_t last_epoch_;
331 T base_lr_;
332 };
333
334 // Type aliases for convenience
345
346} // namespace dl::optimization
PyTorch-like automatic differentiation engine.
AdamW optimizer with autograd support.
void step() override
Perform one AdamW step.
void set_lr(T lr) override
Set learning rate.
T get_lr() const override
Get learning rate.
Adam optimizer with autograd support.
void set_lr(T lr) override
Set learning rate.
T get_lr() const override
Get learning rate.
void step() override
Perform one Adam step.
Base class for autograd-compatible optimizers.
virtual T get_lr() const =0
Get learning rate.
virtual void step()=0
Perform one optimization step.
AutogradOptimizer(std::vector< Variable< T > * > parameters)
Constructor.
std::vector< Variable< T > * > parameters_
virtual void set_lr(T lr)=0
Set learning rate.
virtual void zero_grad()
Zero gradients of all parameters.
Learning rate scheduler base class.
virtual ~LRScheduler()=default
AutogradOptimizer< T > * optimizer_
virtual void step()=0
Update learning rate.
T get_lr() const
Get current learning rate.
LRScheduler(AutogradOptimizer< T > *optimizer)
RMSprop optimizer with autograd support.
T get_lr() const override
Get learning rate.
void step() override
Perform one RMSprop step.
void set_lr(T lr) override
Set learning rate.
Stochastic Gradient Descent optimizer with autograd support.
void set_lr(T lr) override
Set learning rate.
T get_lr() const override
Get learning rate.
void step() override
Perform one SGD step.
Step learning rate scheduler Decays learning rate by gamma every step_size epochs.
void step() override
Update learning rate.
StepLR(AutogradOptimizer< T > *optimizer, size_t step_size, T gamma=0.1)
Variable class that supports automatic differentiation.
Definition autograd.hpp:58
Matrix utility class for deep learning operations.