15 weight_decay_(weight_decay), nesterov_(nesterov) {
16 initialize_momentum_buffers();
21 momentum_buffers_.clear();
22 momentum_buffers_.reserve(this->parameters_.size());
24 for (
const auto* param : this->parameters_) {
32 for (
size_t i = 0; i < this->parameters_.size(); ++i) {
33 auto* param = this->parameters_[i];
52 eps_(eps), weight_decay_(weight_decay), step_count_(0) {
60 exp_avg_.reserve(this->parameters_.size());
61 exp_avg_sq_.reserve(this->parameters_.size());
63 for (
const auto* param : this->parameters_) {
74 for (
size_t i = 0; i < this->parameters_.size(); ++i) {
75 auto* param = this->parameters_[i];
96 eps_(eps), weight_decay_(weight_decay), step_count_(0) {
104 exp_avg_.reserve(this->parameters_.size());
105 exp_avg_sq_.reserve(this->parameters_.size());
107 for (
const auto* param : this->parameters_) {
118 for (
size_t i = 0; i < this->parameters_.size(); ++i) {
119 auto* param = this->parameters_[i];
140 weight_decay_(weight_decay), momentum_(momentum) {
147 momentum_buffer_.clear();
148 square_avg_.reserve(this->parameters_.size());
149 momentum_buffer_.reserve(this->parameters_.size());
151 for (
const auto* param : this->parameters_) {
162 for (
size_t i = 0; i < this->parameters_.size(); ++i) {
163 auto* param = this->parameters_[i];
178 if (last_epoch_ % step_size_ == 0) {
179 T new_lr = base_lr_ * std::pow(gamma_, last_epoch_ / step_size_);
180 this->optimizer_->set_lr(new_lr);
AdamW optimizer with autograd support.
void step() override
Perform one AdamW step.
AdamW(std::vector< Variable< T > * > parameters, T lr=1e-3, T beta1=0.9, T beta2=0.999, T eps=1e-8, T weight_decay=1e-2)
Constructor.
Adam optimizer with autograd support.
Adam(std::vector< Variable< T > * > parameters, T lr=1e-3, T beta1=0.9, T beta2=0.999, T eps=1e-8, T weight_decay=0.0)
Constructor.
void step() override
Perform one Adam step.
Base class for autograd-compatible optimizers.
RMSprop optimizer with autograd support.
void step() override
Perform one RMSprop step.
RMSprop(std::vector< Variable< T > * > parameters, T lr=1e-2, T alpha=0.99, T eps=1e-8, T weight_decay=0.0, T momentum=0.0)
Constructor.
Stochastic Gradient Descent optimizer with autograd support.
SGD(std::vector< Variable< T > * > parameters, T lr, T momentum=0.0, T weight_decay=0.0, bool nesterov=false)
Constructor.
void step() override
Perform one SGD step.
Step learning rate scheduler Decays learning rate by gamma every step_size epochs.
void step() override
Update learning rate.
Variable class that supports automatic differentiation.
PyTorch-like optimizers with automatic differentiation support.