6#include <unordered_set>
76 : data_(
data), requires_grad_(true), grad_fn_(
grad_fn) {
86 std::shared_ptr<Function<T>>
grad_fn()
const {
return grad_fn_; }
129 T&
operator()(
size_t row,
size_t col) {
return data_(row, col); }
130 const T&
operator()(
size_t row,
size_t col)
const {
return data_(row, col); }
132 size_t rows()
const {
return data_.rows(); }
133 size_t cols()
const {
return data_.cols(); }
139 std::shared_ptr<Function<T>> grad_fn_;
140 std::vector<Variable<T>> inputs_;
152 return inputs[0].data() + inputs[1].data();
156 return {grad_output, grad_output};
167 return inputs[0].data() - inputs[1].data();
171 return {grad_output, grad_output *
Matrix<T>(grad_output.
rows(), grad_output.
cols(), -1.0)};
183 return inputs[0].data() * inputs[1].data();
187 return {grad_output * this->
saved_tensors_[1], grad_output * this->saved_tensors_[0]};
199 return dot(inputs[0].data(), inputs[1].data());
217 return inputs[0].data().transpose();
232 Matrix<T> result(inputs[0].rows(), inputs[0].cols());
233 for (
size_t i = 0; i < inputs[0].rows(); ++i) {
234 for (
size_t j = 0; j < inputs[0].cols(); ++j) {
235 result(i, j) = 1.0 / (1.0 + std::exp(-inputs[0](i, j)));
244 Matrix<T> grad_input(sigmoid_output.rows(), sigmoid_output.cols());
245 for (
size_t i = 0; i < sigmoid_output.rows(); ++i) {
246 for (
size_t j = 0; j < sigmoid_output.cols(); ++j) {
247 grad_input(i, j) = grad_output(i, j) * sigmoid_output(i, j) * (1.0 - sigmoid_output(i, j));
262 T sum_val =
sum(inputs[0].data());
268 return {
Matrix<T>(input_shape.rows(), input_shape.cols(), grad_output(0, 0))};
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Matrix multiplication function.
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Function node in the computational graph.
virtual ~Function()=default
virtual std::vector< Matrix< T > > backward(const Matrix< T > &grad_output)=0
Backward pass computation.
virtual void save_for_backward(const std::vector< Matrix< T > > &tensors)
Set saved tensors for backward pass.
std::vector< Matrix< T > > saved_tensors_
virtual Matrix< T > forward(const std::vector< Variable< T > > &inputs)=0
Forward pass computation.
static Matrix zeros(size_t rows, size_t cols)
Create a matrix filled with zeros.
size_t cols() const
Get the number of columns.
Matrix transpose() const
Compute the transpose of the matrix.
size_t rows() const
Get the number of rows.
Element-wise multiplication function.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
std::vector< Matrix< T > > backward(const Matrix< T > &grad_output) override
Backward pass computation.
Matrix< T > forward(const std::vector< Variable< T > > &inputs) override
Forward pass computation.
Variable class that supports automatic differentiation.
Variable< T > mean() const
Variable< T > operator-(const Variable< T > &other) const
Variable< T > operator+(const Variable< T > &other) const
const T & operator()(size_t row, size_t col) const
Variable< T > detach() const
Detach from computational graph.
Variable< T > sigmoid() const
bool requires_grad() const
Variable< T > log() const
Variable(const Matrix< T > &data, std::shared_ptr< Function< T > > grad_fn)
Constructor with gradient function.
Variable< T > dot(const Variable< T > &other) const
std::shared_ptr< Function< T > > grad_fn() const
Variable< T > exp() const
Variable< T > tanh() const
Variable< T > transpose() const
void backward(const Matrix< T > &gradient=Matrix< T >())
Perform backward pass.
Variable< T > sum() const
Variable< T > operator*(const Variable< T > &other) const
const Matrix< T > & data() const
const Matrix< T > & grad() const
void zero_grad()
Zero the gradients.
Variable< T > relu() const
Variable(const Matrix< T > &data, bool requires_grad=false)
Constructor.
T & operator()(size_t row, size_t col)
Matrix utility class for deep learning operations.
T sum(const Matrix< T > &matrix)
Calculate sum of all matrix elements.
Matrix< T > dot(const Matrix< T > &a, const Matrix< T > &b)
Compute dot product of two matrices.