Deep Learning Algorithm Implementations 1.0.0
C++ implementations of fundamental deep learning algorithms
Loading...
Searching...
No Matches
data_loader.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <memory>
4#include <string>
5#include <vector>
6#include "matrix.hpp"
7
21namespace dl {
22 namespace utils {
23 // Forward declarations
25
51 template<typename T>
52 class Dataset {
53 public:
57 Dataset();
58
66 Dataset(const MatrixD &features, const MatrixD &labels);
67
75 void add_sample(const std::vector<T> &feature, const std::vector<T> &label);
76
81 size_t size() const;
82
92 std::pair<MatrixD, MatrixD> get_batch(size_t start_idx, size_t batch_size) const;
93
99 void shuffle();
100
101 private:
102 MatrixD features_;
103 MatrixD labels_;
104 };
105
128 template<typename T>
130 public:
137 DataLoader(const Dataset<T> &dataset, size_t batch_size, bool shuffle = false);
138
143 bool has_next() const;
144
151 std::pair<MatrixD, MatrixD> next_batch();
152
158 void reset();
159
160 private:
161 const Dataset<T> &dataset_;
162 size_t batch_size_;
163 bool shuffle_;
164 size_t current_idx_;
165 };
166
185 class CSVLoader {
186 public:
196 static MatrixD load_csv(const std::string &filename, bool has_header = true, char delimiter = ',');
197
207 static std::pair<MatrixD, MatrixD> load_features_labels(const std::string &filename,
208 const std::vector<size_t> &feature_cols,
209 const std::vector<size_t> &label_cols,
210 bool has_header = true, char delimiter = ',');
211 };
212
232 public:
242 static MatrixD load_image(const std::string &filename, size_t target_width = 0, size_t target_height = 0);
243
253 static std::vector<MatrixD> load_images_from_directory(const std::string &directory_path,
254 size_t target_width = 0, size_t target_height = 0);
255 };
256
281 public:
291 static MatrixD normalize(const MatrixD &data, double min_val = 0.0, double max_val = 1.0);
292
300 static MatrixD standardize(const MatrixD &data);
301
310 static MatrixD one_hot_encode(const std::vector<int> &labels, size_t num_classes);
311
321 static std::tuple<Dataset<double>, Dataset<double>, Dataset<double>>
322 train_val_test_split(const Dataset<double> &data, double train_ratio = 0.7, double val_ratio = 0.15);
323 };
324 } // namespace utils
325} // namespace dl
static MatrixD load_csv(const std::string &filename, bool has_header=true, char delimiter=',')
Load CSV file into a matrix.
static std::pair< MatrixD, MatrixD > load_features_labels(const std::string &filename, const std::vector< size_t > &feature_cols, const std::vector< size_t > &label_cols, bool has_header=true, char delimiter=',')
Load specific columns as features and labels.
void reset()
Reset iterator to start of dataset.
std::pair< MatrixD, MatrixD > next_batch()
Get the next batch of data.
bool has_next() const
Check if more batches are available in current epoch.
std::pair< MatrixD, MatrixD > get_batch(size_t start_idx, size_t batch_size) const
Extract a batch of samples from the dataset.
Dataset()
Default constructor for empty dataset.
void shuffle()
Randomly shuffle the dataset samples.
void add_sample(const std::vector< T > &feature, const std::vector< T > &label)
Add a single sample to the dataset.
size_t size() const
Get the number of samples in the dataset.
static MatrixD load_image(const std::string &filename, size_t target_width=0, size_t target_height=0)
Load a single image file.
static std::vector< MatrixD > load_images_from_directory(const std::string &directory_path, size_t target_width=0, size_t target_height=0)
Load all images from a directory.
static std::tuple< Dataset< double >, Dataset< double >, Dataset< double > > train_val_test_split(const Dataset< double > &data, double train_ratio=0.7, double val_ratio=0.15)
Split dataset into training, validation, and test sets.
static MatrixD one_hot_encode(const std::vector< int > &labels, size_t num_classes)
Convert categorical labels to one-hot encoding.
static MatrixD standardize(const MatrixD &data)
Standardize data to zero mean and unit variance.
static MatrixD normalize(const MatrixD &data, double min_val=0.0, double max_val=1.0)
Normalize data to specified range.
Matrix utility class for deep learning operations.