Deep Learning Algorithm Implementations 1.0.0
C++ implementations of fundamental deep learning algorithms
Loading...
Searching...
No Matches
/home/runner/work/deep-learning-algo-impls/deep-learning-algo-impls/include/utils/data_loader.hpp

Dataset container for machine learning data.

Dataset container for machine learning dataA flexible container class that holds features and labels for training, validation, or testing. Supports batch processing, data shuffling, and efficient memory management.

Template Parameters
TData type for the dataset elements (typically double or float)
// Create dataset from matrices
MatrixD features(100, 784); // 100 samples, 784 features
MatrixD labels(100, 10); // 100 samples, 10 classes
Dataset<double> dataset(features, labels);
// Add individual samples
std::vector<double> sample_features = {1.0, 2.0, 3.0};
std::vector<double> sample_label = {0.0, 1.0, 0.0};
dataset.add_sample(sample_features, sample_label);
// Get batches for training
auto batch = dataset.get_batch(0, 32); // First 32 samples
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "matrix.hpp"
namespace dl {
namespace utils {
// Forward declarations
template<typename T>
class Dataset {
public:
Dataset(const MatrixD &features, const MatrixD &labels);
void add_sample(const std::vector<T> &feature, const std::vector<T> &label);
size_t size() const;
std::pair<MatrixD, MatrixD> get_batch(size_t start_idx, size_t batch_size) const;
void shuffle();
private:
MatrixD features_;
MatrixD labels_;
};
template<typename T>
class DataLoader {
public:
DataLoader(const Dataset<T> &dataset, size_t batch_size, bool shuffle = false);
bool has_next() const;
std::pair<MatrixD, MatrixD> next_batch();
void reset();
private:
const Dataset<T> &dataset_;
size_t batch_size_;
bool shuffle_;
size_t current_idx_;
};
class CSVLoader {
public:
static MatrixD load_csv(const std::string &filename, bool has_header = true, char delimiter = ',');
static std::pair<MatrixD, MatrixD> load_features_labels(const std::string &filename,
const std::vector<size_t> &feature_cols,
const std::vector<size_t> &label_cols,
bool has_header = true, char delimiter = ',');
};
class ImageLoader {
public:
static MatrixD load_image(const std::string &filename, size_t target_width = 0, size_t target_height = 0);
static std::vector<MatrixD> load_images_from_directory(const std::string &directory_path,
size_t target_width = 0, size_t target_height = 0);
};
class Preprocessor {
public:
static MatrixD normalize(const MatrixD &data, double min_val = 0.0, double max_val = 1.0);
static MatrixD standardize(const MatrixD &data);
static MatrixD one_hot_encode(const std::vector<int> &labels, size_t num_classes);
static std::tuple<Dataset<double>, Dataset<double>, Dataset<double>>
train_val_test_split(const Dataset<double> &data, double train_ratio = 0.7, double val_ratio = 0.15);
};
} // namespace utils
} // namespace dl
static MatrixD load_csv(const std::string &filename, bool has_header=true, char delimiter=',')
Load CSV file into a matrix.
static std::pair< MatrixD, MatrixD > load_features_labels(const std::string &filename, const std::vector< size_t > &feature_cols, const std::vector< size_t > &label_cols, bool has_header=true, char delimiter=',')
Load specific columns as features and labels.
void reset()
Reset iterator to start of dataset.
std::pair< MatrixD, MatrixD > next_batch()
Get the next batch of data.
bool has_next() const
Check if more batches are available in current epoch.
std::pair< MatrixD, MatrixD > get_batch(size_t start_idx, size_t batch_size) const
Extract a batch of samples from the dataset.
Dataset()
Default constructor for empty dataset.
void shuffle()
Randomly shuffle the dataset samples.
void add_sample(const std::vector< T > &feature, const std::vector< T > &label)
Add a single sample to the dataset.
size_t size() const
Get the number of samples in the dataset.
static MatrixD load_image(const std::string &filename, size_t target_width=0, size_t target_height=0)
Load a single image file.
static std::vector< MatrixD > load_images_from_directory(const std::string &directory_path, size_t target_width=0, size_t target_height=0)
Load all images from a directory.
static std::tuple< Dataset< double >, Dataset< double >, Dataset< double > > train_val_test_split(const Dataset< double > &data, double train_ratio=0.7, double val_ratio=0.15)
Split dataset into training, validation, and test sets.
static MatrixD one_hot_encode(const std::vector< int > &labels, size_t num_classes)
Convert categorical labels to one-hot encoding.
static MatrixD standardize(const MatrixD &data)
Standardize data to zero mean and unit variance.
static MatrixD normalize(const MatrixD &data, double min_val=0.0, double max_val=1.0)
Normalize data to specified range.
Matrix utility class for deep learning operations.
::utils::Matrix< double > MatrixD