diff --git a/DESCRIPTION b/DESCRIPTION
index f8fdbae..9e2498f 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: tglkmeans
 Title: Efficient Implementation of K-Means++ Algorithm
-Version: 0.4.0
+Version: 0.5.0
 Authors@R: c(
     person("Aviezer", "Lifshitz", , "aviezer.lifshitz@weizmann.ac.il", role = c("aut", "cre")),
     person("Amos", "Tanay", role = "aut"),
@@ -30,6 +30,8 @@ Imports:
     future,
     ggplot2 (>= 2.2.0),
     magrittr,
+    Matrix,
+    methods,
     parallel (>= 3.3.2),
     plyr (>= 1.8.4),
     purrr (>= 0.2.0),
diff --git a/NAMESPACE b/NAMESPACE
index afb2449..4e4625d 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -3,6 +3,7 @@
 export("%>%")
 export(TGL_kmeans)
 export(TGL_kmeans_tidy)
+export(downsample_matrix)
 export(simulate_data)
 export(tglkmeans.set_parallel)
 import(dplyr)
diff --git a/NEWS.md b/NEWS.md
index 7844b04..938561e 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,7 @@
+# tglkmeans 0.5.0
+
+* Added `dowsample_matrix` function to downsample the columns of a count matrix to a target number. 
+
 # tglkmeans 0.4.0
 
 * Default of `id_column` parameter was changed to `FALSE`. Note that this is a breaking change, and if you want to use an id column, you need to set it explicitly to `TRUE`.
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 8e3121b..ff2cd0c 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -13,3 +13,11 @@ TGL_kmeans_cpp <- function(ids, mat, k, metric, max_iter = 40, min_delta = 0.000
     .Call('_tglkmeans_TGL_kmeans_cpp', PACKAGE = 'tglkmeans', ids, mat, k, metric, max_iter, min_delta, use_cpp_random, seed)
 }
 
+downsample_matrix_cpp <- function(input, samples, random_seed) {
+    .Call('_tglkmeans_downsample_matrix_cpp', PACKAGE = 'tglkmeans', input, samples, random_seed)
+}
+
+rcpp_downsample_sparse <- function(matrix, samples, random_seed) {
+    .Call('_tglkmeans_rcpp_downsample_sparse', PACKAGE = 'tglkmeans', matrix, samples, random_seed)
+}
+
diff --git a/R/downsample.R b/R/downsample.R
new file mode 100644
index 0000000..d6d3f4b
--- /dev/null
+++ b/R/downsample.R
@@ -0,0 +1,109 @@
+#' Downsample the columns of a matrix to a target number
+#'
+#' @description This function takes a matrix and downsamples it to a target number of samples.
+#' It uses a random seed for reproducibility and allows for removing columns with
+#' small sums.
+#'
+#' @param mat An integer matrix to be downsampled. Can be a matrix or sparse matrix (dgCMatrix).
+#' If the matrix contains NAs, the function will run significantly slower. Values that are
+#' not integers will be coerced to integers using {.code floor()}.
+#' @param target_n The target number of samples to downsample to.
+#' @param target_q A target quantile of sums to downsample to. Only one of {.field target_n} or {.field target_q} can be provided.
+#' @param seed The random seed for reproducibility (default is NULL)
+#' @param remove_columns Logical indicating whether to remove columns with small sums (default is FALSE)
+#'
+#' @return The downsampled matrix
+#'
+#' @examples
+#' \dontshow{
+#' tglkmeans.set_parallel(1)
+#' }
+#'
+#' mat <- matrix(1:12, nrow = 4)
+#' downsample_matrix(mat, 2)
+#'
+#' # Remove columns with small sums
+#' downsample_matrix(mat, 12, remove_columns = TRUE)
+#'
+#' # sparse matrix
+#' mat_sparse <- Matrix::Matrix(mat, sparse = TRUE)
+#' downsample_matrix(mat_sparse, 2)
+#'
+#' # with a quantile
+#' downsample_matrix(mat, target_q = 0.5)
+#'
+#' @export
+downsample_matrix <- function(mat, target_n = NULL, target_q = NULL, seed = NULL, remove_columns = FALSE) {
+    if (is.null(target_n) && is.null(target_q)) {
+        cli_abort("Either {.field target_n} or {.field target_q} must be provided.")
+    } else if (!is.null(target_n) && !is.null(target_q)) {
+        cli_abort("Only one of {.field target_n} or {.field target_q} can be provided.")
+    }
+
+    sums <- colsums_matrix(mat)
+    if (!is.null(target_q)) {
+        target_n <- round(stats::quantile(sums, target_q))
+        cli::cli_alert_info("Using {.val {target_n}} as the target number.")
+    }
+
+    if (is.null(seed)) {
+        seed <- sample(1:10000, 1)
+        cli::cli_alert_warning("No seed provided. Using {.val {seed}}.")
+    } else if (!is.numeric(seed) || seed <= 0 || seed != as.integer(seed)) {
+        cli_abort("{.field seed} must be a positive integer.")
+    }
+
+    if (!is.logical(remove_columns)) {
+        cli_abort("{.field remove_columns} must be a logical value.")
+    }
+
+    if (target_n <= 0 || target_n != as.integer(target_n)) {
+        cli_abort("{.field target_n} must be a positive integer.")
+    }
+
+    # replace NAs with 0s for the cpp code
+    has_nas <- FALSE
+    if (any(is.na(mat))) {
+        has_nas <- TRUE
+        cli_warn("Input matrix contains NAs. Processing would be significantly slower.")
+        orig_mat <- mat
+        mat[is.na(mat)] <- 0
+    }
+
+    if (methods::is(mat, "dgCMatrix")) {
+        ds_mat <- rcpp_downsample_sparse(mat, target_n, seed)
+    } else if (is.matrix(mat)) {
+        ds_mat <- downsample_matrix_cpp(mat, target_n, seed)
+    }
+
+    small_cols <- sums < target_n
+    if (any(small_cols)) {
+        if (remove_columns) {
+            ds_mat <- ds_mat[, !small_cols, drop = FALSE]
+            if (has_nas) {
+                orig_mat <- orig_mat[, !small_cols, drop = FALSE]
+            }
+            cli_alert_info("Removed {.val {sum(small_cols)}} columns with a sum smaller than {.val {target_n}}.")
+        } else {
+            cli_warn("{.val {sum(small_cols)}} columns have a sum smaller than {.val {target_n}}. These columns were not changed. To remove them, set {.field remove_columns=TRUE}.")
+        }
+    }
+
+    if (has_nas) {
+        # put back the NAs
+        ds_mat[is.na(orig_mat)] <- NA
+    }
+
+
+    return(ds_mat)
+}
+
+colsums_matrix <- function(mat) {
+    if (methods::is(mat, "dgCMatrix")) {
+        return(Matrix::colSums(mat, na.rm = TRUE))
+    } else if (is.matrix(mat)) {
+        return(colSums(mat, na.rm = TRUE))
+    } else {
+        cli_abort("Input must be a matrix or a sparse matrix (dgCMatrix). class of {.field mat} is {.val {class(mat)}}.")
+    }
+}
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 30c405b..f55ec4c 100755
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -27,6 +27,10 @@ reference:
 - contents:
   - TGL_kmeans
   - TGL_kmeans_tidy
+- title: Matrix
+  desc: matrix utility functions
+- contents: 
+  - downsample_matrix
 - title: misc
   desc: utility functions
 - contents: 
diff --git a/man/TGL_kmeans.Rd b/man/TGL_kmeans.Rd
index a3ef07a..f3679b2 100644
--- a/man/TGL_kmeans.Rd
+++ b/man/TGL_kmeans.Rd
@@ -9,7 +9,7 @@ TGL_kmeans(
   k,
   metric = "euclid",
   max_iter = 40,
-  min_delta = 1e-04,
+  min_delta = 0.0001,
   verbose = FALSE,
   keep_log = FALSE,
   id_column = FALSE,
diff --git a/man/TGL_kmeans_tidy.Rd b/man/TGL_kmeans_tidy.Rd
index f304ba8..567060e 100644
--- a/man/TGL_kmeans_tidy.Rd
+++ b/man/TGL_kmeans_tidy.Rd
@@ -9,7 +9,7 @@ TGL_kmeans_tidy(
   k,
   metric = "euclid",
   max_iter = 40,
-  min_delta = 1e-04,
+  min_delta = 0.0001,
   verbose = FALSE,
   keep_log = FALSE,
   id_column = FALSE,
diff --git a/man/downsample_matrix.Rd b/man/downsample_matrix.Rd
new file mode 100644
index 0000000..db41148
--- /dev/null
+++ b/man/downsample_matrix.Rd
@@ -0,0 +1,54 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/downsample.R
+\name{downsample_matrix}
+\alias{downsample_matrix}
+\title{Downsample the columns of a matrix to a target number}
+\usage{
+downsample_matrix(
+  mat,
+  target_n = NULL,
+  target_q = NULL,
+  seed = NULL,
+  remove_columns = FALSE
+)
+}
+\arguments{
+\item{mat}{An integer matrix to be downsampled. Can be a matrix or sparse matrix (dgCMatrix).
+If the matrix contains NAs, the function will run significantly slower. Values that are
+not integers will be coerced to integers using {.code floor()}.}
+
+\item{target_n}{The target number of samples to downsample to.}
+
+\item{target_q}{A target quantile of sums to downsample to. Only one of {.field target_n} or {.field target_q} can be provided.}
+
+\item{seed}{The random seed for reproducibility (default is NULL)}
+
+\item{remove_columns}{Logical indicating whether to remove columns with small sums (default is FALSE)}
+}
+\value{
+The downsampled matrix
+}
+\description{
+This function takes a matrix and downsamples it to a target number of samples.
+It uses a random seed for reproducibility and allows for removing columns with
+small sums.
+}
+\examples{
+\dontshow{
+tglkmeans.set_parallel(1)
+}
+
+mat <- matrix(1:12, nrow = 4)
+downsample_matrix(mat, 2)
+
+# Remove columns with small sums
+downsample_matrix(mat, 12, remove_columns = TRUE)
+
+# sparse matrix
+mat_sparse <- Matrix::Matrix(mat, sparse = TRUE)
+downsample_matrix(mat_sparse, 2)
+
+# with a quantile
+downsample_matrix(mat, target_q = 0.5)
+
+}
diff --git a/src/DownsampleWorker.cpp b/src/DownsampleWorker.cpp
new file mode 100644
index 0000000..b4e6507
--- /dev/null
+++ b/src/DownsampleWorker.cpp
@@ -0,0 +1,152 @@
+#include <vector>
+#include <algorithm>
+#include <cassert>
+#include <random>
+#include <Rcpp.h>
+#include <RcppParallel.h>
+#include "DownsampleWorker.h"
+
+typedef float float32_t;
+typedef double float64_t;
+typedef unsigned char uint8_t;
+typedef unsigned int uint_t;
+
+static size_t
+ceil_power_of_two(const size_t size) {
+    return size_t(1) << size_t(ceil(log2(float64_t(size))));
+}
+
+template<typename D>
+static void initialize_tree(const std::vector<D>& input, std::vector<size_t>& tree) {
+    assert(input.size() >= 2); 
+
+    size_t input_size = ceil_power_of_two(input.size());
+    tree.resize(2 * input_size - 1); // Resize the tree to the necessary size
+
+    // Copy input to the beginning of the tree and fill the rest with zeros
+    std::copy(input.begin(), input.end(), tree.begin());
+    std::fill(tree.begin() + input.size(), tree.begin() + input_size, 0);
+
+    // Iteratively build the tree
+    size_t tree_index = 0;
+    while (input_size > 1) {
+        size_t half_size = input_size / 2;
+        for (size_t index = 0; index < half_size; ++index) {
+            const auto left = tree[tree_index + index * 2];
+            const auto right = tree[tree_index + index * 2 + 1];
+            tree[tree_index + input_size + index] = left + right;
+
+            assert(left >= 0);
+            assert(right >= 0);
+            assert(tree[tree_index + input_size + index] == size_t(left) + size_t(right));
+        }
+        tree_index += input_size;
+        input_size = half_size;
+    }
+    assert(tree.size() == 2 * input.size() - 1);
+}
+
+static size_t random_sample(std::vector<size_t>& tree, ssize_t random) {
+    size_t size_of_level = 1;
+    ssize_t base_of_level = tree.size() - 1;
+    size_t index_in_level = 0;
+    size_t index_in_tree = base_of_level + index_in_level;
+
+    while (true) {
+        assert(index_in_tree == base_of_level + index_in_level);
+        assert(tree[index_in_tree] > random);
+
+        --tree[index_in_tree];
+        size_of_level *= 2;
+        base_of_level -= size_of_level;
+
+        if (base_of_level < 0) {
+            return index_in_level;
+        }
+
+        index_in_level *= 2;
+        index_in_tree = base_of_level + index_in_level;
+        ssize_t right_random = random - ssize_t(tree[index_in_tree]);
+
+        assert(tree[base_of_level + index_in_level] + tree[base_of_level + index_in_level + 1] ==
+               tree[base_of_level + size_of_level + index_in_level / 2] + 1);
+
+        if (right_random >= 0) {
+            ++index_in_level;
+            ++index_in_tree;
+            assert(index_in_level < size_of_level);
+            random = right_random;
+        }
+    }
+}
+
+template<typename D, typename O>
+static void downsample_slice(const std::vector<D>& input, std::vector<O>& output, const int32_t samples, const size_t random_seed) {
+    assert(output.size() == input.size()); 
+
+    if (samples < 0 || input.size() == 0) {
+        return;
+    }
+
+    if (input.size() == 1) {
+        output[0] = O(static_cast<double>(samples) < static_cast<double>(input[0]) ? samples : input[0]);
+        return;
+    }
+
+    size_t input_size = ceil_power_of_two(input.size());
+    std::vector<size_t> tree(2 * input_size - 1);
+    initialize_tree(input, tree);
+    size_t& total = tree[tree.size() - 1];
+
+    if (total <= static_cast<size_t>(samples)) {
+        std::copy(input.begin(), input.end(), output.begin());
+        return;
+    }
+
+    std::fill(output.begin(), output.end(), O(0));
+
+    std::minstd_rand random(random_seed);
+    for (size_t index = 0; index < static_cast<size_t>(samples); ++index) {
+        size_t sampled_index = random_sample(tree, random() % total);
+        if (sampled_index < output.size()) {
+            ++output[sampled_index];
+        }
+    }
+}
+
+DownsampleWorker::DownsampleWorker(const Rcpp::IntegerMatrix& input, Rcpp::IntegerMatrix& output, int samples, unsigned int random_seed)
+    : input_matrix(input), output_matrix(output), samples(samples), random_seed(random_seed) {}
+
+void DownsampleWorker::operator()(std::size_t begin, std::size_t end) {
+    for (std::size_t col = begin; col < end; ++col) {        
+        std::vector<int> input_vec(input_matrix.column(col).begin(), input_matrix.column(col).end());
+        std::vector<int> output_vec(input_vec.size(), 0);
+
+        downsample_slice(input_vec, output_vec, samples, random_seed);
+
+        std::copy(output_vec.begin(), output_vec.end(), output_matrix.column(col).begin());
+    }
+}
+
+DownsampleWorkerSparse::DownsampleWorkerSparse(const Rcpp::IntegerVector& i, const Rcpp::IntegerVector& p, const Rcpp::IntegerVector& x, 
+                                               Rcpp::IntegerVector& out_x, int samples, unsigned int random_seed)
+    : input_i(i), input_p(p), input_x(x), output_x(out_x), samples(samples), random_seed(random_seed) {}
+
+void DownsampleWorkerSparse::operator()(std::size_t begin, std::size_t end) {
+    for (std::size_t col = begin; col < end; ++col) {
+        // Extract the current column from the sparse matrix
+        std::vector<int> input_vec;
+        for (int idx = input_p[col]; idx < input_p[col + 1]; ++idx) {
+            input_vec.push_back(input_x[idx]);
+        }
+
+        std::vector<int> output_vec(input_vec.size(), 0);
+
+        downsample_slice(input_vec, output_vec, samples, random_seed);
+
+        // Store results in the output sparse matrix
+        for (int idx = input_p[col], out_idx = 0; idx < input_p[col + 1]; ++idx, ++out_idx) {
+            output_x[idx] = output_vec[out_idx];
+        }
+    }
+}
diff --git a/src/DownsampleWorker.h b/src/DownsampleWorker.h
new file mode 100644
index 0000000..53c944c
--- /dev/null
+++ b/src/DownsampleWorker.h
@@ -0,0 +1,40 @@
+#ifndef DOWNSAMPLEWORKER_H
+#define DOWNSAMPLEWORKER_H
+
+#include <Rcpp.h>
+#include <RcppParallel.h>
+#include <vector>
+
+class DownsampleWorker : public RcppParallel::Worker {
+private:
+    RcppParallel::RMatrix<int> input_matrix;
+    RcppParallel::RMatrix<int> output_matrix;
+    int samples;
+    unsigned int random_seed;
+
+public:    
+    DownsampleWorker(const Rcpp::IntegerMatrix& input, Rcpp::IntegerMatrix& output, int samples, unsigned int random_seed);
+
+    // Parallel operator
+    void operator()(std::size_t begin, std::size_t end) override;
+};
+
+class DownsampleWorkerSparse : public RcppParallel::Worker {
+private:
+    Rcpp::IntegerVector input_i;
+    Rcpp::IntegerVector input_p;
+    Rcpp::IntegerVector input_x;
+    Rcpp::IntegerVector output_x;
+    int samples;
+    unsigned int random_seed;
+
+public:
+    DownsampleWorkerSparse(const Rcpp::IntegerVector& i, const Rcpp::IntegerVector& p, const Rcpp::IntegerVector& x, 
+                           Rcpp::IntegerVector& out_x, int samples, unsigned int random_seed);
+
+    // Parallel operator
+    void operator()(std::size_t begin, std::size_t end) override;
+};
+
+
+#endif // DOWNSAMPLEWORKER_H
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index d189d69..d5474ab 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -51,11 +51,39 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// downsample_matrix_cpp
+Rcpp::IntegerMatrix downsample_matrix_cpp(Rcpp::IntegerMatrix input, int samples, unsigned int random_seed);
+RcppExport SEXP _tglkmeans_downsample_matrix_cpp(SEXP inputSEXP, SEXP samplesSEXP, SEXP random_seedSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< Rcpp::IntegerMatrix >::type input(inputSEXP);
+    Rcpp::traits::input_parameter< int >::type samples(samplesSEXP);
+    Rcpp::traits::input_parameter< unsigned int >::type random_seed(random_seedSEXP);
+    rcpp_result_gen = Rcpp::wrap(downsample_matrix_cpp(input, samples, random_seed));
+    return rcpp_result_gen;
+END_RCPP
+}
+// rcpp_downsample_sparse
+Rcpp::S4 rcpp_downsample_sparse(Rcpp::S4 matrix, int samples, unsigned int random_seed);
+RcppExport SEXP _tglkmeans_rcpp_downsample_sparse(SEXP matrixSEXP, SEXP samplesSEXP, SEXP random_seedSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< Rcpp::S4 >::type matrix(matrixSEXP);
+    Rcpp::traits::input_parameter< int >::type samples(samplesSEXP);
+    Rcpp::traits::input_parameter< unsigned int >::type random_seed(random_seedSEXP);
+    rcpp_result_gen = Rcpp::wrap(rcpp_downsample_sparse(matrix, samples, random_seed));
+    return rcpp_result_gen;
+END_RCPP
+}
 
 static const R_CallMethodDef CallEntries[] = {
     {"_tglkmeans_reduce_coclust", (DL_FUNC) &_tglkmeans_reduce_coclust, 3},
     {"_tglkmeans_reduce_num_trials", (DL_FUNC) &_tglkmeans_reduce_num_trials, 2},
     {"_tglkmeans_TGL_kmeans_cpp", (DL_FUNC) &_tglkmeans_TGL_kmeans_cpp, 8},
+    {"_tglkmeans_downsample_matrix_cpp", (DL_FUNC) &_tglkmeans_downsample_matrix_cpp, 3},
+    {"_tglkmeans_rcpp_downsample_sparse", (DL_FUNC) &_tglkmeans_rcpp_downsample_sparse, 3},
     {NULL, NULL, 0}
 };
 
diff --git a/src/downsample.cpp b/src/downsample.cpp
new file mode 100644
index 0000000..b2da02f
--- /dev/null
+++ b/src/downsample.cpp
@@ -0,0 +1,47 @@
+#include <vector>
+#include <Rcpp.h>
+#include <RcppParallel.h>
+#include "DownsampleWorker.h"
+
+typedef float float32_t;
+typedef double float64_t;
+typedef unsigned char uint8_t;
+typedef unsigned int uint_t;
+
+// [[Rcpp::export]]
+Rcpp::IntegerMatrix downsample_matrix_cpp(Rcpp::IntegerMatrix input, int samples, unsigned int random_seed) {
+    Rcpp::IntegerMatrix output(input.nrow(), input.ncol());
+
+    DownsampleWorker worker(input, output, samples, random_seed);
+    RcppParallel::parallelFor(0, input.ncol(), worker);
+
+    return output;
+}
+
+// [[Rcpp::export]]
+Rcpp::S4 rcpp_downsample_sparse(Rcpp::S4 matrix, int samples, unsigned int random_seed) {
+    // Extract components of the dgCMatrix
+    Rcpp::IntegerVector i = matrix.slot("i");
+    Rcpp::IntegerVector p = matrix.slot("p");
+    Rcpp::IntegerVector x = matrix.slot("x");
+
+    int nrows = Rcpp::as<Rcpp::IntegerVector>(matrix.slot("Dim"))[0];
+    int ncols = Rcpp::as<Rcpp::IntegerVector>(matrix.slot("Dim"))[1];
+
+    // Prepare output vector
+    Rcpp::IntegerVector out_x(x.size());
+
+    // Create and run the DownsampleWorkerSparse
+    DownsampleWorkerSparse worker(i, p, x, out_x, samples, random_seed);
+    RcppParallel::parallelFor(0, ncols, worker);
+
+    // Create a new dgCMatrix object for the output
+    Rcpp::S4 out_matrix("dgCMatrix");
+    out_matrix.slot("i") = i;
+    out_matrix.slot("p") = p;    
+    Rcpp::NumericVector out_x_double = Rcpp::as<Rcpp::NumericVector>(out_x);
+    out_matrix.slot("x") = out_x_double;
+    out_matrix.slot("Dim") = Rcpp::IntegerVector::create(nrows, ncols);
+
+    return out_matrix;
+}
\ No newline at end of file
diff --git a/tests/testthat/test-downsample.R b/tests/testthat/test-downsample.R
new file mode 100644
index 0000000..8a04d8b
--- /dev/null
+++ b/tests/testthat/test-downsample.R
@@ -0,0 +1,204 @@
+test_that("downsample_matrix returns the correct number of samples", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 2
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+    expect_true(all(mat >= ds_mat))
+})
+
+test_that("downsample_matrix removes columns with small sums when remove_columns is TRUE", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 12
+    ds_mat <- downsample_matrix(mat, target_n, remove_columns = TRUE)
+    expect_equal(ncol(ds_mat), 2)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+})
+
+test_that("downsample_matrix does not remove columns with small sums when remove_columns is FALSE", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 12
+    expect_warning(ds_mat <- downsample_matrix(mat, target_n, remove_columns = FALSE))
+    expect_equal(ncol(ds_mat), ncol(mat))
+    expect_true(all(colSums(ds_mat[, -1], na.rm = TRUE) == target_n))
+})
+
+test_that("downsample_matrix returns the correct number of samples when there are NAs", {
+    mat <- matrix(1:12, nrow = 4)
+    mat[1, 1] <- NA
+    target_n <- 2
+    expect_warning(ds_mat <- downsample_matrix(mat, target_n))
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+
+    # make sure the NAs are still there
+    expect_true(all(is.na(ds_mat[1, 1])))
+})
+
+test_that("downsample_matrix returns the correct number of samples when there are NAs and remove_columns is TRUE", {
+    mat <- matrix(1:12, nrow = 4)
+    mat[1, 1] <- NA
+    target_n <- 2
+    expect_warning(ds_mat <- downsample_matrix(mat, target_n, remove_columns = TRUE))
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+
+    # make sure the NAs are still there
+    expect_true(all(is.na(ds_mat[1, 1])))
+})
+
+test_that("downsample_matrix returns the correct number of samples when there are NAs and remove_columns is FALSE", {
+    mat <- matrix(1:12, nrow = 4)
+    mat[1, 1] <- NA
+    target_n <- 2
+    expect_warning(ds_mat <- downsample_matrix(mat, target_n, remove_columns = FALSE))
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+    expect_true(all(mat[!is.na(mat)] >= ds_mat[!is.na(ds_mat)]))
+
+    # make sure the NAs are still there
+    expect_true(all(is.na(ds_mat[1, 1])))
+})
+
+test_that("downsample_matrix returns the correct number of samples when there are NAs and remove_columns is TRUE and the matrix is sparse", {
+    mat <- Matrix::Matrix(matrix(1:12, nrow = 4), sparse = TRUE)
+    mat[1, 1] <- NA
+    target_n <- 2
+    expect_warning(ds_mat <- downsample_matrix(mat, target_n, remove_columns = TRUE))
+    expect_true(all(Matrix::colSums(ds_mat, na.rm = TRUE) == target_n))
+
+    # make sure the NAs are still there
+    expect_true(all(is.na(ds_mat[1, 1])))
+})
+
+test_that("downsample_matrix works with all zeros matrix", {
+    mat <- matrix(0, nrow = 4)
+    target_n <- 2
+    expect_warning(ds_mat <- downsample_matrix(mat, target_n))
+    expect_true(all(mat == ds_mat))
+
+    ds_mat <- downsample_matrix(mat, target_n, remove_columns = TRUE)
+    expect_equal(ncol(ds_mat), 0)
+})
+
+# Test with Different Matrix Sizes
+test_that("downsample_matrix works with larger matrix", {
+    mat <- matrix(1:1e3, nrow = 10)
+    target_n <- 5
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+})
+
+test_that("downsample_matrix works with single-column matrix", {
+    mat <- matrix(1:10, nrow = 10)
+    target_n <- 5
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+})
+
+test_that("downsample_matrix works with single-row matrix", {
+    mat <- matrix(1:10, nrow = 1)
+    target_n <- 5
+    ds_mat <- downsample_matrix(mat, target_n, remove_columns = TRUE)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+    expect_equal(ncol(ds_mat), 6)
+})
+
+# Test with Different target_n Values
+test_that("downsample_matrix with target_n equal to number of rows", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 4
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+})
+
+test_that("downsample_matrix with target_n greater than number of rows", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 6
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) <= target_n))
+})
+
+test_that("downsample_matrix with target_n equal to 1", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 1
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+})
+
+test_that("downsample_matrix with target_n equal to 0", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 0
+    expect_error(downsample_matrix(mat, target_n))
+})
+
+# Test with Different Matrix Types
+test_that("downsample_matrix with sparse matrix", {
+    mat <- Matrix::Matrix(matrix(1:12, nrow = 4), sparse = TRUE)
+    target_n <- 2
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(Matrix::colSums(ds_mat, na.rm = TRUE) == target_n))
+    expect_true(all(mat >= ds_mat))
+})
+
+test_that("downsample_matrix with invalid matrix type", {
+    mat <- 1:12
+    target_n <- 2
+    expect_error(downsample_matrix(mat, target_n))
+
+    mat <- Matrix::Matrix(matrix(1:12, nrow = 4), sparse = FALSE)
+    target_n <- 2
+    expect_error(downsample_matrix(mat, target_n))
+})
+
+# Test with non-integer values
+test_that("downsample_matrix with non-integer values", {
+    mat <- matrix(1:12, nrow = 4)
+    mat[1, 1] <- 1.5
+    target_n <- 2
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == target_n))
+    expect_true(all(mat >= ds_mat))
+})
+
+# Test with Various Seed Values
+test_that("downsample_matrix with fixed seed", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 2
+    seed <- 123
+    ds_mat1 <- downsample_matrix(mat, target_n, seed = seed)
+    ds_mat2 <- downsample_matrix(mat, target_n, seed = seed)
+    expect_equal(ds_mat1, ds_mat2)
+})
+
+test_that("downsample_matrix without specifying seed", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 2
+    ds_mat <- downsample_matrix(mat, target_n)
+    expect_true(is.matrix(ds_mat))
+})
+
+test_that("downsample_matrix with invalid seed", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 2
+    expect_error(downsample_matrix(mat, target_n, seed = -1))
+    expect_error(downsample_matrix(mat, target_n, seed = "not a number"))
+})
+
+test_that("downsample_matrix correctly downsamples matrix using target_q", {
+    mat <- matrix(1:12, nrow = 4)
+    target_q <- 0.5
+    ds_mat <- downsample_matrix(mat, target_q = target_q, remove_columns = TRUE)
+    expect_true(all(colSums(ds_mat, na.rm = TRUE) == round(stats::quantile(colSums(mat), target_q))))
+
+    expect_warning(ds_mat <- downsample_matrix(mat, target_q = target_q, remove_columns = FALSE))
+    expect_true(all(mat >= ds_mat))
+})
+
+test_that("Cannot provide both target_n and target_q", {
+    mat <- matrix(1:12, nrow = 4)
+    target_n <- 2
+    target_q <- 0.5
+    expect_error(downsample_matrix(mat, target_n = target_n, target_q = target_q))
+})
+
+test_that("Fail when no target_n or target_q is provided", {
+    mat <- matrix(1:12, nrow = 4)
+    expect_error(downsample_matrix(mat))
+})