PixelgenTechnologies · maxkarlsson · Jun 18, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,13 +5,19 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [Unreleased]
+
+### Fixed
+
+ - `downsample_data` will now default to the number of markers or cells in the data if `n_markers` or `n_cells` are higher than available in the data.
+
 ## [0.11.2] 2026-06-15
 
-### Changes 
+### Changed
 
  - `component_hashing` now returns a sample confidence plot with either hash purity or hash enrichment factor, depending on which metric is present in the data.
 
- ### Removed
+### Removed
 
  - `harmony` has been removed, and `do_harmonize` is no longer an option. 
 

diff --git a/R/read_data.R b/R/read_data.R
@@ -277,14 +277,19 @@ merge_data <-
 
 #' Downsample data to a specified number of cells and markers
 #'
-#' Downsamples the data to a specified number of cells and markers, ensuring that control markers are always included.
+#' Downsamples the data to a specified number of cells and markers,
+#' ensuring that control markers are always included.
+#' If a sample has fewer than `n_cells`, all available cells are kept for that sample.
+#' If fewer non-control markers are available than requested, all available non-control markers are kept.
+#' `control_markers` are always kept.
+#' In these cases, the function warns and proceeds instead of failing.
 #'
 #' @param pg_data A Seurat object containing the data to be downsampled.
 #' @param control_markers A character vector of control markers to always include in the downsampled data.
 #' @param n_cells An integer specifying the number of cells to keep in each sample.
 #' @param n_markers An integer specifying the total number of markers to keep in the downsampled data.
 #'
-#' @return A downsampled Seurat object with the specified number of cells and markers.
+#' @return A downsampled Seurat object with selected cells and markers.
 #'
 #' @export
 #'
@@ -295,24 +300,78 @@ downsample_data <-
            n_markers = 20) {
     set.seed(37)
 
-    keep_cells <-
+    pixelatorR:::assert_class(pg_data, "Seurat")
+    pixelatorR:::assert_vector(control_markers, "character", allow_null = TRUE)
+    pixelatorR:::assert_single_value(n_cells, "integer")
+    pixelatorR:::assert_single_value(n_markers, "integer")
+
+    if (length(control_markers) > 0) {
+      pixelatorR:::assert_x_in_y(control_markers, rownames(pg_data))
+    }
+
+    control_markers <- unique(control_markers)
+
+    # Downsample cells
+    cell_data <-
       FetchData(pg_data, "sample_alias") %>%
-      as_tibble(rownames = "cell_id") %>%
+      as_tibble(rownames = "cell_id")
+
+    available_cells <-
+      cell_data %>%
+      count(sample_alias, name = "n_available")
+
+    low_cell_samples <-
+      available_cells %>%
+      filter(n_available < n_cells)
+
+    if (nrow(low_cell_samples) > 0) {
+      cli::cli_warn(
+        c(
+          "Requested {.val {n_cells}} cells per sample, but some samples have fewer cells.",
+          "i" = "Using all available cells for those samples."
+        )
+      )
+    }
+
+    keep_cells <-
+      cell_data %>%
       group_by(sample_alias) %>%
-      slice_sample(n = n_cells) %>%
+      dplyr::group_modify(~ slice_sample(.x, n = min(nrow(.x), n_cells))) %>%
+      ungroup() %>%
       pull(cell_id)
 
-    pixelatorR:::assert_x_in_y(control_markers, rownames(pg_data))
+
+    # Downsample markers
+    markers <- rownames(pg_data)
+    non_control_markers <-
+      setdiff(markers, control_markers)
+
+    target_non_control <- max(n_markers - length(control_markers), 0)
+    available_non_control <- length(non_control_markers)
+
+
+    if (target_non_control > available_non_control) {
+      cli::cli_warn(
+        c(
+          "Requested {.val {target_non_control}} non-control markers,",
+          "but only {.val {available_non_control}} are available.",
+          "i" = "Using all available non-control markers."
+        )
+      )
+    }
+
+    n_non_control <- min(target_non_control, available_non_control)
+
+    sampled_non_control <-
+      if (n_non_control > 0) {
+        sample(non_control_markers, size = n_non_control, replace = FALSE)
+      } else {
+        character(0)
+      }
 
     keep_markers <-
-      rownames(pg_data) %>%
-      {
-        .[!. %in% control_markers]
-      } %>%
-      {
-        .[sample(seq_along(.), size = n_markers - length(control_markers), replace = FALSE)]
-      } %>%
-      union(control_markers)
+      c(control_markers, sampled_non_control) %>%
+      unique()
 
     pg_data <-
       pg_data[keep_markers, keep_cells]

diff --git a/man/downsample_data.Rd b/man/downsample_data.Rd
diff --git a/tests/testthat/test_read_data.R b/tests/testthat/test_read_data.R
@@ -209,6 +209,33 @@ test_that("File reading works as expected", {
   expect_s4_class(seur_down, "Seurat")
   expect_equal(dim(seur_down), c(5, 6))
 
+  # Edge case 1: fewer cells than requested in at least one sample
+  expect_warning(
+    seur_down_low_cells <- downsample_data(
+      seur_comb,
+      control_markers = c("mIgG1", "mIgG2a", "mIgG2b"),
+      n_cells = 1000,
+      n_markers = 5
+    ),
+    "fewer cells"
+  )
+  expect_equal(ncol(seur_down_low_cells), ncol(seur_comb))
+
+  # Edge case 2: fewer non-control markers available than requested
+  all_markers <- rownames(seur_comb)
+  expect_gt(length(all_markers), 1)
+  control_set <- all_markers[-length(all_markers)]
+  expect_warning(
+    seur_down_low_markers <- downsample_data(
+      seur_comb,
+      control_markers = control_set,
+      n_cells = 3,
+      n_markers = nrow(seur_comb) + 5
+    ),
+    "non-control markers"
+  )
+  expect_equal(nrow(seur_down_low_markers), nrow(seur_comb))
+
   # Sample sheet reading
   expect_no_error(sample_sheet <- read_samplesheet(test_samplesheet()))
   expect_equal(