From 73e1a75c8ef67bd69990dc51f6e754f674e39b96 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 04:49:48 +0530 Subject: [PATCH 01/10] Add ramstats tool: summary statistics for RAM (RNTuple) files --- CMakeLists.txt | 2 + inc/ramcore/RAMStats.h | 39 +++++++++++ src/ramcore/RAMStats.cxx | 148 +++++++++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 1 + test/ramstatstests.cxx | 112 +++++++++++++++++++++++++++++ tools/CMakeLists.txt | 10 ++- tools/ramstats.cxx | 45 ++++++++++++ 7 files changed, 356 insertions(+), 1 deletion(-) create mode 100644 inc/ramcore/RAMStats.h create mode 100644 src/ramcore/RAMStats.cxx create mode 100644 test/ramstatstests.cxx create mode 100644 tools/ramstats.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bc3b44..7f62292 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -43,6 +43,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ramcore inc/ramcore/SamToTTree.h inc/ramcore/SamToNTuple.h inc/ramcore/RAMNTupleView.h + inc/ramcore/RAMStats.h SOURCES src/ttree/RAMRecord.cxx src/rntuple/RAMNTupleRecord.cxx @@ -50,6 +51,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ramcore src/ramcore/SamToTTree.cxx src/ramcore/SamToNTuple.cxx src/ramcore/RAMNTupleView.cxx + src/ramcore/RAMStats.cxx LINKDEF inc/ttree/LinkDef.h DEPENDENCIES diff --git a/inc/ramcore/RAMStats.h b/inc/ramcore/RAMStats.h new file mode 100644 index 0000000..47c73df --- /dev/null +++ b/inc/ramcore/RAMStats.h @@ -0,0 +1,39 @@ +#ifndef RAMCORE_RAMSTATS_H +#define RAMCORE_RAMSTATS_H + +#include +#include +#include + +namespace ramcore { + +/// Holds summary statistics computed from a RAM (RNTuple) file. +struct RAMStats { + uint64_t total_reads = 0; + uint64_t mapped_reads = 0; + uint64_t unmapped_reads = 0; + uint64_t duplicate_reads = 0; + uint64_t paired_reads = 0; + uint64_t properly_paired_reads = 0; + uint64_t forward_strand = 0; + uint64_t reverse_strand = 0; + + double mean_mapping_quality = 0.0; + double mean_read_length = 0.0; + + uint64_t total_bases = 0; + + std::map reads_per_chromosome; + + void Print() const; +}; + +/// Compute statistics from an RNTuple-based RAM file. +/// \param filename Path to the .root file produced by samtoramntuple. +/// \param verbose If true, print per-chromosome breakdown. +/// \return Populated RAMStats struct. +RAMStats ComputeStats(const char *filename, bool verbose = false); + +} // namespace ramcore + +#endif // RAMCORE_RAMSTATS_H diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx new file mode 100644 index 0000000..2390670 --- /dev/null +++ b/src/ramcore/RAMStats.cxx @@ -0,0 +1,148 @@ +#include "ramcore/RAMStats.h" +#include "rntuple/RAMNTupleRecord.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace ramcore { + +static constexpr uint16_t FLAG_PAIRED = 0x1; +static constexpr uint16_t FLAG_PROPER_PAIR = 0x2; +static constexpr uint16_t FLAG_UNMAPPED = 0x4; +static constexpr uint16_t FLAG_REVERSE_STRAND = 0x10; +static constexpr uint16_t FLAG_DUPLICATE = 0x400; + +void RAMStats::Print() const +{ + auto pct = [](uint64_t a, uint64_t b) -> double { + return b == 0 ? 0.0 : 100.0 * static_cast(a) / static_cast(b); + }; + + std::cout << "\n=== RAMTools Statistics ===\n"; + std::cout << std::left + << std::setw(30) << "Total reads:" << total_reads << "\n" + << std::setw(30) << "Mapped reads:" << mapped_reads + << std::fixed << std::setprecision(2) + << " (" << pct(mapped_reads, total_reads) << "%)\n" + << std::setw(30) << "Unmapped reads:" << unmapped_reads + << " (" << pct(unmapped_reads, total_reads) << "%)\n" + << std::setw(30) << "Duplicate reads:" << duplicate_reads + << " (" << pct(duplicate_reads, total_reads) << "%)\n" + << std::setw(30) << "Paired reads:" << paired_reads + << " (" << pct(paired_reads, total_reads) << "%)\n" + << std::setw(30) << "Properly paired reads:" << properly_paired_reads + << " (" << pct(properly_paired_reads, total_reads) << "%)\n" + << std::setw(30) << "Forward strand:" << forward_strand + << " (" << pct(forward_strand, total_reads) << "%)\n" + << std::setw(30) << "Reverse strand:" << reverse_strand + << " (" << pct(reverse_strand, total_reads) << "%)\n" + << std::setw(30) << "Total bases:" << total_bases << "\n" + << std::setw(30) << "Mean read length:" + << std::fixed << std::setprecision(2) << mean_read_length << "\n" + << std::setw(30) << "Mean mapping quality:" + << std::fixed << std::setprecision(2) << mean_mapping_quality << "\n"; + + if (!reads_per_chromosome.empty()) { + std::cout << "\n--- Reads per Chromosome ---\n"; + for (const auto &[chrom, count] : reads_per_chromosome) { + std::cout << std::setw(20) << chrom << ": " + << count + << " (" << pct(count, total_reads) << "%)\n"; + } + } + std::cout << "===========================\n\n"; +} + +RAMStats ComputeStats(const char *filename, bool verbose) +{ + RAMStats stats; + + // Load reference name map from the file + RAMNTupleRecord::ReadAllRefs(filename); + RAMNTupleRefs *rnameRefs = RAMNTupleRecord::GetRnameRefs(); + + std::unique_ptr reader; + try { + reader = ROOT::RNTupleReader::Open("RAM", filename); + } catch (const std::exception &e) { + std::cerr << "Error opening file: " << e.what() << "\n"; + return stats; + } + + if (!reader) { + std::cerr << "Error: could not open RNTuple 'RAM' in " << filename << "\n"; + return stats; + } + + // Correct field names: all nested under "record." + auto viewFlag = reader->GetView("record.flag"); + auto viewMapQ = reader->GetView("record.mapq"); + auto viewSeq = reader->GetView("record.seq"); + auto viewRefId = reader->GetView("record.refid"); + + uint64_t mapq_sum = 0; + uint64_t len_sum = 0; + const uint64_t nEntries = reader->GetNEntries(); + + for (uint64_t i = 0; i < nEntries; ++i) { + stats.total_reads++; + + uint16_t flag = viewFlag(i); + uint8_t mapq = viewMapQ(i); + const std::string &seq = viewSeq(i); + int32_t refid = viewRefId(i); + + if (flag & FLAG_UNMAPPED) { + stats.unmapped_reads++; + } else { + stats.mapped_reads++; + mapq_sum += mapq; + } + + if (flag & FLAG_DUPLICATE) stats.duplicate_reads++; + if (flag & FLAG_PAIRED) stats.paired_reads++; + if (flag & FLAG_PROPER_PAIR) stats.properly_paired_reads++; + + if (flag & FLAG_REVERSE_STRAND) + stats.reverse_strand++; + else + stats.forward_strand++; + + // seq is encoded — first 4 bytes store sequence length as uint32_t + uint64_t rlen = 0; + if (seq.size() >= 4) { + rlen = *reinterpret_cast(seq.data()); + } + len_sum += rlen; + stats.total_bases += rlen; + + // Resolve chromosome name from refid + if (refid >= 0 && rnameRefs && + refid < static_cast(rnameRefs->Size())) { + const std::string &chrom = rnameRefs->GetRefName(refid); + if (!chrom.empty() && chrom != "*") { + stats.reads_per_chromosome[chrom]++; + } + } + } + + if (stats.total_reads > 0) + stats.mean_read_length = static_cast(len_sum) / stats.total_reads; + if (stats.mapped_reads > 0) + stats.mean_mapping_quality = static_cast(mapq_sum) / stats.mapped_reads; + + if (verbose) + stats.Print(); + + return stats; +} + +} // namespace ramcore diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 24b54f0..34ec781 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -36,3 +36,4 @@ install(TARGETS ramcoretests chromosome_split_test RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) +add_ramcore_test(ramstatstests ramstatstests.cxx) diff --git a/test/ramstatstests.cxx b/test/ramstatstests.cxx new file mode 100644 index 0000000..5d7a09e --- /dev/null +++ b/test/ramstatstests.cxx @@ -0,0 +1,112 @@ +#include +#include +#include +#include +#include + +#include "../benchmark/generate_sam_benchmark.h" +#include "ramcore/SamToNTuple.h" +#include "ramcore/RAMStats.h" + +namespace { + +class RAMStatsTest : public ::testing::Test { +protected: + static constexpr int kNumReads = 100; + const char *kSamFile = "stats_test.sam"; + const char *kRootFile = "stats_test_rntuple.root"; + + void SetUp() override + { + GenerateSAMFile(kSamFile, kNumReads); + std::remove(kRootFile); + // compression=505 (LZMA), quality_policy=0 — matches existing tests + samtoramntuple(kSamFile, kRootFile, true, true, true, 505, 0); + } + + void TearDown() override + { + std::remove(kSamFile); + std::remove(kRootFile); + } +}; + +/// Total reads reported must equal the number of entries in the RNTuple. +TEST_F(RAMStatsTest, TotalReadsMatchesNTupleEntries) +{ + auto stats = ramcore::ComputeStats(kRootFile); + + auto reader = ROOT::RNTupleReader::Open("RAM", kRootFile); + ASSERT_TRUE(reader != nullptr); + + EXPECT_EQ(stats.total_reads, static_cast(reader->GetNEntries())); + EXPECT_EQ(stats.total_reads, static_cast(kNumReads)); +} + +/// Mapped + unmapped must equal total reads (no reads are lost). +TEST_F(RAMStatsTest, MappedPlusUnmappedEqualsTotal) +{ + auto stats = ramcore::ComputeStats(kRootFile); + EXPECT_EQ(stats.mapped_reads + stats.unmapped_reads, stats.total_reads); +} + +/// Forward + reverse strand counts must equal total reads. +TEST_F(RAMStatsTest, StrandCountsEqualTotal) +{ + auto stats = ramcore::ComputeStats(kRootFile); + EXPECT_EQ(stats.forward_strand + stats.reverse_strand, stats.total_reads); +} + +/// Mean read length must be positive for a non-empty file. +TEST_F(RAMStatsTest, MeanReadLengthIsPositive) +{ + auto stats = ramcore::ComputeStats(kRootFile); + EXPECT_GT(stats.mean_read_length, 0.0); +} + +/// Total bases must be consistent with mean read length * total reads. +TEST_F(RAMStatsTest, TotalBasesConsistentWithMeanLength) +{ + auto stats = ramcore::ComputeStats(kRootFile); + double expected_bases = stats.mean_read_length * stats.total_reads; + // Allow small floating-point rounding tolerance + EXPECT_NEAR(static_cast(stats.total_bases), expected_bases, 1.0); +} + +/// Mean mapping quality must be in valid SAM range [0, 255]. +TEST_F(RAMStatsTest, MeanMappingQualityInValidRange) +{ + auto stats = ramcore::ComputeStats(kRootFile); + EXPECT_GE(stats.mean_mapping_quality, 0.0); + EXPECT_LE(stats.mean_mapping_quality, 255.0); +} + +/// Per-chromosome map must not be empty for a valid SAM file. +TEST_F(RAMStatsTest, ReadsPerChromosomeNotEmpty) +{ + auto stats = ramcore::ComputeStats(kRootFile); + EXPECT_FALSE(stats.reads_per_chromosome.empty()); +} + +/// Chromosome read counts must sum to at most total_reads +/// (unmapped reads with rname="*" are excluded from the map). +TEST_F(RAMStatsTest, ChromosomeCountsSumToAtMostTotal) +{ + auto stats = ramcore::ComputeStats(kRootFile); + uint64_t chrom_sum = 0; + for (const auto &[chrom, count] : stats.reads_per_chromosome) { + chrom_sum += count; + } + EXPECT_LE(chrom_sum, stats.total_reads); +} + +/// ComputeStats on a non-existent file must return zero total reads. +TEST(RAMStatsEdgeCases, NonExistentFileReturnsEmpty) +{ + auto stats = ramcore::ComputeStats("nonexistent_file.root"); + EXPECT_EQ(stats.total_reads, 0u); + +} + +/// ComputeStats on a non-existent file must return zero total reads. +} // namespace diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index b67568f..eb6b95f 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -22,7 +22,14 @@ ROOT_EXECUTABLE(ramntupleview ROOT::ROOTNTuple ) -install(TARGETS samtoram samtoramntuple ramntupleview +ROOT_EXECUTABLE(ramstats + ramstats.cxx + LIBRARIES + ramcore + ROOT::Core + ROOT::ROOTNTuple +) +install(TARGETS samtoram samtoramntuple ramntupleview ramstats RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) @@ -40,3 +47,4 @@ install(FILES ${ROOT_MACROS} DESTINATION share/ramtools/macros ) + diff --git a/tools/ramstats.cxx b/tools/ramstats.cxx new file mode 100644 index 0000000..2d93b88 --- /dev/null +++ b/tools/ramstats.cxx @@ -0,0 +1,45 @@ +/// \file ramstats.cxx +/// \brief Command-line tool to print summary statistics for a RAM (RNTuple) file. +/// +/// Usage: +/// ./tools/ramstats [--verbose] +/// +/// Prints per-file summary metrics equivalent to `samtools flagstat`: +/// total reads, mapped/unmapped, duplicates, paired, strand breakdown, +/// mean mapping quality, mean read length, total bases, reads per chromosome. + +#include "ramcore/RAMStats.h" +#include +#include + +int main(int argc, char **argv) +{ + if (argc < 2) { + std::cerr << "Usage: " << argv[0] << " [--verbose]\n"; + std::cerr << " Compute summary statistics for a RAM (RNTuple) file.\n"; + std::cerr << " --verbose Also print per-chromosome read counts.\n"; + return 1; + } + + const char *inputFile = argv[1]; + bool verbose = false; + for (int i = 2; i < argc; ++i) { + if (std::strcmp(argv[i], "--verbose") == 0) { + verbose = true; + } + } + + auto stats = ramcore::ComputeStats(inputFile, false); + + // Always print summary + stats.Print(); + + // Per-chromosome breakdown only if --verbose + if (verbose && !stats.reads_per_chromosome.empty()) { + // Already printed inside Print() when verbose=true, so re-call with verbose + // Here we just guard the flag correctly — Print() always shows chromosomes + // if they were collected; verbose controls collection in ComputeStats. + } + + return 0; +} From cb189320989ec0a25134bcf1499579d827515518 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 13:08:16 +0530 Subject: [PATCH 02/10] Address review: fix strand counting, UB reinterpret_cast, error handling, verbose flag --- inc/ramcore/RAMStats.h | 35 ++++++++++++----------- src/ramcore/RAMStats.cxx | 60 +++++++++++++++++++++------------------- test/ramstatstests.cxx | 44 ++++++++++++++--------------- tools/ramstats.cxx | 36 +++++++++++------------- 4 files changed, 89 insertions(+), 86 deletions(-) diff --git a/inc/ramcore/RAMStats.h b/inc/ramcore/RAMStats.h index 47c73df..6e0f1b5 100644 --- a/inc/ramcore/RAMStats.h +++ b/inc/ramcore/RAMStats.h @@ -7,32 +7,35 @@ namespace ramcore { -/// Holds summary statistics computed from a RAM (RNTuple) file. struct RAMStats { - uint64_t total_reads = 0; - uint64_t mapped_reads = 0; - uint64_t unmapped_reads = 0; - uint64_t duplicate_reads = 0; - uint64_t paired_reads = 0; + uint64_t total_reads = 0; + uint64_t mapped_reads = 0; + uint64_t unmapped_reads = 0; + uint64_t duplicate_reads = 0; + uint64_t paired_reads = 0; uint64_t properly_paired_reads = 0; - uint64_t forward_strand = 0; - uint64_t reverse_strand = 0; + uint64_t forward_strand = 0; + uint64_t reverse_strand = 0; - double mean_mapping_quality = 0.0; - double mean_read_length = 0.0; - - uint64_t total_bases = 0; + double mean_mapping_quality = 0.0; + double mean_read_length = 0.0; + uint64_t total_bases = 0; std::map reads_per_chromosome; void Print() const; }; +/// Result wrapper so callers can distinguish errors from empty files +struct RAMStatsResult { + RAMStats stats; + bool ok = false; + std::string error_message; +}; + /// Compute statistics from an RNTuple-based RAM file. -/// \param filename Path to the .root file produced by samtoramntuple. -/// \param verbose If true, print per-chromosome breakdown. -/// \return Populated RAMStats struct. -RAMStats ComputeStats(const char *filename, bool verbose = false); +/// Returns RAMStatsResult — check .ok before using .stats. +RAMStatsResult ComputeStats(const char *filename); } // namespace ramcore diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx index 2390670..fe3908f 100644 --- a/src/ramcore/RAMStats.cxx +++ b/src/ramcore/RAMStats.cxx @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -41,9 +42,9 @@ void RAMStats::Print() const << std::setw(30) << "Properly paired reads:" << properly_paired_reads << " (" << pct(properly_paired_reads, total_reads) << "%)\n" << std::setw(30) << "Forward strand:" << forward_strand - << " (" << pct(forward_strand, total_reads) << "%)\n" + << " (" << pct(forward_strand, mapped_reads) << "% of mapped)\n" << std::setw(30) << "Reverse strand:" << reverse_strand - << " (" << pct(reverse_strand, total_reads) << "%)\n" + << " (" << pct(reverse_strand, mapped_reads) << "% of mapped)\n" << std::setw(30) << "Total bases:" << total_bases << "\n" << std::setw(30) << "Mean read length:" << std::fixed << std::setprecision(2) << mean_read_length << "\n" @@ -61,11 +62,23 @@ void RAMStats::Print() const std::cout << "===========================\n\n"; } -RAMStats ComputeStats(const char *filename, bool verbose) +/// Extract the original sequence length from an encoded seq field. +/// The field format is: [4-byte uint32_t length][packed 2-bit nucleotides] +/// We use memcpy to avoid undefined behaviour from unaligned reinterpret_cast. +static uint32_t DecodedSeqLength(const std::string &encoded_seq) +{ + if (encoded_seq.size() < 4) + return 0; + uint32_t length = 0; + std::memcpy(&length, encoded_seq.data(), sizeof(length)); + return length; +} + +RAMStatsResult ComputeStats(const char *filename) { RAMStats stats; - // Load reference name map from the file + // Load reference name map stored alongside the RNTuple RAMNTupleRecord::ReadAllRefs(filename); RAMNTupleRefs *rnameRefs = RAMNTupleRecord::GetRnameRefs(); @@ -73,16 +86,13 @@ RAMStats ComputeStats(const char *filename, bool verbose) try { reader = ROOT::RNTupleReader::Open("RAM", filename); } catch (const std::exception &e) { - std::cerr << "Error opening file: " << e.what() << "\n"; - return stats; + // Return error result so caller can distinguish bad file from empty file + return RAMStatsResult{stats, false, e.what()}; } - if (!reader) { - std::cerr << "Error: could not open RNTuple 'RAM' in " << filename << "\n"; - return stats; - } + if (!reader) + return RAMStatsResult{stats, false, "RNTupleReader::Open returned nullptr"}; - // Correct field names: all nested under "record." auto viewFlag = reader->GetView("record.flag"); auto viewMapQ = reader->GetView("record.mapq"); auto viewSeq = reader->GetView("record.seq"); @@ -102,35 +112,32 @@ RAMStats ComputeStats(const char *filename, bool verbose) if (flag & FLAG_UNMAPPED) { stats.unmapped_reads++; + // Unmapped reads have no strand — do NOT count them in strand stats } else { stats.mapped_reads++; mapq_sum += mapq; + // Strand is only meaningful for mapped reads + if (flag & FLAG_REVERSE_STRAND) + stats.reverse_strand++; + else + stats.forward_strand++; } if (flag & FLAG_DUPLICATE) stats.duplicate_reads++; if (flag & FLAG_PAIRED) stats.paired_reads++; if (flag & FLAG_PROPER_PAIR) stats.properly_paired_reads++; - if (flag & FLAG_REVERSE_STRAND) - stats.reverse_strand++; - else - stats.forward_strand++; - - // seq is encoded — first 4 bytes store sequence length as uint32_t - uint64_t rlen = 0; - if (seq.size() >= 4) { - rlen = *reinterpret_cast(seq.data()); - } + // Extract sequence length from encoded field (4-byte length prefix + packed nibbles) + uint32_t rlen = DecodedSeqLength(seq); len_sum += rlen; stats.total_bases += rlen; - // Resolve chromosome name from refid + // Resolve chromosome name from integer refid via the ref name table if (refid >= 0 && rnameRefs && refid < static_cast(rnameRefs->Size())) { const std::string &chrom = rnameRefs->GetRefName(refid); - if (!chrom.empty() && chrom != "*") { + if (!chrom.empty() && chrom != "*") stats.reads_per_chromosome[chrom]++; - } } } @@ -139,10 +146,7 @@ RAMStats ComputeStats(const char *filename, bool verbose) if (stats.mapped_reads > 0) stats.mean_mapping_quality = static_cast(mapq_sum) / stats.mapped_reads; - if (verbose) - stats.Print(); - - return stats; + return RAMStatsResult{stats, true, ""}; } } // namespace ramcore diff --git a/test/ramstatstests.cxx b/test/ramstatstests.cxx index 5d7a09e..dcda524 100644 --- a/test/ramstatstests.cxx +++ b/test/ramstatstests.cxx @@ -34,77 +34,77 @@ class RAMStatsTest : public ::testing::Test { /// Total reads reported must equal the number of entries in the RNTuple. TEST_F(RAMStatsTest, TotalReadsMatchesNTupleEntries) { - auto stats = ramcore::ComputeStats(kRootFile); + auto result = ramcore::ComputeStats(kRootFile); auto reader = ROOT::RNTupleReader::Open("RAM", kRootFile); ASSERT_TRUE(reader != nullptr); - EXPECT_EQ(stats.total_reads, static_cast(reader->GetNEntries())); - EXPECT_EQ(stats.total_reads, static_cast(kNumReads)); + EXPECT_EQ(result.stats.total_reads, static_cast(reader->GetNEntries())); + EXPECT_EQ(result.stats.total_reads, static_cast(kNumReads)); } /// Mapped + unmapped must equal total reads (no reads are lost). TEST_F(RAMStatsTest, MappedPlusUnmappedEqualsTotal) { - auto stats = ramcore::ComputeStats(kRootFile); - EXPECT_EQ(stats.mapped_reads + stats.unmapped_reads, stats.total_reads); + auto result = ramcore::ComputeStats(kRootFile); + EXPECT_EQ(result.stats.mapped_reads + result.stats.unmapped_reads, result.stats.total_reads); } /// Forward + reverse strand counts must equal total reads. TEST_F(RAMStatsTest, StrandCountsEqualTotal) { - auto stats = ramcore::ComputeStats(kRootFile); - EXPECT_EQ(stats.forward_strand + stats.reverse_strand, stats.total_reads); + auto result = ramcore::ComputeStats(kRootFile); + EXPECT_EQ(result.stats.forward_strand + result.stats.reverse_strand, result.stats.total_reads); } /// Mean read length must be positive for a non-empty file. TEST_F(RAMStatsTest, MeanReadLengthIsPositive) { - auto stats = ramcore::ComputeStats(kRootFile); - EXPECT_GT(stats.mean_read_length, 0.0); + auto result = ramcore::ComputeStats(kRootFile); + EXPECT_GT(result.stats.mean_read_length, 0.0); } /// Total bases must be consistent with mean read length * total reads. TEST_F(RAMStatsTest, TotalBasesConsistentWithMeanLength) { - auto stats = ramcore::ComputeStats(kRootFile); - double expected_bases = stats.mean_read_length * stats.total_reads; + auto result = ramcore::ComputeStats(kRootFile); + double expected_bases = result.stats.mean_read_length * result.stats.total_reads; // Allow small floating-point rounding tolerance - EXPECT_NEAR(static_cast(stats.total_bases), expected_bases, 1.0); + EXPECT_NEAR(static_cast(result.stats.total_bases), expected_bases, 1.0); } /// Mean mapping quality must be in valid SAM range [0, 255]. TEST_F(RAMStatsTest, MeanMappingQualityInValidRange) { - auto stats = ramcore::ComputeStats(kRootFile); - EXPECT_GE(stats.mean_mapping_quality, 0.0); - EXPECT_LE(stats.mean_mapping_quality, 255.0); + auto result = ramcore::ComputeStats(kRootFile); + EXPECT_GE(result.stats.mean_mapping_quality, 0.0); + EXPECT_LE(result.stats.mean_mapping_quality, 255.0); } /// Per-chromosome map must not be empty for a valid SAM file. TEST_F(RAMStatsTest, ReadsPerChromosomeNotEmpty) { - auto stats = ramcore::ComputeStats(kRootFile); - EXPECT_FALSE(stats.reads_per_chromosome.empty()); + auto result = ramcore::ComputeStats(kRootFile); + EXPECT_FALSE(result.stats.reads_per_chromosome.empty()); } /// Chromosome read counts must sum to at most total_reads /// (unmapped reads with rname="*" are excluded from the map). TEST_F(RAMStatsTest, ChromosomeCountsSumToAtMostTotal) { - auto stats = ramcore::ComputeStats(kRootFile); + auto result = ramcore::ComputeStats(kRootFile); uint64_t chrom_sum = 0; - for (const auto &[chrom, count] : stats.reads_per_chromosome) { + for (const auto &[chrom, count] : result.stats.reads_per_chromosome) { chrom_sum += count; } - EXPECT_LE(chrom_sum, stats.total_reads); + EXPECT_LE(chrom_sum, result.stats.total_reads); } /// ComputeStats on a non-existent file must return zero total reads. TEST(RAMStatsEdgeCases, NonExistentFileReturnsEmpty) { - auto stats = ramcore::ComputeStats("nonexistent_file.root"); - EXPECT_EQ(stats.total_reads, 0u); + auto result = ramcore::ComputeStats("nonexistent_file.root"); + EXPECT_EQ(result.stats.total_reads, 0u); } diff --git a/tools/ramstats.cxx b/tools/ramstats.cxx index 2d93b88..216a11f 100644 --- a/tools/ramstats.cxx +++ b/tools/ramstats.cxx @@ -1,13 +1,3 @@ -/// \file ramstats.cxx -/// \brief Command-line tool to print summary statistics for a RAM (RNTuple) file. -/// -/// Usage: -/// ./tools/ramstats [--verbose] -/// -/// Prints per-file summary metrics equivalent to `samtools flagstat`: -/// total reads, mapped/unmapped, duplicates, paired, strand breakdown, -/// mean mapping quality, mean read length, total bases, reads per chromosome. - #include "ramcore/RAMStats.h" #include #include @@ -24,21 +14,27 @@ int main(int argc, char **argv) const char *inputFile = argv[1]; bool verbose = false; for (int i = 2; i < argc; ++i) { - if (std::strcmp(argv[i], "--verbose") == 0) { + if (std::strcmp(argv[i], "--verbose") == 0) verbose = true; - } } - auto stats = ramcore::ComputeStats(inputFile, false); + auto result = ramcore::ComputeStats(inputFile); + + if (!result.ok) { + std::cerr << "Error: " << result.error_message << "\n"; + return 1; + } - // Always print summary - stats.Print(); + // Computation and I/O are separate — tool controls printing + result.stats.Print(); - // Per-chromosome breakdown only if --verbose - if (verbose && !stats.reads_per_chromosome.empty()) { - // Already printed inside Print() when verbose=true, so re-call with verbose - // Here we just guard the flag correctly — Print() always shows chromosomes - // if they were collected; verbose controls collection in ComputeStats. + if (verbose && !result.stats.reads_per_chromosome.empty()) { + std::cout << "\n--- Reads per Chromosome ---\n"; + for (const auto &[chrom, count] : result.stats.reads_per_chromosome) { + double pct = 100.0 * count / result.stats.total_reads; + std::cout << " " << chrom << ": " << count + << " (" << pct << "%)\n"; + } } return 0; From 376190ab284c16acbc55f23307dfda01c0ee6243 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 13:24:28 +0530 Subject: [PATCH 03/10] Fix: remove double chromosome print, add InitializeRefs before ReadAllRefs --- src/ramcore/RAMStats.cxx | 9 +-------- tools/ramstats.cxx | 2 ++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx index fe3908f..8eb2211 100644 --- a/src/ramcore/RAMStats.cxx +++ b/src/ramcore/RAMStats.cxx @@ -51,14 +51,6 @@ void RAMStats::Print() const << std::setw(30) << "Mean mapping quality:" << std::fixed << std::setprecision(2) << mean_mapping_quality << "\n"; - if (!reads_per_chromosome.empty()) { - std::cout << "\n--- Reads per Chromosome ---\n"; - for (const auto &[chrom, count] : reads_per_chromosome) { - std::cout << std::setw(20) << chrom << ": " - << count - << " (" << pct(count, total_reads) << "%)\n"; - } - } std::cout << "===========================\n\n"; } @@ -79,6 +71,7 @@ RAMStatsResult ComputeStats(const char *filename) RAMStats stats; // Load reference name map stored alongside the RNTuple + RAMNTupleRecord::InitializeRefs(); RAMNTupleRecord::ReadAllRefs(filename); RAMNTupleRefs *rnameRefs = RAMNTupleRecord::GetRnameRefs(); diff --git a/tools/ramstats.cxx b/tools/ramstats.cxx index 216a11f..b9e2fcb 100644 --- a/tools/ramstats.cxx +++ b/tools/ramstats.cxx @@ -28,6 +28,7 @@ int main(int argc, char **argv) // Computation and I/O are separate — tool controls printing result.stats.Print(); + // Per-chromosome breakdown only with --verbose if (verbose && !result.stats.reads_per_chromosome.empty()) { std::cout << "\n--- Reads per Chromosome ---\n"; for (const auto &[chrom, count] : result.stats.reads_per_chromosome) { @@ -35,6 +36,7 @@ int main(int argc, char **argv) std::cout << " " << chrom << ": " << count << " (" << pct << "%)\n"; } + std::cout << "\n"; } return 0; From 7eb1f0eee16ed3918800d300d70339af0a0d2b0d Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 13:37:44 +0530 Subject: [PATCH 04/10] Fix clang-tidy warnings: remove unused headers, fix include order --- src/ramcore/RAMStats.cxx | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx index 8eb2211..6fe55e1 100644 --- a/src/ramcore/RAMStats.cxx +++ b/src/ramcore/RAMStats.cxx @@ -3,15 +3,15 @@ #include #include -#include #include #include -#include +#include #include -#include -#include +#include #include +#include +#include namespace ramcore { @@ -50,13 +50,12 @@ void RAMStats::Print() const << std::fixed << std::setprecision(2) << mean_read_length << "\n" << std::setw(30) << "Mean mapping quality:" << std::fixed << std::setprecision(2) << mean_mapping_quality << "\n"; - std::cout << "===========================\n\n"; } /// Extract the original sequence length from an encoded seq field. /// The field format is: [4-byte uint32_t length][packed 2-bit nucleotides] -/// We use memcpy to avoid undefined behaviour from unaligned reinterpret_cast. +/// memcpy is used instead of reinterpret_cast to avoid alignment-dependent UB. static uint32_t DecodedSeqLength(const std::string &encoded_seq) { if (encoded_seq.size() < 4) @@ -70,7 +69,7 @@ RAMStatsResult ComputeStats(const char *filename) { RAMStats stats; - // Load reference name map stored alongside the RNTuple + // Initialize and load the reference name map stored alongside the RNTuple RAMNTupleRecord::InitializeRefs(); RAMNTupleRecord::ReadAllRefs(filename); RAMNTupleRefs *rnameRefs = RAMNTupleRecord::GetRnameRefs(); @@ -79,7 +78,6 @@ RAMStatsResult ComputeStats(const char *filename) try { reader = ROOT::RNTupleReader::Open("RAM", filename); } catch (const std::exception &e) { - // Return error result so caller can distinguish bad file from empty file return RAMStatsResult{stats, false, e.what()}; } @@ -105,7 +103,7 @@ RAMStatsResult ComputeStats(const char *filename) if (flag & FLAG_UNMAPPED) { stats.unmapped_reads++; - // Unmapped reads have no strand — do NOT count them in strand stats + // Unmapped reads have no strand — excluded from strand stats } else { stats.mapped_reads++; mapq_sum += mapq; @@ -135,9 +133,11 @@ RAMStatsResult ComputeStats(const char *filename) } if (stats.total_reads > 0) - stats.mean_read_length = static_cast(len_sum) / stats.total_reads; + stats.mean_read_length = + static_cast(len_sum) / static_cast(stats.total_reads); if (stats.mapped_reads > 0) - stats.mean_mapping_quality = static_cast(mapq_sum) / stats.mapped_reads; + stats.mean_mapping_quality = + static_cast(mapq_sum) / static_cast(stats.mapped_reads); return RAMStatsResult{stats, true, ""}; } From 61415415bcdf3e09b998b8d85fa3c8e6427c467e Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 13:52:31 +0530 Subject: [PATCH 05/10] Add tests for SamParser and RAMNTupleUtils to improve coverage (fixes #26) --- test/CMakeLists.txt | 1 + test/samparsertest.cxx | 246 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 247 insertions(+) create mode 100644 test/samparsertest.cxx diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 34ec781..2eb7fd7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,3 +37,4 @@ install(TARGETS ramcoretests chromosome_split_test ) add_ramcore_test(ramstatstests ramstatstests.cxx) +add_ramcore_test(samparsertest samparsertest.cxx) diff --git a/test/samparsertest.cxx b/test/samparsertest.cxx new file mode 100644 index 0000000..20c7f52 --- /dev/null +++ b/test/samparsertest.cxx @@ -0,0 +1,246 @@ +#include +#include +#include +#include +#include + +#include "ramcore/SamParser.h" +#include "rntuple/RAMNTupleRecord.h" + +// ───────────────────────────────────────────────────────────────────────────── +// Helpers +// ───────────────────────────────────────────────────────────────────────────── + +static void WriteSAM(const char *path, const std::string &content) +{ + std::ofstream f(path); + f << content; +} + +// ───────────────────────────────────────────────────────────────────────────── +// SamParser tests +// ───────────────────────────────────────────────────────────────────────────── + +namespace { + +class SamParserTest : public ::testing::Test { +protected: + const char *kSamFile = "parser_test.sam"; + void TearDown() override { std::remove(kSamFile); } +}; + +/// Parser must return false for a non-existent file. +TEST_F(SamParserTest, NonExistentFileReturnsFalse) +{ + ramcore::SamParser parser; + bool ok = parser.ParseFile("no_such_file.sam", nullptr, nullptr); + EXPECT_FALSE(ok); +} + +/// Header lines starting with '@' must trigger the header callback. +TEST_F(SamParserTest, HeaderCallbackFired) +{ + WriteSAM(kSamFile, + "@HD\tVN:1.6\tSO:coordinate\n" + "@SQ\tSN:chr1\tLN:248956422\n"); + + ramcore::SamParser parser; + std::vector tags; + parser.ParseFile(kSamFile, + [&](const std::string &tag, const std::string &) { tags.push_back(tag); }, + nullptr); + + ASSERT_EQ(tags.size(), 2u); + EXPECT_EQ(tags[0], "@HD"); + EXPECT_EQ(tags[1], "@SQ"); +} + +/// Record callback must fire once per alignment line. +TEST_F(SamParserTest, RecordCallbackFiredForEachAlignment) +{ + WriteSAM(kSamFile, + "@HD\tVN:1.6\n" + "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n" + "read2\t16\tchr1\t200\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n"); + + ramcore::SamParser parser; + size_t count = 0; + parser.ParseFile(kSamFile, nullptr, + [&](const ramcore::SamRecord &, size_t) { ++count; }); + + EXPECT_EQ(count, 2u); + EXPECT_EQ(parser.GetRecordsProcessed(), 2u); +} + +/// Parsed record fields must match the SAM columns exactly. +TEST_F(SamParserTest, RecordFieldsParsedCorrectly) +{ + WriteSAM(kSamFile, + "@HD\tVN:1.6\n" + "myread\t99\tchr2\t500\t30\t5M2I3M\t=\t600\t110\tACGTACGTAC\tIIIIIIIIII\n"); + + ramcore::SamParser parser; + ramcore::SamRecord captured; + parser.ParseFile(kSamFile, nullptr, + [&](const ramcore::SamRecord &rec, size_t) { captured = rec; }); + + EXPECT_EQ(captured.qname, "myread"); + EXPECT_EQ(captured.flag, 99); + EXPECT_EQ(captured.rname, "chr2"); + EXPECT_EQ(captured.pos, 500); + EXPECT_EQ(captured.mapq, 30); + EXPECT_EQ(captured.cigar, "5M2I3M"); + EXPECT_EQ(captured.rnext, "="); + EXPECT_EQ(captured.pnext, 600); + EXPECT_EQ(captured.tlen, 110); + EXPECT_EQ(captured.seq, "ACGTACGTAC"); + EXPECT_EQ(captured.qual, "IIIIIIIIII"); +} + +/// An empty SAM file (no headers, no records) must parse successfully. +TEST_F(SamParserTest, EmptyFileParseSucceeds) +{ + WriteSAM(kSamFile, ""); + ramcore::SamParser parser; + bool ok = parser.ParseFile(kSamFile, nullptr, nullptr); + EXPECT_TRUE(ok); + EXPECT_EQ(parser.GetRecordsProcessed(), 0u); +} + +/// A SAM file with only headers and no records must produce zero record calls. +TEST_F(SamParserTest, HeaderOnlyFileProducesNoRecords) +{ + WriteSAM(kSamFile, + "@HD\tVN:1.6\n" + "@SQ\tSN:chr1\tLN:1000\n"); + + ramcore::SamParser parser; + size_t count = 0; + parser.ParseFile(kSamFile, nullptr, + [&](const ramcore::SamRecord &, size_t) { ++count; }); + + EXPECT_EQ(count, 0u); +} + +/// Optional fields must be captured in the record. +TEST_F(SamParserTest, OptionalFieldsCaptured) +{ + WriteSAM(kSamFile, + "@HD\tVN:1.6\n" + "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\tNM:i:0\tRG:Z:sample1\n"); + + ramcore::SamParser parser; + ramcore::SamRecord captured; + parser.ParseFile(kSamFile, nullptr, + [&](const ramcore::SamRecord &rec, size_t) { captured = rec; }); + + EXPECT_GE(captured.optional_fields.size(), 1u); +} + +/// Lines processed counter must include both header and record lines. +TEST_F(SamParserTest, LinesProcessedCountIsCorrect) +{ + WriteSAM(kSamFile, + "@HD\tVN:1.6\n" + "@SQ\tSN:chr1\tLN:1000\n" + "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n"); + + ramcore::SamParser parser; + parser.ParseFile(kSamFile, nullptr, nullptr); + EXPECT_EQ(parser.GetLinesProcessed(), 3u); +} + +/// StripCRLF must remove trailing CR and LF characters. +TEST(StripCRLFTest, RemovesTrailingCRLF) +{ + char buf1[] = "hello\r\n"; + ramcore::StripCRLF(buf1); + EXPECT_STREQ(buf1, "hello"); + + char buf2[] = "world\n"; + ramcore::StripCRLF(buf2); + EXPECT_STREQ(buf2, "world"); + + char buf3[] = "noend"; + ramcore::StripCRLF(buf3); + EXPECT_STREQ(buf3, "noend"); +} + +// ───────────────────────────────────────────────────────────────────────────── +// RAMNTupleUtils encode/decode tests +// ───────────────────────────────────────────────────────────────────────────── + +/// EncodeSequence + DecodeSequence must be inverse operations. +TEST(RAMNTupleUtilsTest, SequenceRoundTrip) +{ + const std::string original = "ACGTACGTACGT"; + std::string encoded = RAMNTupleUtils::EncodeSequence(original); + + // Encoded format: 4-byte length prefix + packed nibbles + ASSERT_GE(encoded.size(), 4u); + uint32_t stored_len = 0; + std::memcpy(&stored_len, encoded.data(), 4); + EXPECT_EQ(stored_len, original.size()); + + std::string decoded = RAMNTupleUtils::DecodeSequence(encoded.substr(4), original.size()); + EXPECT_EQ(decoded, original); +} + +/// Odd-length sequences must round-trip correctly. +TEST(RAMNTupleUtilsTest, OddLengthSequenceRoundTrip) +{ + const std::string original = "ACGTA"; + std::string encoded = RAMNTupleUtils::EncodeSequence(original); + std::string decoded = RAMNTupleUtils::DecodeSequence(encoded.substr(4), original.size()); + EXPECT_EQ(decoded, original); +} + +/// Empty sequence must encode and decode without crashing. +TEST(RAMNTupleUtilsTest, EmptySequenceRoundTrip) +{ + const std::string original = ""; + std::string encoded = RAMNTupleUtils::EncodeSequence(original); + std::string decoded = RAMNTupleUtils::DecodeSequence(encoded.substr(4), 0); + EXPECT_EQ(decoded, original); +} + +/// EncodeQuality + DecodeQuality must be inverse for default Phred33 mode. +TEST(RAMNTupleUtilsTest, QualityRoundTripPhred33) +{ + const std::string original = "IIIIIIIIII"; + uint32_t flags = RAMNTupleRecord::kPhred33; + std::string encoded = RAMNTupleUtils::EncodeQuality(original, flags); + std::string decoded = RAMNTupleUtils::DecodeQuality(encoded, flags); + EXPECT_EQ(decoded, original); +} + +/// Quality scores must round-trip when drop mode is used. +TEST(RAMNTupleUtilsTest, QualityRoundTripDropMode) +{ + const std::string original = "IIIIIIIIII"; + uint32_t flags = RAMNTupleRecord::kDrop; + std::string encoded = RAMNTupleUtils::EncodeQuality(original, flags); + // In drop mode decoded quality should be empty or placeholder + std::string decoded = RAMNTupleUtils::DecodeQuality(encoded, flags); + // Just verify no crash and result is a string + EXPECT_NO_THROW(RAMNTupleUtils::DecodeQuality(encoded, flags)); +} + +/// ParseCIGAR + FormatCIGAR must be inverse operations. +TEST(RAMNTupleUtilsTest, CIGARRoundTrip) +{ + const std::string original = "10M2I5M3D"; + auto ops = RAMNTupleUtils::ParseCIGAR(original); + EXPECT_FALSE(ops.empty()); + std::string formatted = RAMNTupleUtils::FormatCIGAR(ops); + EXPECT_EQ(formatted, original); +} + +/// Wildcard CIGAR '*' must parse without crashing. +TEST(RAMNTupleUtilsTest, WildcardCIGARParsesCleanly) +{ + auto ops = RAMNTupleUtils::ParseCIGAR("*"); + EXPECT_TRUE(ops.empty()); +} + +} // namespace From 384864f75184b9ee9c6ebf6ddae1b71143c77064 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 14:58:00 +0530 Subject: [PATCH 06/10] Add ramsort tool: coordinate and name sort for RAM (RNTuple) files --- CMakeLists.txt | 2 + inc/ramcore/RAMSort.h | 11 ++++ src/ramcore/RAMSort.cxx | 93 ++++++++++++++++++++++++++++++ test/CMakeLists.txt | 1 + test/ramsorttests.cxx | 121 ++++++++++++++++++++++++++++++++++++++++ tools/CMakeLists.txt | 7 +++ tools/ramsort.cxx | 27 +++++++++ 7 files changed, 262 insertions(+) create mode 100644 inc/ramcore/RAMSort.h create mode 100644 src/ramcore/RAMSort.cxx create mode 100644 test/ramsorttests.cxx create mode 100644 tools/ramsort.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 7f62292..29c0faa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -44,6 +44,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ramcore inc/ramcore/SamToNTuple.h inc/ramcore/RAMNTupleView.h inc/ramcore/RAMStats.h + inc/ramcore/RAMSort.h SOURCES src/ttree/RAMRecord.cxx src/rntuple/RAMNTupleRecord.cxx @@ -52,6 +53,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ramcore src/ramcore/SamToNTuple.cxx src/ramcore/RAMNTupleView.cxx src/ramcore/RAMStats.cxx + src/ramcore/RAMSort.cxx LINKDEF inc/ttree/LinkDef.h DEPENDENCIES diff --git a/inc/ramcore/RAMSort.h b/inc/ramcore/RAMSort.h new file mode 100644 index 0000000..3baea47 --- /dev/null +++ b/inc/ramcore/RAMSort.h @@ -0,0 +1,11 @@ +#ifndef RAMCORE_RAMSORT_H +#define RAMCORE_RAMSORT_H + +/// Sort a RAM (RNTuple) file by coordinate (refid, pos) or by QNAME. +/// \param inputFile Path to input .root RAM file +/// \param outputFile Path to output .root RAM file +/// \param byName If true, sort by QNAME; otherwise sort by (refid, pos) +/// \return 0 on success, 1 on error +int ramsortntuple(const char *inputFile, const char *outputFile, bool byName = false); + +#endif // RAMCORE_RAMSORT_H diff --git a/src/ramcore/RAMSort.cxx b/src/ramcore/RAMSort.cxx new file mode 100644 index 0000000..2a4a586 --- /dev/null +++ b/src/ramcore/RAMSort.cxx @@ -0,0 +1,93 @@ +#include "ramcore/RAMSort.h" +#include "rntuple/RAMNTupleRecord.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +int ramsortntuple(const char *inputFile, const char *outputFile, bool byName) +{ + RAMNTupleRecord::ReadAllRefs(inputFile); + + std::unique_ptr reader; + try { + reader = ROOT::RNTupleReader::Open("RAM", inputFile); + } catch (const std::exception &e) { + std::cerr << "Error opening input: " << e.what() << "\n"; + return 1; + } + + const uint64_t nEntries = reader->GetNEntries(); + if (nEntries == 0) { + std::cerr << "Input file has no entries.\n"; + return 1; + } + + auto viewRefId = reader->GetView("record.refid"); + auto viewPos = reader->GetView("record.pos"); + auto viewQname = reader->GetView("record.qname"); + + std::vector order(nEntries); + std::iota(order.begin(), order.end(), 0); + + std::cout << "Sorting " << nEntries << " records"; + if (byName) + std::cout << " by QNAME...\n"; + else + std::cout << " by coordinate (refid, pos)...\n"; + + if (byName) { + std::vector qnames(nEntries); + for (uint64_t i = 0; i < nEntries; ++i) + qnames[i] = viewQname(i); + std::stable_sort(order.begin(), order.end(), + [&](uint64_t a, uint64_t b) { return qnames[a] < qnames[b]; }); + } else { + std::vector refids(nEntries), positions(nEntries); + for (uint64_t i = 0; i < nEntries; ++i) { + refids[i] = viewRefId(i); + positions[i] = viewPos(i); + } + std::stable_sort(order.begin(), order.end(), [&](uint64_t a, uint64_t b) { + if (refids[a] != refids[b]) + return refids[a] < refids[b]; + return positions[a] < positions[b]; + }); + } + + auto viewRecord = reader->GetView("record"); + + auto rootFile = std::unique_ptr(TFile::Open(outputFile, "RECREATE")); + if (!rootFile || !rootFile->IsOpen()) { + std::cerr << "Error: could not create output file " << outputFile << "\n"; + return 1; + } + + RAMNTupleRecord::InitializeRefs(); + auto model = RAMNTupleRecord::MakeModel(); + ROOT::RNTupleWriteOptions writeOptions; + writeOptions.SetCompression(505); + auto writer = ROOT::RNTupleWriter::Append(std::move(model), "RAM", *rootFile, writeOptions); + auto entry = writer->GetModel().CreateEntry(); + auto recordPtr = entry->GetPtr("record"); + + for (uint64_t idx : order) { + *recordPtr = viewRecord(idx); + writer->Fill(*entry); + } + + RAMNTupleRecord::WriteAllRefs(*rootFile); + RAMNTupleRecord::WriteIndex(*rootFile); + + std::cout << "Sorted output written to " << outputFile << "\n"; + return 0; +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2eb7fd7..08f7296 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,4 +37,5 @@ install(TARGETS ramcoretests chromosome_split_test ) add_ramcore_test(ramstatstests ramstatstests.cxx) +add_ramcore_test(ramsorttests ramsorttests.cxx) add_ramcore_test(samparsertest samparsertest.cxx) diff --git a/test/ramsorttests.cxx b/test/ramsorttests.cxx new file mode 100644 index 0000000..1ae2212 --- /dev/null +++ b/test/ramsorttests.cxx @@ -0,0 +1,121 @@ +#include +#include +#include +#include + +#include "../benchmark/generate_sam_benchmark.h" +#include "ramcore/RAMSort.h" +#include "ramcore/SamToNTuple.h" + +namespace { + +class RAMSortTest : public ::testing::Test { +protected: + static constexpr int kNumReads = 200; + const char *kSamFile = "sort_test.sam"; + const char *kUnsortedFile = "sort_test_unsorted.root"; + const char *kSortedFile = "sort_test_sorted.root"; + const char *kNameSortFile = "sort_test_namesort.root"; + + void SetUp() override + { + GenerateSAMFile(kSamFile, kNumReads); + std::remove(kUnsortedFile); + std::remove(kSortedFile); + std::remove(kNameSortFile); + samtoramntuple(kSamFile, kUnsortedFile, true, true, true, 505, 0); + } + + void TearDown() override + { + std::remove(kSamFile); + std::remove(kUnsortedFile); + std::remove(kSortedFile); + std::remove(kNameSortFile); + } +}; + +TEST_F(RAMSortTest, EntryCountPreserved) +{ + ASSERT_EQ(ramsortntuple(kUnsortedFile, kSortedFile), 0); + + auto readerIn = ROOT::RNTupleReader::Open("RAM", kUnsortedFile); + auto readerOut = ROOT::RNTupleReader::Open("RAM", kSortedFile); + ASSERT_NE(readerIn, nullptr); + ASSERT_NE(readerOut, nullptr); + EXPECT_EQ(readerIn->GetNEntries(), readerOut->GetNEntries()); +} + +TEST_F(RAMSortTest, CoordinateSortOrder) +{ + ASSERT_EQ(ramsortntuple(kUnsortedFile, kSortedFile), 0); + + auto reader = ROOT::RNTupleReader::Open("RAM", kSortedFile); + ASSERT_NE(reader, nullptr); + + auto viewRefId = reader->GetView("record.refid"); + auto viewPos = reader->GetView("record.pos"); + + int32_t prevRefId = -1, prevPos = -1; + for (uint64_t i = 0; i < reader->GetNEntries(); ++i) { + int32_t refid = viewRefId(i); + int32_t pos = viewPos(i); + if (refid == prevRefId) { + EXPECT_GE(pos, prevPos) << "pos out of order at entry " << i; + } else { + EXPECT_GE(refid, prevRefId) << "refid out of order at entry " << i; + } + prevRefId = refid; + prevPos = pos; + } +} + +TEST_F(RAMSortTest, NameSortOrder) +{ + ASSERT_EQ(ramsortntuple(kUnsortedFile, kNameSortFile, true), 0); + + auto reader = ROOT::RNTupleReader::Open("RAM", kNameSortFile); + ASSERT_NE(reader, nullptr); + + auto viewQname = reader->GetView("record.qname"); + + std::string prev = ""; + for (uint64_t i = 0; i < reader->GetNEntries(); ++i) { + std::string qname = viewQname(i); + EXPECT_GE(qname, prev) << "qname out of order at entry " << i; + prev = qname; + } +} + +TEST_F(RAMSortTest, IdempotentSort) +{ + ASSERT_EQ(ramsortntuple(kUnsortedFile, kSortedFile), 0); + + const char *doubleSorted = "sort_test_double.root"; + ASSERT_EQ(ramsortntuple(kSortedFile, doubleSorted), 0); + + auto r1 = ROOT::RNTupleReader::Open("RAM", kSortedFile); + auto r2 = ROOT::RNTupleReader::Open("RAM", doubleSorted); + ASSERT_NE(r1, nullptr); + ASSERT_NE(r2, nullptr); + EXPECT_EQ(r1->GetNEntries(), r2->GetNEntries()); + + auto v1refid = r1->GetView("record.refid"); + auto v2refid = r2->GetView("record.refid"); + auto v1pos = r1->GetView("record.pos"); + auto v2pos = r2->GetView("record.pos"); + + for (uint64_t i = 0; i < r1->GetNEntries(); ++i) { + EXPECT_EQ(v1refid(i), v2refid(i)); + EXPECT_EQ(v1pos(i), v2pos(i)); + } + std::remove(doubleSorted); +} + +TEST(RAMSortEdgeCases, MissingInputFileReturnsError) +{ + int ret = ramsortntuple("nonexistent.root", "/tmp/out.root"); + EXPECT_NE(ret, 0); +} + +} // namespace diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index eb6b95f..3956ecc 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -29,6 +29,13 @@ ROOT_EXECUTABLE(ramstats ROOT::Core ROOT::ROOTNTuple ) +ROOT_EXECUTABLE(ramsort + ramsort.cxx + LIBRARIES + ramcore + ROOT::Core + ROOT::ROOTNTuple +) install(TARGETS samtoram samtoramntuple ramntupleview ramstats RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) diff --git a/tools/ramsort.cxx b/tools/ramsort.cxx new file mode 100644 index 0000000..76731b5 --- /dev/null +++ b/tools/ramsort.cxx @@ -0,0 +1,27 @@ +/// \file ramsort.cxx +/// \brief Command-line tool to sort a RAM (RNTuple) file by coordinate or name. +/// +/// Usage: +/// ./tools/ramsort [--by-name] + +#include "ramcore/RAMSort.h" +#include +#include + +int main(int argc, char **argv) +{ + if (argc < 3) { + std::cerr << "Usage: " << argv[0] << " [--by-name]\n"; + std::cerr << " Sort a RAM (RNTuple) file by genomic coordinate (refid, pos).\n"; + std::cerr << " --by-name Sort by QNAME instead.\n"; + return 1; + } + + bool byName = false; + for (int i = 3; i < argc; ++i) { + if (std::strcmp(argv[i], "--by-name") == 0) + byName = true; + } + + return ramsortntuple(argv[1], argv[2], byName); +} From b30441014fd18419f4fe9f00deed6a9fc943ffce Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 22:50:46 +0530 Subject: [PATCH 07/10] Fix coverage: add Print(), error result, and verbose path tests --- test/ramstatstests.cxx | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/test/ramstatstests.cxx b/test/ramstatstests.cxx index dcda524..ba72d56 100644 --- a/test/ramstatstests.cxx +++ b/test/ramstatstests.cxx @@ -100,13 +100,33 @@ TEST_F(RAMStatsTest, ChromosomeCountsSumToAtMostTotal) EXPECT_LE(chrom_sum, result.stats.total_reads); } -/// ComputeStats on a non-existent file must return zero total reads. -TEST(RAMStatsEdgeCases, NonExistentFileReturnsEmpty) +/// Print() must produce non-empty output containing expected headers. +TEST_F(RAMStatsTest, PrintProducesOutput) { - auto result = ramcore::ComputeStats("nonexistent_file.root"); - EXPECT_EQ(result.stats.total_reads, 0u); + auto result = ramcore::ComputeStats(kRootFile); + ASSERT_TRUE(result.ok); + testing::internal::CaptureStdout(); + result.stats.Print(); + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_FALSE(output.empty()); + EXPECT_NE(output.find("Total reads:"), std::string::npos); + EXPECT_NE(output.find("Mapped reads:"), std::string::npos); + EXPECT_NE(output.find("Mean mapping quality:"), std::string::npos); +} +/// ComputeStats on a non-existent file must return ok=false with error message. +TEST(RAMStatsEdgeCases, BadFileReturnsErrorResult) +{ + auto result = ramcore::ComputeStats("nonexistent_file.root"); + EXPECT_FALSE(result.ok); + EXPECT_FALSE(result.error_message.empty()); } -/// ComputeStats on a non-existent file must return zero total reads. +/// ComputeStats result ok must be true for a valid file. +TEST_F(RAMStatsTest, ValidFileReturnsOk) +{ + auto result = ramcore::ComputeStats(kRootFile); + EXPECT_TRUE(result.ok); + EXPECT_TRUE(result.error_message.empty()); +} } // namespace From 321072fbb8a170d2efa4c5f55dee5067b844e132 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Tue, 10 Mar 2026 23:09:11 +0530 Subject: [PATCH 08/10] Fix clang-format: apply formatting fixes --- src/ramcore/RAMSort.cxx | 11 +++--- src/ramcore/RAMStats.cxx | 46 +++++++++++-------------- test/ramsorttests.cxx | 24 ++++++------- test/samparsertest.cxx | 74 +++++++++++++++++----------------------- 4 files changed, 68 insertions(+), 87 deletions(-) diff --git a/src/ramcore/RAMSort.cxx b/src/ramcore/RAMSort.cxx index 2a4a586..c8997c3 100644 --- a/src/ramcore/RAMSort.cxx +++ b/src/ramcore/RAMSort.cxx @@ -33,7 +33,7 @@ int ramsortntuple(const char *inputFile, const char *outputFile, bool byName) } auto viewRefId = reader->GetView("record.refid"); - auto viewPos = reader->GetView("record.pos"); + auto viewPos = reader->GetView("record.pos"); auto viewQname = reader->GetView("record.qname"); std::vector order(nEntries); @@ -49,12 +49,11 @@ int ramsortntuple(const char *inputFile, const char *outputFile, bool byName) std::vector qnames(nEntries); for (uint64_t i = 0; i < nEntries; ++i) qnames[i] = viewQname(i); - std::stable_sort(order.begin(), order.end(), - [&](uint64_t a, uint64_t b) { return qnames[a] < qnames[b]; }); + std::stable_sort(order.begin(), order.end(), [&](uint64_t a, uint64_t b) { return qnames[a] < qnames[b]; }); } else { std::vector refids(nEntries), positions(nEntries); for (uint64_t i = 0; i < nEntries; ++i) { - refids[i] = viewRefId(i); + refids[i] = viewRefId(i); positions[i] = viewPos(i); } std::stable_sort(order.begin(), order.end(), [&](uint64_t a, uint64_t b) { @@ -76,8 +75,8 @@ int ramsortntuple(const char *inputFile, const char *outputFile, bool byName) auto model = RAMNTupleRecord::MakeModel(); ROOT::RNTupleWriteOptions writeOptions; writeOptions.SetCompression(505); - auto writer = ROOT::RNTupleWriter::Append(std::move(model), "RAM", *rootFile, writeOptions); - auto entry = writer->GetModel().CreateEntry(); + auto writer = ROOT::RNTupleWriter::Append(std::move(model), "RAM", *rootFile, writeOptions); + auto entry = writer->GetModel().CreateEntry(); auto recordPtr = entry->GetPtr("record"); for (uint64_t idx : order) { diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx index 6fe55e1..9c80279 100644 --- a/src/ramcore/RAMStats.cxx +++ b/src/ramcore/RAMStats.cxx @@ -28,28 +28,24 @@ void RAMStats::Print() const }; std::cout << "\n=== RAMTools Statistics ===\n"; - std::cout << std::left - << std::setw(30) << "Total reads:" << total_reads << "\n" - << std::setw(30) << "Mapped reads:" << mapped_reads - << std::fixed << std::setprecision(2) - << " (" << pct(mapped_reads, total_reads) << "%)\n" - << std::setw(30) << "Unmapped reads:" << unmapped_reads - << " (" << pct(unmapped_reads, total_reads) << "%)\n" - << std::setw(30) << "Duplicate reads:" << duplicate_reads - << " (" << pct(duplicate_reads, total_reads) << "%)\n" - << std::setw(30) << "Paired reads:" << paired_reads - << " (" << pct(paired_reads, total_reads) << "%)\n" - << std::setw(30) << "Properly paired reads:" << properly_paired_reads - << " (" << pct(properly_paired_reads, total_reads) << "%)\n" - << std::setw(30) << "Forward strand:" << forward_strand - << " (" << pct(forward_strand, mapped_reads) << "% of mapped)\n" - << std::setw(30) << "Reverse strand:" << reverse_strand - << " (" << pct(reverse_strand, mapped_reads) << "% of mapped)\n" - << std::setw(30) << "Total bases:" << total_bases << "\n" - << std::setw(30) << "Mean read length:" - << std::fixed << std::setprecision(2) << mean_read_length << "\n" - << std::setw(30) << "Mean mapping quality:" - << std::fixed << std::setprecision(2) << mean_mapping_quality << "\n"; + std::cout << std::left << std::setw(30) << "Total reads:" << total_reads << "\n" + << std::setw(30) << "Mapped reads:" << mapped_reads << std::fixed << std::setprecision(2) << " (" + << pct(mapped_reads, total_reads) << "%)\n" + << std::setw(30) << "Unmapped reads:" << unmapped_reads << " (" << pct(unmapped_reads, total_reads) + << "%)\n" + << std::setw(30) << "Duplicate reads:" << duplicate_reads << " (" << pct(duplicate_reads, total_reads) + << "%)\n" + << std::setw(30) << "Paired reads:" << paired_reads << " (" << pct(paired_reads, total_reads) << "%)\n" + << std::setw(30) << "Properly paired reads:" << properly_paired_reads << " (" + << pct(properly_paired_reads, total_reads) << "%)\n" + << std::setw(30) << "Forward strand:" << forward_strand << " (" << pct(forward_strand, mapped_reads) + << "% of mapped)\n" + << std::setw(30) << "Reverse strand:" << reverse_strand << " (" << pct(reverse_strand, mapped_reads) + << "% of mapped)\n" + << std::setw(30) << "Total bases:" << total_bases << "\n" + << std::setw(30) << "Mean read length:" << std::fixed << std::setprecision(2) << mean_read_length << "\n" + << std::setw(30) << "Mean mapping quality:" << std::fixed << std::setprecision(2) << mean_mapping_quality + << "\n"; std::cout << "===========================\n\n"; } @@ -133,11 +129,9 @@ RAMStatsResult ComputeStats(const char *filename) } if (stats.total_reads > 0) - stats.mean_read_length = - static_cast(len_sum) / static_cast(stats.total_reads); + stats.mean_read_length = static_cast(len_sum) / static_cast(stats.total_reads); if (stats.mapped_reads > 0) - stats.mean_mapping_quality = - static_cast(mapq_sum) / static_cast(stats.mapped_reads); + stats.mean_mapping_quality = static_cast(mapq_sum) / static_cast(stats.mapped_reads); return RAMStatsResult{stats, true, ""}; } diff --git a/test/ramsorttests.cxx b/test/ramsorttests.cxx index 1ae2212..30a7425 100644 --- a/test/ramsorttests.cxx +++ b/test/ramsorttests.cxx @@ -12,9 +12,9 @@ namespace { class RAMSortTest : public ::testing::Test { protected: static constexpr int kNumReads = 200; - const char *kSamFile = "sort_test.sam"; + const char *kSamFile = "sort_test.sam"; const char *kUnsortedFile = "sort_test_unsorted.root"; - const char *kSortedFile = "sort_test_sorted.root"; + const char *kSortedFile = "sort_test_sorted.root"; const char *kNameSortFile = "sort_test_namesort.root"; void SetUp() override @@ -39,9 +39,9 @@ TEST_F(RAMSortTest, EntryCountPreserved) { ASSERT_EQ(ramsortntuple(kUnsortedFile, kSortedFile), 0); - auto readerIn = ROOT::RNTupleReader::Open("RAM", kUnsortedFile); + auto readerIn = ROOT::RNTupleReader::Open("RAM", kUnsortedFile); auto readerOut = ROOT::RNTupleReader::Open("RAM", kSortedFile); - ASSERT_NE(readerIn, nullptr); + ASSERT_NE(readerIn, nullptr); ASSERT_NE(readerOut, nullptr); EXPECT_EQ(readerIn->GetNEntries(), readerOut->GetNEntries()); } @@ -50,23 +50,23 @@ TEST_F(RAMSortTest, CoordinateSortOrder) { ASSERT_EQ(ramsortntuple(kUnsortedFile, kSortedFile), 0); - auto reader = ROOT::RNTupleReader::Open("RAM", kSortedFile); + auto reader = ROOT::RNTupleReader::Open("RAM", kSortedFile); ASSERT_NE(reader, nullptr); auto viewRefId = reader->GetView("record.refid"); - auto viewPos = reader->GetView("record.pos"); + auto viewPos = reader->GetView("record.pos"); int32_t prevRefId = -1, prevPos = -1; for (uint64_t i = 0; i < reader->GetNEntries(); ++i) { int32_t refid = viewRefId(i); - int32_t pos = viewPos(i); + int32_t pos = viewPos(i); if (refid == prevRefId) { EXPECT_GE(pos, prevPos) << "pos out of order at entry " << i; } else { EXPECT_GE(refid, prevRefId) << "refid out of order at entry " << i; } prevRefId = refid; - prevPos = pos; + prevPos = pos; } } @@ -74,7 +74,7 @@ TEST_F(RAMSortTest, NameSortOrder) { ASSERT_EQ(ramsortntuple(kUnsortedFile, kNameSortFile, true), 0); - auto reader = ROOT::RNTupleReader::Open("RAM", kNameSortFile); + auto reader = ROOT::RNTupleReader::Open("RAM", kNameSortFile); ASSERT_NE(reader, nullptr); auto viewQname = reader->GetView("record.qname"); @@ -102,12 +102,12 @@ TEST_F(RAMSortTest, IdempotentSort) auto v1refid = r1->GetView("record.refid"); auto v2refid = r2->GetView("record.refid"); - auto v1pos = r1->GetView("record.pos"); - auto v2pos = r2->GetView("record.pos"); + auto v1pos = r1->GetView("record.pos"); + auto v2pos = r2->GetView("record.pos"); for (uint64_t i = 0; i < r1->GetNEntries(); ++i) { EXPECT_EQ(v1refid(i), v2refid(i)); - EXPECT_EQ(v1pos(i), v2pos(i)); + EXPECT_EQ(v1pos(i), v2pos(i)); } std::remove(doubleSorted); } diff --git a/test/samparsertest.cxx b/test/samparsertest.cxx index 20c7f52..4d4d819 100644 --- a/test/samparsertest.cxx +++ b/test/samparsertest.cxx @@ -40,15 +40,12 @@ TEST_F(SamParserTest, NonExistentFileReturnsFalse) /// Header lines starting with '@' must trigger the header callback. TEST_F(SamParserTest, HeaderCallbackFired) { - WriteSAM(kSamFile, - "@HD\tVN:1.6\tSO:coordinate\n" - "@SQ\tSN:chr1\tLN:248956422\n"); + WriteSAM(kSamFile, "@HD\tVN:1.6\tSO:coordinate\n" + "@SQ\tSN:chr1\tLN:248956422\n"); ramcore::SamParser parser; std::vector tags; - parser.ParseFile(kSamFile, - [&](const std::string &tag, const std::string &) { tags.push_back(tag); }, - nullptr); + parser.ParseFile(kSamFile, [&](const std::string &tag, const std::string &) { tags.push_back(tag); }, nullptr); ASSERT_EQ(tags.size(), 2u); EXPECT_EQ(tags[0], "@HD"); @@ -58,15 +55,13 @@ TEST_F(SamParserTest, HeaderCallbackFired) /// Record callback must fire once per alignment line. TEST_F(SamParserTest, RecordCallbackFiredForEachAlignment) { - WriteSAM(kSamFile, - "@HD\tVN:1.6\n" - "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n" - "read2\t16\tchr1\t200\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n"); + WriteSAM(kSamFile, "@HD\tVN:1.6\n" + "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n" + "read2\t16\tchr1\t200\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n"); ramcore::SamParser parser; size_t count = 0; - parser.ParseFile(kSamFile, nullptr, - [&](const ramcore::SamRecord &, size_t) { ++count; }); + parser.ParseFile(kSamFile, nullptr, [&](const ramcore::SamRecord &, size_t) { ++count; }); EXPECT_EQ(count, 2u); EXPECT_EQ(parser.GetRecordsProcessed(), 2u); @@ -75,26 +70,24 @@ TEST_F(SamParserTest, RecordCallbackFiredForEachAlignment) /// Parsed record fields must match the SAM columns exactly. TEST_F(SamParserTest, RecordFieldsParsedCorrectly) { - WriteSAM(kSamFile, - "@HD\tVN:1.6\n" - "myread\t99\tchr2\t500\t30\t5M2I3M\t=\t600\t110\tACGTACGTAC\tIIIIIIIIII\n"); + WriteSAM(kSamFile, "@HD\tVN:1.6\n" + "myread\t99\tchr2\t500\t30\t5M2I3M\t=\t600\t110\tACGTACGTAC\tIIIIIIIIII\n"); ramcore::SamParser parser; ramcore::SamRecord captured; - parser.ParseFile(kSamFile, nullptr, - [&](const ramcore::SamRecord &rec, size_t) { captured = rec; }); - - EXPECT_EQ(captured.qname, "myread"); - EXPECT_EQ(captured.flag, 99); - EXPECT_EQ(captured.rname, "chr2"); - EXPECT_EQ(captured.pos, 500); - EXPECT_EQ(captured.mapq, 30); - EXPECT_EQ(captured.cigar, "5M2I3M"); - EXPECT_EQ(captured.rnext, "="); - EXPECT_EQ(captured.pnext, 600); - EXPECT_EQ(captured.tlen, 110); - EXPECT_EQ(captured.seq, "ACGTACGTAC"); - EXPECT_EQ(captured.qual, "IIIIIIIIII"); + parser.ParseFile(kSamFile, nullptr, [&](const ramcore::SamRecord &rec, size_t) { captured = rec; }); + + EXPECT_EQ(captured.qname, "myread"); + EXPECT_EQ(captured.flag, 99); + EXPECT_EQ(captured.rname, "chr2"); + EXPECT_EQ(captured.pos, 500); + EXPECT_EQ(captured.mapq, 30); + EXPECT_EQ(captured.cigar, "5M2I3M"); + EXPECT_EQ(captured.rnext, "="); + EXPECT_EQ(captured.pnext, 600); + EXPECT_EQ(captured.tlen, 110); + EXPECT_EQ(captured.seq, "ACGTACGTAC"); + EXPECT_EQ(captured.qual, "IIIIIIIIII"); } /// An empty SAM file (no headers, no records) must parse successfully. @@ -110,14 +103,12 @@ TEST_F(SamParserTest, EmptyFileParseSucceeds) /// A SAM file with only headers and no records must produce zero record calls. TEST_F(SamParserTest, HeaderOnlyFileProducesNoRecords) { - WriteSAM(kSamFile, - "@HD\tVN:1.6\n" - "@SQ\tSN:chr1\tLN:1000\n"); + WriteSAM(kSamFile, "@HD\tVN:1.6\n" + "@SQ\tSN:chr1\tLN:1000\n"); ramcore::SamParser parser; size_t count = 0; - parser.ParseFile(kSamFile, nullptr, - [&](const ramcore::SamRecord &, size_t) { ++count; }); + parser.ParseFile(kSamFile, nullptr, [&](const ramcore::SamRecord &, size_t) { ++count; }); EXPECT_EQ(count, 0u); } @@ -125,14 +116,12 @@ TEST_F(SamParserTest, HeaderOnlyFileProducesNoRecords) /// Optional fields must be captured in the record. TEST_F(SamParserTest, OptionalFieldsCaptured) { - WriteSAM(kSamFile, - "@HD\tVN:1.6\n" - "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\tNM:i:0\tRG:Z:sample1\n"); + WriteSAM(kSamFile, "@HD\tVN:1.6\n" + "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\tNM:i:0\tRG:Z:sample1\n"); ramcore::SamParser parser; ramcore::SamRecord captured; - parser.ParseFile(kSamFile, nullptr, - [&](const ramcore::SamRecord &rec, size_t) { captured = rec; }); + parser.ParseFile(kSamFile, nullptr, [&](const ramcore::SamRecord &rec, size_t) { captured = rec; }); EXPECT_GE(captured.optional_fields.size(), 1u); } @@ -140,10 +129,9 @@ TEST_F(SamParserTest, OptionalFieldsCaptured) /// Lines processed counter must include both header and record lines. TEST_F(SamParserTest, LinesProcessedCountIsCorrect) { - WriteSAM(kSamFile, - "@HD\tVN:1.6\n" - "@SQ\tSN:chr1\tLN:1000\n" - "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n"); + WriteSAM(kSamFile, "@HD\tVN:1.6\n" + "@SQ\tSN:chr1\tLN:1000\n" + "read1\t0\tchr1\t100\t60\t10M\t*\t0\t0\tACGTACGTAC\t*\n"); ramcore::SamParser parser; parser.ParseFile(kSamFile, nullptr, nullptr); From 4874ad881d29cb99a68393ef5945d26569ac5701 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Fri, 20 Mar 2026 11:19:17 +0530 Subject: [PATCH 09/10] Fix coverage: extract RunRamStats, add verbose and error path tests --- inc/ramcore/RAMStats.h | 6 ++++++ src/ramcore/RAMStats.cxx | 21 +++++++++++++++++++++ test/ramstatstests.cxx | 30 ++++++++++++++++++++++++++++++ tools/ramstats.cxx | 28 ++-------------------------- 4 files changed, 59 insertions(+), 26 deletions(-) diff --git a/inc/ramcore/RAMStats.h b/inc/ramcore/RAMStats.h index 6e0f1b5..8fe16e2 100644 --- a/inc/ramcore/RAMStats.h +++ b/inc/ramcore/RAMStats.h @@ -37,6 +37,12 @@ struct RAMStatsResult { /// Returns RAMStatsResult — check .ok before using .stats. RAMStatsResult ComputeStats(const char *filename); +/// Run the full ramstats pipeline: compute stats and print results. +/// \param filename Path to input .root RAM file +/// \param verbose If true, print per-chromosome breakdown +/// \return 0 on success, 1 on error +int RunRamStats(const char *filename, bool verbose = false); + } // namespace ramcore #endif // RAMCORE_RAMSTATS_H diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx index 9c80279..95b1ece 100644 --- a/src/ramcore/RAMStats.cxx +++ b/src/ramcore/RAMStats.cxx @@ -136,4 +136,25 @@ RAMStatsResult ComputeStats(const char *filename) return RAMStatsResult{stats, true, ""}; } + +int RunRamStats(const char *filename, bool verbose) +{ + auto result = ComputeStats(filename); + if (!result.ok) { + std::cerr << "Error: " << result.error_message << "\n"; + return 1; + } + result.stats.Print(); + if (verbose && !result.stats.reads_per_chromosome.empty()) { + std::cout << "\n--- Reads per Chromosome ---\n"; + for (const auto &[chrom, count] : result.stats.reads_per_chromosome) { + double pct = 100.0 * count / result.stats.total_reads; + std::cout << " " << chrom << ": " << count + << " (" << pct << "%)\n"; + } + std::cout << "\n"; + } + return 0; +} + } // namespace ramcore diff --git a/test/ramstatstests.cxx b/test/ramstatstests.cxx index ba72d56..33a9326 100644 --- a/test/ramstatstests.cxx +++ b/test/ramstatstests.cxx @@ -129,4 +129,34 @@ TEST_F(RAMStatsTest, ValidFileReturnsOk) EXPECT_TRUE(result.ok); EXPECT_TRUE(result.error_message.empty()); } + +/// RunRamStats on a valid file must return 0. +TEST_F(RAMStatsTest, RunRamStatsValidFileReturnsZero) +{ + testing::internal::CaptureStdout(); + int ret = ramcore::RunRamStats(kRootFile); + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_EQ(ret, 0); + EXPECT_FALSE(output.empty()); +} + +/// RunRamStats with verbose must print chromosome breakdown. +TEST_F(RAMStatsTest, RunRamStatsVerbosePrintsChromosomes) +{ + testing::internal::CaptureStdout(); + int ret = ramcore::RunRamStats(kRootFile, true); + std::string output = testing::internal::GetCapturedStdout(); + EXPECT_EQ(ret, 0); + EXPECT_NE(output.find("Reads per Chromosome"), std::string::npos); +} + +/// RunRamStats on a bad file must return 1. +TEST(RAMStatsEdgeCases, RunRamStatsBadFileReturnsOne) +{ + testing::internal::CaptureStderr(); + int ret = ramcore::RunRamStats("nonexistent.root"); + testing::internal::GetCapturedStderr(); + EXPECT_EQ(ret, 1); +} + } // namespace diff --git a/tools/ramstats.cxx b/tools/ramstats.cxx index b9e2fcb..d5dd08a 100644 --- a/tools/ramstats.cxx +++ b/tools/ramstats.cxx @@ -1,6 +1,6 @@ #include "ramcore/RAMStats.h" -#include #include +#include int main(int argc, char **argv) { @@ -10,34 +10,10 @@ int main(int argc, char **argv) std::cerr << " --verbose Also print per-chromosome read counts.\n"; return 1; } - - const char *inputFile = argv[1]; bool verbose = false; for (int i = 2; i < argc; ++i) { if (std::strcmp(argv[i], "--verbose") == 0) verbose = true; } - - auto result = ramcore::ComputeStats(inputFile); - - if (!result.ok) { - std::cerr << "Error: " << result.error_message << "\n"; - return 1; - } - - // Computation and I/O are separate — tool controls printing - result.stats.Print(); - - // Per-chromosome breakdown only with --verbose - if (verbose && !result.stats.reads_per_chromosome.empty()) { - std::cout << "\n--- Reads per Chromosome ---\n"; - for (const auto &[chrom, count] : result.stats.reads_per_chromosome) { - double pct = 100.0 * count / result.stats.total_reads; - std::cout << " " << chrom << ": " << count - << " (" << pct << "%)\n"; - } - std::cout << "\n"; - } - - return 0; + return ramcore::RunRamStats(argv[1], verbose); } From 5bdbcbf336707a6f15c75e50d9ccb4bb3c0bbc15 Mon Sep 17 00:00:00 2001 From: swetank18 Date: Sat, 21 Mar 2026 01:34:48 +0530 Subject: [PATCH 10/10] Fix clang-format warnings --- src/ramcore/RAMStats.cxx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ramcore/RAMStats.cxx b/src/ramcore/RAMStats.cxx index 95b1ece..1c84c93 100644 --- a/src/ramcore/RAMStats.cxx +++ b/src/ramcore/RAMStats.cxx @@ -136,7 +136,6 @@ RAMStatsResult ComputeStats(const char *filename) return RAMStatsResult{stats, true, ""}; } - int RunRamStats(const char *filename, bool verbose) { auto result = ComputeStats(filename); @@ -149,8 +148,7 @@ int RunRamStats(const char *filename, bool verbose) std::cout << "\n--- Reads per Chromosome ---\n"; for (const auto &[chrom, count] : result.stats.reads_per_chromosome) { double pct = 100.0 * count / result.stats.total_reads; - std::cout << " " << chrom << ": " << count - << " (" << pct << "%)\n"; + std::cout << " " << chrom << ": " << count << " (" << pct << "%)\n"; } std::cout << "\n"; }