Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,17 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ramcore
inc/ramcore/SamToTTree.h
inc/ramcore/SamToNTuple.h
inc/ramcore/RAMNTupleView.h
inc/ramcore/RAMStats.h
inc/ramcore/RAMSort.h
SOURCES
src/ttree/RAMRecord.cxx
src/rntuple/RAMNTupleRecord.cxx
src/ramcore/SamParser.cxx
src/ramcore/SamToTTree.cxx
src/ramcore/SamToNTuple.cxx
src/ramcore/RAMNTupleView.cxx
src/ramcore/RAMStats.cxx
src/ramcore/RAMSort.cxx
LINKDEF
inc/ttree/LinkDef.h
DEPENDENCIES
Expand Down
11 changes: 11 additions & 0 deletions inc/ramcore/RAMSort.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#ifndef RAMCORE_RAMSORT_H
#define RAMCORE_RAMSORT_H
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: header guard does not follow preferred style [llvm-header-guard]

Suggested change
#define RAMCORE_RAMSORT_H
#ifndef GITHUB_WORKSPACE_INC_RAMCORE_RAMSORT_H
#define GITHUB_WORKSPACE_INC_RAMCORE_RAMSORT_H

inc/ramcore/RAMSort.h:10:

- #endif // RAMCORE_RAMSORT_H
+ #endif // GITHUB_WORKSPACE_INC_RAMCORE_RAMSORT_H


/// Sort a RAM (RNTuple) file by coordinate (refid, pos) or by QNAME.
/// \param inputFile Path to input .root RAM file
/// \param outputFile Path to output .root RAM file
/// \param byName If true, sort by QNAME; otherwise sort by (refid, pos)
/// \return 0 on success, 1 on error
int ramsortntuple(const char *inputFile, const char *outputFile, bool byName = false);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: unknown type name 'bool' [clang-diagnostic-error]

int ramsortntuple(const char *inputFile, const char *outputFile, bool byName = false);
                                                                 ^

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: use of undeclared identifier 'false' [clang-diagnostic-error]

int ramsortntuple(const char *inputFile, const char *outputFile, bool byName = false);
                                                                               ^


#endif // RAMCORE_RAMSORT_H
48 changes: 48 additions & 0 deletions inc/ramcore/RAMStats.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#ifndef RAMCORE_RAMSTATS_H
#define RAMCORE_RAMSTATS_H
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: header guard does not follow preferred style [llvm-header-guard]

Suggested change
#define RAMCORE_RAMSTATS_H
#ifndef GITHUB_WORKSPACE_INC_RAMCORE_RAMSTATS_H
#define GITHUB_WORKSPACE_INC_RAMCORE_RAMSTATS_H

inc/ramcore/RAMStats.h:41:

+ endif // GITHUB_WORKSPACE_INC_RAMCORE_RAMSTATS_H

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: header guard does not follow preferred style [llvm-header-guard]

Suggested change
#define RAMCORE_RAMSTATS_H
#ifndef GITHUB_WORKSPACE_INC_RAMCORE_RAMSTATS_H
#define GITHUB_WORKSPACE_INC_RAMCORE_RAMSTATS_H

inc/ramcore/RAMStats.h:47:

+ endif // GITHUB_WORKSPACE_INC_RAMCORE_RAMSTATS_H


#include <cstdint>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'cstdint' file not found [clang-diagnostic-error]

#include <cstdint>
         ^

#include <string>
#include <map>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: #includes are not sorted properly [llvm-include-order]

Suggested change
#include <map>
#include <map>
#include <string>


namespace ramcore {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: variable 'ramcore' is non-const and globally accessible, consider making it const [cppcoreguidelines-avoid-non-const-global-variables]

namespace ramcore {
          ^


struct RAMStats {
uint64_t total_reads = 0;
uint64_t mapped_reads = 0;
uint64_t unmapped_reads = 0;
uint64_t duplicate_reads = 0;
uint64_t paired_reads = 0;
uint64_t properly_paired_reads = 0;
uint64_t forward_strand = 0;
uint64_t reverse_strand = 0;

double mean_mapping_quality = 0.0;
double mean_read_length = 0.0;
uint64_t total_bases = 0;

std::map<std::string, uint64_t> reads_per_chromosome;

void Print() const;
};

/// Result wrapper so callers can distinguish errors from empty files
struct RAMStatsResult {
RAMStats stats;
bool ok = false;
std::string error_message;
};

/// Compute statistics from an RNTuple-based RAM file.
/// Returns RAMStatsResult — check .ok before using .stats.
RAMStatsResult ComputeStats(const char *filename);

/// Run the full ramstats pipeline: compute stats and print results.
/// \param filename Path to input .root RAM file
/// \param verbose If true, print per-chromosome breakdown
/// \return 0 on success, 1 on error
int RunRamStats(const char *filename, bool verbose = false);

} // namespace ramcore

#endif // RAMCORE_RAMSTATS_H
92 changes: 92 additions & 0 deletions src/ramcore/RAMSort.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#include "ramcore/RAMSort.h"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'ramcore/RAMSort.h' file not found [clang-diagnostic-error]

#include "ramcore/RAMSort.h"
         ^

#include "rntuple/RAMNTupleRecord.h"

#include <ROOT/RNTupleModel.hxx>
#include <ROOT/RNTupleReader.hxx>
#include <ROOT/RNTupleWriter.hxx>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: #includes are not sorted properly [llvm-include-order]

Suggested change
#include <ROOT/RNTupleWriter.hxx>
#include <ROOT/RNTupleView.hxx>

src/ramcore/RAMSort.cxx:7:

- #include <ROOT/RNTupleView.hxx>
+ #include <ROOT/RNTupleWriter.hxx>

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: #includes are not sorted properly [llvm-include-order]

#include <ROOT/RNTupleWriter.hxx>
^

this fix will not be applied because it overlaps with another fix

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header RNTupleReader.hxx is not used directly [misc-include-cleaner]

Suggested change
#include <ROOT/RNTupleWriter.hxx>
#include <ROOT/RNTupleWriter.hxx>

#include <ROOT/RNTupleWriteOptions.hxx>
#include <ROOT/RNTupleView.hxx>
#include <TFile.h>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header RNTupleView.hxx is not used directly [misc-include-cleaner]

Suggested change
#include <TFile.h>
#include <TFile.h>


#include <algorithm>
#include <iostream>
#include <numeric>
#include <string>
#include <vector>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header string is not used directly [misc-include-cleaner]

Suggested change
#include <vector>
#include <vector>


Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header vector is not used directly [misc-include-cleaner]

Suggested change

int ramsortntuple(const char *inputFile, const char *outputFile, bool byName)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: function 'ramsortntuple' can be made static or moved into an anonymous namespace to enforce internal linkage [misc-use-internal-linkage]

Suggested change
int ramsortntuple(const char *inputFile, const char *outputFile, bool byName)
static int ramsortntuple(const char *inputFile, const char *outputFile, bool byName)

{
RAMNTupleRecord::ReadAllRefs(inputFile);

std::unique_ptr<ROOT::RNTupleReader> reader;
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: no header providing "std::unique_ptr" is directly included [misc-include-cleaner]

src/ramcore/RAMSort.cxx:12:

- #include <numeric>
+ #include <memory>
+ #include <numeric>

try {
reader = ROOT::RNTupleReader::Open("RAM", inputFile);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: incomplete type 'ROOT::RNTupleReader' named in nested name specifier [clang-diagnostic-error]

      reader = ROOT::RNTupleReader::Open("RAM", inputFile);
                     ^
Additional context

inc/rntuple/RAMNTupleRecord.h:23: forward declaration of 'ROOT::RNTupleReader'

class RNTupleReader;
      ^

} catch (const std::exception &e) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: no header providing "std::exception" is directly included [misc-include-cleaner]

src/ramcore/RAMSort.cxx:11:

- #include <iostream>
+ #include <exception>
+ #include <iostream>

std::cerr << "Error opening input: " << e.what() << "\n";
return 1;
}

const uint64_t nEntries = reader->GetNEntries();
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: member access into incomplete type 'ROOT::RNTupleReader' [clang-diagnostic-error]

   const uint64_t nEntries = reader->GetNEntries();
                                   ^
Additional context

inc/rntuple/RAMNTupleRecord.h:23: forward declaration of 'ROOT::RNTupleReader'

class RNTupleReader;
      ^

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: no header providing "uint64_t" is directly included [misc-include-cleaner]

src/ramcore/RAMSort.cxx:11:

- #include <iostream>
+ #include <cstdint>
+ #include <iostream>

if (nEntries == 0) {
std::cerr << "Input file has no entries.\n";
return 1;
}

auto viewRefId = reader->GetView<int32_t>("record.refid");
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: member access into incomplete type 'ROOT::RNTupleReader' [clang-diagnostic-error]

   auto viewRefId = reader->GetView<int32_t>("record.refid");
                          ^
Additional context

inc/rntuple/RAMNTupleRecord.h:23: forward declaration of 'ROOT::RNTupleReader'

class RNTupleReader;
      ^

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: no header providing "int32_t" is directly included [misc-include-cleaner]

   auto viewRefId = reader->GetView<int32_t>("record.refid");
                                    ^

auto viewPos = reader->GetView<int32_t>("record.pos");
auto viewQname = reader->GetView<std::string>("record.qname");

std::vector<uint64_t> order(nEntries);
std::iota(order.begin(), order.end(), 0);

std::cout << "Sorting " << nEntries << " records";
if (byName)
std::cout << " by QNAME...\n";
else
std::cout << " by coordinate (refid, pos)...\n";

if (byName) {
std::vector<std::string> qnames(nEntries);
for (uint64_t i = 0; i < nEntries; ++i)
qnames[i] = viewQname(i);
std::stable_sort(order.begin(), order.end(), [&](uint64_t a, uint64_t b) { return qnames[a] < qnames[b]; });
} else {
std::vector<int32_t> refids(nEntries), positions(nEntries);
for (uint64_t i = 0; i < nEntries; ++i) {
refids[i] = viewRefId(i);
positions[i] = viewPos(i);
}
std::stable_sort(order.begin(), order.end(), [&](uint64_t a, uint64_t b) {
if (refids[a] != refids[b])
return refids[a] < refids[b];
return positions[a] < positions[b];
});
}

auto viewRecord = reader->GetView<RAMNTupleRecord>("record");

auto rootFile = std::unique_ptr<TFile>(TFile::Open(outputFile, "RECREATE"));
if (!rootFile || !rootFile->IsOpen()) {
std::cerr << "Error: could not create output file " << outputFile << "\n";
return 1;
}

RAMNTupleRecord::InitializeRefs();
auto model = RAMNTupleRecord::MakeModel();
ROOT::RNTupleWriteOptions writeOptions;
writeOptions.SetCompression(505);
auto writer = ROOT::RNTupleWriter::Append(std::move(model), "RAM", *rootFile, writeOptions);
auto entry = writer->GetModel().CreateEntry();
auto recordPtr = entry->GetPtr<RAMNTupleRecord>("record");

for (uint64_t idx : order) {
*recordPtr = viewRecord(idx);
writer->Fill(*entry);
}

RAMNTupleRecord::WriteAllRefs(*rootFile);
RAMNTupleRecord::WriteIndex(*rootFile);

std::cout << "Sorted output written to " << outputFile << "\n";
return 0;
}
158 changes: 158 additions & 0 deletions src/ramcore/RAMStats.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
#include "ramcore/RAMStats.h"
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: 'ramcore/RAMStats.h' file not found [clang-diagnostic-error]

#include "ramcore/RAMStats.h"
         ^

#include "rntuple/RAMNTupleRecord.h"

#include <ROOT/RNTupleReader.hxx>
#include <ROOT/RNTupleView.hxx>

#include <cstdint>
#include <cstring>
#include <exception>
#include <iomanip>
#include <iostream>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header iomanip is not used directly [misc-include-cleaner]

Suggested change
#include <iostream>
#include <iostream>

#include <map>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header iostream is not used directly [misc-include-cleaner]

Suggested change
#include <map>
#include <map>

#include <memory>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header map is not used directly [misc-include-cleaner]

Suggested change
#include <memory>
#include <memory>

#include <string>
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: included header memory is not used directly [misc-include-cleaner]

Suggested change
#include <string>
#include <string>


namespace ramcore {

static constexpr uint16_t FLAG_PAIRED = 0x1;
static constexpr uint16_t FLAG_PROPER_PAIR = 0x2;
static constexpr uint16_t FLAG_UNMAPPED = 0x4;
static constexpr uint16_t FLAG_REVERSE_STRAND = 0x10;
static constexpr uint16_t FLAG_DUPLICATE = 0x400;

void RAMStats::Print() const
{
auto pct = [](uint64_t a, uint64_t b) -> double {
return b == 0 ? 0.0 : 100.0 * static_cast<double>(a) / static_cast<double>(b);
};

std::cout << "\n=== RAMTools Statistics ===\n";
std::cout << std::left << std::setw(30) << "Total reads:" << total_reads << "\n"
<< std::setw(30) << "Mapped reads:" << mapped_reads << std::fixed << std::setprecision(2) << " ("
<< pct(mapped_reads, total_reads) << "%)\n"
<< std::setw(30) << "Unmapped reads:" << unmapped_reads << " (" << pct(unmapped_reads, total_reads)
<< "%)\n"
<< std::setw(30) << "Duplicate reads:" << duplicate_reads << " (" << pct(duplicate_reads, total_reads)
<< "%)\n"
<< std::setw(30) << "Paired reads:" << paired_reads << " (" << pct(paired_reads, total_reads) << "%)\n"
<< std::setw(30) << "Properly paired reads:" << properly_paired_reads << " ("
<< pct(properly_paired_reads, total_reads) << "%)\n"
<< std::setw(30) << "Forward strand:" << forward_strand << " (" << pct(forward_strand, mapped_reads)
<< "% of mapped)\n"
<< std::setw(30) << "Reverse strand:" << reverse_strand << " (" << pct(reverse_strand, mapped_reads)
<< "% of mapped)\n"
<< std::setw(30) << "Total bases:" << total_bases << "\n"
<< std::setw(30) << "Mean read length:" << std::fixed << std::setprecision(2) << mean_read_length << "\n"
<< std::setw(30) << "Mean mapping quality:" << std::fixed << std::setprecision(2) << mean_mapping_quality
<< "\n";
std::cout << "===========================\n\n";
}

/// Extract the original sequence length from an encoded seq field.
/// The field format is: [4-byte uint32_t length][packed 2-bit nucleotides]
/// memcpy is used instead of reinterpret_cast to avoid alignment-dependent UB.
static uint32_t DecodedSeqLength(const std::string &encoded_seq)
{
if (encoded_seq.size() < 4)
return 0;
uint32_t length = 0;
std::memcpy(&length, encoded_seq.data(), sizeof(length));
return length;
}

RAMStatsResult ComputeStats(const char *filename)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: function 'ComputeStats' can be made static or moved into an anonymous namespace to enforce internal linkage [misc-use-internal-linkage]

Suggested change
RAMStatsResult ComputeStats(const char *filename)
RAMStatsResult ComputeStatsstatic (const char *filename)

{
RAMStats stats;

// Initialize and load the reference name map stored alongside the RNTuple
RAMNTupleRecord::InitializeRefs();
RAMNTupleRecord::ReadAllRefs(filename);
RAMNTupleRefs *rnameRefs = RAMNTupleRecord::GetRnameRefs();

std::unique_ptr<ROOT::RNTupleReader> reader;
try {
reader = ROOT::RNTupleReader::Open("RAM", filename);
} catch (const std::exception &e) {
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: empty catch statements hide issues; to handle exceptions appropriately, consider re-throwing, handling, or avoiding catch altogether [bugprone-empty-catch]

   } catch (const std::exception &e) {
     ^

return RAMStatsResult{stats, false, e.what()};
}

if (!reader)
return RAMStatsResult{stats, false, "RNTupleReader::Open returned nullptr"};

auto viewFlag = reader->GetView<uint16_t>("record.flag");
auto viewMapQ = reader->GetView<uint8_t>("record.mapq");
auto viewSeq = reader->GetView<std::string>("record.seq");
auto viewRefId = reader->GetView<int32_t>("record.refid");

uint64_t mapq_sum = 0;
uint64_t len_sum = 0;
const uint64_t nEntries = reader->GetNEntries();

for (uint64_t i = 0; i < nEntries; ++i) {
stats.total_reads++;

uint16_t flag = viewFlag(i);
uint8_t mapq = viewMapQ(i);
const std::string &seq = viewSeq(i);
int32_t refid = viewRefId(i);

if (flag & FLAG_UNMAPPED) {
stats.unmapped_reads++;
// Unmapped reads have no strand — excluded from strand stats
} else {
stats.mapped_reads++;
mapq_sum += mapq;
// Strand is only meaningful for mapped reads
if (flag & FLAG_REVERSE_STRAND)
stats.reverse_strand++;
else
stats.forward_strand++;
}

if (flag & FLAG_DUPLICATE) stats.duplicate_reads++;
if (flag & FLAG_PAIRED) stats.paired_reads++;
if (flag & FLAG_PROPER_PAIR) stats.properly_paired_reads++;

// Extract sequence length from encoded field (4-byte length prefix + packed nibbles)
uint32_t rlen = DecodedSeqLength(seq);
len_sum += rlen;
stats.total_bases += rlen;

// Resolve chromosome name from integer refid via the ref name table
if (refid >= 0 && rnameRefs &&
refid < static_cast<int32_t>(rnameRefs->Size())) {
const std::string &chrom = rnameRefs->GetRefName(refid);
if (!chrom.empty() && chrom != "*")
stats.reads_per_chromosome[chrom]++;
}
}

if (stats.total_reads > 0)
stats.mean_read_length = static_cast<double>(len_sum) / static_cast<double>(stats.total_reads);
if (stats.mapped_reads > 0)
stats.mean_mapping_quality = static_cast<double>(mapq_sum) / static_cast<double>(stats.mapped_reads);

return RAMStatsResult{stats, true, ""};
}

int RunRamStats(const char *filename, bool verbose)
{
auto result = ComputeStats(filename);
if (!result.ok) {
std::cerr << "Error: " << result.error_message << "\n";
return 1;
}
result.stats.Print();
if (verbose && !result.stats.reads_per_chromosome.empty()) {
std::cout << "\n--- Reads per Chromosome ---\n";
for (const auto &[chrom, count] : result.stats.reads_per_chromosome) {
double pct = 100.0 * count / result.stats.total_reads;
std::cout << " " << chrom << ": " << count << " (" << pct << "%)\n";
}
std::cout << "\n";
}
return 0;
}

} // namespace ramcore
3 changes: 3 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,6 @@ install(TARGETS ramcoretests chromosome_split_test
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)

add_ramcore_test(ramstatstests ramstatstests.cxx)
add_ramcore_test(ramsorttests ramsorttests.cxx)
add_ramcore_test(samparsertest samparsertest.cxx)
Loading