From ca1483f6100610984e521a979283d8297e36f710 Mon Sep 17 00:00:00 2001 From: AdityaPandeyCN Date: Thu, 19 Mar 2026 19:29:11 +0530 Subject: [PATCH 1/2] Add BAM-to-RNTuple conversion support --- CMakeLists.txt | 14 +- inc/ramcore/BamtoNTuple.h | 9 ++ src/ramcore/BamtoNTuple.cxx | 294 ++++++++++++++++++++++++++++++++++ test/CMakeLists.txt | 7 +- test/bam_conversion.cxx | 306 ++++++++++++++++++++++++++++++++++++ tools/CMakeLists.txt | 14 +- tools/bamtoramntuple.cxx | 56 +++++++ 7 files changed, 688 insertions(+), 12 deletions(-) create mode 100644 inc/ramcore/BamtoNTuple.h create mode 100644 src/ramcore/BamtoNTuple.cxx create mode 100644 test/bam_conversion.cxx create mode 100644 tools/bamtoramntuple.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 2bc3b44..f1a8aeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,6 +29,10 @@ find_package(ROOT ${ROOT_MIN_VERSION} REQUIRED COMPONENTS Core RIO Tree ROOTNTup include(${ROOT_USE_FILE}) +# Find htslib +find_package(PkgConfig REQUIRED) +pkg_check_modules(htslib REQUIRED IMPORTED_TARGET htslib) + include(GNUInstallDirs) set(CMAKE_INSTALL_LIBDIR lib) set(CMAKE_INSTALL_INCLUDEDIR include) @@ -50,6 +54,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ramcore src/ramcore/SamToTTree.cxx src/ramcore/SamToNTuple.cxx src/ramcore/RAMNTupleView.cxx + src/ramcore/BamtoNTuple.cxx LINKDEF inc/ttree/LinkDef.h DEPENDENCIES @@ -68,14 +73,14 @@ target_include_directories(ramcore $ ) +target_link_libraries(ramcore PUBLIC PkgConfig::htslib) + install(DIRECTORY inc/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) if(RAMTOOLS_BUILD_TOOLS) add_subdirectory(tools) endif() -# Include benchmark directory if benchmarks OR tests are enabled -# (sam_generator is needed by tests) if(RAMTOOLS_BUILD_BENCHMARKS OR RAMTOOLS_BUILD_TESTS) add_subdirectory(benchmark) endif() @@ -83,7 +88,7 @@ endif() if(RAMTOOLS_BUILD_TESTS) enable_testing() add_subdirectory(test) - + if(ENABLE_COVERAGE) add_custom_target(coverage COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure @@ -93,5 +98,4 @@ if(RAMTOOLS_BUILD_TESTS) COMMENT "Running tests and generating code coverage report..." ) endif() -endif() - +endif() \ No newline at end of file diff --git a/inc/ramcore/BamtoNTuple.h b/inc/ramcore/BamtoNTuple.h new file mode 100644 index 0000000..31a298c --- /dev/null +++ b/inc/ramcore/BamtoNTuple.h @@ -0,0 +1,9 @@ +#ifndef RAMCORE_BAMTONTUPLE_H +#define RAMCORE_BAMTONTUPLE_H + +#include + +void bamtoramntuple(const char *bamfile, const char *treefile, bool index, bool split, bool cache, + int compression_algorithm, uint32_t quality_policy); + +#endif // RAMCORE_BAMTONTUPLE_H \ No newline at end of file diff --git a/src/ramcore/BamtoNTuple.cxx b/src/ramcore/BamtoNTuple.cxx new file mode 100644 index 0000000..16f9e72 --- /dev/null +++ b/src/ramcore/BamtoNTuple.cxx @@ -0,0 +1,294 @@ +#include "ramcore/BamtoNTuple.h" + +#include "rntuple/RAMNTupleRecord.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +constexpr uint16_t kUnmapped = 0x4; +constexpr int32_t kPositionInterval = 10000; +constexpr int64_t kMappedInterval = 100; + +std::string GetSeq(const bam1_t *b) +{ + const int len = b->core.l_qseq; + const uint8_t *enc = bam_get_seq(b); + std::string s(static_cast(len), 'N'); + for (int i = 0; i < len; ++i) + s[static_cast(i)] = seq_nt16_str[bam_seqi(enc, i)]; + return s; +} + +std::string GetQual(const bam1_t *b) +{ + const int len = b->core.l_qseq; + const uint8_t *q = bam_get_qual(b); + if (q == nullptr || q[0] == 0xff) + return "*"; + std::string s(static_cast(len), '!'); + for (int i = 0; i < len; ++i) + s[static_cast(i)] = static_cast(q[i] + 33); + return s; +} + +std::string GetCigar(const bam1_t *b) +{ + const uint32_t n = b->core.n_cigar; + if (n == 0) + return "*"; + const uint32_t *cigar = bam_get_cigar(b); + static constexpr std::array ops = {'M', 'I', 'D', 'N', 'S', 'H', 'P', '=', 'X'}; + std::string s; + s.reserve(n * 4U); + for (uint32_t i = 0; i < n; ++i) { + s += std::to_string(bam_cigar_oplen(cigar[i])); + s += ops.at(static_cast(bam_cigar_op(cigar[i]))); + } + return s; +} + +std::string FormatTag(const char *name, char type_char, const std::string &value) +{ + std::string result; + result.reserve(strlen(name) + 3 + value.size()); + result += name; + result += ':'; + result += type_char; + result += ':'; + result += value; + return result; +} + +std::vector GetTags(const bam1_t *b) +{ + std::vector tags; + const uint8_t *aux = bam_get_aux(b); + const uint8_t *end = b->data + b->l_data; + + while (aux < end) { + const std::array tag = {static_cast(aux[0]), static_cast(aux[1]), '\0'}; + aux += 2; + const char type = static_cast(*aux++); + + switch (type) { + case 'A': + tags.emplace_back(FormatTag(tag.data(), 'A', std::string(1, static_cast(*aux)))); + aux += 1; + break; + case 'c': { + tags.emplace_back(FormatTag(tag.data(), 'i', std::to_string(static_cast(*aux)))); + aux += 1; + break; + } + case 'C': { + tags.emplace_back(FormatTag(tag.data(), 'i', std::to_string(*aux))); + aux += 1; + break; + } + case 's': { + int16_t v{}; + memcpy(&v, aux, sizeof(v)); + tags.emplace_back(FormatTag(tag.data(), 'i', std::to_string(v))); + aux += sizeof(v); + break; + } + case 'S': { + uint16_t v{}; + memcpy(&v, aux, sizeof(v)); + tags.emplace_back(FormatTag(tag.data(), 'i', std::to_string(v))); + aux += sizeof(v); + break; + } + case 'i': { + int32_t v{}; + memcpy(&v, aux, sizeof(v)); + tags.emplace_back(FormatTag(tag.data(), 'i', std::to_string(v))); + aux += sizeof(v); + break; + } + case 'I': { + uint32_t v{}; + memcpy(&v, aux, sizeof(v)); + tags.emplace_back(FormatTag(tag.data(), 'i', std::to_string(v))); + aux += sizeof(v); + break; + } + case 'f': { + float v{}; + memcpy(&v, aux, sizeof(v)); + tags.emplace_back(FormatTag(tag.data(), 'f', std::to_string(v))); + aux += sizeof(v); + break; + } + case 'Z': + case 'H': { + const char *str = static_cast(static_cast(aux)); + tags.emplace_back(FormatTag(tag.data(), type, str)); + aux += strlen(str) + 1; + break; + } + default: return tags; + } + } + return tags; +} + +void FillRecord(RAMNTupleRecord *rec, const bam1_t *b, const sam_hdr_t *hdr, uint32_t quality_policy) +{ + const char *rname = (b->core.tid < 0) ? "*" : sam_hdr_tid2name(hdr, b->core.tid); + + const char *rnext = "*"; + if (b->core.mtid >= 0) + rnext = (b->core.mtid == b->core.tid) ? "=" : sam_hdr_tid2name(hdr, b->core.mtid); + + rec->SetBit(quality_policy); + rec->SetQNAME(bam_get_qname(b)); + rec->SetFLAG(b->core.flag); + rec->SetREFID(rname); + rec->SetPOS(static_cast(b->core.pos + 1)); + rec->SetMAPQ(b->core.qual); + rec->SetCIGAR(GetCigar(b)); + rec->SetREFNEXT(rnext); + rec->SetPNEXT(static_cast(b->core.mpos + 1)); + rec->SetTLEN(static_cast(b->core.isize)); + rec->SetSEQ(GetSeq(b)); + rec->SetQUAL(GetQual(b)); + + rec->ResetNOPT(); + for (const auto &tag : GetTags(b)) { + rec->SetOPT(tag); + } +} + +} // namespace + +void bamtoramntuple(const char *bamfile, const char *treefile, bool index, bool /*split*/, bool /*cache*/, + int compression_algorithm, uint32_t quality_policy) +{ + TStopwatch stopwatch; + stopwatch.Start(); + + samFile *bamIn = sam_open(bamfile, "r"); + if (bamIn == nullptr) { + std::cerr << "Cannot open BAM: " << bamfile << "\n"; + return; + } + + hts_set_threads(bamIn, static_cast(std::thread::hardware_concurrency())); + + sam_hdr_t *hdr = sam_hdr_read(bamIn); + if (hdr == nullptr) { + std::cerr << "Cannot read BAM header\n"; + sam_close(bamIn); + return; + } + + auto rootFile = std::unique_ptr(TFile::Open(treefile, "RECREATE")); + if (rootFile == nullptr || !rootFile->IsOpen()) { + std::cerr << "Failed to create: " << treefile << "\n"; + sam_hdr_destroy(hdr); + sam_close(bamIn); + return; + } + + RAMNTupleRecord::InitializeRefs(); + + const int nRef = sam_hdr_nref(hdr); + for (int i = 0; i < nRef; ++i) + RAMNTupleRecord::GetRnameRefs()->GetRefId(sam_hdr_tid2name(hdr, i)); + + auto model = RAMNTupleRecord::MakeModel(); + ROOT::RNTupleWriteOptions opts; + opts.SetCompression(compression_algorithm); + opts.SetMaxUnzippedPageSize(64000); + + auto writer = ROOT::RNTupleWriter::Append(std::move(model), "RAM", *rootFile, opts); + auto entry = writer->GetModel().CreateEntry(); + auto recordPtr = entry->GetPtr("record"); + + bam1_t *rec = bam_init1(); + std::size_t count = 0; + int64_t mapped_count = 0; + int32_t last_refid = -1; + int32_t last_indexed_pos = -kPositionInterval; + + while (sam_read1(bamIn, hdr, rec) >= 0) { + FillRecord(recordPtr.get(), rec, hdr, quality_policy); + writer->Fill(*entry); + + if (index && !(rec->core.flag & kUnmapped) && recordPtr->GetREFID() >= 0) { + int32_t current_refid = recordPtr->GetREFID(); + int32_t current_pos = recordPtr->GetPOS() - 1; + + bool new_chrom = (current_refid != last_refid); + bool far_enough = (current_pos - last_indexed_pos >= kPositionInterval); + bool periodic = (mapped_count % kMappedInterval == 0); + bool duplicate = (!new_chrom && current_pos == last_indexed_pos); + + if ((new_chrom || far_enough || periodic) && !duplicate) { + RAMNTupleRecord::GetIndex()->AddItem(current_refid, current_pos, static_cast(count)); + last_refid = current_refid; + last_indexed_pos = current_pos; + } + mapped_count++; + } + + ++count; + if (count % 1000000 == 0) + std::cerr << "Converted " << count << " records...\n"; + } + + bam_destroy1(rec); + writer.reset(); + + if (index) + RAMNTupleRecord::WriteIndex(*rootFile); + RAMNTupleRecord::WriteAllRefs(*rootFile); + + TList headers; + headers.SetName("headers"); + const char *text = sam_hdr_str(hdr); + if (text != nullptr) { + std::istringstream ss(text); + std::string line; + while (std::getline(ss, line)) { + if (line.size() >= 3) { + auto named_obj = std::make_unique(line.substr(0, 3).c_str(), line.c_str()); + headers.Add(named_obj.release()); + } + } + } + headers.Write(); + + rootFile->Close(); + sam_hdr_destroy(hdr); + sam_close(bamIn); + + std::cout << "\nRAM file created: " << treefile << "\n" + << "Number of entries: " << count << "\n"; + RAMNTupleRecord::GetRnameRefs()->Print(); + RAMNTupleRecord::GetRnextRefs()->Print(); + if (index) + std::cout << "Index entries: " << RAMNTupleRecord::GetIndex()->Size() << "\n"; + + stopwatch.Print(); +} \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 24b54f0..5f0e413 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -17,7 +17,6 @@ function(add_ramcore_test test_name) ) if(RAMTOOLS_BUILD_BENCHMARKS) - target_link_libraries(${test_name} benchmark::benchmark) endif() @@ -31,8 +30,8 @@ endfunction() add_ramcore_test(ramcoretests ramcoretests.cxx) add_ramcore_test(chromosome_split_test chromosome_split_test.cxx) +add_ramcore_test(bam_conversion bam_conversion.cxx) -install(TARGETS ramcoretests chromosome_split_test +install(TARGETS ramcoretests chromosome_split_test bam_conversion RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} -) - +) \ No newline at end of file diff --git a/test/bam_conversion.cxx b/test/bam_conversion.cxx new file mode 100644 index 0000000..a872eeb --- /dev/null +++ b/test/bam_conversion.cxx @@ -0,0 +1,306 @@ +#include "ramcore/BamtoNTuple.h" + +#include "generate_sam_benchmark.h" +#include "ramcore/RAMNTupleView.h" +#include "ramcore/SamToNTuple.h" + +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +void GenerateBAMFile(const char *bam_path, int num_reads) +{ + const char *sam_path = "temp_for_bam.sam"; + GenerateSAMFile(sam_path, num_reads); + + samFile *in = sam_open(sam_path, "r"); + samFile *out = sam_open(bam_path, "wb"); + sam_hdr_t *hdr = sam_hdr_read(in); + + if (sam_hdr_write(out, hdr) < 0) { + throw std::runtime_error("Failed to write BAM header"); + } + + bam1_t *rec = bam_init1(); + while (sam_read1(in, hdr, rec) >= 0) { + if (sam_write1(out, hdr, rec) < 0) { + throw std::runtime_error("Failed to write BAM record"); + } + } + + bam_destroy1(rec); + sam_hdr_destroy(hdr); + sam_close(in); + sam_close(out); + std::remove(sam_path); +} + +void GenerateRichBAMFile(const char *bam_path) +{ + samFile *out = sam_open(bam_path, "wb"); + + sam_hdr_t *hdr = sam_hdr_init(); + + // Bypassed varargs entirely by passing the structured header string + const std::string header_text = "@HD\tVN:1.6\tSO:coordinate\n" + "@SQ\tSN:chr1\tLN:100000\n" + "@SQ\tSN:chr2\tLN:80000\n"; + + if (sam_hdr_add_lines(hdr, header_text.c_str(), header_text.length()) < 0) { + throw std::runtime_error("Failed to add rich BAM header lines"); + } + + if (sam_hdr_write(out, hdr) < 0) { + throw std::runtime_error("Failed to write rich BAM header"); + } + + bam1_t *rec = bam_init1(); + + // Mapped read with all aux tag types: A, c, C, s, S, i, I, f, Z, H + { + const char *qname = "read_tags"; + const char *seq = "ACGTACGTACGT"; + const int len = static_cast(strlen(seq)); + std::string qual(len, 'I'); + + std::array cigar = {static_cast(len) << 4 | 0}; + + bam_set1(rec, strlen(qname), qname, 0, 0, 1000, 60, 1, cigar.data(), -1, -1, 0, len, seq, qual.c_str(), 128); + + int8_t val_c = -5; + bam_aux_append(rec, "Xc", 'c', sizeof(val_c), static_cast(static_cast(&val_c))); + + uint8_t val_C = 200; + bam_aux_append(rec, "XC", 'C', sizeof(val_C), &val_C); + + int16_t val_s = -1000; + bam_aux_append(rec, "Xs", 's', sizeof(val_s), static_cast(static_cast(&val_s))); + + uint16_t val_S = 50000; + bam_aux_append(rec, "XS", 'S', sizeof(val_S), static_cast(static_cast(&val_S))); + + int32_t val_i = -100000; + bam_aux_append(rec, "Xi", 'i', sizeof(val_i), static_cast(static_cast(&val_i))); + + uint32_t val_I = 3000000; + bam_aux_append(rec, "XI", 'I', sizeof(val_I), static_cast(static_cast(&val_I))); + + float val_f = 3.14F; + bam_aux_append(rec, "Xf", 'f', sizeof(val_f), static_cast(static_cast(&val_f))); + + uint8_t val_A = 'Q'; + bam_aux_append(rec, "XA", 'A', 1, &val_A); + + const char *val_Z = "hello"; + bam_aux_append(rec, "XZ", 'Z', static_cast(strlen(val_Z) + 1), + static_cast(static_cast(val_Z))); + + const char *val_H = "1AE301"; + bam_aux_append(rec, "XH", 'H', static_cast(strlen(val_H) + 1), + static_cast(static_cast(val_H))); + + if (sam_write1(out, hdr, rec) < 0) + throw std::runtime_error("Failed to write record 1"); + } + + // Unmapped read, no CIGAR, no quality + { + const char *qname = "read_unmapped"; + const char *seq = "NNNNNN"; + const int len = static_cast(strlen(seq)); + + bam_set1(rec, strlen(qname), qname, 4, -1, -1, 0, 0, nullptr, -1, -1, 0, len, seq, nullptr, 0); + + if (sam_write1(out, hdr, rec) < 0) + throw std::runtime_error("Failed to write record 2"); + } + + // Mate on different chromosome + { + const char *qname = "read_diffchrom"; + const char *seq = "ACGTACGT"; + const int len = static_cast(strlen(seq)); + std::string qual(len, 'F'); + + std::array cigar = {static_cast(len) << 4 | 0}; + + bam_set1(rec, strlen(qname), qname, 1, 0, 500, 30, 1, cigar.data(), 1, 2000, 0, len, seq, qual.c_str(), 32); + + if (sam_write1(out, hdr, rec) < 0) + throw std::runtime_error("Failed to write record 3"); + } + + // Mate on same chromosome, multi-op CIGAR (4M4S) + { + const char *qname = "read_samechrom"; + const char *seq = "TTTTGGGG"; + const int len = static_cast(strlen(seq)); + std::string qual(len, 'A'); + + std::array cigar = {4U << 4 | 0, 4U << 4 | 4}; + + bam_set1(rec, strlen(qname), qname, 1, 0, 600, 40, 2, cigar.data(), 0, 800, 200, len, seq, qual.c_str(), 32); + + if (sam_write1(out, hdr, rec) < 0) + throw std::runtime_error("Failed to write record 4"); + } + + // Bulk reads for index exercising + for (int r = 0; r < 1001; ++r) { + std::string qname = "bulk_" + std::to_string(r); + const char *seq = "ACGT"; + const int len = 4; + const char *qual = "IIII"; + + std::array cigar = {4U << 4 | 0}; + + bam_set1(rec, qname.size(), qname.c_str(), 0, 0, 100 + r, 50, 1, cigar.data(), -1, -1, 0, len, seq, qual, 0); + + if (sam_write1(out, hdr, rec) < 0) + throw std::runtime_error("Failed to write bulk record"); + } + + bam_destroy1(rec); + sam_hdr_destroy(hdr); + sam_close(out); +} + +class BamToNTupleTest : public ::testing::Test { +protected: + void SetUp() override + { + GenerateBAMFile(/*bam_path=*/"test.bam", /*num_reads=*/100); + GenerateSAMFile(/*sam_path=*/"test_compare.sam", /*num_reads=*/100); + std::remove("test_bam.ram"); + std::remove("test_sam.ram"); + } + + void TearDown() override + { + std::remove("test_bam.ram"); + std::remove("test_sam.ram"); + std::remove("test.bam"); + std::remove("test_compare.sam"); + std::remove("test_rich.bam"); + std::remove("test_rich.ram"); + } +}; + +TEST_F(BamToNTupleTest, ConversionProducesEntries) +{ + bamtoramntuple("test.bam", "test_bam.ram", true, false, true, 505, 0U); + + ASSERT_TRUE(std::filesystem::exists("test_bam.ram")); + + auto reader = ROOT::RNTupleReader::Open("RAM", "test_bam.ram"); + ASSERT_NE(reader, nullptr); + EXPECT_GT(reader->GetNEntries(), 0); +} + +TEST_F(BamToNTupleTest, SameEntryCountAsSAMPath) +{ + bamtoramntuple("test.bam", "test_bam.ram", true, false, true, 505, 0U); + + samtoramntuple("test_compare.sam", "test_sam.ram", true, true, true, 505, 0U); + + auto bamReader = ROOT::RNTupleReader::Open("RAM", "test_bam.ram"); + auto samReader = ROOT::RNTupleReader::Open("RAM", "test_sam.ram"); + + ASSERT_NE(bamReader, nullptr); + ASSERT_NE(samReader, nullptr); + EXPECT_EQ(bamReader->GetNEntries(), samReader->GetNEntries()); +} + +TEST_F(BamToNTupleTest, RegionQueryWorks) +{ + bamtoramntuple("test.bam", "test_bam.ram", true, false, true, 505, 0U); + + const std::array regions = {"chr1:100-200", "chr2:500-1000", "chr5:1000-5000"}; + + for (const char *region : regions) { + testing::internal::CaptureStdout(); + const Long64_t count = ramntupleview("test_bam.ram", region, true, false, nullptr); + testing::internal::GetCapturedStdout(); + EXPECT_GE(count, 0); + } +} + +TEST_F(BamToNTupleTest, MandatoryFieldsPresent) +{ + bamtoramntuple("test.bam", "test_bam.ram", true, false, true, 505, 0U); + + auto reader = ROOT::RNTupleReader::Open("RAM", "test_bam.ram"); + ASSERT_NE(reader, nullptr); + + const auto &desc = reader->GetDescriptor(); + EXPECT_NE(desc.FindFieldId("record"), ROOT::kInvalidDescriptorId); + EXPECT_GT(reader->GetNEntries(), 0); +} + +TEST_F(BamToNTupleTest, RichBAMCoversAllTagTypes) +{ + GenerateRichBAMFile(/*bam_path=*/"test_rich.bam"); + + bamtoramntuple("test_rich.bam", "test_rich.ram", true, false, true, 505, 0U); + + auto reader = ROOT::RNTupleReader::Open("RAM", "test_rich.ram"); + ASSERT_NE(reader, nullptr); + EXPECT_EQ(reader->GetNEntries(), 1005); +} + +TEST_F(BamToNTupleTest, InvalidFileReturnsGracefully) +{ + testing::internal::CaptureStderr(); + bamtoramntuple("nonexistent.bam", "out.ram", true, false, true, 505, 0U); + std::string err = testing::internal::GetCapturedStderr(); + EXPECT_NE(err.find("Cannot open BAM"), std::string::npos); + EXPECT_FALSE(std::filesystem::exists("out.ram")); +} + +TEST_F(BamToNTupleTest, ConversionWithoutIndex) +{ + bamtoramntuple("test.bam", "test_bam.ram", false, false, true, 505, 0U); + + auto reader = ROOT::RNTupleReader::Open("RAM", "test_bam.ram"); + ASSERT_NE(reader, nullptr); + EXPECT_GT(reader->GetNEntries(), 0); +} + +TEST_F(BamToNTupleTest, UnmappedAndMissingQualityHandled) +{ + GenerateRichBAMFile(/*bam_path=*/"test_rich.bam"); + + bamtoramntuple("test_rich.bam", "test_rich.ram", true, false, true, 505, 0U); + + testing::internal::CaptureStdout(); + const Long64_t count = ramntupleview("test_rich.ram", "chr1:100-2000", true, false, nullptr); + testing::internal::GetCapturedStdout(); + EXPECT_GE(count, 0); +} + +TEST_F(BamToNTupleTest, DifferentChromMateHandled) +{ + GenerateRichBAMFile(/*bam_path=*/"test_rich.bam"); + + bamtoramntuple("test_rich.bam", "test_rich.ram", true, false, true, 505, 0U); + + testing::internal::CaptureStdout(); + const Long64_t count = ramntupleview("test_rich.ram", "chr2:400-600", true, false, nullptr); + testing::internal::GetCapturedStdout(); + EXPECT_GE(count, 0); +} + +} // namespace \ No newline at end of file diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index b67568f..88b4779 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -22,7 +22,16 @@ ROOT_EXECUTABLE(ramntupleview ROOT::ROOTNTuple ) -install(TARGETS samtoram samtoramntuple ramntupleview +ROOT_EXECUTABLE(bamtoramntuple + bamtoramntuple.cxx + LIBRARIES + ramcore + hts + ROOT::Core + ROOT::ROOTNTuple +) + +install(TARGETS samtoram samtoramntuple ramntupleview bamtoramntuple RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} ) @@ -38,5 +47,4 @@ set(ROOT_MACROS install(FILES ${ROOT_MACROS} DESTINATION share/ramtools/macros -) - +) \ No newline at end of file diff --git a/tools/bamtoramntuple.cxx b/tools/bamtoramntuple.cxx new file mode 100644 index 0000000..2627f29 --- /dev/null +++ b/tools/bamtoramntuple.cxx @@ -0,0 +1,56 @@ +#include "ramcore/BamtoNTuple.h" + +#include "rntuple/RAMNTupleRecord.h" + +#include +#include +#include + +int main(int argc, char *argv[]) +{ + if (argc < 2) { + std::cout << "Usage: " << argv[0] << " [output]\n" + << "Options:\n" + << " -noindex Disable indexing\n" + << " -illumina Use Illumina quality binning\n" + << " -dropqual Drop quality scores\n"; + return 1; + } + + const char *input = argv[1]; + const char *output = nullptr; + + bool do_index = true; + uint32_t quality_mode = RAMNTupleRecord::kPhred33; + + for (int i = 2; i < argc; ++i) { + const std::string arg = argv[i]; + if (arg == "-noindex") + do_index = false; + else if (arg == "-illumina") + quality_mode = RAMNTupleRecord::kIlluminaBinning; + else if (arg == "-dropqual") + quality_mode = RAMNTupleRecord::kDrop; + else if (arg[0] != '-') + output = argv[i]; + } + + std::string outfile; + if (output == nullptr) { + outfile = input; + const auto pos = outfile.rfind(".bam"); + if (pos != std::string::npos) + outfile.erase(pos); + output = outfile.c_str(); + } + + std::string ramfile = output; + if (ramfile.find(".root") == std::string::npos && ramfile.find(".ram") == std::string::npos) + ramfile += ".ram"; + + bamtoramntuple(input, ramfile.c_str(), + /*index=*/do_index, /*split=*/false, /*cache=*/true, + /*compression_algorithm=*/505, /*quality_policy=*/quality_mode); + + return 0; +} \ No newline at end of file From f15bbe054df8e1355dd46b23a66392b94c4d25c0 Mon Sep 17 00:00:00 2001 From: AdityaPandeyCN Date: Mon, 20 Apr 2026 20:34:13 +0530 Subject: [PATCH 2/2] clang suggestion Signed-off-by: AdityaPandeyCN --- test/bam_conversion.cxx | 18 ++++++++---------- tools/bamtoramntuple.cxx | 6 ++---- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/test/bam_conversion.cxx b/test/bam_conversion.cxx index a872eeb..3de6bc8 100644 --- a/test/bam_conversion.cxx +++ b/test/bam_conversion.cxx @@ -81,36 +81,34 @@ void GenerateRichBAMFile(const char *bam_path) bam_set1(rec, strlen(qname), qname, 0, 0, 1000, 60, 1, cigar.data(), -1, -1, 0, len, seq, qual.c_str(), 128); int8_t val_c = -5; - bam_aux_append(rec, "Xc", 'c', sizeof(val_c), static_cast(static_cast(&val_c))); + bam_aux_append(rec, "Xc", 'c', sizeof(val_c), reinterpret_cast(&val_c)); uint8_t val_C = 200; bam_aux_append(rec, "XC", 'C', sizeof(val_C), &val_C); int16_t val_s = -1000; - bam_aux_append(rec, "Xs", 's', sizeof(val_s), static_cast(static_cast(&val_s))); + bam_aux_append(rec, "Xs", 's', sizeof(val_s), reinterpret_cast(&val_s)); uint16_t val_S = 50000; - bam_aux_append(rec, "XS", 'S', sizeof(val_S), static_cast(static_cast(&val_S))); + bam_aux_append(rec, "XS", 'S', sizeof(val_S), reinterpret_cast(&val_S)); int32_t val_i = -100000; - bam_aux_append(rec, "Xi", 'i', sizeof(val_i), static_cast(static_cast(&val_i))); + bam_aux_append(rec, "Xi", 'i', sizeof(val_i), reinterpret_cast(&val_i)); uint32_t val_I = 3000000; - bam_aux_append(rec, "XI", 'I', sizeof(val_I), static_cast(static_cast(&val_I))); + bam_aux_append(rec, "XI", 'I', sizeof(val_I), reinterpret_cast(&val_I)); float val_f = 3.14F; - bam_aux_append(rec, "Xf", 'f', sizeof(val_f), static_cast(static_cast(&val_f))); + bam_aux_append(rec, "Xf", 'f', sizeof(val_f), reinterpret_cast(&val_f)); uint8_t val_A = 'Q'; bam_aux_append(rec, "XA", 'A', 1, &val_A); const char *val_Z = "hello"; - bam_aux_append(rec, "XZ", 'Z', static_cast(strlen(val_Z) + 1), - static_cast(static_cast(val_Z))); + bam_aux_append(rec, "XZ", 'Z', static_cast(strlen(val_Z) + 1), reinterpret_cast(val_Z)); const char *val_H = "1AE301"; - bam_aux_append(rec, "XH", 'H', static_cast(strlen(val_H) + 1), - static_cast(static_cast(val_H))); + bam_aux_append(rec, "XH", 'H', static_cast(strlen(val_H) + 1), reinterpret_cast(val_H)); if (sam_write1(out, hdr, rec) < 0) throw std::runtime_error("Failed to write record 1"); diff --git a/tools/bamtoramntuple.cxx b/tools/bamtoramntuple.cxx index 2627f29..97b85cd 100644 --- a/tools/bamtoramntuple.cxx +++ b/tools/bamtoramntuple.cxx @@ -27,10 +27,8 @@ int main(int argc, char *argv[]) const std::string arg = argv[i]; if (arg == "-noindex") do_index = false; - else if (arg == "-illumina") - quality_mode = RAMNTupleRecord::kIlluminaBinning; - else if (arg == "-dropqual") - quality_mode = RAMNTupleRecord::kDrop; + else if (arg == "-illumina" || arg == "-dropqual") + quality_mode = (arg == "-illumina") ? RAMNTupleRecord::kIlluminaBinning : RAMNTupleRecord::kDrop; else if (arg[0] != '-') output = argv[i]; }