From 0674742a7554e22cb32ca4153c48296c598fdaf5 Mon Sep 17 00:00:00 2001 From: Abhuday Singh Date: Sat, 28 Mar 2026 00:03:08 +0530 Subject: [PATCH 1/2] fix: respect CLI compression flag instead of forcing ZSTD --- .gitignore | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 4502af8..596725a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,29 @@ -ramexample.root + + +# Genomics data +*.fastq +*.fastq.gz +*.fa +*.fa.gz + +# BWA index files +*.amb +*.ann +*.bwt +*.pac +*.sa + +# Output files +*.sam +*.ram +*.root + +# Mac / temp +.DS_Store + +# Accidental files +* 2.* +--help.ramramexample.root *.csv *.log *.perf From 504f560cd0049fdf426608294297d6cd34c02d9c Mon Sep 17 00:00:00 2001 From: Abhuday Singh Date: Sun, 12 Apr 2026 22:58:03 +0530 Subject: [PATCH 2/2] feat: add --compression and --level CLI flags to samtoramntuple Replaces hardcoded 505 (ZSTD L5) with user-selectable algorithm and level. --compression accepts: zstd (default), lz4, lzma, zlib --level accepts 1-19 (default 5) Compression setting threaded through as level*100 + algo_id per ROOT convention. Tested on SRR062634 (309,689 reads, 85MB SAM): ZSTD L5 (default): 23MB, 6s LZ4: 23MB, 3s ZSTD L1: 25MB, 4s Closes #49 --- src/ramcore/SamToNTuple.cxx | 2 +- tools/samtoramntuple.cxx | 46 ++++++++++++++++++++++++++++--------- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/src/ramcore/SamToNTuple.cxx b/src/ramcore/SamToNTuple.cxx index f6d9a78..166f315 100644 --- a/src/ramcore/SamToNTuple.cxx +++ b/src/ramcore/SamToNTuple.cxx @@ -234,7 +234,7 @@ void samtoramntuple_split_by_chromosome(const char *datafile, const char *output writeOptions.SetUseBufferedWrite(true); auto parallel_writer = - ROOT::Experimental::RNTupleParallelWriter::Recreate(std::move(model), "RAM", filename, writeOptions); + ROOT::RNTupleParallelWriter::Recreate(std::move(model), "RAM", filename, writeOptions); const int contexts_per_file = std::min(4, num_threads); const size_t records_per_context = (records.size() + contexts_per_file - 1) / contexts_per_file; diff --git a/tools/samtoramntuple.cxx b/tools/samtoramntuple.cxx index 55d0be1..4bb91a3 100644 --- a/tools/samtoramntuple.cxx +++ b/tools/samtoramntuple.cxx @@ -3,25 +3,30 @@ #include #include #include +#include int main(int argc, char* argv[]) { if (argc < 2) { std::cout << "Usage: " << argv[0] << " [output]\n"; std::cout << "Options:\n"; - std::cout << " -split Split by chromosome\n"; - std::cout << " -noindex Disable indexing\n"; - std::cout << " -illumina Use Illumina quality binning\n"; - std::cout << " -dropqual Drop quality scores\n"; + std::cout << " -split Split by chromosome\n"; + std::cout << " -noindex Disable indexing\n"; + std::cout << " -illumina Use Illumina quality binning\n"; + std::cout << " -dropqual Drop quality scores\n"; + std::cout << " --compression A Compression algorithm: zstd (default), lz4, lzma, zlib\n"; + std::cout << " --level N Compression level 1-19 (default 5)\n"; return 1; } - + const char* input = argv[1]; const char* output = nullptr; bool do_split = false; bool do_index = true; uint32_t quality_mode = RAMNTupleRecord::kPhred33; - + int compression_algo = ROOT::RCompressionSetting::EAlgorithm::EValues::kZSTD; + int compression_level = 5; + for (int i = 2; i < argc; i++) { std::string arg = argv[i]; if (arg == "-split") { @@ -32,11 +37,31 @@ int main(int argc, char* argv[]) { quality_mode = RAMNTupleRecord::kIlluminaBinning; } else if (arg == "-dropqual") { quality_mode = RAMNTupleRecord::kDrop; + } else if (arg == "--compression" && i + 1 < argc) { + std::string val = argv[++i]; + if (val == "zstd" || val == "ZSTD") compression_algo = ROOT::RCompressionSetting::EAlgorithm::EValues::kZSTD; + else if (val == "lz4" || val == "LZ4") compression_algo = ROOT::RCompressionSetting::EAlgorithm::EValues::kLZ4; + else if (val == "lzma" || val == "LZMA") compression_algo = ROOT::RCompressionSetting::EAlgorithm::EValues::kLZMA; + else if (val == "zlib" || val == "ZLIB") compression_algo = ROOT::RCompressionSetting::EAlgorithm::EValues::kZLIB; + else { + std::cerr << "Unknown compression algorithm: " << val + << ". Valid: zstd, lz4, lzma, zlib\n"; + return 1; + } + } else if (arg == "--level" && i + 1 < argc) { + compression_level = std::atoi(argv[++i]); + if (compression_level < 1 || compression_level > 19) { + std::cerr << "Compression level must be 1-19\n"; + return 1; + } } else if (arg[0] != '-') { output = argv[i]; } } + // ROOT compression setting format: level * 100 + algorithm_id + int compression_setting = compression_level * 100 + compression_algo; + std::string outfile; if (!output) { outfile = std::string(input); @@ -49,19 +74,18 @@ int main(int argc, char* argv[]) { try { if (do_split) { - samtoramntuple_split_by_chromosome(input, output, 505, quality_mode, 4); + samtoramntuple_split_by_chromosome(input, output, compression_setting, quality_mode, 4); } else { std::string ramfile = std::string(output); if (ramfile.find(".root") == std::string::npos && ramfile.find(".ram") == std::string::npos) { ramfile += ".ram"; } - samtoramntuple(input, ramfile.c_str(), do_index, true, true, 505, quality_mode); + samtoramntuple(input, ramfile.c_str(), do_index, true, true, compression_setting, quality_mode); } } catch (const std::exception& e) { std::cerr << "Error: " << e.what() << std::endl; return 1; } - - return 0; -} + return 0; +} \ No newline at end of file