From cde9a7b22f983a83a6bc0988ea8ca2c74ed55fd9 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 21 Dec 2025 19:43:03 -0800
Subject: [PATCH 1/6] multiple sources: replacing GenomicRegion and
 SimpleGenomicRegion with Interval6 and Interval

---
 src/amrfinder/allelicmeth.cpp  |  178 +++--
 src/amrfinder/amrtester.cpp    |  236 ++++---
 src/analysis/cpgbins.cpp       |  104 ++-
 src/analysis/hmr-rep.cpp       |   61 +-
 src/analysis/hmr.cpp           |  457 ++++++-------
 src/analysis/hypermr.cpp       |  305 +++++----
 src/analysis/metagene.cpp      |  177 +++--
 src/analysis/multimethstat.cpp |  521 +++++++--------
 src/analysis/pmd.cpp           | 1109 ++++++++++++++++----------------
 src/analysis/roimethstat.cpp   |  187 +++---
 src/common/Interval.hpp        |   17 +-
 src/common/Interval6.hpp       |   20 +-
 src/common/LevelsCounter.cpp   |   41 +-
 src/common/LevelsCounter.hpp   |   62 +-
 src/common/MSite.cpp           |  210 +++---
 src/common/bsutils.cpp         |   23 +-
 src/common/bsutils.hpp         |    8 +-
 src/radmeth/dmr.cpp            |  289 +++++----
 src/radmeth/radmeth-merge.cpp  |  139 ++--
 src/utils/selectsites.cpp      |  280 ++++----
 20 files changed, 2143 insertions(+), 2281 deletions(-)
diff --git a/src/amrfinder/allelicmeth.cpp b/src/amrfinder/allelicmeth.cpp
index e1705d3b..3b99ff58 100644
--- a/src/amrfinder/allelicmeth.cpp
+++ b/src/amrfinder/allelicmeth.cpp
@@ -1,6 +1,4 @@
-/* allelicmeth:
- *
- * Copyright (C) 2014-2022 University of Southern California,
+/* Copyright (C) 2014-2022 University of Southern California,
  *                         Andrew D. Smith, and Benjamin E. Decato
  *
  * Authors: Andrew D. Smith and Benjamin E. Decato
@@ -19,7 +17,6 @@
 #include "Epiread.hpp"
 #include "MSite.hpp"
 
-#include "GenomicRegion.hpp"
 #include "OptionParser.hpp"
 #include "smithlab_os.hpp"
 #include "smithlab_utils.hpp"
@@ -42,27 +39,16 @@
 #include <utility>
 #include <vector>
 
-using std::cerr;
-using std::cout;
-using std::max;
-using std::min;
-using std::runtime_error;
-using std::string;
-using std::unordered_map;
-using std::unordered_set;
-using std::vector;
-
-// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions,*-prefer-member-initializer)
+// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
 
 static inline double
 log_sum_log(const double p, const double q) {
-  if (p == 0) {
+  if (p == 0)
     return q;
-  }
-  else if (q == 0) {
+  if (q == 0)
     return p;
-  }
-  return p > q ? p + log1p(exp(q - p)) : q + log1p(exp(p - q));
+  return p > q ? p + std::log1p(std::exp(q - p))
+               : q + std::log1p(std::exp(p - q));
 }
 
 static inline double
@@ -77,28 +63,29 @@ lnchoose(const unsigned int n, unsigned int m) {
 
 // p(k) =  C(n1, k) C(n2, t - k) / C(n1 + n2, t)
 static double
-log_hyper_g(const size_t k, const size_t n1, const size_t n2, const size_t t) {
+log_hyper_g(const std::size_t k, const std::size_t n1, const std::size_t n2,
+            const std::size_t t) {
   return lnchoose(n1, k) + lnchoose(n2, t - k) - lnchoose(n1 + n2, t);
 }
 
 static double
-fishers_exact(size_t a, size_t b, size_t c, size_t d) {
-  const size_t m = a + c;  // sum of first column
-  const size_t n = b + d;  // sum of second column
-  const size_t k = a + b;  // sum of first row
+fishers_exact(std::size_t a, std::size_t b, std::size_t c, std::size_t d) {
+  const std::size_t m = a + c;  // sum of first column
+  const std::size_t n = b + d;  // sum of second column
+  const std::size_t k = a + b;  // sum of first row
   // ADS: want more extreme than "observed"
   const double observed = log_hyper_g(a, m, n, k);
   double p = 0.0;
-  for (size_t i = (n > k ? 0ul : k - n); i <= std::min(k, m); ++i) {
+  for (std::size_t i = (n > k ? 0ul : k - n); i <= std::min(k, m); ++i) {
     const double curr = log_hyper_g(i, m, n, k);
     if (curr <= observed)
       p = log_sum_log(p, curr);
   }
-  return exp(p);
+  return std::exp(p);
 }
 
-static size_t
-state_pair_to_index(const string &s, const size_t idx) {
+static std::size_t
+state_pair_to_index(const std::string &s, const std::size_t idx) {
   assert(idx < s.length() - 1);
   const char a = s[idx];
   if (a == 'C') {
@@ -136,13 +123,13 @@ template <class T> struct PairStateCounter {
     return CC + CT + TC + TT;
   }
 
-  string
+  std::string
   tostring() const {
     return toa(CC) + '\t' + toa(CT) + '\t' + toa(TC) + '\t' + toa(TT);
   }
 
   void
-  increment(const size_t state) {
+  increment(const std::size_t state) {
     if (state == 0)
       ++CC;
     else if (state == 1)
@@ -156,19 +143,20 @@ template <class T> struct PairStateCounter {
 
 template <typename T>
 void
-fit_states(const epiread &er, vector<PairStateCounter<T>> &counts) {
-  for (size_t i = 0; i < er.length() - 1; ++i) {
-    const size_t pos = er.pos + i;
+fit_states(const epiread &er, std::vector<PairStateCounter<T>> &counts) {
+  for (std::size_t i = 0; i < er.length() - 1; ++i) {
+    const std::size_t pos = er.pos + i;
     assert(pos < counts.size());
-    const size_t curr_state = state_pair_to_index(er.seq, i);
+    const std::size_t curr_state = state_pair_to_index(er.seq, i);
     counts[pos].increment(curr_state);
   }
 }
 
 static void
-collect_cpgs(const string &s, unordered_map<size_t, size_t> &cpgs) {
-  size_t cpg_idx = 0;
-  size_t nuc_idx = 0;
+collect_cpgs(const std::string &s,
+             std::unordered_map<std::size_t, std::size_t> &cpgs) {
+  std::size_t cpg_idx = 0;
+  std::size_t nuc_idx = 0;
   const auto lim = end(s) - (s.size() == 0 ? 0 : 1);
   for (auto itr = begin(s); itr != lim; ++itr, ++nuc_idx)
     if (*itr == 'C' && *(itr + 1) == 'G')
@@ -179,53 +167,58 @@ collect_cpgs(const string &s, unordered_map<size_t, size_t> &cpgs) {
    consecutive CpG sites. So there would be one fewer of them than the
    total CpGs in a chromosome. */
 static void
-convert_coordinates(const string &chrom, vector<MSite> &sites) {
-  unordered_map<size_t, size_t> cpgs;
+convert_coordinates(const std::string &chrom, std::vector<MSite> &sites) {
+  std::unordered_map<std::size_t, std::size_t> cpgs;
   collect_cpgs(chrom, cpgs);
-  for (size_t i = 0; i < sites.size(); ++i) {
+  for (std::size_t i = 0; i < sites.size(); ++i) {
     const auto pos_itr = cpgs.find(sites[i].pos);
     if (pos_itr == end(cpgs))
-      throw runtime_error("failed converting site:\n" + sites[i].tostring());
+      throw std::runtime_error("failed converting site:\n" +
+                               sites[i].tostring());
     sites[i].pos = pos_itr->second;
   }
 }
 
 template <typename T>
 void
-add_cytosine(const string &chrom_name, const size_t start_cpg,
-             vector<PairStateCounter<T>> &counts, vector<MSite> &cytosines) {
+add_cytosine(const std::string &chrom_name, const std::size_t start_cpg,
+             std::vector<PairStateCounter<T>> &counts,
+             std::vector<MSite> &cytosines) {
   std::ostringstream s;
   s << counts[start_cpg].score() << "\t" << counts[start_cpg].total() << "\t"
     << counts[start_cpg].tostring();
-  const string name(s.str());
+  const std::string name(s.str());
   cytosines.push_back(MSite(chrom_name, start_cpg, '+', name, 0, 0));
 }
 
 template <typename T>
 void
-process_chrom(const string &chrom_name, const vector<epiread> &epireads,
-              vector<MSite> &cytosines, vector<PairStateCounter<T>> &counts) {
-  for (size_t i = 0; i < epireads.size(); ++i)
+process_chrom(const std::string &chrom_name,
+              const std::vector<epiread> &epireads,
+              std::vector<MSite> &cytosines,
+              std::vector<PairStateCounter<T>> &counts) {
+  for (std::size_t i = 0; i < epireads.size(); ++i)
     fit_states(epireads[i], counts);
-  for (size_t i = 0; i < counts.size(); ++i)
+  for (std::size_t i = 0; i < counts.size(); ++i)
     add_cytosine(chrom_name, i, counts, cytosines);
 }
 
 static void
-update_chroms_seen(const string &chrom_name,
-                   unordered_set<string> &chroms_seen) {
+update_chroms_seen(const std::string &chrom_name,
+                   std::unordered_set<std::string> &chroms_seen) {
   const auto chr_itr = chroms_seen.find(chrom_name);
   if (chr_itr != end(chroms_seen))
-    throw runtime_error("chroms out of order: " + chrom_name);
+    throw std::runtime_error("chroms out of order: " + chrom_name);
   chroms_seen.insert(chrom_name);
 }
 
 static void
-verify_chroms_available(const string &chrom_name,
-                        unordered_map<string, size_t> &chrom_lookup) {
+verify_chroms_available(
+  const std::string &chrom_name,
+  std::unordered_map<std::string, std::size_t> &chrom_lookup) {
   const auto chr_itr = chrom_lookup.find(chrom_name);
   if (chr_itr == end(chrom_lookup))
-    throw runtime_error("chrom not found: " + chrom_name);
+    throw std::runtime_error("chrom not found: " + chrom_name);
 }
 
 int
@@ -236,8 +229,8 @@ computes probability of allele-specific methylation at each tuple of CpGs
 )";
     bool VERBOSE = false;
 
-    string outfile;
-    string chroms_dir;
+    std::string outfile;
+    std::string chroms_dir;
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<epireads>");
@@ -246,79 +239,79 @@ computes probability of allele-specific methylation at each tuple of CpGs
     opt_parse.add_opt("chrom", 'c', "genome sequence file/directory", true,
                       chroms_dir);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (leftover_args.size() != 1) {
-      cerr << opt_parse.help_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string epi_file(leftover_args.front());
+    const std::string epi_file(leftover_args.front());
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    vector<string> chrom_names;
-    vector<string> chroms;
+    std::vector<std::string> chrom_names;
+    std::vector<std::string> chroms;
     read_fasta_file_short_names(chroms_dir, chrom_names, chroms);
     for (auto &&i : chroms)
       transform(begin(i), end(i), begin(i),
                 [](const char c) { return std::toupper(c); });
 
     // lookup to map chrom names to chrom sequences
-    unordered_map<string, size_t> chrom_lookup;
-    for (size_t i = 0; i < chrom_names.size(); ++i)
+    std::unordered_map<std::string, std::size_t> chrom_lookup;
+    for (std::size_t i = 0; i < chrom_names.size(); ++i)
       chrom_lookup.insert(make_pair(chrom_names[i], i));
 
-    unordered_map<string, size_t> chrom_sizes;
-    for (size_t i = 0; i < chrom_names.size(); ++i) {
-      size_t cpg_count = 0;
-      for (size_t j = 0; j < chroms[i].size() - 1; ++j)
+    std::unordered_map<std::string, std::size_t> chrom_sizes;
+    for (std::size_t i = 0; i < chrom_names.size(); ++i) {
+      std::size_t cpg_count = 0;
+      for (std::size_t j = 0; j < chroms[i].size() - 1; ++j)
         cpg_count += (chroms[i][j] == 'C' && chroms[i][j + 1] == 'G');
       chrom_sizes.insert(make_pair(chrom_names[i], cpg_count));
     }
 
     if (VERBOSE)
-      cerr << "number of chromosomes: " << chrom_sizes.size() << '\n';
+      std::cerr << "number of chromosomes: " << chrom_sizes.size() << '\n';
 
     std::ifstream in(epi_file);
     if (!in)
-      throw runtime_error("cannot open input file: " + epi_file);
+      throw std::runtime_error("cannot open input file: " + epi_file);
 
-    std::ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open output file: " + outfile);
 
-    unordered_set<string> chroms_seen;
-    string chrom;
+    std::unordered_set<std::string> chroms_seen;
+    std::string chrom;
     epiread er;
-    vector<epiread> epireads;
+    std::vector<epiread> epireads;
     while (in >> er) {
       if (er.chr != chrom) {
         update_chroms_seen(er.chr, chroms_seen);
         verify_chroms_available(er.chr, chrom_lookup);
 
         if (VERBOSE)
-          cerr << "[processing " << er.chr << "]" << '\n';
+          std::cerr << "[processing " << er.chr << "]" << '\n';
 
         if (!chrom.empty()) {
-          vector<PairStateCounter<uint32_t>> counts(chrom_sizes[chrom] - 1);
-          vector<MSite> cytosines;
+          std::vector<PairStateCounter<std::uint32_t>> counts(
+            chrom_sizes[chrom] - 1);
+          std::vector<MSite> cytosines;
           process_chrom(chrom, epireads, cytosines, counts);
-          const size_t chrom_idx = chrom_lookup[chrom];
+          const std::size_t chrom_idx = chrom_lookup[chrom];
           convert_coordinates(chroms[chrom_idx], cytosines);
-          for (size_t i = 0; i < cytosines.size() - 1; ++i) {
+          for (std::size_t i = 0; i < cytosines.size() - 1; ++i) {
             out << cytosines[i].chrom << "\t" << cytosines[i].pos
                 << "\t+\tCpG\t" << cytosines[i].context << '\n';
           }
@@ -329,22 +322,23 @@ computes probability of allele-specific methylation at each tuple of CpGs
       chrom.swap(er.chr);
     }
     if (!chrom.empty()) {
-      vector<PairStateCounter<uint32_t>> counts(chrom_sizes[chrom] - 1);
-      vector<MSite> cytosines;
+      std::vector<PairStateCounter<std::uint32_t>> counts(chrom_sizes[chrom] -
+                                                          1);
+      std::vector<MSite> cytosines;
       process_chrom(chrom, epireads, cytosines, counts);
-      const size_t chrom_idx = chrom_lookup[chrom];
+      const std::size_t chrom_idx = chrom_lookup[chrom];
       convert_coordinates(chroms[chrom_idx], cytosines);
-      for (size_t i = 0; i < cytosines.size() - 1; ++i) {
+      for (std::size_t i = 0; i < cytosines.size() - 1; ++i) {
         out << cytosines[i].chrom << "\t" << cytosines[i].pos << "\t+\tCpG\t"
             << cytosines[i].context << '\n';
       }
     }
   }
   catch (const std::exception &e) {
-    cerr << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
 
-// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions,*-prefer-member-initializer)
+// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
diff --git a/src/amrfinder/amrtester.cpp b/src/amrfinder/amrtester.cpp
index 78da4fed..80d776a2 100644
--- a/src/amrfinder/amrtester.cpp
+++ b/src/amrfinder/amrtester.cpp
@@ -1,29 +1,30 @@
-/* amrtester: A program for testing whether a genomic region has
- * allele-specific methylation
- *
- * Copyright (C) 2014-2023 University of Southern California and
- *                         Benjamin E Decato and Andrew D. Smith and Fang Fang
+/* Copyright (C) 2014-2023 Andrew D. Smith, Benjamin E Decato and Fang Fang
  *
  * Authors: Andrew D. Smith and Benjamin E Decato and Fang Fang
  *
- * This program is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
  *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+[[maybe_unused]] static constexpr auto about = R"(
+amrtester: A program for testing whether a genomic region has allele-specific
+methylation
+)";
+
 #include "Epiread.hpp"
 #include "EpireadStats.hpp"
 
-#include <GenomicRegion.hpp>
+#include <Interval6.hpp>
 #include <OptionParser.hpp>
 #include <smithlab_os.hpp>
 #include <smithlab_utils.hpp>
@@ -42,44 +43,32 @@
 #include <utility>
 #include <vector>
 
-using std::begin;
-using std::cerr;
-using std::cout;
-using std::end;
-using std::runtime_error;
-using std::streampos;
-using std::string;
-using std::unordered_map;
-using std::vector;
-
-using epi_r = small_epiread;
-
-// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
+// NOLINTBEGIN(*-narrowing-conversions)
 
 static void
 backup_to_start_of_current_record(std::ifstream &in) {
-  static const size_t assumed_max_valid_line_width = 10000;
-  size_t count = 0;
+  static constexpr std::size_t assumed_max_valid_line_width = 10000;
+  std::size_t count = 0;
   while (in.tellg() > 0 && in.peek() != '\n' && in.peek() != '\r' &&
          count++ < assumed_max_valid_line_width)
     in.seekg(-1, std::ios_base::cur);
   if (count > assumed_max_valid_line_width)
-    throw runtime_error("file contains a line longer than " +
-                        std::to_string(assumed_max_valid_line_width));
+    throw std::runtime_error("file contains a line longer than " +
+                             std::to_string(assumed_max_valid_line_width));
 }
 
-static streampos
-find_first_epiread_ending_after_position(const string &query_chrom,
-                                         const size_t query_pos,
+static std::streampos
+find_first_epiread_ending_after_position(const std::string &query_chrom,
+                                         const std::size_t query_pos,
                                          std::ifstream &in) {
   in.seekg(0, std::ios_base::end);
   auto high_pos = in.tellg();
-  size_t eof = in.tellg();
+  std::size_t eof = in.tellg();
   in.seekg(0, std::ios_base::beg);
   std::streamoff low_pos = 0;
 
-  string chrom, seq;
-  size_t start = 0ul;
+  std::string chrom, seq;
+  std::size_t start = 0ul;
 
   // This is just binary search on disk
   while (high_pos > low_pos + 1) {
@@ -92,11 +81,11 @@ find_first_epiread_ending_after_position(const string &query_chrom,
     if (low_pos + 2 == static_cast<std::streamoff>(eof))
       return -1;
 
-    if (!(in >> chrom >> start >> seq)) {
-      throw runtime_error("problem loading reads");
-    }
+    if (!(in >> chrom >> start >> seq))
+      throw std::runtime_error("problem loading reads");
+
     if (chrom < query_chrom ||
-        (chrom == query_chrom && start + seq.length() <= query_pos))
+        (chrom == query_chrom && start + std::size(seq) <= query_pos))
       low_pos = mid_pos;
     else
       high_pos = mid_pos;
@@ -105,62 +94,58 @@ find_first_epiread_ending_after_position(const string &query_chrom,
 }
 
 static void
-load_reads(const string &reads_file_name, const GenomicRegion &region,
-           vector<epi_r> &the_reads) {
+load_reads(const std::string &reads_file_name, const Interval6 &region,
+           std::vector<small_epiread> &the_reads) {
   // open and check the file
   std::ifstream in(reads_file_name.c_str());
   if (!in)
-    throw runtime_error("cannot open input file " + reads_file_name);
+    throw std::runtime_error("cannot open input file " + reads_file_name);
 
-  const string query_chrom(region.get_chrom());
-  const size_t query_start = region.get_start();
-  const size_t query_end = region.get_end();
-  const streampos low_offset =
+  const std::string query_chrom(region.chrom);
+  const std::size_t query_start = region.start;
+  const std::size_t query_end = region.stop;
+  const std::streampos low_offset =
     find_first_epiread_ending_after_position(query_chrom, query_start, in);
 
   in.seekg(low_offset, std::ios_base::beg);
   backup_to_start_of_current_record(in);
 
-  string chrom, seq;
-  size_t start = 0ul;
+  std::string chrom, seq;
+  std::size_t start = 0ul;
   while ((in >> chrom >> start >> seq) && chrom == query_chrom &&
          start < query_end)
     the_reads.emplace_back(start, seq);
 }
 
 static void
-convert_coordinates(const vector<size_t> &cpg_positions,
-                    GenomicRegion &region) {
-  const size_t start_pos =
-    lower_bound(cbegin(cpg_positions), cend(cpg_positions),
-                region.get_start()) -
-    cbegin(cpg_positions);
-
-  const size_t end_pos =
-    lower_bound(cbegin(cpg_positions), cend(cpg_positions), region.get_end()) -
-    cbegin(cpg_positions);
-
-  region.set_start(start_pos);
-  region.set_end(end_pos);
+convert_coordinates(const std::vector<std::size_t> &cpg_positions,
+                    Interval6 &region) {
+  const auto lb_pos = [](const auto &v, const auto x) {
+    return std::distance(std::cbegin(v),
+                         std::lower_bound(std::cbegin(v), std::cend(v), x));
+  };
+  region.start = lb_pos(cpg_positions, region.start);
+  region.stop = lb_pos(cpg_positions, region.stop);
 }
 
 inline static bool
-is_cpg(const string &s, const size_t idx) {
-  return toupper(s[idx]) == 'C' && toupper(s[idx + 1]) == 'G';
+is_cpg(const std::string &s, const std::size_t idx) {
+  return std::toupper(s[idx]) == 'C' && std::toupper(s[idx + 1]) == 'G';
 }
 
 static void
-collect_cpgs(const string &s, vector<size_t> &cpgs) {
-  const size_t lim = s.length() - 1;
-  for (size_t i = 0; i < lim; ++i)
+collect_cpgs(const std::string &s, std::vector<std::size_t> &cpgs) {
+  const std::size_t lim = s.length() - 1;
+  for (std::size_t i = 0; i < lim; ++i)
     if (is_cpg(s, i))
       cpgs.push_back(i);
 }
 
 static void
-clip_reads(const size_t start_pos, const size_t end_pos, vector<epi_r> &r) {
-  size_t j = 0;
-  for (size_t i = 0; i < r.size(); ++i) {
+clip_reads(const std::size_t start_pos, const std::size_t end_pos,
+           std::vector<small_epiread> &r) {
+  std::size_t j = 0;
+  for (std::size_t i = 0; i < std::size(r); ++i) {
     if (start_pos < r[i].pos + r[i].seq.length() && r[i].pos < end_pos) {
       if (r[i].pos < start_pos) {
         assert(start_pos - r[i].pos < r[i].seq.length());
@@ -173,18 +158,16 @@ clip_reads(const size_t start_pos, const size_t end_pos, vector<epi_r> &r) {
       ++j;
     }
   }
-  r.erase(begin(r) + j, end(r));
+  r.erase(std::begin(r) + j, std::end(r));
 }
 
 // give names to regions if they do not exist
 static void
-ensure_regions_are_named(
-  vector<GenomicRegion> &regions  // cppcheck-suppress constParameterReference
-) {
+ensure_regions_are_named(std::vector<Interval6> &regions) {
   auto region_name_idx = 0u;
-  for (auto region : regions)
-    if (region.get_name().empty())
-      region.set_name("region" + std::to_string(++region_name_idx));
+  for (auto &region : regions)
+    if (region.name.empty())
+      region.name = "region" + std::to_string(++region_name_idx);
 }
 
 int
@@ -197,11 +180,14 @@ main_amrtester(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     bool use_bic = false;
     bool correct_for_read_count = true;
 
-    string outfile;
-    string chrom_file;
+    std::string outfile;
+    std::string chrom_file;
 
-    size_t max_itr = 10;
-    double high_prob = 0.75, low_prob = 0.25;
+    // NOLINTBEGIN(*-avoid-magic-numbers)
+    std::size_t max_itr{10};
+    double high_prob{0.75};
+    double low_prob{0.25};
+    // NOLINTEND(*-avoid-magic-numbers)
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
@@ -217,111 +203,109 @@ main_amrtester(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
     opt_parse.add_opt("progress", 'P', "show progress", false, show_progress);
 
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
-    if (leftover_args.size() != 2) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+    if (std::size(leftover_args) != 2) {
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string regions_file(leftover_args.front());
-    const string reads_file_name(leftover_args.back());
+    const std::string regions_file(leftover_args.front());
+    const std::string reads_file_name(leftover_args.back());
     /****************** END COMMAND LINE OPTIONS *****************/
 
     const EpireadStats epistat{low_prob, high_prob, critical_value,
                                max_itr,  use_bic,   correct_for_read_count};
 
     if (!validate_epiread_file(reads_file_name))
-      throw runtime_error("invalid states file: " + reads_file_name);
+      throw std::runtime_error("invalid states file: " + reads_file_name);
 
     /* first load in all the chromosome sequences and names, and make
        a map from chromosome name to the location of the chromosome
        itself */
-    vector<string> chroms;
-    vector<string> chrom_names;
-    unordered_map<string, size_t> chrom_lookup;
+    std::vector<std::string> chroms;
+    std::vector<std::string> chrom_names;
+    std::unordered_map<std::string, std::size_t> chrom_lookup;
     read_fasta_file_short_names(chrom_file, chrom_names, chroms);
-    for (auto i = 0u; i < size(chroms); ++i) {
-      if (chrom_lookup.find(chrom_names[i]) != cend(chrom_lookup))
-        throw runtime_error("repeated chromosome name: " + chrom_names[i]);
+    for (auto i = 0u; i < std::size(chroms); ++i) {
+      if (chrom_lookup.find(chrom_names[i]) != std::cend(chrom_lookup))
+        throw std::runtime_error("repeated chromosome name: " + chrom_names[i]);
       chrom_lookup[chrom_names[i]] = i;
     }
 
-    vector<GenomicRegion> regions;
-    ReadBEDFile(regions_file, regions);
-    if (!check_sorted(regions))
-      throw runtime_error("regions not sorted in: " + regions_file);
+    auto regions = read_intervals6(regions_file);
+    if (!std::is_sorted(std::cbegin(regions), std::cend(regions)))
+      throw std::runtime_error("regions not sorted in: " + regions_file);
 
     ensure_regions_are_named(regions);
 
-    auto n_regions = size(regions);
+    auto n_regions = std::size(regions);
     if (verbose)
-      cerr << "number of regions: " << n_regions << '\n';
+      std::cerr << "number of regions: " << n_regions << '\n';
 
-    string chrom_name;
-    vector<size_t> cpg_positions;
+    std::string chrom_name;
+    std::vector<std::size_t> cpg_positions;
 
-    std::ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open file: " + outfile);
 
     bool is_significant = false;
 
-    ProgressBar progress(size(regions));
+    ProgressBar progress(std::size(regions));
     auto progress_idx = 0u;
 
     for (auto &region : regions) {
       if (show_progress && progress.time_to_report(progress_idx))
-        progress.report(cerr, progress_idx);
+        progress.report(std::cerr, progress_idx);
       ++progress_idx;
 
       // get the correct chrom if it has changed
-      if (region.get_chrom() != chrom_name) {
-        chrom_name = region.get_chrom();
+      if (region.chrom != chrom_name) {
+        chrom_name = region.chrom;
         auto the_chrom = chrom_lookup.find(chrom_name);
         if (the_chrom == end(chrom_lookup))
-          throw runtime_error("could not find chrom: " + chrom_name);
+          throw std::runtime_error("could not find chrom: " + chrom_name);
 
         cpg_positions.clear();
         collect_cpgs(chroms[the_chrom->second], cpg_positions);
       }
 
-      GenomicRegion conv_region(region);
+      Interval6 conv_region(region);
       convert_coordinates(cpg_positions, conv_region);
 
-      vector<epi_r> reads;
+      std::vector<small_epiread> reads;
       load_reads(reads_file_name, conv_region, reads);
 
-      clip_reads(conv_region.get_start(), conv_region.get_end(), reads);
+      clip_reads(conv_region.start, conv_region.stop, reads);
 
       const auto score =
         reads.empty() ? 1.0 : epistat.test_asm(reads, is_significant);
-      region.set_score(static_cast<float>(score));
-      region.set_name(region.get_name() + ":" + toa(reads.size()));
+      region.score = score;
+      region.name += ":" + std::to_string(std::size(reads));
       out << region << '\n';
     }
     if (show_progress)
-      cerr << "\r100%\n";
+      std::cerr << "\r100%\n";
   }
   catch (const std::exception &e) {
-    cerr << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
 
-// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
+// NOLINTEND(*-narrowing-conversions)
diff --git a/src/analysis/cpgbins.cpp b/src/analysis/cpgbins.cpp
index 6ab28693..a76d74f4 100644
--- a/src/analysis/cpgbins.cpp
+++ b/src/analysis/cpgbins.cpp
@@ -15,13 +15,14 @@
  * General Public License for more details.
  */
 
-#include "GenomicRegion.hpp"
+#include "Interval6.hpp"
 #include "LevelsCounter.hpp"
 #include "OptionParser.hpp"
 #include "bsutils.hpp"
 #include "xcounts_utils.hpp"
 
 #include <algorithm>
+#include <cstddef>
 #include <cstdint>
 #include <exception>
 #include <filesystem>
@@ -36,67 +37,35 @@
 #include <utility>
 #include <vector>
 
-// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
-
-static std::string
-format_levels_counter(const LevelsCounter &lc) {
-  // ...
-  // (7) weighted mean methylation
-  // (8) unweighted mean methylation
-  // (9) fractional methylation
-  // (10) number of sites in the region
-  // (11) number of sites covered at least once
-  // (12) number of observations in reads indicating methylation
-  // (13) total number of observations from reads in the region
-  std::ostringstream oss;
-  // clang-format off
-  oss << lc.mean_meth_weighted() << '\t'
-      << lc.mean_meth() << '\t'
-      << lc.fractional_meth() << '\t'
-      << lc.total_sites << '\t'
-      << lc.sites_covered << '\t'
-      << lc.total_c << '\t'
-      << (lc.total_c + lc.total_t) << '\t'
-      << lc.total_meth << '\t'
-      << lc.sites_covered;
-  // clang-format on
-  return oss.str();
-}
-
 static std::unordered_map<std::string, std::uint64_t>
 get_chrom_sizes(const std::string &chrom_sizes_file) {
-  // NOLINTBEGIN(performance-inefficient-string-concatenation)
-  std::unordered_map<std::string, std::uint64_t> chrom_sizes;
-
   std::ifstream in(chrom_sizes_file);
   if (!in)
     throw std::runtime_error("failed to open file: " + chrom_sizes_file);
 
+  std::unordered_map<std::string, std::uint64_t> chrom_sizes;
+
   std::string line;
   while (getline(in, line)) {
     std::istringstream iss(line);
     std::string chrom_name{};
     std::uint64_t chrom_size{};
     if (!(iss >> chrom_name >> chrom_size))
-      throw std::runtime_error("bad line in " + chrom_sizes_file + ":\n" +
-                               line);
+      throw std::runtime_error("bad chromosome sizes line:\n" + line);
     std::string dummy;
     if (iss >> dummy)
       throw std::runtime_error("too many columns: " + line);
 
     if (chrom_sizes.find(chrom_name) != std::cend(chrom_sizes))
-      throw std::runtime_error("repeated entry " + chrom_name + " in " +
-                               chrom_sizes_file);
+      throw std::runtime_error("repeated entry in chromosome sizes: " + line);
 
     chrom_sizes[chrom_name] = chrom_size;
   }
   return chrom_sizes;
-  // NOLINTEND(performance-inefficient-string-concatenation)
 }
 
 static std::vector<std::string>
 get_chrom_names(const std::string &chrom_sizes_file) {
-  // NOLINTBEGIN(performance-inefficient-string-concatenation)
   std::ifstream in(chrom_sizes_file);
   if (!in)
     throw std::runtime_error("failed to open file: " + chrom_sizes_file);
@@ -108,17 +77,15 @@ get_chrom_names(const std::string &chrom_sizes_file) {
     std::istringstream iss(line);
     std::string chrom_name{};
     if (!(iss >> chrom_name))
-      throw std::runtime_error("bad line in " + chrom_sizes_file + ":\n" +
-                               line);
-
+      throw std::runtime_error("bad line in chromosome sizes file: " + line);
     chrom_names.push_back(chrom_name);
   }
   return chrom_names;
-  // NOLINTEND(performance-inefficient-string-concatenation)
 }
 
 static void
 update(LevelsCounter &lc, const xcounts_entry &xce) {
+  static constexpr auto one_half = 0.5;
   const std::uint64_t n_reads = xce.n_reads();
   if (n_reads > 0) {
     ++lc.sites_covered;
@@ -127,10 +94,11 @@ update(LevelsCounter &lc, const xcounts_entry &xce) {
     lc.total_t += xce.n_unmeth;
     const auto meth = xce.frac();
     lc.total_meth += meth;
-    double lower = 0.0, upper = 0.0;
-    wilson_ci_for_binomial(lc.alpha, n_reads, meth, lower, upper);
-    lc.called_meth += (lower > 0.5);
-    lc.called_unmeth += (upper < 0.5);
+    double lower{}, upper{};
+    wilson_ci_for_binomial(lc.alpha, static_cast<double>(n_reads), meth, lower,
+                           upper);
+    lc.called_meth += (lower > one_half);
+    lc.called_unmeth += (upper < one_half);
   }
   ++lc.total_sites;
 }
@@ -141,11 +109,11 @@ get_name(const bool report_more_info, const char level_code,
   static const std::string tag = "CpG";
   return tag + (report_more_info
                   ? ""
-                  : "_" + std::to_string(
-                            (level_code == 'w'
-                               ? lc.coverage()
-                               : (level_code == 'u' ? lc.sites_covered
-                                                    : lc.total_called()))));
+                  : "_" + std::to_string(level_code == 'w'
+                                           ? lc.coverage()
+                                           : (level_code == 'u'
+                                                ? lc.sites_covered
+                                                : lc.total_called())));
 }
 
 static void
@@ -153,9 +121,8 @@ process_chrom(const bool report_more_info, const char level_code,
               const std::string &chrom_name, const std::uint64_t chrom_size,
               const std::uint64_t bin_size,
               const std::vector<xcounts_entry> &sites, std::ostream &out) {
-  GenomicRegion r(chrom_name, 0, 0, "CpG", 0.0, '+');
-
-  std::uint64_t j = 0;
+  Interval6 r(chrom_name, 0, 0, "CpG", 0.0, '+');
+  std::uint64_t j{};
   for (auto i = 0ul; i < chrom_size; i += bin_size) {
     while (j < std::size(sites) && sites[j].pos < i)
       ++j;
@@ -164,13 +131,13 @@ process_chrom(const bool report_more_info, const char level_code,
     while (j < std::size(sites) && sites[j].pos < i + bin_size)
       update(lc, sites[j++]);
 
-    r.set_start(i);
-    r.set_end(std::min(i + bin_size, chrom_size));
-    r.set_score(level_code == 'w' ? lc.mean_meth_weighted()
-                                  : (level_code == 'u' ? lc.mean_meth()
-                                                       : lc.fractional_meth()));
+    r.start = i;
+    r.stop = std::min(i + bin_size, chrom_size);
+    r.score = level_code == 'w'
+                ? lc.mean_meth_weighted()
+                : (level_code == 'u' ? lc.mean_meth() : lc.fractional_meth());
 
-    r.set_name(get_name(report_more_info, level_code, lc));
+    r.name = get_name(report_more_info, level_code, lc);
 
     out << r;
     if (report_more_info)
@@ -183,12 +150,12 @@ static void
 process_chrom(const bool report_more_info, const std::string &chrom_name,
               const std::uint64_t chrom_size, const std::uint64_t bin_size,
               std::ostream &out) {
-  GenomicRegion r(chrom_name, 0, 0, "CpG_0", 0.0, '+');
+  Interval6 r(chrom_name, 0, 0, "CpG_0", 0.0, '+');
   LevelsCounter lc;
   const std::string lc_formatted = format_levels_counter(lc);
   for (auto i = 0ul; i < chrom_size; i += bin_size) {
-    r.set_start(i);
-    r.set_end(std::min(i + bin_size, chrom_size));
+    r.start = i;
+    r.stop = std::min(i + bin_size, chrom_size);
     out << r;
     if (report_more_info)
       out << '\t' << lc_formatted;
@@ -216,9 +183,11 @@ Columns (beyond the first 6) in the BED format output:
 
     bool verbose = false;
     bool report_more_info = false;
-    std::uint32_t n_threads = 1;
+    std::int32_t n_threads = 1;
     // std::uint64_t bin_size = 1000;
-    size_t bin_size = 1000;  // ADS: for macOS gcc-14.2.0
+
+    // ADS: for macOS gcc-14.2.0
+    std::size_t bin_size = 1000;  // NOLINT(*-avoid-magic-numbers)
     std::string level_code = "w";
     std::string outfile;
 
@@ -261,6 +230,11 @@ Columns (beyond the first 6) in the BED format output:
     const std::string xcounts_file = leftover_args.back();
     /****************** END COMMAND LINE OPTIONS *****************/
 
+    if (n_threads <= 0) {
+      std::cerr << "number of threads must be at least 1\n";
+      return EXIT_FAILURE;
+    }
+
     if (!std::filesystem::is_regular_file(chrom_sizes_file))
       throw std::runtime_error("chromosome sizes file not a regular file: " +
                                chrom_sizes_file);
@@ -295,5 +269,3 @@ Columns (beyond the first 6) in the BED format output:
   }
   return EXIT_SUCCESS;
 }
-
-// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
diff --git a/src/analysis/hmr-rep.cpp b/src/analysis/hmr-rep.cpp
index ec40f9ca..125af4d3 100644
--- a/src/analysis/hmr-rep.cpp
+++ b/src/analysis/hmr-rep.cpp
@@ -1,23 +1,22 @@
-/* Copyright (C) 2019-2022 University of Southern California
- *                    Andrew D Smith
- * Author: Andrew D. Smith, Song Qiang, Jenny Qu
+/* Copyright (C) 2019-2025 Andrew D. Smith, Song Qiang, Jenny Qu
  *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
  *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * This is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
  */
 
-#include "GenomicRegion.hpp"
+#include "Interval6.hpp"
 #include "MSite.hpp"
-#include "OptionParser.hpp"
 #include "TwoStateHMM.hpp"
 
+#include "OptionParser.hpp"
+
 #include <bamxx.hpp>
 
 #include <algorithm>
@@ -40,9 +39,9 @@
 
 // NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
 
-static GenomicRegion
+static Interval6
 as_gen_rgn(const MSite &s) {
-  return GenomicRegion(s.chrom, s.pos, s.pos + 1);
+  return Interval6(s.chrom, s.pos, s.pos + 1, std::string{}, 0.0, '+');
 }
 
 static double
@@ -102,15 +101,15 @@ static void
 build_domains(const std::vector<MSite> &cpgs,
               const std::vector<size_t> &reset_points,
               const std::vector<bool> &state_ids,
-              std::vector<GenomicRegion> &domains) {
+              std::vector<Interval6> &domains) {
   size_t n_cpgs = 0, n_domains = 0, reset_idx = 1, prev_end = 0;
   bool in_domain = false;
   for (size_t i = 0; i < state_ids.size(); ++i) {
     if (reset_points[reset_idx] == i) {
       if (in_domain) {
         in_domain = false;
-        domains.back().set_end(prev_end);
-        domains.back().set_score(n_cpgs);
+        domains.back().stop = prev_end;
+        domains.back().score = n_cpgs;
         n_cpgs = 0;
       }
       ++reset_idx;
@@ -119,14 +118,14 @@ build_domains(const std::vector<MSite> &cpgs,
       if (!in_domain) {
         in_domain = true;
         domains.push_back(as_gen_rgn(cpgs[i]));
-        domains.back().set_name("HYPO" + std::to_string(n_domains++));
+        domains.back().name = "HYPO" + std::to_string(n_domains++);
       }
       ++n_cpgs;
     }
     else if (in_domain) {
       in_domain = false;
-      domains.back().set_end(prev_end);
-      domains.back().set_score(n_cpgs);
+      domains.back().stop = prev_end;
+      domains.back().score = n_cpgs;
       n_cpgs = 0;
     }
     prev_end = cpgs[i].pos + 1;
@@ -487,7 +486,7 @@ main_hmr_rep(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     if (fdr_cutoff == std::numeric_limits<double>::max())
       fdr_cutoff = get_stepup_cutoff(p_values, 0.01);
 
-    std::vector<GenomicRegion> domains;
+    std::vector<Interval6> domains;
     build_domains(cpgs, reset_points, state_ids, domains);
 
     std::ofstream of;
@@ -498,7 +497,7 @@ main_hmr_rep(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     size_t good_hmr_count = 0;
     for (size_t i = 0; i < domains.size(); ++i)
       if (p_values[i] < fdr_cutoff) {
-        domains[i].set_name("HYPO" + std::to_string(good_hmr_count++));
+        domains[i].name = "HYPO" + std::to_string(good_hmr_count++);
         out << domains[i] << '\n';
       }
 
@@ -517,10 +516,10 @@ main_hmr_rep(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
           m_reads += meth[j][i].first;
           u_reads += meth[j][i].second;
         }
-        GenomicRegion cpg(as_gen_rgn(cpgs[i]));
-        cpg.set_name("CpG:" + std::to_string(m_reads) + ":" +
-                     std::to_string(u_reads));
-        cpg.set_score(posteriors[i]);
+        Interval6 cpg(as_gen_rgn(cpgs[i]));
+        cpg.name =
+          "CpG:" + std::to_string(m_reads) + ":" + std::to_string(u_reads);
+        cpg.score = posteriors[i];
         out_post << cpg << '\n';
       }
     }
@@ -535,10 +534,10 @@ main_hmr_rep(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
           m_reads += meth[j][i].first;
           u_reads += meth[j][i].second;
         }
-        GenomicRegion cpg(as_gen_rgn(cpgs[i]));
-        cpg.set_name("CpG:" + std::to_string(m_reads) + ":" +
-                     std::to_string(u_reads));
-        cpg.set_score(1.0 - posteriors[i]);
+        Interval6 cpg(as_gen_rgn(cpgs[i]));
+        cpg.name =
+          "CpG:" + std::to_string(m_reads) + ":" + std::to_string(u_reads);
+        cpg.score = 1.0 - posteriors[i];
         out_post << cpg << '\n';
       }
     }
diff --git a/src/analysis/hmr.cpp b/src/analysis/hmr.cpp
index 295ebeeb..d5faeeab 100644
--- a/src/analysis/hmr.cpp
+++ b/src/analysis/hmr.cpp
@@ -1,20 +1,19 @@
-/* Copyright (C) 2009-2023 University of Southern California
- *                         Andrew D Smith
+/* Copyright (C) 2009-2025 Andrew D Smith and Song Qiang
  *
- * Author: Andrew D. Smith, Song Qiang
+ * Author: Andrew D. Smith and Song Qiang
  *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
  *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * This is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
  */
 
-#include "GenomicRegion.hpp"
+#include "Interval6.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
 #include "TwoStateHMM.hpp"
@@ -22,6 +21,8 @@
 
 #include <bamxx.hpp>
 
+#include "nlohmann/json.hpp"
+
 #include <algorithm>
 #include <cstdint>
 #include <cstdlib>
@@ -41,18 +42,64 @@
 #include <utility>
 #include <vector>
 
-// NOLINTBEGIN(*-narrowing-conversions)
+template <typename InputIterator, typename T = double>
+[[nodiscard]] static auto
+get_mean(InputIterator first, InputIterator last) -> T {
+  return std::accumulate(first, last, static_cast<T>(0)) /
+         std::distance(first, last);
+}
+
+struct hmr_params {
+  static constexpr auto frac_init = 0.33;
+  static constexpr auto trans_init = 0.25;
+  double fg_alpha{};
+  double fg_beta{};
+  double bg_alpha{};
+  double bg_beta{};
+  double p_fb{};
+  double p_bf{};
+  double domain_score_cutoff{};
+
+  [[nodiscard]] auto
+  fg_meth() const {
+    return fg_alpha / (fg_alpha + fg_beta);
+  }
+
+  [[nodiscard]] auto
+  bg_meth() const {
+    return bg_alpha / (bg_alpha + bg_beta);
+  }
+
+  auto
+  init(const double n_reads) {
+    fg_alpha = frac_init * n_reads;
+    fg_beta = (1.0 - frac_init) * n_reads;
+    bg_alpha = (1.0 - frac_init) * n_reads;
+    bg_beta = frac_init * n_reads;
+    p_fb = trans_init;
+    p_bf = trans_init;
+    domain_score_cutoff = std::numeric_limits<double>::max();
+  }
+
+  /// Estimate methylation level using param values and the posterior
+  /// probability on HMR status
+  [[nodiscard]] auto
+  est_meth(const double posterior) -> double {
+    return fg_meth() * posterior + bg_meth() * (1.0 - posterior);
+  }
+
+  NLOHMANN_DEFINE_TYPE_INTRUSIVE(hmr_params, fg_alpha, fg_beta, bg_alpha,
+                                 bg_beta, p_fb, p_bf, domain_score_cutoff);
+};
 
 struct hmr_summary {
-  explicit hmr_summary(const std::vector<GenomicRegion> &hmrs) :
+  explicit hmr_summary(const std::vector<Interval6> &hmrs) :
     hmr_count{std::size(hmrs)},
-    hmr_total_size{
-      std::accumulate(std::cbegin(hmrs), std::cend(hmrs), 0ul,
-                      [](const std::uint64_t t, const GenomicRegion &p) {
-                        return t + p.get_width();
-                      })} {
+    hmr_total_size{std::accumulate(
+      std::cbegin(hmrs), std::cend(hmrs), 0ul,
+      [](const std::uint64_t t, const Interval6 &p) { return t + size(p); })} {
     hmr_mean_size = static_cast<double>(hmr_total_size) /
-                    std::max(hmr_count, static_cast<std::uint64_t>(1));
+                    std::max(static_cast<std::uint32_t>(hmr_count), 1u);
   }
   // hmr_count is the number of identified HMRs.
   std::uint64_t hmr_count{};
@@ -61,39 +108,35 @@ struct hmr_summary {
   // mean_hmr_size is the mean size of the identified HMRs
   double hmr_mean_size{};
 
-  std::string
-  tostring() {
+  auto
+  tostring() -> std::string {
     std::ostringstream oss;
     oss << "hmr_count: " << hmr_count << '\n'
         << "hmr_total_size: " << hmr_total_size << '\n'
         << "hmr_mean_size: " << std::fixed << std::setprecision(2)
         << hmr_mean_size;
-
     return oss.str();
   }
+  NLOHMANN_DEFINE_TYPE_INTRUSIVE(hmr_summary, hmr_count, hmr_total_size,
+                                 hmr_mean_size);
 };
 
-static GenomicRegion
-as_gen_rgn(const MSite &s) {
-  return GenomicRegion(s.chrom, s.pos, s.pos + 1);
-}
-
-static std::string
-format_cpg_meth_tag(const std::pair<double, double> &m) {
-  return "CpG:" + std::to_string(static_cast<std::size_t>(m.first)) + ":" +
-         std::to_string(static_cast<std::size_t>(m.second));
+static auto
+to_interval(const MSite &s) -> Interval6 {
+  const auto p = static_cast<std::uint32_t>(s.pos);
+  return {s.chrom, p, p + 1, {}, {}, '+'};
 }
 
-static double
-get_stepup_cutoff(std::vector<double> scores, const double cutoff) {
+static auto
+get_stepup_cutoff(std::vector<double> scores, const double cutoff) -> double {
   if (cutoff <= 0)
     return std::numeric_limits<double>::max();
-  else if (cutoff > 1)
+  if (cutoff > 1)
     return std::numeric_limits<double>::min();
 
-  const std::size_t n = std::size(scores);
+  const std::uint32_t n = std::size(scores);
   std::sort(begin(scores), std::end(scores));
-  std::size_t i = 1;
+  std::uint32_t i = 1;
   while (i < n && scores[i - 1] < (cutoff * i) / n)
     ++i;
   return scores[i - 1];
@@ -105,8 +148,8 @@ get_domain_scores(const std::vector<bool> &state_ids,
                   const std::vector<std::size_t> &reset_points)
   -> std::vector<double> {
   std::size_t reset_idx = 1;
-  bool in_domain = false;
-  double score = 0;
+  bool in_domain{};
+  double score{};
   std::vector<double> domain_scores;
   for (std::size_t i = 0; i < std::size(state_ids); ++i) {
     if (reset_points[reset_idx] == i) {
@@ -136,51 +179,53 @@ static void
 build_domains(const std::vector<MSite> &cpgs,
               const std::vector<std::size_t> &reset_points,
               const std::vector<bool> &state_ids,
-              std::vector<GenomicRegion> &domains) {
-  std::size_t n_cpgs = 0, reset_idx = 1, prev_end = 0;
+              std::vector<Interval6> &domains) {
+  std::uint32_t n_cpgs{};
+  std::uint32_t prev_stop{};
   bool in_domain = false;
-  for (std::size_t i = 0; i < std::size(state_ids); ++i) {
-    if (reset_points[reset_idx] == i) {
+  // assume reset points include 0 and size(cpgs)
+  auto r_itr = std::cbegin(reset_points) + 1;
+  for (std::uint32_t i = 0; i < std::size(state_ids); ++i) {
+    if (i == *r_itr) {
       if (in_domain) {
         in_domain = false;
-        domains.back().set_end(prev_end);
-        domains.back().set_score(n_cpgs);
+        domains.back().stop = prev_stop;
+        domains.back().score = n_cpgs;
         n_cpgs = 0;
       }
-      ++reset_idx;
+      ++r_itr;
     }
     if (state_ids[i]) {  // currently in an hmr
       if (!in_domain) {
         in_domain = true;
-        domains.push_back(as_gen_rgn(cpgs[i]));
-        domains.back().set_name("HYPO" + std::to_string(std::size(domains)));
+        domains.push_back(to_interval(cpgs[i]));
+        domains.back().name = "HYPO" + std::to_string(std::size(domains));
       }
       ++n_cpgs;
     }
     else if (in_domain) {
       in_domain = false;
-      domains.back().set_end(prev_end);
-      domains.back().set_score(n_cpgs);
+      domains.back().stop = prev_stop;
+      domains.back().score = n_cpgs;
       n_cpgs = 0;
     }
-    prev_end = cpgs[i].pos + 1;
+    prev_stop = cpgs[i].pos + 1;
   }
   if (in_domain) {
-    domains.back().set_end(prev_end);
-    domains.back().set_score(n_cpgs);
+    domains.back().stop = prev_stop;
+    domains.back().score = n_cpgs;
   }
 }
 
 template <class T, class U>
-static void
+static auto
 separate_regions(const bool verbose, const std::size_t desert_size,
                  std::vector<MSite> &cpgs, std::vector<T> &meth,
-                 std::vector<U> &reads,
-                 std::vector<std::size_t> &reset_points) {
+                 std::vector<U> &reads) -> std::vector<std::size_t> {
   if (verbose)
     std::cerr << "[separating by cpg desert]\n";
   // eliminate the zero-read cpgs
-  std::size_t j = 0;
+  std::int64_t j = 0;
   for (std::size_t i = 0; i < std::size(cpgs); ++i)
     if (reads[i] > 0) {
       cpgs[j] = cpgs[i];
@@ -192,8 +237,10 @@ separate_regions(const bool verbose, const std::size_t desert_size,
   meth.erase(std::begin(meth) + j, std::end(meth));
   reads.erase(std::begin(reads) + j, std::end(reads));
 
-  double total_bases = 0;
-  double bases_in_deserts = 0;
+  std::vector<std::size_t> reset_points;
+
+  std::size_t total_bases{};
+  std::size_t bases_in_deserts{};
   // segregate cpgs
   std::size_t prev_pos = 0;
   for (std::size_t i = 0; i < std::size(cpgs); ++i) {
@@ -213,25 +260,28 @@ separate_regions(const bool verbose, const std::size_t desert_size,
   reset_points.push_back(std::size(cpgs));
 
   if (verbose)
-    std::cerr << "[cpgs retained: " << std::size(cpgs) << "]" << '\n'
+    std::cerr << "[cpgs retained: " << std::size(cpgs) << "]\n"
               << "[deserts removed: " << std::size(reset_points) - 2 << "]\n"
               << "[genome fraction covered: "
-              << 1.0 - (bases_in_deserts / total_bases) << "]\n";
+              << 1.0 - (static_cast<double>(bases_in_deserts) /
+                        static_cast<double>(total_bases))
+              << "]\n";
+  return reset_points;
 }
 
-/* function to "fold" the methylation profile so that the middle
- * methylation becomes lower methylation, and both the low and high
- * methylation become high. this method actually seems to work.
+/* function to "fold" the methylation profile so that the middle methylation
+ * becomes lower methylation, and both the low and high methylation become
+ * high. this method actually seems to work.
  */
 static void
 make_partial_meth(const std::vector<std::uint32_t> &reads,
                   std::vector<std::pair<double, double>> &meth) {
-  static constexpr auto half = 0.5;
+  static constexpr auto one_half = 0.5;
   for (std::size_t i = 0; i < std::size(reads); ++i) {
-    double m = meth[i].first / reads[i];
-    m = (m <= half) ? (1.0 - 2 * m) : (1.0 - 2 * (1.0 - m));
-    meth[i].first = reads[i] * m;
-    meth[i].second = (reads[i] - meth[i].first);
+    const auto r = reads[i];
+    double m = meth[i].first / r;
+    m = (m <= one_half) ? (1.0 - 2 * m) : (1.0 - 2 * (1.0 - m));
+    meth[i] = {r * m, r * (1.0 - m)};
   }
 }
 
@@ -256,160 +306,121 @@ shuffle_cpgs(const std::size_t rng_seed, const TwoStateHMM &hmm,
 [[nodiscard]] static auto
 assign_p_values(const std::vector<double> &random,
                 const std::vector<double> &observed) -> std::vector<double> {
-  const double n_randoms = random.empty() ? 1 : std::size(random);
+  const auto n_rand =
+    random.empty() ? 1.0 : static_cast<double>(std::size(random));
   const auto n_scores = std::size(observed);
+  const auto r_end = std::cend(random);
   std::vector<double> p_values(n_scores);
   for (auto i = 0u; i < n_scores; ++i) {
-    const auto ub =
-      std::upper_bound(std::cbegin(random), std::cend(random), observed[i]);
-    p_values[i] = std::distance(ub, std::end(random)) / n_randoms;
+    const auto ub = std::upper_bound(std::cbegin(random), r_end, observed[i]);
+    p_values[i] = static_cast<double>(std::distance(ub, r_end)) / n_rand;
   }
   return p_values;
 }
 
-static void
-read_params_file(const bool verbose, const std::string &params_file,
-                 double &fg_alpha, double &fg_beta, double &bg_alpha,
-                 double &bg_beta, double &p_fb, double &p_bf,
-                 double &domain_score_cutoff) {
-  std::string jnk;
+[[nodiscard]] static auto
+read_params_file(const std::string &params_file) -> hmr_params {
   std::ifstream in(params_file);
   if (!in)
-    throw std::runtime_error("failed to parse params file: " + params_file);
-  in >> jnk >> fg_alpha >> jnk >> fg_beta >> jnk >> bg_alpha >> jnk >>
-    bg_beta >> jnk >> p_fb >> jnk >> p_bf >> jnk >> domain_score_cutoff;
-  if (verbose)
-    std::cerr << "FG_ALPHA\t" << fg_alpha << '\n'
-              << "FG_BETA\t" << fg_beta << '\n'
-              << "BG_ALPHA\t" << bg_alpha << '\n'
-              << "BG_BETA\t" << bg_beta << '\n'
-              << "F_B\t" << p_fb << '\n'
-              << "B_F\t" << p_bf << '\n'
-              << "DOMAIN_SCORE_CUTOFF\t" << domain_score_cutoff << '\n';
+    throw std::runtime_error("failed to open params file: " + params_file);
+  const nlohmann::json data = nlohmann::json::parse(in);
+  try {
+    hmr_params p(data);
+    return p;
+  }
+  catch (const std::exception &e) {
+    const auto msg = "failed to parse parameters file: " + params_file;
+    throw std::runtime_error(std::string(e.what()) + "\n" + msg);
+  }
 }
 
 static void
-write_params_file(const std::string &outfile, const double fg_alpha,
-                  const double fg_beta, const double bg_alpha,
-                  const double bg_beta, const double p_fb, const double p_bf,
-                  const double domain_score_cutoff) {
-  static constexpr auto precision_value = 30;
-  std::ofstream of;
-  if (!outfile.empty())
-    of.open(outfile.c_str());
-  std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
-
-  out.precision(precision_value);
-  out << "FG_ALPHA\t" << fg_alpha << '\n'
-      << "FG_BETA\t" << fg_beta << '\n'
-      << "BG_ALPHA\t" << bg_alpha << '\n'
-      << "BG_BETA\t" << bg_beta << '\n'
-      << "F_B\t" << p_fb << '\n'
-      << "B_F\t" << p_bf << '\n'
-      << "DOMAIN_SCORE_CUTOFF\t" << domain_score_cutoff << '\n';
+write_params_file(const std::string &outfile, const hmr_params &params) {
+  std::ofstream out(outfile);
+  if (!out)
+    throw std::runtime_error("failed to open params output file: " + outfile);
+  out << nlohmann::json(params);
 }
 
 [[nodiscard]]
-static inline double
-get_n_meth(const MSite &s) {
-  return s.meth * s.n_reads;
+static inline auto
+get_n_meth(const MSite &s) -> double {
+  return s.meth * static_cast<double>(s.n_reads);
 }
 
 [[nodiscard]]
-static inline double
-get_n_unmeth(const MSite &s) {
-  return (1.0 - s.meth) * s.n_reads;
+static inline auto
+get_n_unmeth(const MSite &s) -> double {
+  return (1.0 - s.meth) * static_cast<double>(s.n_reads);
 }
 
-static void
-load_cpgs(const std::string &cpgs_file, std::vector<MSite> &cpgs,
-          std::vector<std::pair<double, double>> &meth,
-          std::vector<std::uint32_t> &reads) {
+static auto
+load_cpgs(const std::string &cpgs_file) -> std::vector<MSite> {
   bamxx::bgzf_file in(cpgs_file, "r");
   if (!in)
     throw std::runtime_error("failed opening file: " + cpgs_file);
-
   if (get_has_counts_header(cpgs_file))
     skip_counts_header(in);
-
+  std::vector<MSite> cpgs;
   MSite prev_site, the_site;
   while (read_site(in, the_site)) {
     if (!the_site.is_cpg() || distance(prev_site, the_site) < 2)
-      throw std::runtime_error("error: input is not symmetric-CpGs: " +
-                               cpgs_file);
+      throw std::runtime_error("input is not symmetric-CpGs: " + cpgs_file);
     cpgs.push_back(the_site);
-    reads.push_back(the_site.n_reads);
-    meth.push_back(
-      std::make_pair(get_n_meth(the_site), get_n_unmeth(the_site)));
     prev_site = the_site;
   }
-}
-
-template <typename InputIterator, typename T = double>
-static auto
-get_mean(InputIterator first, InputIterator last) -> T {
-  return std::accumulate(first, last, static_cast<T>(0)) /
-         std::distance(first, last);
+  return cpgs;
 }
 
 template <class T>
 static void
 check_sorted_within_chroms(T first, const T last) {
   // empty, or a single element
-  if (first == last || first + 1 == last)
+  if (last <= first + 1)
     return;
-
   std::unordered_set<std::string> chroms_seen;
-  std::string cur_chrom = "";
-
-  for (auto fast = first + 1; fast != last; ++fast, ++first) {
-    if (fast->chrom != cur_chrom) {
-      if (chroms_seen.find(fast->chrom) != std::end(chroms_seen)) {
+  std::string cur_chrom;
+  for (auto next = first + 1; next != last; ++next, ++first) {
+    if (next->chrom != cur_chrom) {
+      if (chroms_seen.find(next->chrom) != std::end(chroms_seen))
         throw std::runtime_error("input not grouped by chromosomes. "
                                  "Error in the following line:\n" +
-                                 fast->tostring());
-      }
-      cur_chrom = fast->chrom;
+                                 next->tostring());
+      cur_chrom = next->chrom;
       chroms_seen.insert(cur_chrom);
       // don't check ordering if chroms have changed
     }
-    else {  // first and fast are in the same chrom
-      if (first->pos >= fast->pos) {
+    else {  // first and next are in the same chrom
+      if (first->pos >= next->pos) {
         throw std::runtime_error("input file not sorted properly. "
                                  "Error in the following lines:\n" +
-                                 first->tostring() + "\n" + fast->tostring());
+                                 first->tostring() + "\n" + next->tostring());
       }
     }
   }
 }
 
-int
-main_hmr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
+auto
+main_hmr(int argc, char *argv[]) -> int {  // NOLINT(*-avoid-c-arrays)
   try {
     static constexpr auto min_coverage = 1.0;
     static constexpr auto tolerance = 1e-10;  // corrections for small values
     static constexpr auto p_value_cutoff = 0.01;
-    static constexpr auto alpha_frac = 0.33;
 
     std::string outfile;
-    std::string hypo_post_outfile;
-    std::string meth_post_outfile;
+    std::string post_outfile;
+    std::string meth_outfile;
 
-    // NOLINTBEGIN(*-avoid-magic-numbers)
-    std::size_t desert_size{1000};
-    std::size_t max_iterations{10};
-    std::size_t rng_seed{408};
-    double p_fb{0.25};
-    double p_bf{0.25};
-    // NOLINTEND(*-avoid-magic-numbers)
+    std::size_t desert_size{1000};   // NOLINT(*-avoid-magic-numbers)
+    std::size_t max_iterations{10};  // NOLINT(*-avoid-magic-numbers)
+    std::size_t rng_seed{408};       // NOLINT(*-avoid-magic-numbers)
 
     // run mode flags
-    bool verbose = false;
-    bool PARTIAL_METH = false;
-    bool allow_extra_fields = false;
+    bool verbose{false};
+    bool PARTIAL_METH{false};
+    bool allow_extra_fields{false};
 
     std::string summary_file;
-
     std::string params_in_file;
     std::string params_out_file;
 
@@ -434,11 +445,11 @@ main_hmr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     opt_parse.add_opt("post-hypo", '\0',
                       "output file for single-CpG posterior "
                       "hypomethylation probability (default: none)",
-                      false, hypo_post_outfile);
+                      false, post_outfile);
     opt_parse.add_opt("post-meth", '\0',
                       "output file for single-CpG posteiror "
                       "methylation probability (default: none)",
-                      false, meth_post_outfile);
+                      false, meth_outfile);
     opt_parse.add_opt("params-in", 'P',
                       "HMM parameter file "
                       "(override training)",
@@ -486,16 +497,22 @@ main_hmr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
       throw std::runtime_error("failed to open output file: " + outfile);
 
     // separate the regions by chrom and by desert
-    std::vector<MSite> cpgs;
-    std::vector<std::pair<double, double>> meth;
-    std::vector<std::uint32_t> reads;
     if (verbose)
       std::cerr << "[reading methylation levels]\n";
-    load_cpgs(cpgs_file, cpgs, meth, reads);
-
+    const auto orig_cpgs = load_cpgs(cpgs_file);
     if (verbose)
       std::cerr << "[checking if input is properly formatted]\n";
-    check_sorted_within_chroms(std::begin(cpgs), std::end(cpgs));
+    check_sorted_within_chroms(std::cbegin(orig_cpgs), std::cend(orig_cpgs));
+
+    auto cpgs = orig_cpgs;
+
+    std::vector<std::pair<double, double>> meth;
+    std::vector<std::uint32_t> reads;
+    for (const auto &cpg : cpgs) {
+      reads.push_back(cpg.n_reads);
+      meth.emplace_back(get_n_meth(cpg), get_n_unmeth(cpg));
+    }
+
     if (PARTIAL_METH)
       make_partial_meth(reads, meth);
 
@@ -520,100 +537,88 @@ main_hmr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
       return EXIT_SUCCESS;
     }
 
-    // separate the regions by chrom and by desert, and eliminate
-    // those isolated CpGs
-    std::vector<std::size_t> reset_points;
-    separate_regions(verbose, desert_size, cpgs, meth, reads, reset_points);
+    // separate the regions by chrom and by desert, and eliminate those
+    // isolated CpGs
+    const auto reset_points =
+      separate_regions(verbose, desert_size, cpgs, meth, reads);
 
     const TwoStateHMM hmm(tolerance, max_iterations, verbose);
 
-    double fg_alpha{}, fg_beta{};
-    double bg_alpha{}, bg_beta{};
-    double domain_score_cutoff = std::numeric_limits<double>::max();
+    hmr_params params;
 
     if (!params_in_file.empty()) {  // read parameters file
-      read_params_file(verbose, params_in_file, fg_alpha, fg_beta, bg_alpha,
-                       bg_beta, p_fb, p_bf, domain_score_cutoff);
+      params = read_params_file(params_in_file);
       max_iterations = 0;
     }
-    else {
-      const double n_reads = get_mean(std::cbegin(reads), std::cend(reads));
-      fg_alpha = alpha_frac * n_reads;
-      fg_beta = (1.0 - alpha_frac) * n_reads;
-      bg_alpha = (1.0 - alpha_frac) * n_reads;
-      bg_beta = alpha_frac * n_reads;
-    }
+    else
+      params.init(mean_coverage);
 
     if (max_iterations > 0)
-      hmm.BaumWelchTraining(meth, reset_points, p_fb, p_bf, fg_alpha, fg_beta,
-                            bg_alpha, bg_beta);
-    // DECODE THE DOMAINS
+      hmm.BaumWelchTraining(meth, reset_points, params.p_fb, params.p_bf,
+                            params.fg_alpha, params.fg_beta, params.bg_alpha,
+                            params.bg_beta);
+    // decode the domains
     std::vector<bool> state_ids;
     std::vector<double> posteriors;
-    hmm.PosteriorDecoding(meth, reset_points, p_fb, p_bf, fg_alpha, fg_beta,
-                          bg_alpha, bg_beta, state_ids, posteriors);
+    hmm.PosteriorDecoding(meth, reset_points, params.p_fb, params.p_bf,
+                          params.fg_alpha, params.fg_beta, params.bg_alpha,
+                          params.bg_beta, state_ids, posteriors);
 
     const auto domain_scores = get_domain_scores(state_ids, meth, reset_points);
-    const auto random_scores =
-      shuffle_cpgs(rng_seed, hmm, meth, reset_points, p_fb, p_bf, fg_alpha,
-                   fg_beta, bg_alpha, bg_beta);
+    const auto random_scores = shuffle_cpgs(
+      rng_seed, hmm, meth, reset_points, params.p_fb, params.p_bf,
+      params.fg_alpha, params.fg_beta, params.bg_alpha, params.bg_beta);
     const auto p_values = assign_p_values(random_scores, domain_scores);
 
-    if (domain_score_cutoff == std::numeric_limits<double>::max() &&
+    if (params.domain_score_cutoff == std::numeric_limits<double>::max() &&
         !domain_scores.empty())
-      domain_score_cutoff = get_stepup_cutoff(p_values, p_value_cutoff);
+      params.domain_score_cutoff = get_stepup_cutoff(p_values, p_value_cutoff);
 
     // write parameters if requested
     if (!params_out_file.empty())
-      write_params_file(params_out_file, fg_alpha, fg_beta, bg_alpha, bg_beta,
-                        p_fb, p_bf, domain_score_cutoff);
+      write_params_file(params_out_file, params);
 
-    std::vector<GenomicRegion> domains;
+    std::vector<Interval6> domains;
     if (!domain_scores.empty())
       build_domains(cpgs, reset_points, state_ids, domains);
 
     {
       std::size_t good_hmr_count = 0;
       for (auto i = 0u; i < std::size(domains); ++i)
-        if (p_values[i] < domain_score_cutoff) {
+        if (p_values[i] < params.domain_score_cutoff) {
           domains[good_hmr_count] = domains[i];
-          domains[good_hmr_count].set_name("HYPO" +
-                                           std::to_string(good_hmr_count));
+          domains[good_hmr_count].name =
+            "HYPO" + std::to_string(good_hmr_count);
           ++good_hmr_count;
         }
       domains.resize(good_hmr_count);
-
       for (const auto &d : domains)
-        out << d << '\n';
+        out << to_string(d) << '\n';
     }
 
-    if (!hypo_post_outfile.empty()) {
+    if (!post_outfile.empty()) {
       if (verbose)
-        std::cerr << "[writing=" << hypo_post_outfile << "]\n";
-      std::ofstream out_hypo_post(hypo_post_outfile);
-      if (!out_hypo_post)
-        throw std::runtime_error("failed to open output hypo post file: " +
-                                 hypo_post_outfile);
-      for (std::size_t i = 0; i < std::size(cpgs); ++i) {
-        GenomicRegion cpg(as_gen_rgn(cpgs[i]));
-        cpg.set_name(format_cpg_meth_tag(meth[i]));
-        cpg.set_score(posteriors[i]);
-        out_hypo_post << cpg << '\n';
+        std::cerr << "[writing=" << post_outfile << "]\n";
+      std::ofstream out_post(post_outfile);
+      if (!out_post)
+        throw std::runtime_error("failed to open output file: " + post_outfile);
+      auto j = 0u;
+      for (auto cpg : orig_cpgs) {
+        cpg.meth = (cpg.n_reads > 0) ? 1.0 - posteriors[j++] : 0.0;
+        out_post << cpg << '\n';
       }
     }
 
-    if (!meth_post_outfile.empty()) {
-      std::ofstream out_meth_post(meth_post_outfile);
-      if (!out_meth_post)
-        throw std::runtime_error("failed to open meth post output file: " +
-                                 meth_post_outfile);
+    if (!meth_outfile.empty()) {
+      std::ofstream out_meth(meth_outfile);
+      if (!out_meth)
+        throw std::runtime_error("failed to open output file: " + meth_outfile);
       if (verbose)
-        std::cerr << "[writing=" << meth_post_outfile << "]\n";
-      for (std::size_t i = 0; i < std::size(cpgs); ++i) {
-        GenomicRegion cpg(as_gen_rgn(cpgs[i]));
-        cpg.set_name(format_cpg_meth_tag(meth[i]));
-        cpg.set_score(1.0 - posteriors[i]);
-        out_meth_post << cpg << '\n';
+        std::cerr << "[writing=" << meth_outfile << "]\n";
+      auto j = 0u;
+      for (auto cpg : orig_cpgs) {
+        cpg.meth = cpg.n_reads > 0 ? params.est_meth(posteriors[j++]) : 0.0;
+        out_meth << cpg << '\n';
       }
     }
     if (!summary_file.empty()) {
@@ -629,5 +634,3 @@ main_hmr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   }
   return EXIT_SUCCESS;
 }
-
-// NOLINTEND(*-narrowing-conversions)
diff --git a/src/analysis/hypermr.cpp b/src/analysis/hypermr.cpp
index 4f9d1698..12c42532 100644
--- a/src/analysis/hypermr.cpp
+++ b/src/analysis/hypermr.cpp
@@ -1,31 +1,34 @@
-/* Copyright (C) 2009-2023 University of Southern California
- *                         Song Qiang and Andrew D Smith
- * Author: Song Qiang, Andrew D. Smith
+/* Copyright (C) 2009-2025 Song Qiang and Andrew D Smith
  *
- * This file is part of dnmtools.
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
  *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
  *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this software; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
+ * You should have received a copy of the GNU General Public License along
+ * with this software; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+[[maybe_unused]] static constexpr auto about = R"(
+Identify regions of elevated methylation. Designed for methylomes of plants,
+especially A. thaliana, and other organisms with a low background methylation
+level, but with increased methylation associated with biological activity at a
+specific regions of the genome.
+)";
+
 #include "BetaBin.hpp"
-#include "GenomicRegion.hpp"
+#include "Interval6.hpp"
 #include "MSite.hpp"
-#include "OptionParser.hpp"
 #include "ThreeStateHMM.hpp"
 
+#include "OptionParser.hpp"
+
 #include <bamxx.hpp>
 
 #include <algorithm>
@@ -41,170 +44,151 @@
 #include <utility>
 #include <vector>
 
-using std::begin;
-using std::cerr;
-using std::cout;
-using std::end;
-using std::endl;
-using std::istream_iterator;
-using std::make_pair;
-using std::max;
-using std::min;
-using std::numeric_limits;
-using std::pair;
-using std::runtime_error;
-using std::string;
-using std::to_string;
-using std::vector;
-
-using std::ofstream;
-using std::ostream_iterator;
-
-// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
-
-static GenomicRegion
+// NOLINTBEGIN(*-narrowing-conversions)
+
+static Interval6
 as_gen_rgn(const MSite &s) {
-  return GenomicRegion(s.chrom, s.pos, s.pos + 1);
+  return Interval6(s.chrom, s.pos, s.pos + 1, std::string{}, 0.0, '+');
 }
 
 static void
-load_cpgs(const string &cpgs_file, vector<MSite> &cpgs,
-          vector<pair<double, double>> &meth) {
+load_cpgs(const std::string &cpgs_file, std::vector<MSite> &cpgs,
+          std::vector<std::pair<double, double>> &meth) {
   bamxx::bgzf_file in(cpgs_file, "r");
   if (!in)
-    throw runtime_error("failed opening file: " + cpgs_file);
+    throw std::runtime_error("failed opening file: " + cpgs_file);
 
   MSite the_site;
-  string line;
+  std::string line;
   while (getline(in, line))
     cpgs.push_back(MSite(line));
 
-  meth.resize(cpgs.size());
-  for (size_t i = 0; i < cpgs.size(); ++i)
-    meth[i] = make_pair(cpgs[i].n_meth(), cpgs[i].n_unmeth());
+  meth.resize(std::size(cpgs));
+  for (std::size_t i = 0; i < std::size(cpgs); ++i)
+    meth[i] = std::make_pair(cpgs[i].n_meth(), cpgs[i].n_unmeth());
 }
 
 template <class T>
 static void
-separate_regions(const size_t desert_size, vector<MSite> &cpgs, vector<T> &meth,
-                 vector<size_t> &reset_points, size_t &total_bases,
-                 size_t &bases_in_deserts) {
+separate_regions(const std::size_t desert_size, std::vector<MSite> &cpgs,
+                 std::vector<T> &meth, std::vector<std::size_t> &reset_points,
+                 std::size_t &total_bases, std::size_t &bases_in_deserts) {
   // eliminate the zero-read cpgs
-  size_t j = 0;
-  for (size_t i = 0; i < cpgs.size(); ++i)
+  std::size_t j = 0;
+  for (std::size_t i = 0; i < std::size(cpgs); ++i)
     if (cpgs[i].n_reads > 0) {
       cpgs[j] = cpgs[i];
       meth[j] = meth[i];
       ++j;
     }
-  cpgs.erase(begin(cpgs) + j, end(cpgs));
-  meth.erase(begin(meth) + j, end(meth));
+  cpgs.erase(std::begin(cpgs) + j, std::end(cpgs));
+  meth.erase(std::begin(meth) + j, std::end(meth));
 
   total_bases = 0;
   bases_in_deserts = 0;
-  size_t prev_pos = 0;
-  for (size_t i = 0; i < cpgs.size(); ++i) {
-    const size_t dist = (i > 0 && cpgs[i].chrom == cpgs[i - 1].chrom)
-                          ? cpgs[i].pos - prev_pos
-                          : numeric_limits<size_t>::max();
+  std::size_t prev_pos = 0;
+  for (std::size_t i = 0; i < std::size(cpgs); ++i) {
+    const std::size_t dist = (i > 0 && cpgs[i].chrom == cpgs[i - 1].chrom)
+                               ? cpgs[i].pos - prev_pos
+                               : std::numeric_limits<std::size_t>::max();
     if (dist > desert_size) {
       reset_points.push_back(i);
-      if (dist < numeric_limits<size_t>::max())
+      if (dist < std::numeric_limits<std::size_t>::max())
         bases_in_deserts += dist;
     }
-    if (dist < numeric_limits<size_t>::max())
+    if (dist < std::numeric_limits<std::size_t>::max())
       total_bases += dist;
 
     prev_pos = cpgs[i].pos;
   }
-  reset_points.push_back(cpgs.size());
+  reset_points.push_back(std::size(cpgs));
 }
 
 // ADS (!!!) this function seems to not be working at all right now
 static void
-read_params_file(const string &params_file,
+read_params_file(const std::string &params_file,
                  // betabin &hypo_emission,
                  // betabin &HYPER_emission,
                  // betabin &HYPO_emission,
-                 vector<vector<double>> &trans) {
+                 std::vector<std::vector<double>> &trans) {
   std::ifstream in(params_file);
   if (!in)
-    throw runtime_error("failed to read param file: " + params_file);
+    throw std::runtime_error("failed to read param file: " + params_file);
 
-  string hypo_emission_str;
+  std::string hypo_emission_str;
   getline(in, hypo_emission_str);
 
-  string HYPER_emission_str;
+  std::string HYPER_emission_str;
   getline(in, HYPER_emission_str);
 
-  string HYPO_emission_str;
+  std::string HYPO_emission_str;
   getline(in, HYPO_emission_str);
 
-  trans.resize(3, vector<double>(3, 0.0));
-  for (size_t i = 0; i < trans.size(); ++i)
-    for (size_t j = 0; j < trans[i].size(); ++j)
+  trans.resize(3, std::vector<double>(3, 0.0));
+  for (std::size_t i = 0; i < std::size(trans); ++i)
+    for (std::size_t j = 0; j < std::size(trans[i]); ++j)
       in >> trans[i][j];
 }
 
 static void
-write_params_file(const string &params_file, const betabin &hypo_emission,
+write_params_file(const std::string &params_file, const betabin &hypo_emission,
                   const betabin &HYPER_emission, const betabin &HYPO_emission,
-                  const vector<vector<double>> &trans) {
-  ofstream out(params_file);
+                  const std::vector<std::vector<double>> &trans) {
+  std::ofstream out(params_file);
   out << hypo_emission.tostring() << '\n'
       << HYPER_emission.tostring() << '\n'
       << HYPO_emission.tostring() << '\n';
   for (const auto &i : trans) {
     std::copy(std::cbegin(i), std::cend(i),
-              ostream_iterator<double>(out, "\t"));
+              std::ostream_iterator<double>(out, "\t"));
     out << '\n';
   }
 }
 
 static void
-build_domains(const vector<MSite> &cpgs,
-              const vector<pair<double, double>> &meth,
-              const vector<size_t> &reset_points,
-              const vector<STATE_LABELS> &classes,
-              vector<GenomicRegion> &domains) {
+build_domains(const std::vector<MSite> &cpgs,
+              const std::vector<std::pair<double, double>> &meth,
+              const std::vector<std::size_t> &reset_points,
+              const std::vector<STATE_LABELS> &classes,
+              std::vector<Interval6> &domains) {
   domains.clear();
 
-  for (size_t i = 0; i < reset_points.size() - 1; ++i) {
-    const size_t start = reset_points[i];
-    const size_t end = reset_points[i + 1];
+  for (std::size_t i = 0; i + 1 < std::size(reset_points); ++i) {
+    const std::size_t start = reset_points[i];
+    const std::size_t end = reset_points[i + 1];
 
-    GenomicRegion domain(as_gen_rgn(cpgs[start]));
+    Interval6 domain(as_gen_rgn(cpgs[start]));
     STATE_LABELS prev_state = classes[start];
-    size_t n = 1;
+    std::size_t n = 1;
     double meth_sum =
       meth[start].first / (meth[start].first + meth[start].second);
 
-    for (size_t j = start + 1; j < end; ++j) {
+    for (std::size_t j = start + 1; j < end; ++j) {
       if ((prev_state == hypo && classes[j] == hypo) ||
           (prev_state != hypo && classes[j] != hypo)) {
         ++n;
         meth_sum += meth[j].first / (meth[j].first + meth[j].second);
       }
       else {
-        domain.set_end(cpgs[j - 1].pos + 1);
-        const string label = (prev_state == hypo ? "hypo" : "hyper");
-        domain.set_name(label + ":" + std::to_string(n));
-        domain.set_score(meth_sum);
-        domain.set_strand('+');
+        domain.stop = cpgs[j - 1].pos + 1;
+        const std::string label = (prev_state == hypo ? "hypo" : "hyper");
+        domain.name = label + ":" + std::to_string(n);
+        domain.score = meth_sum;
+        domain.strand = '+';
         if (prev_state == HYPER || prev_state == HYPO)
           domains.push_back(domain);
 
-        domain = GenomicRegion(as_gen_rgn(cpgs[j]));
+        domain = Interval6(as_gen_rgn(cpgs[j]));
         n = 1;
         prev_state = classes[j];
         meth_sum = meth[j].first / (meth[j].first + meth[j].second);
       }
     }
-    domain.set_end(cpgs[end - 1].pos + 1);
-    const string label = (prev_state == hypo ? "hypo" : "hyper");
-    domain.set_name(label + ":" + std::to_string(n));
-    domain.set_score(meth_sum);
-    domain.set_strand('+');
+    domain.stop = cpgs[end - 1].pos + 1;
+    const std::string label = (prev_state == hypo ? "hypo" : "hyper");
+    domain.name = label + ":" + std::to_string(n);
+    domain.score = meth_sum;
+    domain.strand = '+';
     if (prev_state == HYPER || prev_state == HYPO)
       domains.push_back(domain);
   }
@@ -212,53 +196,53 @@ build_domains(const vector<MSite> &cpgs,
 
 static void
 filter_domains(const double min_cumulative_meth,
-               vector<GenomicRegion> &domains) {
-  size_t j = 0;
-  for (size_t i = 0; i < domains.size(); ++i)
-    if (domains[i].get_score() >= min_cumulative_meth)
+               std::vector<Interval6> &domains) {
+  std::size_t j = 0;
+  for (std::size_t i = 0; i < std::size(domains); ++i)
+    if (domains[i].score >= min_cumulative_meth)
       domains[j++] = domains[i];
-  domains.erase(begin(domains) + j, end(domains));
+  domains.erase(std::begin(domains) + j, std::end(domains));
 }
 
 static void
-initialize_transitions(vector<vector<double>> &trans) {
+initialize_transitions(std::vector<std::vector<double>> &trans) {
   // "hypo" -> "hypo" is high, and goes only to "HYPER";
   // "HYPER" -> "HYPER" or "HYPO" equally;
   // "HYPO" -> "HYPER" usually; never to "hypo"
-  trans = {{0.990, 0.010, 0.000}, {0.050, 0.475, 0.475}, {0.000, 0.666, 0.334}};
+
+  // NOLINTBEGIN(*-avoid-magic-numbers)
+  trans = {
+    {0.990, 0.010, 0.000},
+    {0.050, 0.475, 0.475},
+    {0.000, 0.666, 0.334},
+  };
+  // NOLINTEND(*-avoid-magic-numbers)
 }
 
 int
 main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
-    static const string description =
-      "Identify regions of elevated methylation. Designed for "
-      "methylomes of plants, especially A. thaliana, and other "
-      "organisms with a low background methylation level, but "
-      "with increased methylation associated with biological "
-      "activity at a specific regions of the genome.";
-
-    string outfile;
-    string scores_file;
+    // NOLINTBEGIN(*-avoid-magic-numbers)
+    std::size_t desert_size = 1000;
+    std::size_t max_iterations = 10;
+    // corrections for small values (not parameters):
+    double tolerance = 1e-10;
+    double min_cumulative_meth = 4.0;
 
-    size_t desert_size = 1000;
-    size_t max_iterations = 10;
+    // NOLINTEND(*-avoid-magic-numbers)
 
     // run mode flags
     bool VERBOSE = false;
-
-    // corrections for small values (not parameters):
-    double tolerance = 1e-10;
-
-    double min_cumulative_meth = 4.0;
     bool USE_VITERBI_DECODING = false;
 
-    string params_in_file;
-    string params_out_file;
+    std::string outfile;
+    std::string scores_file;
+    std::string params_in_file;
+    std::string params_out_file;
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
-                           description, "<counts-file>");
+                           about, "<counts-file>");
     opt_parse.add_opt("out", 'o', "output file (BED format)", false, outfile);
     opt_parse.add_opt("scores", 's', "output file for posterior scores", false,
                       scores_file);
@@ -277,62 +261,63 @@ main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     opt_parse.add_opt("params-out", 'p', "parameters ouptut file", false,
                       params_out_file);
     opt_parse.set_show_defaults();
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
-    if (leftover_args.size() != 1) {
-      cerr << opt_parse.help_message() << '\n';
+    if (std::size(leftover_args) != 1) {
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string cpgs_file = leftover_args.front();
+    const std::string cpgs_file = leftover_args.front();
     /****************** END COMMAND LINE OPTIONS *****************/
 
     if (!is_msite_file(cpgs_file))
-      throw runtime_error("malformed counts file: " + cpgs_file);
+      throw std::runtime_error("malformed counts file: " + cpgs_file);
 
     if (VERBOSE)
-      cerr << "[loading_data]" << '\n';
-    vector<MSite> cpgs;
-    vector<pair<double, double>> meth;
+      std::cerr << "[loading_data]" << '\n';
+    std::vector<MSite> cpgs;
+    std::vector<std::pair<double, double>> meth;
     load_cpgs(cpgs_file, cpgs, meth);
-    const size_t n_sites = cpgs.size();
+    const std::size_t n_sites = std::size(cpgs);
     const double mean_cov =
       std::accumulate(
         std::cbegin(cpgs), std::cend(cpgs), 0.0,
         [](const auto a, const auto &c) { return a + c.n_reads; }) /
       static_cast<double>(n_sites);
-    cerr << "[n_sites=" << n_sites << "]" << '\n'
-         << "[mean_coverage=" << mean_cov << "]" << '\n';
+    std::cerr << "[n_sites=" << n_sites << "]" << '\n'
+              << "[mean_coverage=" << mean_cov << "]" << '\n';
 
     // separate the regions by chrom and by desert, and eliminate
     // those isolated CpGs
-    vector<size_t> reset_points;
-    size_t total_bases = 0, bases_in_deserts = 0;
+    std::vector<std::size_t> reset_points;
+    std::size_t total_bases = 0, bases_in_deserts = 0;
     separate_regions(desert_size, cpgs, meth, reset_points, total_bases,
                      bases_in_deserts);
 
     if (VERBOSE) {
       const auto des_frac = static_cast<double>(bases_in_deserts) / total_bases;
-      cerr << "[n_sites_retained=" << cpgs.size() << "]" << '\n'
-           << "[deserts_removed=" << reset_points.size() - 2 << "]" << '\n'
-           << "[remaining_genome_fraction=" << 1.0 - des_frac << "]" << '\n';
+      std::cerr << "[n_sites_retained=" << std::size(cpgs) << "]" << '\n'
+                << "[deserts_removed=" << std::size(reset_points) - 2 << "]\n"
+                << "[remaining_genome_fraction=" << 1.0 - des_frac << "]"
+                << '\n';
     }
 
     ThreeStateHMM hmm(meth, reset_points, tolerance, max_iterations, VERBOSE);
 
-    vector<vector<double>> trans;
+    std::vector<std::vector<double>> trans;
     initialize_transitions(trans);
 
     betabin hypo_emission, HYPER_emission, HYPO_emission;
@@ -344,12 +329,13 @@ main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                        // HYPO_emission,
                        trans);
     else {
+      // NOLINTBEGIN(*-avoid-magic-numbers)
       const double n_reads = mean_cov;
       const double fg_alpha = 0.33 * n_reads;
       const double fg_beta = 0.67 * n_reads;
       const double bg_alpha = 0.67 * n_reads;
       const double bg_beta = 0.33 * n_reads;
-
+      // NOLINTEND(*-avoid-magic-numbers)
       hypo_emission = betabin(fg_alpha, fg_beta);
       HYPER_emission = betabin(bg_alpha, bg_beta);
       HYPO_emission = hypo_emission;
@@ -365,7 +351,7 @@ main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                         HYPO_emission, trans);
 
     // decode the states
-    vector<STATE_LABELS> classes;
+    std::vector<STATE_LABELS> classes;
     if (USE_VITERBI_DECODING)
       hmm.ViterbiDecoding();
     else
@@ -373,26 +359,25 @@ main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     hmm.get_classes(classes);
 
     // identify the domains of hypermethylation
-    vector<GenomicRegion> domains;
+    std::vector<Interval6> domains;
     build_domains(cpgs, hmm.observations, reset_points, classes, domains);
     filter_domains(min_cumulative_meth, domains);
 
     // write the results
-    ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
-    copy(begin(domains), end(domains),
-         ostream_iterator<GenomicRegion>(out, "\n"));
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open outfile: " + outfile);
+    std::copy(std::cbegin(domains), std::cend(domains),
+              std::ostream_iterator<Interval6>(out, "\n"));
 
     // if requested, write the posterior scores
     if (!scores_file.empty()) {
       if (USE_VITERBI_DECODING)
         hmm.PosteriorDecoding();
-      vector<Triplet> scores;
+      std::vector<Triplet> scores;
       hmm.get_state_posteriors(scores);
-      ofstream score_out(scores_file);
-      for (size_t i = 0; i < cpgs.size(); ++i) {
+      std::ofstream score_out(scores_file);
+      for (std::size_t i = 0; i < std::size(cpgs); ++i) {
         score_out << cpgs[i] << "\t";
         if (classes[i] == hypo)
           score_out << "hypo\n" << scores[i].hypo << '\n';
@@ -404,10 +389,10 @@ main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     }
   }
   catch (const std::exception &e) {
-    cerr << "ERROR:\t" << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
 
-// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
+// NOLINTEND(*-narrowing-conversions)
diff --git a/src/analysis/metagene.cpp b/src/analysis/metagene.cpp
index e20d2f6d..4c8f574d 100644
--- a/src/analysis/metagene.cpp
+++ b/src/analysis/metagene.cpp
@@ -1,30 +1,29 @@
-/* metagene (tsscpgplot): get data to plot methylation level around a TSS
+/* Copyright (C) 2010-2025 Andrew D. Smith
  *
- * Copyright (C) 2010-2023 Andrew D. Smith
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
  *
- * Authors: Andrew D. Smith
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
  *
- * This program is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see
- * <http://www.gnu.org/licenses/>.
+ * You should have received a copy of the GNU General Public License along
+ * with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include "GenomicRegion.hpp"
+[[maybe_unused]] static constexpr auto about = R"(
+metagene (tsscpgplot): get data to plot methylation level around a TSS
+)";
+
+#include "Interval6.hpp"
 #include "LevelsCounter.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
 
-#include "bamxx.hpp"
+#include <bamxx.hpp>
 
 #include <algorithm>
 #include <cmath>
@@ -40,33 +39,23 @@
 #include <utility>
 #include <vector>
 
-using std::cerr;
-using std::cout;
-using std::ostream;
-using std::pair;
-using std::runtime_error;
-using std::sort;
-using std::string;
-using std::to_string;
-using std::unordered_map;
-using std::vector;
-
 static MSite
-tss_from_gene(const GenomicRegion &r) {
+tss_from_gene(const Interval6 &r) {
   MSite s;
-  s.chrom = r.get_chrom();
-  s.pos = (r.pos_strand() ? r.get_start() : r.get_end());
-  s.strand = r.get_strand();
+  s.chrom = r.chrom;
+  s.pos = r.strand == '+' ? r.start : r.stop;
+  s.strand = r.strand;
   return s;
 }
 
 static void
-process_chrom(const uint32_t region_size, const vector<GenomicRegion> &genes,
-              const pair<uint32_t, uint32_t> &bounds,
-              const vector<MSite> &sites, vector<LevelsCounter> &levels) {
-  const uint32_t twice_rs = 2 * region_size;
-
-  auto comp = [](const MSite &a, const MSite &b) { return a.pos < b.pos; };
+process_chrom(const std::uint32_t region_size,
+              const std::vector<Interval6> &genes,
+              const std::pair<std::uint32_t, std::uint32_t> &bounds,
+              const std::vector<MSite> &sites,
+              std::vector<LevelsCounter> &levels) {
+  const auto cmp = [](const MSite &a, const MSite &b) { return a.pos < b.pos; };
+  const std::uint32_t twice_rs = 2 * region_size;
 
   for (auto i = bounds.first; i < bounds.second; ++i) {
     const MSite tss = tss_from_gene(genes[i]);
@@ -76,8 +65,10 @@ process_chrom(const uint32_t region_size, const vector<GenomicRegion> &genes,
     MSite right = tss;
     right.pos += region_size;
 
-    const auto lim = upper_bound(cbegin(sites), cend(sites), right, comp);
-    auto itr = lower_bound(cbegin(sites), cend(sites), left, comp);
+    const auto lim =
+      std::upper_bound(std::cbegin(sites), std::cend(sites), right, cmp);
+    auto itr =
+      std::lower_bound(std::cbegin(sites), std::cend(sites), left, cmp);
     for (; itr != lim; ++itr) {
       const auto dist = itr->pos - left.pos;
       const auto base_idx = tss.strand == '+' ? dist : twice_rs - dist;
@@ -88,10 +79,11 @@ process_chrom(const uint32_t region_size, const vector<GenomicRegion> &genes,
 
 template <typename T>
 static void
-collapse_bins(const uint32_t bin_size, vector<T> &v) {
-  const uint32_t n_bins = std::ceil(static_cast<double>(v.size()) / bin_size);
-  vector<T> vv(n_bins);
-  for (auto i = 0u; i < v.size(); ++i)
+collapse_bins(const std::uint32_t bin_size, std::vector<T> &v) {
+  const std::uint32_t n_bins =
+    std::ceil(static_cast<double>(std::size(v)) / bin_size);
+  std::vector<T> vv(n_bins);
+  for (auto i = 0u; i < std::size(v); ++i)
     vv[i / bin_size] += v[i];
   v.swap(vv);
 }
@@ -111,11 +103,11 @@ genes, and the promoters are of interest, the strand will be used
 correctly.
 )";
   try {
-    string outfile;
-    uint32_t region_size = 5000;  // NOLINT(*-avoid-magic-numbers)
+    std::string outfile;
+    std::uint32_t region_size = 5000;  // NOLINT(*-avoid-magic-numbers)
     bool verbose = false;
     bool show_progress = false;
-    uint32_t bin_size = 50;  // NOLINT(*-avoid-magic-numbers)
+    std::uint32_t bin_size = 50;  // NOLINT(*-avoid-magic-numbers)
 
     /****************** GET COMMAND LINE ARGUMENTS ***************************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
@@ -127,111 +119,110 @@ correctly.
     opt_parse.add_opt("bin", 'b', "bin size", false, bin_size);
     opt_parse.add_opt("progress", '\0', "show progress", false, show_progress);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
-    if (leftover_args.size() != 2) {
-      cerr << opt_parse.help_message() << '\n';
+    if (std::size(leftover_args) != 2) {
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string features_file_name = leftover_args.front();
-    const string cpg_file_name = leftover_args.back();
+    const std::string features_file_name = leftover_args.front();
+    const std::string cpg_file_name = leftover_args.back();
     /**********************************************************************/
 
     if (verbose)
-      cerr << "[loading feature annotations data]" << '\n';
-    vector<GenomicRegion> features;
-    ReadBEDFile(features_file_name, features);
-    sort(begin(features), end(features));
+      std::cerr << "[loading feature annotations data]\n";
+    auto features = read_intervals6(features_file_name);
+    std::sort(std::begin(features), std::end(features));
     if (verbose)
-      cerr << "[number of features: " << features.size() << "]" << '\n';
+      std::cerr << "[number of features: " << std::size(features) << "]\n";
 
     // identify the start and end of ranges for each chromosome
-    unordered_map<string, pair<uint32_t, uint32_t>> lookup;
-    typedef decltype(lookup)::iterator lookup_itr;
+    using pos_pair = std::pair<std::uint32_t, std::uint32_t>;
+    std::unordered_map<std::string, pos_pair> lookup;
 
-    string chrom_name;
+    std::string chrom_name;
     auto prev_idx = 0u;
-    for (auto i = 0u; i < features.size(); ++i)
-      if (features[i].get_chrom() != chrom_name) {
+    for (auto i = 0u; i < std::size(features); ++i)
+      if (features[i].chrom != chrom_name) {
         if (!chrom_name.empty())
           lookup.insert({chrom_name, {prev_idx, i}});
         prev_idx = i;
-        chrom_name = features[i].get_chrom();
+        chrom_name = features[i].chrom;
       }
-    lookup.insert({chrom_name, {prev_idx, features.size()}});
+    lookup.insert({chrom_name, {prev_idx, std::size(features)}});
     if (verbose)
-      cerr << "[number of chroms with features: " << lookup.size() << "]\n";
+      std::cerr << "[number of chroms with features: " << std::size(lookup)
+                << "]\n";
 
-    auto pair_diff = [&lookup](const lookup_itr x) {
-      return (x != end(lookup)) ? x->second.second - x->second.first : 0u;
+    const auto pair_diff = [&lookup](const auto x) {
+      return (x != std::cend(lookup)) ? x->second.second - x->second.first : 0u;
     };
 
-    vector<LevelsCounter> levels(2 * region_size);
+    std::vector<LevelsCounter> levels(2 * region_size);
 
     bamxx::bgzf_file cpgin(cpg_file_name, "r");
     if (!cpgin)
-      throw runtime_error("failed to open file: " + cpg_file_name);
+      throw std::runtime_error("failed to open file: " + cpg_file_name);
 
-    vector<MSite> sites;
-    string line;
+    std::vector<MSite> sites;
+    std::string line;
     chrom_name.clear();
     while (getline(cpgin, line)) {
-      auto the_site = MSite(line);
+      const auto the_site = MSite(line);
       if (the_site.chrom != chrom_name) {
         if (!sites.empty()) {
           const auto bounds = lookup.find(chrom_name);
-          if (bounds != end(lookup))
+          if (bounds != std::cend(lookup))
             process_chrom(region_size, features, bounds->second, sites, levels);
           const auto n_features = pair_diff(bounds);
           if (show_progress)
-            cerr << "[sites=" << sites.size() << " features=" << n_features
-                 << "]" << '\n';
+            std::cerr << "[sites=" << std::size(sites)
+                      << " features=" << n_features << "]" << '\n';
           sites.clear();
         }
         if (show_progress)
-          cerr << "[processing: " << the_site.chrom << "]";
+          std::cerr << "[processing: " << the_site.chrom << "]";
         chrom_name = the_site.chrom;
       }
-      sites.push_back(std::move(the_site));
+      sites.push_back(the_site);
     }
 
     if (!sites.empty()) {
       const auto bounds = lookup.find(chrom_name);
-      if (bounds != end(lookup))
+      if (bounds != std::cend(lookup))
         process_chrom(region_size, features, bounds->second, sites, levels);
       const auto n_features = pair_diff(bounds);
       if (show_progress)
-        cerr << "[sites=" << sites.size() << " features=" << n_features << "]"
-             << '\n';
+        std::cerr << "[sites=" << std::size(sites) << " features=" << n_features
+                  << "]\n";
     }
 
     collapse_bins(bin_size, levels);
 
     if (verbose)
-      cerr << "output columns:\n" << LevelsCounter::format_header() << '\n';
-
-    std::ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    std::ostream out(of.is_open() ? of.rdbuf() : std::cout.rdbuf());
+      std::cerr << "output columns:\n"
+                << LevelsCounter::format_header() << '\n';
 
-    for (auto i = 0u; i < levels.size(); ++i)
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open file: " + outfile);
+    for (auto i = 0u; i < std::size(levels); ++i)
       out << i * bin_size << '\t' << levels[i].format_row() << '\n';
   }
-  catch (std::exception &e) {
-    cerr << e.what() << '\n';
+  catch (const std::exception &e) {
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
diff --git a/src/analysis/multimethstat.cpp b/src/analysis/multimethstat.cpp
index c745bd82..67e2ce75 100644
--- a/src/analysis/multimethstat.cpp
+++ b/src/analysis/multimethstat.cpp
@@ -1,25 +1,26 @@
-/* roimethstat: average methylation in each of a set of regions
+/* Copyright (C) 2014-2022 Andrew D. Smith
  *
- * Copyright (C) 2014-2022 Andrew D. Smith
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
  *
- * Authors: Andrew D. Smith
- *
- * This program is free software: you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation, either version 3 of the
- * License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
  */
 
-#include "GenomicRegion.hpp"
+[[maybe_unused]] static constexpr auto about = R"(
+multistat: average methylation in each of a set of regions for each methylome
+)";
+
+#include "Interval6.hpp"
 #include "MSite.hpp"
-#include "OptionParser.hpp"
 #include "bsutils.hpp"
 
+#include "OptionParser.hpp"
+
 #include <bamxx.hpp>
 
 #include <algorithm>
@@ -35,93 +36,75 @@
 #include <utility>
 #include <vector>
 
-using std::cerr;
-using std::cout;
-using std::endl;
-using std::ifstream;
-using std::ios_base;
-using std::is_sorted;
-using std::isfinite;
-using std::make_pair;
-using std::pair;
-using std::runtime_error;
-using std::string;
-using std::vector;
-
-using bamxx::bgzf_file;
-
-// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
-
-static pair<bool, bool>
-meth_unmeth_calls(const size_t n_meth, const size_t n_unmeth) {
-  static const double alpha = 0.95;
-  // get info for binomial test
-  double lower = 0.0, upper = 0.0;
-  const size_t total = n_meth + n_unmeth;
+// NOLINTBEGIN(*-narrowing-conversions)
+
+static std::pair<bool, bool>
+meth_unmeth_calls(const std::size_t n_meth, const std::size_t n_unmeth) {
+  static constexpr double alpha = 0.95;
+  static constexpr double one_half = 0.5;
+  double lower{}, upper{};
+  const auto total = n_meth + n_unmeth;
   wilson_ci_for_binomial(alpha, total, static_cast<double>(n_meth) / total,
                          lower, upper);
-  return make_pair(lower > 0.5, upper < 0.5);
+  return std::make_pair(lower > one_half, upper < one_half);
 }
 
-static std::pair<size_t, size_t>
-region_bounds(const vector<MSite> &sites, const GenomicRegion &region) {
-  const string chrom(region.get_chrom());
-  const char strand(region.get_strand());
-
-  const MSite a(chrom, region.get_start(), strand, "", 0, 0);
-  vector<MSite>::const_iterator a_ins(lower_bound(begin(sites), end(sites), a));
-
-  const MSite b(chrom, region.get_end(), strand, "", 0, 0);
-  vector<MSite>::const_iterator b_ins(lower_bound(begin(sites), end(sites), b));
-
-  return make_pair(a_ins - begin(sites), b_ins - begin(sites));
+static std::pair<std::size_t, std::size_t>
+region_bounds(const std::vector<MSite> &sites, const Interval6 &region) {
+  const MSite a(region.chrom, region.start, region.strand, "", 0, 0);
+  const auto a_ins = std::lower_bound(std::cbegin(sites), std::cend(sites), a);
+  const MSite b(region.chrom, region.stop, region.strand, "", 0, 0);
+  const auto b_ins = std::lower_bound(std::cbegin(sites), std::cend(sites), b);
+  return std::make_pair(std::distance(std::cbegin(sites), a_ins),
+                        std::distance(std::cbegin(sites), b_ins));
 }
 
 /*
-   This function is used to make sure the output is done the same way
-   in the different places it might be generated. One issue is that
-   there is a lot of string and stream manipulation here, which might
-   not be desirable if there are lots of regions, as when the genome
-   is binned.
+   This function is used to make sure the output is done the same way in the
+   different places it might be generated. One issue is that there is a lot of
+   string and stream manipulation here, which might not be desirable if there
+   are lots of regions, as when the genome is binned.
 */
-static string
-format_output_line(
-  const bool report_more_information, const GenomicRegion &r,
-  const vector<double> &score, const vector<double> &weighted_mean_meth,
-  const vector<double> &mean_meth, const vector<double> &fractional_meth,
-  const size_t total_cpgs, const vector<size_t> &cpgs_with_reads,
-  const vector<size_t> &total_meth, const vector<size_t> &total_reads) {
+static std::string
+format_output_line(const bool report_more_information, const Interval6 &r,
+                   const std::vector<double> &score,
+                   const std::vector<double> &weighted_mean_meth,
+                   const std::vector<double> &mean_meth,
+                   const std::vector<double> &fractional_meth,
+                   const std::size_t total_cpgs,
+                   const std::vector<std::size_t> &cpgs_with_reads,
+                   const std::vector<std::size_t> &total_meth,
+                   const std::vector<std::size_t> &total_reads) {
   std::ostringstream oss;
-  oss << r.get_chrom() << '\t' << r.get_start() << '\t' << r.get_end() << '\t'
-      << r.get_name() << '\t';
+  oss << r.chrom << '\t' << r.start << '\t' << r.stop << '\t' << r.name << '\t';
 
-  const size_t n_samples = score.size();
+  const std::size_t n_samples = std::size(score);
 
-  for (size_t i = 0; i < n_samples; ++i) {
-    if (isfinite(score[i]))
+  for (std::size_t i = 0; i < n_samples; ++i) {
+    if (std::isfinite(score[i]))
       oss << score[i];
     else
       oss << "NA";
 
     // GS: commenting this out because we shouldn't be reporting
     // strand several times
-    // oss << '\t' << r.get_strand();
+    // oss << '\t' << r.strand;
 
     if (report_more_information) {
       oss << '\t';
-      if (isfinite(weighted_mean_meth[i]))
+      if (std::isfinite(weighted_mean_meth[i]))
         oss << weighted_mean_meth[i];
       else
         oss << "NA";
 
       oss << '\t';
-      if (isfinite(mean_meth[i]))
+      if (std::isfinite(mean_meth[i]))
         oss << mean_meth[i];
       else
         oss << "NA";
 
       oss << '\t';
-      if (isfinite(fractional_meth[i]))
+      if (std::isfinite(fractional_meth[i]))
         oss << fractional_meth[i];
       else
         oss << "NA";
@@ -136,12 +119,12 @@ format_output_line(
 }
 
 static void
-get_site_and_values(string &line, MSite &site, vector<double> &values) {
+get_site_and_values(std::string &line, MSite &site,
+                    std::vector<double> &values) {
   // in tabular, chrom/pos/strand is separated by colons
-  replace(begin(line), end(line), ':', '\t');
+  std::replace(std::begin(line), std::end(line), ':', '\t');
   std::istringstream iss(line);
   iss >> site.chrom >> site.pos >> site.strand >> site.context;
-
   values.clear();
   double v = 0.0;
   while (iss >> v)
@@ -149,102 +132,101 @@ get_site_and_values(string &line, MSite &site, vector<double> &values) {
 }
 
 static bool
-all_is_finite(const vector<double> &scores) {
-  for (auto it(begin(scores)); it != end(scores); ++it)
-    if (!isfinite(*it))
-      return false;
-
-  return true;
+all_finite(const std::vector<double> &scores) {
+  return std::all_of(std::cbegin(scores), std::cend(scores),
+                     [](const auto x) { return std::isfinite(x); });
 }
 
 static void
-process_with_cpgs_loaded(const bool VERBOSE,
+process_with_cpgs_loaded(const bool verbose,
                          // const bool sort_data_if_needed,
-                         const bool PRINT_NUMERIC_ONLY,
+                         const bool print_numeric_only,
                          const bool report_more_information,
-                         const char level_code, const string &cpgs_file,
-                         vector<GenomicRegion> &regions, std::ostream &out) {
-  bgzf_file in(cpgs_file, "r");
+                         const char level_code, const std::string &cpgs_file,
+                         const std::vector<Interval6> &regions,
+                         std::ostream &out) {
+  bamxx::bgzf_file in(cpgs_file, "r");
   if (!in)
-    throw runtime_error("cannot open file: " + cpgs_file);
+    throw std::runtime_error("cannot open file: " + cpgs_file);
 
-  string header;
+  std::string header;
   getline(in, header);
 
   std::istringstream iss(header);
-  string col_name;
-  vector<string> col_names;
+  std::string col_name;
+  std::vector<std::string> col_names;
   while (iss >> col_name)
     col_names.push_back(col_name);
 
-  auto end_uniq = std::unique(begin(col_names), end(col_names));
+  const auto end_uniq = std::unique(std::begin(col_names), std::end(col_names));
+  const auto n_uniq = std::distance(std::begin(col_names), end_uniq);
   // make sure all columns come in pairs of counts
-  if (2 * static_cast<size_t>(distance(begin(col_names), end_uniq)) !=
-      col_names.size())
-    throw runtime_error("wrong header format:\n" + header);
+  if (2 * static_cast<std::size_t>(n_uniq) != std::size(col_names))
+    throw std::runtime_error("wrong header format:\n" + header);
 
-  col_names.resize(distance(begin(col_names), end_uniq));
-  const size_t n_samples = col_names.size();
-  if (VERBOSE)
-    cerr << "[n_samples=" << n_samples << "]\n";
+  col_names.resize(std::distance(std::begin(col_names), end_uniq));
+  const std::size_t n_samples = std::size(col_names);
+  if (verbose)
+    std::cerr << "[n_samples=" << n_samples << "]\n";
 
-  vector<MSite> cpgs;
-  vector<vector<double>> values;
+  std::vector<MSite> cpgs;
+  std::vector<std::vector<double>> values;
   MSite the_cpg;
-  string line;
+  std::string line;
   while (getline(in, line)) {
-    values.push_back(vector<double>());
+    values.push_back(std::vector<double>());
     get_site_and_values(line, the_cpg, values.back());
     cpgs.push_back(the_cpg);
 
-    if (values.back().size() != 2 * n_samples)
-      throw runtime_error("wrong row length. Expected " +
-                          std::to_string(2 * n_samples) + ", got " +
-                          std::to_string(values.back().size()) + "\n" + line);
+    if (std::size(values.back()) != 2 * n_samples)
+      throw std::runtime_error(
+        "wrong row length. Expected " + std::to_string(2 * n_samples) +
+        ", got " + std::to_string(std::size(values.back())) + "\n" + line);
   }
 
-  if (VERBOSE)
-    cerr << "[n_cpgs=" << cpgs.size() << "]\n";
+  if (verbose)
+    std::cerr << "[n_cpgs=" << std::size(cpgs) << "]\n";
 
-  if (!is_sorted(begin(cpgs), end(cpgs))) {
+  if (!std::is_sorted(std::cbegin(cpgs), std::cend(cpgs))) {
     /* GS: need a way to sort cpgs and values to preserve the order.
      * Commenting this for now until I know the best way to do this
     if (sort_data_if_needed)
-      sort(begin(cpgs), end(cpgs));
+      std::sort(begin(cpgs), end(cpgs));
     else*/
-    throw runtime_error("data not sorted: " + cpgs_file);
+    throw std::runtime_error("data not sorted: " + cpgs_file);
   }
 
-  if (VERBOSE)
-    cerr << "[n_cpgs=" << cpgs.size() << "]\n";
+  if (verbose)
+    std::cerr << "[n_cpgs=" << std::size(cpgs) << "]\n";
 
   // write header as first column
-  for (size_t i = 0; i < n_samples; ++i)
+  for (std::size_t i = 0; i < n_samples; ++i)
     out << '\t' << col_names[i];
   out << '\n';
 
   // preallocate entire space for a single row
-  vector<size_t> total_meth(n_samples, 0);
-  vector<size_t> total_reads(n_samples, 0);
-  vector<size_t> cpgs_with_reads(n_samples, 0);
-  vector<size_t> called_total(n_samples, 0);
-  vector<size_t> called_meth(n_samples, 0);
-  vector<double> weighted_mean_meth(n_samples, 0);
-  vector<double> fractional_meth(n_samples, 0);
-  vector<double> unweighted_mean_meth(n_samples, 0);
-  vector<double> score(n_samples, 0);
-
-  for (size_t i = 0; i < regions.size(); ++i) {
-    const pair<size_t, size_t> bounds(region_bounds(cpgs, regions[i]));
-
-    for (size_t j = 0; j < n_samples; ++j) {
+  std::vector<std::size_t> total_meth(n_samples, 0);
+  std::vector<std::size_t> total_reads(n_samples, 0);
+  std::vector<std::size_t> cpgs_with_reads(n_samples, 0);
+  std::vector<std::size_t> called_total(n_samples, 0);
+  std::vector<std::size_t> called_meth(n_samples, 0);
+  std::vector<double> weighted_mean_meth(n_samples, 0);
+  std::vector<double> fractional_meth(n_samples, 0);
+  std::vector<double> unweighted_mean_meth(n_samples, 0);
+  std::vector<double> score(n_samples, 0);
+
+  for (std::size_t i = 0; i < std::size(regions); ++i) {
+    const std::pair<std::size_t, std::size_t> bounds(
+      region_bounds(cpgs, regions[i]));
+
+    for (std::size_t j = 0; j < n_samples; ++j) {
       total_meth[j] = total_reads[j] = cpgs_with_reads[j] = called_total[j] =
         called_meth[j] = 0;
       double mean_meth = 0.0;
 
-      for (size_t k = bounds.first; k < bounds.second; ++k) {
-        const size_t meth = values[k][2 * j + 1];
-        const size_t tot = values[k][2 * j];
+      for (std::size_t k = bounds.first; k < bounds.second; ++k) {
+        const std::size_t meth = values[k][2 * j + 1];
+        const std::size_t tot = values[k][2 * j];
         if (tot > 0) {
           total_reads[j] += tot;
           total_meth[j] += meth;
@@ -270,8 +252,8 @@ process_with_cpgs_loaded(const bool VERBOSE,
                                                 : fractional_meth[j]));
     }
 
-    const size_t total_cpgs = bounds.second - bounds.first;
-    if (!PRINT_NUMERIC_ONLY || all_is_finite(score))
+    const std::size_t total_cpgs = bounds.second - bounds.first;
+    if (!print_numeric_only || all_finite(score))
       out << format_output_line(report_more_information, regions[i], score,
                                 weighted_mean_meth, unweighted_mean_meth,
                                 fractional_meth, total_cpgs, cpgs_with_reads,
@@ -286,7 +268,7 @@ process_with_cpgs_loaded(const bool VERBOSE,
 ///
 
 static void
-move_to_start_of_line(ifstream &in) {
+move_to_start_of_line(std::ifstream &in) {
   char next{};
   while (in.good() && in.get(next) && next != '\n') {
     in.unget();
@@ -298,45 +280,46 @@ move_to_start_of_line(ifstream &in) {
 }
 
 static void
-get_chr_and_idx(ifstream &in, string &chrom, size_t &pos) {
-  string tmp;
+get_chr_and_idx(std::ifstream &in, std::string &chrom, std::size_t &pos) {
+  std::string tmp;
   in >> tmp;
-  replace(begin(tmp), end(tmp), ':', ' ');
+  std::replace(std::begin(tmp), std::end(tmp), ':', ' ');
   std::istringstream iss(tmp);
   iss >> chrom >> pos;
 }
 
 static void
-find_start_line(const string &chr, const size_t idx, ifstream &cpg_in) {
-  cpg_in.seekg(0, ios_base::beg);
-  const size_t begin_pos = cpg_in.tellg();
-  cpg_in.seekg(0, ios_base::end);
-  const size_t end_pos = cpg_in.tellg();
+find_start_line(const std::string &chr, const std::size_t idx,
+                std::ifstream &cpg_in) {
+  cpg_in.seekg(0, std::ios_base::beg);
+  const std::size_t begin_pos = cpg_in.tellg();
+  cpg_in.seekg(0, std::ios_base::end);
+  const std::size_t end_pos = cpg_in.tellg();
 
   if (end_pos - begin_pos < 2)
-    throw runtime_error("empty meth file");
+    throw std::runtime_error("empty meth file");
 
-  size_t step_size = (end_pos - begin_pos) / 2;
+  std::size_t step_size = (end_pos - begin_pos) / 2;
 
-  cpg_in.seekg(0, ios_base::beg);
-  string low_chr;
-  size_t low_idx = 0;
+  cpg_in.seekg(0, std::ios_base::beg);
+  std::string low_chr;
+  std::size_t low_idx = 0;
   get_chr_and_idx(cpg_in, low_chr, low_idx);
 
   // MAGIC: need the -2 here to get past the EOF and possibly a '\n'
-  cpg_in.seekg(-2, ios_base::end);
+  cpg_in.seekg(-2, std::ios_base::end);
   move_to_start_of_line(cpg_in);
-  string high_chr;
-  size_t high_idx{};
+  std::string high_chr;
+  std::size_t high_idx{};
   get_chr_and_idx(cpg_in, high_chr, high_idx);
 
-  size_t pos = step_size;
-  cpg_in.seekg(pos, ios_base::beg);
+  std::size_t pos = step_size;
+  cpg_in.seekg(pos, std::ios_base::beg);
   move_to_start_of_line(cpg_in);
 
   while (step_size > 0) {
-    string mid_chr;
-    size_t mid_idx = 0;
+    std::string mid_chr;
+    std::size_t mid_idx = 0;
     // ADS: possible here to check that the chroms are in the right
     // order, at least the ones we have seen.
     get_chr_and_idx(cpg_in, mid_chr, mid_idx);
@@ -351,49 +334,51 @@ find_start_line(const string &chr, const size_t idx, ifstream &cpg_in) {
       std::swap(mid_idx, low_idx);
       pos += step_size;
     }
-    cpg_in.seekg(pos, ios_base::beg);
+    cpg_in.seekg(pos, std::ios_base::beg);
     move_to_start_of_line(cpg_in);
   }
 }
 
 static bool
-cpg_not_past_region(const GenomicRegion &region, const size_t end_pos,
+cpg_not_past_region(const Interval6 &region, const std::size_t end_pos,
                     const MSite &cpg) {
-  return (cpg.chrom == region.get_chrom() && cpg.pos < end_pos) ||
-         cpg.chrom < region.get_chrom();
+  return (cpg.chrom == region.chrom && cpg.pos < end_pos) ||
+         cpg.chrom < region.chrom;
 }
 
 static void
-get_cpg_stats(ifstream &cpg_in, const GenomicRegion &region,
-              vector<size_t> &total_meth, vector<size_t> &total_reads,
-              size_t &total_cpgs, vector<size_t> &cpgs_with_reads,
-              vector<size_t> &called_total, vector<size_t> &called_meth,
-              vector<double> &mean_meth) {
-  const string chrom(region.get_chrom());
-  const size_t start_pos = region.get_start();
-  const size_t end_pos = region.get_end();
+get_cpg_stats(std::ifstream &cpg_in, const Interval6 &region,
+              std::vector<std::size_t> &total_meth,
+              std::vector<std::size_t> &total_reads, std::size_t &total_cpgs,
+              std::vector<std::size_t> &cpgs_with_reads,
+              std::vector<std::size_t> &called_total,
+              std::vector<std::size_t> &called_meth,
+              std::vector<double> &mean_meth) {
+  const std::string chrom(region.chrom);
+  const std::size_t start_pos = region.start;
+  const std::size_t end_pos = region.stop;
   find_start_line(chrom, start_pos, cpg_in);
 
-  const size_t n_samples = total_meth.size();
+  const std::size_t n_samples = std::size(total_meth);
 
   MSite cpg;
-  vector<double> values;
+  std::vector<double> values;
   values.reserve(2 * n_samples);
-  string line;
+  std::string line;
   while (getline(cpg_in, line)) {
     values.clear();
     get_site_and_values(line, cpg, values);
-    if (values.size() != 2 * n_samples)
-      throw runtime_error("wrong row length. Expected " +
-                          std::to_string(2 * n_samples) + ", got " +
-                          std::to_string(values.size()) + "\n" + line);
+    if (std::size(values) != 2 * n_samples)
+      throw std::runtime_error("wrong row length. Expected " +
+                               std::to_string(2 * n_samples) + ", got " +
+                               std::to_string(std::size(values)) + "\n" + line);
 
     if (cpg_not_past_region(region, end_pos, cpg)) {
       if (start_pos <= cpg.pos && cpg.chrom == chrom) {
         ++total_cpgs;
-        for (size_t j = 0; j < n_samples; ++j) {
-          const size_t meth = values[2 * j + 1];
-          const size_t tot = values[2 * j];
+        for (std::size_t j = 0; j < n_samples; ++j) {
+          const std::size_t meth = values[2 * j + 1];
+          const std::size_t tot = values[2 * j];
           if (values[2 * j] > 0) {
             total_meth[j] += meth;
             total_reads[j] += tot;
@@ -413,51 +398,51 @@ get_cpg_stats(ifstream &cpg_in, const GenomicRegion &region,
 }
 
 static void
-process_with_cpgs_on_disk(const bool PRINT_NUMERIC_ONLY,
+process_with_cpgs_on_disk(const bool print_numeric_only,
                           const bool report_more_information,
-                          const char level_code, const string &cpgs_file,
-                          vector<GenomicRegion> &regions, std::ostream &out) {
-  ifstream in(cpgs_file);
-  string header;
+                          const char level_code, const std::string &cpgs_file,
+                          const std::vector<Interval6> &regions,
+                          std::ostream &out) {
+  std::ifstream in(cpgs_file);
+  std::string header;
   getline(in, header);
 
   std::istringstream iss(header);
-  string col_name;
-  vector<string> col_names;
+  std::string col_name;
+  std::vector<std::string> col_names;
   while (iss >> col_name)
     col_names.push_back(col_name);
 
-  auto end_uniq = std::unique(begin(col_names), end(col_names));
+  const auto end_uniq = std::unique(std::begin(col_names), std::end(col_names));
+  const auto n_uniq = std::distance(std::begin(col_names), end_uniq);
   // make sure all columns come in pairs of counts
-  if (2 * static_cast<size_t>(distance(begin(col_names), end_uniq)) !=
-      col_names.size())
-    throw runtime_error("wrong header format:\n" + header);
+  if (2 * static_cast<std::size_t>(n_uniq) != std::size(col_names))
+    throw std::runtime_error("wrong header format:\n" + header);
 
-  col_names.resize(distance(begin(col_names), end_uniq));
-  const size_t n_samples = col_names.size();
+  col_names.resize(std::distance(std::begin(col_names), end_uniq));
+  const std::size_t n_samples = std::size(col_names);
 
   // write header as first column
-  for (size_t i = 0; i < n_samples; ++i)
+  for (std::size_t i = 0; i < n_samples; ++i)
     out << '\t' << col_names[i];
   out << '\n';
 
   // preallocate entire space for a single row
-  vector<size_t> total_meth(n_samples, 0);
-  vector<size_t> total_reads(n_samples, 0);
-  vector<size_t> cpgs_with_reads(n_samples, 0);
-  vector<size_t> called_total(n_samples, 0);
-  vector<size_t> called_meth(n_samples, 0);
-  vector<double> weighted_mean_meth(n_samples, 0);
-  vector<double> fractional_meth(n_samples, 0);
-  vector<double> unweighted_mean_meth(n_samples, 0);
-  vector<double> score(n_samples, 0.0);
-  vector<double> mean_meth(n_samples, 0.0);
-
-  vector<double> values(2 * n_samples, 0.0);
-  for (size_t i = 0; i < regions.size() && in; ++i) {
-    size_t total_cpgs = 0;
-    mean_meth = vector<double>(n_samples, 0.0);
-    for (size_t j = 0; j < n_samples; ++j) {
+  std::vector<std::size_t> total_meth(n_samples, 0);
+  std::vector<std::size_t> total_reads(n_samples, 0);
+  std::vector<std::size_t> cpgs_with_reads(n_samples, 0);
+  std::vector<std::size_t> called_total(n_samples, 0);
+  std::vector<std::size_t> called_meth(n_samples, 0);
+  std::vector<double> weighted_mean_meth(n_samples, 0);
+  std::vector<double> fractional_meth(n_samples, 0);
+  std::vector<double> unweighted_mean_meth(n_samples, 0);
+  std::vector<double> score(n_samples, 0.0);
+
+  for (std::size_t i = 0; i < std::size(regions) && in; ++i) {
+    std::size_t total_cpgs = 0;
+    auto mean_meth = std::vector<double>(n_samples, 0.0);
+
+    for (std::size_t j = 0; j < n_samples; ++j) {
       total_meth[j] = total_reads[j] = cpgs_with_reads[j] = called_total[j] =
         called_meth[j] = 0;
     }
@@ -465,20 +450,19 @@ process_with_cpgs_on_disk(const bool PRINT_NUMERIC_ONLY,
     get_cpg_stats(in, regions[i], total_meth, total_reads, total_cpgs,
                   cpgs_with_reads, called_total, called_meth, mean_meth);
 
-    for (size_t j = 0; j < n_samples; ++j) {
+    for (std::size_t j = 0; j < n_samples; ++j) {
       fractional_meth[j] =
         static_cast<double>(called_meth[j]) / called_total[j];
       weighted_mean_meth[j] =
         static_cast<double>(total_meth[j]) / total_reads[j];
       unweighted_mean_meth[j] = mean_meth[j] / cpgs_with_reads[j];
-
       score[j] =
         (level_code == 'w' ? weighted_mean_meth[j]
                            : (level_code == 'u' ? unweighted_mean_meth[j]
                                                 : fractional_meth[j]));
     }
 
-    if (!PRINT_NUMERIC_ONLY || all_is_finite(score))
+    if (!print_numeric_only || all_finite(score))
       out << format_output_line(report_more_information, regions[i], score,
                                 weighted_mean_meth, unweighted_mean_meth,
                                 fractional_meth, total_cpgs, cpgs_with_reads,
@@ -489,38 +473,36 @@ process_with_cpgs_on_disk(const bool PRINT_NUMERIC_ONLY,
 
 // end of code for searching on disk
 
-[[nodiscard]] static size_t
-check_bed_format(const string &regions_file) {
-  ifstream in(regions_file);
+[[nodiscard]] static std::size_t
+get_bed_columns(const std::string &regions_file) {
+  std::ifstream in(regions_file);
   if (!in)
-    throw runtime_error("cannot open file: " + regions_file);
-
-  string line;
+    throw std::runtime_error("cannot open file: " + regions_file);
+  std::string line;
   getline(in, line);
-
   std::istringstream iss(line);
-  string token;
-  size_t n_columns = 0;
+  std::string token;
+  std::size_t n_columns = 0;
   while (iss >> token)
     ++n_columns;
-
   return n_columns;
 }
 
 int
 main_multimethstat(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
-    static const string default_name_prefix = "X";
-
-    bool VERBOSE = false;
-    bool PRINT_NUMERIC_ONLY = false;
-    bool load_entire_file = false;
-    bool report_more_information = false;
-    bool sort_data_if_needed = false;
-
-    string level_code = "w";
-
-    string outfile;
+    static constexpr auto default_name_prefix = "X";
+    constexpr auto valid_n_cols = [](const auto n_cols) {
+      return n_cols != 3 && n_cols < 6;  // NOLINT(*-avoid-magic-numbers)
+    };
+
+    bool verbose{false};
+    bool print_numeric_only{false};
+    bool load_entire_file{false};
+    bool report_more_information{false};
+    bool sort_data_if_needed{false};
+    std::string level_code = "w";
+    std::string outfile;
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
@@ -531,7 +513,7 @@ main_multimethstat(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)",
                       false, outfile);
     opt_parse.add_opt("numeric", 'N', "print numeric values only (not NAs)",
-                      false, PRINT_NUMERIC_ONLY);
+                      false, print_numeric_only);
     opt_parse.add_opt("preload", 'L', "Load all CpG sites", false,
                       load_entire_file);
     opt_parse.add_opt("sort", 's', "sort data if needed", false,
@@ -542,84 +524,79 @@ main_multimethstat(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                       false, level_code);
     opt_parse.add_opt("more-levels", 'M', "report more methylation information",
                       false, report_more_information);
-    opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
-    vector<string> leftover_args;
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
-    if (leftover_args.size() != 2) {
-      cerr << opt_parse.help_message() << '\n';
+    if (std::size(leftover_args) != 2) {
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (sort_data_if_needed && !load_entire_file) {
-      cerr << "cannot sort data unless all data is loaded\n";
+      std::cerr << "cannot sort data unless all data is loaded\n";
       return EXIT_SUCCESS;
     }
     if (level_code != "w" && level_code != "u" && level_code != "f") {
-      cerr << "selected level must be in {w, u, f}\n";
+      std::cerr << "selected level must be in {w, u, f}\n";
       return EXIT_SUCCESS;
     }
-    const string regions_file = leftover_args.front();
-    const string cpgs_file = leftover_args.back();
+    const std::string regions_file = leftover_args.front();
+    const std::string cpgs_file = leftover_args.back();
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    if (VERBOSE)
-      cerr << "loading regions\n";
+    if (verbose)
+      std::cerr << "loading regions\n";
 
-    const size_t n_columns = check_bed_format(regions_file);
-    // MAGIC: this allows for exactly 3 or at least 6 columns in the
-    // bed format
-    if (n_columns != 3 && n_columns < 6)
-      throw runtime_error("format must be 3 or 6+ column bed: " + regions_file);
-
-    vector<GenomicRegion> regions;
-    ReadBEDFile(regions_file, regions);
-    if (!is_sorted(begin(regions), end(regions))) {
+    const auto n_columns = get_bed_columns(regions_file);
+    if (valid_n_cols(n_columns))
+      throw std::runtime_error("format must be 3 or 6+ column bed: " +
+                               regions_file);
+    auto regions = read_intervals6(regions_file);
+    if (!std::is_sorted(std::cbegin(regions), std::cend(regions))) {
       if (sort_data_if_needed) {
-        if (VERBOSE)
-          cerr << "sorting regions\n";
-        sort(begin(regions), end(regions));
+        if (verbose)
+          std::cerr << "sorting regions\n";
+        std::sort(std::begin(regions), std::end(regions));
       }
       else
-        throw runtime_error("regions not sorted in file: " + regions_file);
+        throw std::runtime_error("regions not sorted in file: " + regions_file);
     }
 
     if (n_columns == 3)  // then we should name the regions
-      for (size_t i = 0; i < regions.size(); ++i)
-        regions[i].set_name(default_name_prefix + std::to_string(i));
-
-    if (VERBOSE)
-      cerr << "[n_regions=" << regions.size() << "]\n";
+      for (std::size_t i = 0; i < std::size(regions); ++i)
+        regions[i].name = default_name_prefix + std::to_string(i);
 
-    std::ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
+    if (verbose)
+      std::cerr << "[n_regions=" << std::size(regions) << "]\n";
 
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open output file: " + outfile);
     if (load_entire_file)
-      process_with_cpgs_loaded(VERBOSE,  // sort_data_if_needed,
-                               PRINT_NUMERIC_ONLY, report_more_information,
+      process_with_cpgs_loaded(verbose,  // sort_data_if_needed,
+                               print_numeric_only, report_more_information,
                                level_code[0], cpgs_file, regions, out);
     else
-      process_with_cpgs_on_disk(PRINT_NUMERIC_ONLY, report_more_information,
+      process_with_cpgs_on_disk(print_numeric_only, report_more_information,
                                 level_code[0], cpgs_file, regions, out);
   }
   catch (const std::exception &e) {
-    cerr << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
 
-// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
+// NOLINTEND(*-narrowing-conversions)
diff --git a/src/analysis/pmd.cpp b/src/analysis/pmd.cpp
index 8519c23a..02529e99 100644
--- a/src/analysis/pmd.cpp
+++ b/src/analysis/pmd.cpp
@@ -1,25 +1,32 @@
-/* Copyright (C) 2018 University of Southern California
- *                         Andrew D Smith
- * Authors: Andrew D. Smith, Song Qiang, Jenny Qu,
- *          Benjamin Decato, Guilherme Sena
+/* Copyright (C) 2018-2025 Andrew D Smith and Benjamin Decato
  *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
+ * This is free software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the Free Software
+ * Foundation; either version 2 of the License, or (at your option) any later
+ * version.
  *
- * This is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * This is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ * details.
  */
 
-#include "GenomicRegion.hpp"
+[[maybe_unused]] static constexpr auto description = R"(
+Identify PMDs in methylomes. Methylation must be provided in the methcounts
+file format (chrom, position, strand, context, methylation, reads). See the
+methcounts documentation for details. This program assumes only data at CpG
+sites and that strands are collapsed so only the positive site appears in the
+file, but reads counts are from both strands.
+)";
+
+#include "Interval.hpp"
+#include "Interval6.hpp"
 #include "MSite.hpp"
-#include "OptionParser.hpp"
 #include "TwoStateHMM_PMD.hpp"
 #include "bsutils.hpp"
 #include "counts_header.hpp"
+
+#include "OptionParser.hpp"
 #include "smithlab_utils.hpp"
 
 #include <bamxx.hpp>
@@ -45,38 +52,16 @@
 #include <utility>
 #include <vector>
 
-// NOLINTBEGIN(*-avoid-magic-numbers,*-narrowing-conversions)
-
-using std::cerr;
-using std::cout;
-using std::make_pair;
-using std::max;
-using std::min;
-using std::numeric_limits;
-using std::runtime_error;
-using std::string;
-using std::vector;
-
-using std::ofstream;
-using std::ostream;
-using std::to_string;
-
-using bamxx::bgzf_file;
-
-template <typename T> using num_lim = std::numeric_limits<T>;
+// NOLINTBEGIN(*-narrowing-conversions)
 
 struct pmd_summary {
-  explicit pmd_summary(const vector<GenomicRegion> &pmds) :
-    pmd_count{std::size(pmds)} {
-    // NOLINTBEGIN(*-prefer-member-initializer)
-    pmd_total_size =
-      accumulate(cbegin(pmds), cend(pmds), 0ul,
-                 [](const std::uint64_t t, const GenomicRegion &p) {
-                   return t + p.get_width();
-                 });
+  explicit pmd_summary(const std::vector<Interval6> &pmds) :
+    pmd_count{std::size(pmds)},
+    pmd_total_size{std::accumulate(
+      std::cbegin(pmds), std::cend(pmds), 0ul,
+      [](const std::uint64_t t, const Interval6 &p) { return t + size(p); })} {
     pmd_mean_size = static_cast<double>(pmd_total_size) /
                     std::max(pmd_count, static_cast<std::uint64_t>(1));
-    // NOLINTEND(*-prefer-member-initializer)
   }
   // pmd_count is the number of identified PMDs.
   std::uint64_t pmd_count{};
@@ -84,48 +69,47 @@ struct pmd_summary {
   std::uint64_t pmd_total_size{};
   // mean_pmd_size is the mean size of the identified PMDs
   double pmd_mean_size{};
-
-  string
-  tostring() {
-    std::ostringstream oss;
-    oss << "pmd_count: " << pmd_count << '\n'
-        << "pmd_total_size: " << pmd_total_size << '\n'
-        << "pmd_mean_size: " << std::fixed << std::setprecision(2)
-        << pmd_mean_size;
-
-    return oss.str();
-  }
 };
 
+[[nodiscard]] static inline auto
+to_string(const pmd_summary &s) -> std::string {
+  std::ostringstream oss;
+  oss << "pmd_count: " << s.pmd_count << '\n'
+      << "pmd_total_size: " << s.pmd_total_size << '\n'
+      << "pmd_mean_size: " << std::fixed << s.pmd_mean_size;
+  return oss.str();
+}
+
 static void
-get_adjacent_distances(const vector<GenomicRegion> &pmds,
-                       vector<size_t> &dists) {
-  for (size_t i = 1; i < pmds.size(); ++i)
-    if (pmds[i].same_chrom(pmds[i - 1]))
-      dists.push_back(pmds[i].get_start() - pmds[i - 1].get_end());
+get_adjacent_distances(const std::vector<Interval6> &pmds,
+                       std::vector<std::size_t> &dists) {
+  for (std::size_t i = 1; i < std::size(pmds); ++i)
+    if (pmds[i].chrom == pmds[i - 1].chrom)
+      dists.push_back(pmds[i].start - pmds[i - 1].stop);
 }
 
 static bool
-precedes(const string &chrom, const size_t position, const GenomicRegion &r) {
-  return (chrom < r.get_chrom() ||
-          (chrom == r.get_chrom() && position < r.get_start()));
+precedes(const std::string &chrom, const std::size_t position,
+         const Interval6 &r) {
+  return chrom < r.chrom || (chrom == r.chrom && position < r.start);
 }
 
 static bool
-succeeds(const string &chrom, const size_t position, const GenomicRegion &r) {
-  return (r.get_chrom() < chrom ||
-          (chrom == r.get_chrom() && r.get_end() <= position));
+succeeds(const std::string &chrom, const std::size_t position,
+         const Interval6 &r) {
+  return r.chrom < chrom || (chrom == r.chrom && r.stop <= position);
 }
 
 static void
-merge_nearby_pmd(const size_t max_merge_dist, vector<GenomicRegion> &pmds) {
-  size_t j = 0;
-  for (size_t i = 1; i < pmds.size(); ++i) {
-    if (pmds[j].same_chrom(pmds[i]) &&
-        pmds[i].get_start() - pmds[j].get_end() <= max_merge_dist) {
-      pmds[j].set_end(pmds[i].get_end());
-      const string combined_name(pmds[j].get_name() + pmds[i].get_name());
-      pmds[j].set_name(combined_name);
+merge_nearby_pmd(const std::size_t max_merge_dist,
+                 std::vector<Interval6> &pmds) {
+  std::size_t j = 0;
+  for (std::size_t i = 1; i < std::size(pmds); ++i) {
+    if (pmds[j].chrom == pmds[i].chrom &&
+        pmds[i].start - pmds[j].stop <= max_merge_dist) {
+      pmds[j].stop = pmds[i].stop;
+      const std::string combined_name(pmds[j].name + pmds[i].name);
+      pmds[j].name = combined_name;
     }
     else
       pmds[++j] = pmds[i];
@@ -152,39 +136,42 @@ beta_max_likelihood(const double fg_alpha, const double fg_beta,
          beta_log_likelihood(bg_alpha, bg_beta, p_hi);
 }
 
-static size_t
+static std::size_t
 find_best_bound(const bool IS_RIGHT_BOUNDARY,
-                std::map<size_t, std::pair<size_t, size_t>> &pos_meth_tot,
-                const vector<double> &fg_alpha, const vector<double> &fg_beta,
-                const vector<double> &bg_alpha, const vector<double> &bg_beta) {
-  vector<std::pair<size_t, size_t>> meth_tot;
-  vector<size_t> positions;
-  for (auto &&it : pos_meth_tot) {
+                const std::map<std::size_t, std::pair<std::size_t, std::size_t>>
+                  &pos_meth_tot,
+                const std::vector<double> &fg_alpha,
+                const std::vector<double> &fg_beta,
+                const std::vector<double> &bg_alpha,
+                const std::vector<double> &bg_beta) {
+  std::vector<std::pair<std::size_t, std::size_t>> meth_tot;
+  std::vector<std::size_t> positions;
+  for (const auto &it : pos_meth_tot) {
     positions.push_back(it.first);
     meth_tot.push_back(it.second);
   }
 
-  vector<size_t> cumu_left_meth(meth_tot.size(), 0);
-  vector<size_t> cumu_left_tot(meth_tot.size(), 0);
-  vector<size_t> cumu_right_meth(meth_tot.size(), 0);
-  vector<size_t> cumu_right_tot(meth_tot.size(), 0);
-  if (meth_tot.size() > 0)
-    for (size_t i = 1; i < meth_tot.size() - 1; ++i) {
-      const size_t j = meth_tot.size() - 1 - i;
+  std::vector<std::size_t> cumu_left_meth(std::size(meth_tot), 0);
+  std::vector<std::size_t> cumu_left_tot(std::size(meth_tot), 0);
+  std::vector<std::size_t> cumu_right_meth(std::size(meth_tot), 0);
+  std::vector<std::size_t> cumu_right_tot(std::size(meth_tot), 0);
+  if (std::size(meth_tot) > 0)
+    for (std::size_t i = 1; i + 1 < std::size(meth_tot); ++i) {
+      const std::size_t j = std::size(meth_tot) - 1 - i;
       cumu_left_meth[i] = cumu_left_meth[i - 1] + meth_tot[i - 1].first;
       cumu_left_tot[i] = cumu_left_tot[i - 1] + meth_tot[i - 1].second;
       cumu_right_meth[j] = cumu_right_meth[j + 1] + meth_tot[j + 1].first;
       cumu_right_tot[j] = cumu_right_tot[j + 1] + meth_tot[j + 1].second;
     }
 
-  size_t best_idx = 0;
-  double best_score = -num_lim<double>::max();
-  if (meth_tot.size() > 0)
-    for (size_t i = 1; i < meth_tot.size() - 1; ++i) {
-      size_t N_low{};
-      size_t k_low{};
-      size_t N_hi{};
-      size_t k_hi{};
+  std::size_t best_idx = 0;
+  double best_score = -std::numeric_limits<double>::max();
+  if (std::size(meth_tot) > 0)
+    for (std::size_t i = 1; i + 1 < std::size(meth_tot); ++i) {
+      std::size_t N_low{};
+      std::size_t k_low{};
+      std::size_t N_hi{};
+      std::size_t k_hi{};
       if (!IS_RIGHT_BOUNDARY) {
         N_low = cumu_right_tot[i] + meth_tot[i].second;
         k_low = cumu_right_meth[i] + meth_tot[i].first;
@@ -202,66 +189,63 @@ find_best_bound(const bool IS_RIGHT_BOUNDARY,
         const double p_hi = static_cast<double>(k_hi) / N_hi;
         const double p_low = static_cast<double>(k_low) / N_low;
 
-        for (size_t j = 0; j < fg_alpha.size(); ++j) {
+        for (std::size_t j = 0; j < std::size(fg_alpha); ++j) {
           score += beta_max_likelihood(fg_alpha[j], fg_beta[j], bg_alpha[j],
                                        bg_beta[j], p_low, p_hi);
         }  // beta max likelihood using learned emissions
-        score /= fg_alpha.size();
+        score /= std::size(fg_alpha);
         if (p_hi > p_low && score > best_score) {
           best_idx = i;
           best_score = score;
         }
       }
     }
-  return (best_score > -num_lim<double>::max()) ? positions[best_idx]
-                                                : num_lim<size_t>::max();
+  return (best_score > -std::numeric_limits<double>::max())
+           ? positions[best_idx]
+           : std::numeric_limits<std::size_t>::max();
 }
 
 static void
-get_boundary_positions(vector<GenomicRegion> &bounds,
-                       const vector<GenomicRegion> &pmds,
-                       const size_t &bin_size) {
-  for (size_t i = 0; i < pmds.size(); ++i) {
+get_boundary_positions(std::vector<Interval6> &bounds,
+                       const std::vector<Interval6> &pmds,
+                       const std::size_t &bin_size) {
+  for (std::size_t i = 0; i < std::size(pmds); ++i) {
     bounds.push_back(pmds[i]);
-    pmds[i].get_start() > bin_size
-      ? bounds.back().set_start(pmds[i].get_start() - bin_size)
-      : bounds.back().set_start(0);
-    bounds.back().set_end(pmds[i].get_start() + bin_size);
+    bounds.back().start =
+      pmds[i].start > bin_size ? pmds[i].start - bin_size : 0;
+    bounds.back().stop = pmds[i].start + bin_size;
 
     bounds.push_back(pmds[i]);
-    pmds[i].get_end() > bin_size
-      ? bounds.back().set_start(pmds[i].get_end() - bin_size)
-      : bounds.back().set_start(0);
-    bounds.back().set_end(pmds[i].get_end() + bin_size);
+    bounds.back().start = pmds[i].stop > bin_size ? pmds[i].stop - bin_size : 0;
+    bounds.back().stop = pmds[i].stop + bin_size;
   }
 }
 
 static void
 get_optimized_boundary_likelihoods(
-  const vector<string> &cpgs_file, vector<GenomicRegion> &bounds,
-  const vector<bool> &array_status, const vector<double> &fg_alpha,
-  const vector<double> &fg_beta, const vector<double> &bg_alpha,
-  const vector<double> &bg_beta, vector<double> &boundary_scores,
-  vector<size_t> &boundary_certainties) {
-  // MAGIC NUMBER FOR WEIGHTING ARRAY
-  // CONTRIBUTION TO BOUNDARY OBSERVATIONS
-  static const double array_coverage_constant = 10;
-
-  vector<bgzf_file> in;
-  for (size_t i = 0; i < cpgs_file.size(); ++i) {
+  const std::vector<std::string> &cpgs_file,
+  const std::vector<Interval6> &bounds, const std::vector<bool> &array_status,
+  const std::vector<double> &fg_alpha, const std::vector<double> &fg_beta,
+  const std::vector<double> &bg_alpha, const std::vector<double> &bg_beta,
+  std::vector<double> &boundary_scores,
+  std::vector<std::size_t> &boundary_certainties) {
+  // For weighting array contribution to boundary observations
+  static constexpr double array_coverage_constant = 10;
+
+  std::vector<bamxx::bgzf_file> in;
+  for (std::size_t i = 0; i < std::size(cpgs_file); ++i) {
     in.emplace_back(cpgs_file[i], "r");
     if (get_has_counts_header(cpgs_file[i]))
       skip_counts_header(in.back());
   }
 
-  std::map<size_t, std::pair<size_t, size_t>> pos_meth_tot;
-  size_t n_meth{};
-  size_t n_reads{};
-  size_t bound_idx{};
-  for (; bound_idx < bounds.size(); ++bound_idx) {  // for each boundary
-    for (size_t i = 0; i < in.size(); ++i) {
+  std::map<std::size_t, std::pair<std::size_t, std::size_t>> pos_meth_tot;
+  std::size_t n_meth{};
+  std::size_t n_reads{};
+  std::size_t bound_idx{};
+  for (; bound_idx < std::size(bounds); ++bound_idx) {  // for each boundary
+    for (std::size_t i = 0; i < std::size(in); ++i) {
       // get totals for all CpGs overlapping that boundary
-
       MSite site;
       while (read_site(in[i], site) &&
              !succeeds(site.chrom, site.pos, bounds[bound_idx])) {
@@ -284,10 +268,8 @@ get_optimized_boundary_likelihoods(
             n_meth = site.n_meth();
             n_reads = site.n_reads;
           }
-          auto it(pos_meth_tot.find(site.pos));
-          if (it == end(pos_meth_tot))
+          if (pos_meth_tot.find(site.pos) == std::cend(pos_meth_tot))
             pos_meth_tot[site.pos] = std::make_pair(n_meth, n_reads);
-
           else {  // add this file's contribution to the site's methylation
             pos_meth_tot[site.pos].first += n_meth;
             pos_meth_tot[site.pos].second += n_reads;
@@ -297,18 +279,18 @@ get_optimized_boundary_likelihoods(
     }
 
     // Get the boundary position
-    size_t boundary_position =
-      (bounds[bound_idx].get_start() + bounds[bound_idx].get_end()) / 2;
-
-    size_t N_low = 0, k_low = 0, N_hi = 0, k_hi = 0;
-    for (auto it = begin(pos_meth_tot); it != end(pos_meth_tot); ++it) {
-      if (it->first < boundary_position) {
-        N_low += it->second.second;
-        k_low += it->second.first;
+    std::size_t boundary_position =
+      (bounds[bound_idx].start + bounds[bound_idx].stop) / 2;
+
+    std::size_t N_low = 0, k_low = 0, N_hi = 0, k_hi = 0;
+    for (const auto &p : pos_meth_tot) {
+      if (p.first < boundary_position) {
+        N_low += p.second.second;
+        k_low += p.second.first;
       }
       else {
-        N_hi += it->second.second;
-        k_hi += it->second.first;
+        N_hi += p.second.second;
+        k_hi += p.second.first;
       }
     }
 
@@ -317,47 +299,48 @@ get_optimized_boundary_likelihoods(
     const double p_low = static_cast<double>(k_low) / N_low;
 
     if (bound_idx % 2) {  // its a right boundary, p_low should go with fg
-      for (size_t j = 0; j < fg_alpha.size(); ++j)
+      for (std::size_t j = 0; j < std::size(fg_alpha); ++j)
         score += beta_max_likelihood(fg_alpha[j], fg_beta[j], bg_alpha[j],
                                      bg_beta[j], p_low, p_hi);
     }
     else {  // its a left boundary, p_low should go with bg
-      for (size_t j = 0; j < fg_alpha.size(); ++j)
+      for (std::size_t j = 0; j < std::size(fg_alpha); ++j)
         score += beta_max_likelihood(bg_alpha[j], bg_beta[j], fg_alpha[j],
                                      fg_beta[j], p_low, p_hi);
     }
     boundary_certainties.push_back(std::min(N_low, N_hi));
-    score /= fg_alpha.size();
+    score /= std::size(fg_alpha);
     boundary_scores.push_back(exp(score));
     pos_meth_tot.clear();
   }
 }
 
 static void
-find_exact_boundaries(
-  const vector<string> &cpgs_file, vector<GenomicRegion> &bounds,
-  const vector<bool> &array_status, const vector<double> &fg_alpha,
-  const vector<double> &fg_beta, const vector<double> &bg_alpha,
-  const vector<double> &bg_beta, vector<size_t> &bound_site) {
-  // MAGIC NUMBER FOR WEIGHTING ARRAY
-  // CONTRIBUTION TO BOUNDARY OBSERVATIONS
-  static const double array_coverage_constant = 10;
-
-  vector<bgzf_file> in;
-  for (size_t i = 0; i < std::size(cpgs_file); ++i) {
+find_exact_boundaries(const std::vector<std::string> &cpgs_file,
+                      const std::vector<Interval6> &bounds,
+                      const std::vector<bool> &array_status,
+                      const std::vector<double> &fg_alpha,
+                      const std::vector<double> &fg_beta,
+                      const std::vector<double> &bg_alpha,
+                      const std::vector<double> &bg_beta,
+                      std::vector<std::size_t> &bound_site) {
+  // For weighting array contribution to boundary observations
+  static constexpr double array_coverage_constant = 10;
+
+  std::vector<bamxx::bgzf_file> in;
+  for (std::size_t i = 0; i < std::size(cpgs_file); ++i) {
     in.emplace_back(cpgs_file[i], "r");
     if (get_has_counts_header(cpgs_file[i]))
       skip_counts_header(in[i]);
   }
 
-  std::map<size_t, std::pair<size_t, size_t>> pos_meth_tot;
-  size_t n_meth{};
-  size_t n_reads{};
-  size_t bound_idx{};
-  for (; bound_idx < bounds.size(); ++bound_idx) {  // for each boundary
-    for (size_t i = 0; i < in.size(); ++i) {
+  std::map<std::size_t, std::pair<std::size_t, std::size_t>> pos_meth_tot;
+  std::size_t n_meth{};
+  std::size_t n_reads{};
+  std::size_t bound_idx{};
+  for (; bound_idx < std::size(bounds); ++bound_idx) {  // for each boundary
+    for (std::size_t i = 0; i < std::size(in); ++i) {
       // get totals for all CpGs overlapping that boundary
-
       MSite site;
       while (read_site(in[i], site) &&
              !succeeds(site.chrom, site.pos, bounds[bound_idx])) {
@@ -398,108 +381,97 @@ find_exact_boundaries(
 }
 
 static void
-optimize_boundaries(const size_t bin_size, const vector<string> &cpgs_file,
-                    vector<GenomicRegion> &pmds,
-                    const vector<bool> &array_status,
-                    const vector<double> &fg_alpha,
-                    const vector<double> &fg_beta,
-                    const vector<double> &bg_alpha,
-                    const vector<double> &bg_beta) {
-  vector<GenomicRegion> bounds;
+optimize_boundaries(
+  const std::size_t bin_size, const std::vector<std::string> &cpgs_file,
+  std::vector<Interval6> &pmds, const std::vector<bool> &array_status,
+  const std::vector<double> &fg_alpha, const std::vector<double> &fg_beta,
+  const std::vector<double> &bg_alpha, const std::vector<double> &bg_beta) {
+  std::vector<Interval6> bounds;
   get_boundary_positions(bounds, pmds, bin_size);
-  vector<size_t> bound_site;
+  std::vector<std::size_t> bound_site;
   find_exact_boundaries(cpgs_file, bounds, array_status, fg_alpha, fg_beta,
                         bg_alpha, bg_beta, bound_site);
 
-  ////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////
-  ///// NOW RESET THE STARTS AND ENDS OF PMDs
-  /////
-  for (size_t i = 0; i < pmds.size(); ++i) {
-    const size_t start_site = bound_site[2 * i];
-    if (start_site != num_lim<size_t>::max())
-      pmds[i].set_start(start_site);
-    const size_t end_site = bound_site[2 * i + 1];
-    if (end_site != num_lim<size_t>::max())
-      pmds[i].set_end(end_site + 1);
+  // Now reset the starts and ends of PMDs
+  for (std::size_t i = 0; i < std::size(pmds); ++i) {
+    const std::size_t start_site = bound_site[2 * i];
+    if (start_site != std::numeric_limits<std::size_t>::max())
+      pmds[i].start = start_site;
+    const std::size_t end_site = bound_site[2 * i + 1];
+    if (end_site != std::numeric_limits<std::size_t>::max())
+      pmds[i].stop = end_site + 1;
   }
 
-  ////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////
-  ///// NOW MERGE THE PMDS THAT ARE TOO CLOSE
-  /////
-  vector<size_t> dists;
+  // Now merge the pmds that are too close
+  std::vector<std::size_t> dists;
   get_adjacent_distances(pmds, dists);
-  sort(begin(dists), end(dists));
-
-  ////////////////////////////////////////////////////////////////////////
-  ////////////////////////////////////////////////////////////////////////
-  // NEED TO USE SOME RANDOMIZATION METHOD HERE TO FIGURE OUT THE
-  // MERGING DISTANCE
-  vector<std::pair<size_t, size_t>> dist_hist;
-  size_t first = 0;
-  for (size_t i = 1; i < dists.size(); ++i)
+  std::sort(std::begin(dists), std::end(dists));
+
+  // Need to use some randomization method here to figure out the
+  // merging distance
+  std::vector<std::pair<std::size_t, std::size_t>> dist_hist;
+  std::size_t first = 0;
+  for (std::size_t i = 1; i < std::size(dists); ++i)
     if (dists[i] != dists[i - 1]) {
       dist_hist.push_back(std::make_pair(dists[i - 1], i - first));
       first = i;
     }
-
   merge_nearby_pmd(2 * bin_size, pmds);
 
-  ///////////////////////////////////////////////////////////////////////
-  ///////////////////////////////////////////////////////////////////////
-  // LAST, GET THE CPG SITES WITHIN 1 BIN OF EACH BOUNDARY AND COMPUTE
-  // THE LIKELIHOOD TO GET A "SCORE" ON THE BOUNDARY
-  ///////
+  // Last, get the cpg sites within 1 bin of each boundary and compute
+  // the likelihood to get a "score" on the boundary
   bounds.clear();  // need updated boundaries after merging nearby PMDs
   get_boundary_positions(bounds, pmds, bin_size);
 
-  vector<double> boundary_scores;
-  vector<size_t> boundary_certainties;
+  std::vector<double> boundary_scores;
+  std::vector<std::size_t> boundary_certainties;
   get_optimized_boundary_likelihoods(cpgs_file, bounds, array_status, fg_alpha,
                                      fg_beta, bg_alpha, bg_beta,
                                      boundary_scores, boundary_certainties);
 
   // Add the boundary scores to the PMD names
-  for (size_t i = 0; i < pmds.size(); ++i)
-    pmds[i].set_name(pmds[i].get_name() + ":" +
-                     to_string(boundary_scores[2 * i]) + ":" +
-                     to_string(boundary_certainties[2 * i]) + ":" +
-                     to_string(boundary_scores[2 * i + 1]) + ":" +
-                     to_string(boundary_certainties[2 * i + 1]));
+  // clang-format off
+  for (std::size_t i = 0; i < std::size(pmds); ++i)
+    pmds[i].name = pmds[i].name + ":" +
+      std::to_string(boundary_scores[2 * i]) + ":" +
+      std::to_string(boundary_certainties[2 * i]) + ":" +
+      std::to_string(boundary_scores[2 * i + 1]) + ":" +
+      std::to_string(boundary_certainties[2 * i + 1]);
+  // clang-format on
 }
 
 double
-get_score_cutoff_for_fdr(const vector<double> &scores, const double fdr) {
+get_score_cutoff_for_fdr(const std::vector<double> &scores, const double fdr) {
   if (fdr <= 0)
-    return num_lim<double>::max();
-  else if (fdr > 1)
-    return num_lim<double>::min();
-  vector<double> local(scores);
-  std::sort(begin(local), end(local));
-  size_t i = 0;
-  for (; i < local.size() - 1 &&
-         local[i + 1] < fdr * static_cast<double>(i + 1) / local.size();
+    return std::numeric_limits<double>::max();
+  if (fdr > 1)
+    return std::numeric_limits<double>::min();
+  std::vector<double> local(scores);
+  std::sort(std::begin(local), std::end(local));
+  std::size_t i = 0;
+  for (; i + 1 < std::size(local) &&
+         local[i + 1] < fdr * static_cast<double>(i + 1) / std::size(local);
        ++i)
     ;
-  return local[i] + 1.0 / scores.size();
+  return local[i] + 1.0 / std::size(scores);
 }
 
 static inline double
 score_contribution(const std::pair<double, double> &m) {
   const double denom = m.first + m.second;
-  return (denom > 0) ? 1.0 - m.first / denom : 0.0;
+  return denom > 0 ? 1.0 - m.first / denom : 0.0;
 }
 
 static void
-get_domain_scores(const vector<bool> &classes,
-                  const vector<vector<std::pair<double, double>>> &meth,
-                  const vector<size_t> &reset_points, vector<double> &scores) {
-  const size_t n_replicates = meth.size();
-  size_t reset_idx = 1;
+get_domain_scores(
+  const std::vector<bool> &classes,
+  const std::vector<std::vector<std::pair<double, double>>> &meth,
+  const std::vector<std::size_t> &reset_points, std::vector<double> &scores) {
+  const std::size_t n_replicates = std::size(meth);
+  std::size_t reset_idx = 1;
   bool in_domain = false;
   double score = 0;
-  for (size_t i = 0; i < classes.size(); ++i) {
+  for (std::size_t i = 0; i < std::size(classes); ++i) {
     if (reset_points[reset_idx] == i) {
       if (in_domain) {
         scores.push_back(score);
@@ -509,7 +481,7 @@ get_domain_scores(const vector<bool> &classes,
       ++reset_idx;
     }
     if (classes[i]) {
-      for (size_t r = 0; r < n_replicates; ++r)
+      for (std::size_t r = 0; r < n_replicates; ++r)
         score += score_contribution(meth[r][i]);
       in_domain = true;
     }
@@ -524,16 +496,17 @@ get_domain_scores(const vector<bool> &classes,
 }
 
 static void
-build_domains(const vector<SimpleGenomicRegion> &bins,
-              const vector<size_t> &reset_points, const vector<bool> &classes,
-              vector<GenomicRegion> &domains) {
-  size_t n_bins = 0, reset_idx = 1, prev_end = 0;
+build_domains(const std::vector<Interval> &bins,
+              const std::vector<std::size_t> &reset_points,
+              const std::vector<bool> &classes,
+              std::vector<Interval6> &domains) {
+  std::size_t n_bins = 0, reset_idx = 1, prev_end = 0;
   bool in_domain = false;
-  for (size_t i = 0; i < classes.size(); ++i) {
+  for (std::size_t i = 0; i < std::size(classes); ++i) {
     if (reset_points[reset_idx] == i) {
       if (in_domain) {
-        domains.back().set_end(prev_end);
-        domains.back().set_score(n_bins);
+        domains.back().stop = prev_end;
+        domains.back().score = n_bins;
         n_bins = 0;
         in_domain = false;
       }
@@ -541,37 +514,39 @@ build_domains(const vector<SimpleGenomicRegion> &bins,
     }
     if (classes[i]) {
       if (!in_domain) {
-        domains.push_back(GenomicRegion(bins[i]));
+        domains.emplace_back(bins[i].chrom, bins[i].start, bins[i].stop,
+                             std::string{}, 0, '+');
         in_domain = true;
       }
       ++n_bins;
     }
     else if (in_domain) {
-      domains.back().set_end(prev_end);
-      domains.back().set_score(n_bins);
+      domains.back().stop = prev_end;
+      domains.back().score = n_bins;
       n_bins = 0;
       in_domain = false;
     }
-    prev_end = bins[i].get_end();
+    prev_end = bins[i].stop;
   }
 }
 
 // Modified to take multiple replicates
 template <class T, class U>
 static void
-separate_regions(const size_t desert_size,
-                 vector<vector<SimpleGenomicRegion>> &bins,
-                 vector<vector<T>> &meth, vector<vector<U>> &reads,
-                 vector<size_t> &reset_points,
-                 vector<size_t> &dists_btwn_bins) {
-  const size_t n_replicates = bins.size();
+separate_regions(const std::size_t desert_size,
+                 std::vector<std::vector<Interval>> &bins,
+                 std::vector<std::vector<T>> &meth,
+                 std::vector<std::vector<U>> &reads,
+                 std::vector<std::size_t> &reset_points,
+                 std::vector<std::size_t> &dists_btwn_bins) {
+  const std::size_t n_replicates = std::size(bins);
 
   // eliminate the zero-read cpg sites if no coverage in any rep
-  size_t end_coord_of_prev = 0;
-  size_t j = 0;
-  for (size_t i = 0; i < bins[0].size(); ++i) {
+  std::size_t end_coord_of_prev = 0;
+  std::size_t j = 0;
+  for (std::size_t i = 0; i < std::size(bins[0]); ++i) {
     bool all_empty = true;
-    size_t rep_idx = 0;
+    std::size_t rep_idx = 0;
     while (all_empty && rep_idx < n_replicates) {
       if (reads[rep_idx][i] == 0)
         ++rep_idx;
@@ -579,9 +554,9 @@ separate_regions(const size_t desert_size,
         all_empty = false;
     }
     if (!all_empty) {
-      dists_btwn_bins.push_back(bins[0][i].get_start() - end_coord_of_prev);
-      end_coord_of_prev = bins[0][i].get_end();
-      for (size_t r = 0; r < n_replicates; ++r) {
+      dists_btwn_bins.push_back(bins[0][i].start - end_coord_of_prev);
+      end_coord_of_prev = bins[0][i].stop;
+      for (std::size_t r = 0; r < n_replicates; ++r) {
         bins[r][j] = bins[r][i];
         meth[r][j] = meth[r][i];
         reads[r][j] = reads[r][i];
@@ -590,70 +565,74 @@ separate_regions(const size_t desert_size,
     }
   }
 
-  for (size_t r = 0; r < n_replicates; ++r) {
+  for (std::size_t r = 0; r < n_replicates; ++r) {
     bins[r].resize(j);
     meth[r].resize(j);
     reads[r].resize(j);
   }
 
   // segregate bins
-  size_t prev_cpg = 0;
-  for (size_t i = 0; i < bins[0].size(); ++i) {
-    const size_t dist = (i > 0 && bins[0][i].same_chrom(bins[0][i - 1]))
-                          ? bins[0][i].get_start() - prev_cpg
-                          : num_lim<size_t>::max();
+  std::size_t prev_cpg = 0;
+  for (std::size_t i = 0; i < std::size(bins[0]); ++i) {
+    const std::size_t dist = (i > 0 && bins[0][i].chrom == bins[0][i - 1].chrom)
+                               ? bins[0][i].start - prev_cpg
+                               : std::numeric_limits<std::size_t>::max();
     if (dist > desert_size)
       reset_points.push_back(i);
-    prev_cpg = bins[0][i].get_start();
+    prev_cpg = bins[0][i].start;
   }
   assert(std::size(reset_points) > 0);
-  reset_points.push_back(bins[0].size());
+  reset_points.push_back(std::size(bins[0]));
 }
 
 static void
-shuffle_bins(const size_t rng_seed, const TwoStateHMM &hmm,
-             vector<vector<std::pair<double, double>>> meth,
-             const vector<size_t> &reset_points,
-             const vector<double> &start_trans,
-             const vector<vector<double>> &trans,
-             const vector<double> &end_trans, const vector<double> &fg_alpha,
-             const vector<double> &fg_beta, const vector<double> &bg_alpha,
-             const vector<double> &bg_beta, vector<double> &domain_scores,
-             const vector<bool> &array_status) {
-  size_t n_replicates = meth.size();
-
+shuffle_bins(const std::size_t rng_seed, const TwoStateHMM &hmm,
+             std::vector<std::vector<std::pair<double, double>>>
+               meth,  // cppcheck-suppress[passedByValue]
+             const std::vector<std::size_t> &reset_points,
+             const std::vector<double> &start_trans,
+             const std::vector<std::vector<double>> &trans,
+             const std::vector<double> &end_trans,
+             const std::vector<double> &fg_alpha,
+             const std::vector<double> &fg_beta,
+             const std::vector<double> &bg_alpha,
+             const std::vector<double> &bg_beta,
+             std::vector<double> &domain_scores,
+             const std::vector<bool> &array_status) {
+  const auto n_replicates = std::size(meth);
   auto eng = std::default_random_engine(rng_seed);
-  for (size_t r = 0; r < n_replicates; ++r)
-    std::shuffle(begin(meth[r]), end(meth[r]), eng);
+  for (std::size_t r = 0; r < n_replicates; ++r)
+    std::shuffle(std::begin(meth[r]), std::end(meth[r]), eng);
 
-  vector<bool> classes;
-  vector<double> scores;
+  std::vector<bool> classes;
+  std::vector<double> scores;
   hmm.PosteriorDecoding_rep(meth, reset_points, start_trans, trans, end_trans,
                             fg_alpha, fg_beta, bg_alpha, bg_beta, classes,
                             scores, array_status);
   get_domain_scores(classes, meth, reset_points, domain_scores);
-  sort(begin(domain_scores), end(domain_scores));
+  std::sort(std::begin(domain_scores), std::end(domain_scores));
 }
 
 static void
-assign_p_values(const vector<double> &random_scores,
-                const vector<double> &observed_scores,
-                vector<double> &p_values) {
-  const double n_randoms = random_scores.size() == 0 ? 1 : random_scores.size();
+assign_p_values(const std::vector<double> &random_scores,
+                const std::vector<double> &observed_scores,
+                std::vector<double> &p_values) {
+  const double n_randoms = std::max(std::size(random_scores), 1ul);
   for (auto scr : observed_scores) {
     const auto scr_itr =
-      upper_bound(cbegin(random_scores), cend(random_scores), scr);
-    p_values.push_back(std::distance(scr_itr, cend(random_scores)) / n_randoms);
+      upper_bound(std::cbegin(random_scores), std::cend(random_scores), scr);
+    p_values.push_back(std::distance(scr_itr, std::cend(random_scores)) /
+                       n_randoms);
   }
 }
 
 static void
-read_params_file(const bool verbose, const string &params_file,
+read_params_file(const bool verbose, const std::string &params_file,
                  double &fg_alpha, double &fg_beta, double &bg_alpha,
-                 double &bg_beta, vector<double> &start_trans,
-                 vector<vector<double>> &trans, vector<double> &end_trans,
-                 double &fdr_cutoff) {
-  string jnk;
+                 double &bg_beta, std::vector<double> &start_trans,
+                 std::vector<std::vector<double>> &trans,
+                 std::vector<double> &end_trans, double &fdr_cutoff) {
+  std::string jnk;
   std::ifstream in(params_file.c_str());
 
   in >> jnk >> fg_alpha >> jnk >> fg_beta >> jnk >> bg_alpha >> jnk >>
@@ -663,50 +642,56 @@ read_params_file(const bool verbose, const string &params_file,
     fdr_cutoff;
 
   if (verbose)
-    cerr << "Read in params from " << params_file << '\n'
-         << "FG_ALPHA\t" << fg_alpha << '\n'
-         << "FG_BETA\t" << fg_beta << '\n'
-         << "BG_ALPHA\t" << bg_alpha << '\n'
-         << "BG_BETA\t" << bg_beta << '\n'
-         << "S_F\t" << start_trans[0] << '\n'
-         << "S_B\t" << start_trans[1] << '\n'
-         << "F_F\t" << trans[0][0] << '\n'
-         << "F_B\t" << trans[0][1] << '\n'
-         << "B_F\t" << trans[1][0] << '\n'
-         << "B_B\t" << trans[1][1] << '\n'
-         << "F_E\t" << end_trans[0] << '\n'
-         << "B_E\t" << end_trans[1] << '\n'
-         << "FDR_CUTOFF\t" << fdr_cutoff << '\n';
+    std::cerr << "Read in params from " << params_file << '\n'
+              << "FG_ALPHA\t" << fg_alpha << '\n'
+              << "FG_BETA\t" << fg_beta << '\n'
+              << "BG_ALPHA\t" << bg_alpha << '\n'
+              << "BG_BETA\t" << bg_beta << '\n'
+              << "S_F\t" << start_trans[0] << '\n'
+              << "S_B\t" << start_trans[1] << '\n'
+              << "F_F\t" << trans[0][0] << '\n'
+              << "F_B\t" << trans[0][1] << '\n'
+              << "B_F\t" << trans[1][0] << '\n'
+              << "B_B\t" << trans[1][1] << '\n'
+              << "F_E\t" << end_trans[0] << '\n'
+              << "B_E\t" << end_trans[1] << '\n'
+              << "FDR_CUTOFF\t" << fdr_cutoff << '\n';
 }
 
 static void
-write_posteriors_file(const string &posteriors_file,
-                      const vector<vector<SimpleGenomicRegion>> &bins,
-                      const vector<double> &scores) {
-  static const size_t decimal_precision = 10;
-
-  ofstream out(posteriors_file);
+write_posteriors_file(const std::string &posteriors_file,
+                      const std::vector<std::vector<Interval>> &bins,
+                      const std::vector<double> &scores) {
+  static constexpr auto decimal_precision = 10;
+  std::ofstream out(posteriors_file);
+  if (!out)
+    throw std::runtime_error("failed to open: " + posteriors_file);
   out.precision(decimal_precision);
-  for (size_t r = 0; r < scores.size(); ++r)
+  for (std::size_t r = 0; r < std::size(scores); ++r)
     out << bins[0][r] << '\t' << scores[r] << '\n';
 }
 
 static void
-write_params_file(const string &outfile, const vector<double> &fg_alpha,
-                  const vector<double> &fg_beta, const vector<double> &bg_alpha,
-                  const vector<double> &bg_beta,
-                  const vector<double> &start_trans,
-                  const vector<vector<double>> &trans,
-                  const vector<double> &end_trans) {
-  static const size_t decimal_precision = 30;
-  ofstream out(outfile);
+write_params_file(const std::string &outfile,
+                  const std::vector<double> &fg_alpha,
+                  const std::vector<double> &fg_beta,
+                  const std::vector<double> &bg_alpha,
+                  const std::vector<double> &bg_beta,
+                  const std::vector<double> &start_trans,
+                  const std::vector<std::vector<double>> &trans,
+                  const std::vector<double> &end_trans) {
+  static constexpr auto decimal_precision = 30;
+  std::ofstream out(outfile);
+  if (!out)
+    throw std::runtime_error("failed to open: " + outfile);
   out.precision(decimal_precision);
-  for (size_t r = 0; r < fg_alpha.size(); ++r)
+  // NOLINTBEGIN(*-avoid-magic-numbers)
+  for (std::size_t r = 0; r < std::size(fg_alpha); ++r)
     out << "FG_ALPHA_" << r + 1 << "\t" << std::setw(14) << fg_alpha[r] << "\t"
         << "FG_BETA_" << r + 1 << "\t" << std::setw(14) << fg_beta[r] << "\t"
         << "BG_ALPHA_" << r + 1 << "\t" << std::setw(14) << bg_alpha[r] << "\t"
         << "BG_BETA_" << r + 1 << "\t" << std::setw(14) << bg_beta[r] << '\n';
-
+  // NOLINTEND(*-avoid-magic-numbers)
   out << "S_F\t" << start_trans[0] << '\n'
       << "S_B\t" << start_trans[1] << '\n'
       << "F_F\t" << trans[0][0] << '\n'
@@ -718,8 +703,8 @@ write_params_file(const string &outfile, const vector<double> &fg_alpha,
 }
 
 static bool
-check_if_array_data(const string &infile) {
-  bgzf_file in(infile, "r");
+check_if_array_data(const std::string &infile) {
+  bamxx::bgzf_file in(infile, "r");
   if (!in)
     throw std::runtime_error("bad file: " + infile);
 
@@ -735,22 +720,22 @@ check_if_array_data(const string &infile) {
 }
 
 static void
-load_array_data(const size_t bin_size, const string &cpgs_file,
-                vector<SimpleGenomicRegion> &bins,
-                vector<std::pair<double, double>> &meth,
-                vector<size_t> &reads) {
+load_array_data(const std::size_t bin_size, const std::string &cpgs_file,
+                std::vector<Interval> &bins,
+                std::vector<std::pair<double, double>> &meth,
+                std::vector<std::size_t> &reads) {
   // MAGIC. GS: minimum value for array?
-  static const double meth_min = 1.0e-2;
+  static constexpr double meth_min = 1.0e-2;
 
-  bgzf_file in(cpgs_file, "r");
+  bamxx::bgzf_file in(cpgs_file, "r");
   if (!in)
     throw std::runtime_error("bad sites file: " + cpgs_file);
 
   if (get_has_counts_header(cpgs_file))
     skip_counts_header(in);
 
-  string curr_chrom;
-  size_t prev_pos = 0ul, curr_pos = 0ul;
+  std::string curr_chrom;
+  std::size_t prev_pos = 0ul, curr_pos = 0ul;
   double array_meth_bin = 0.0;
   double num_probes_in_bin = 0.0;
 
@@ -772,8 +757,9 @@ load_array_data(const size_t bin_size, const string &cpgs_file,
     if (curr_chrom != site.chrom) {
       if (!curr_chrom.empty()) {
         if (site.chrom < curr_chrom)
-          throw runtime_error("CpGs not sorted in file \"" + cpgs_file + "\"");
-        bins.push_back(SimpleGenomicRegion(curr_chrom, curr_pos, prev_pos + 1));
+          throw std::runtime_error("CpGs not sorted in file \"" + cpgs_file +
+                                   "\"");
+        bins.push_back(Interval(curr_chrom, curr_pos, prev_pos + 1));
         meth.push_back(std::make_pair(array_meth_bin, num_probes_in_bin));
         if (num_probes_in_bin > 0)
           reads.push_back(1);
@@ -789,8 +775,7 @@ load_array_data(const size_t bin_size, const string &cpgs_file,
       throw std::runtime_error("CpGs not sorted in file \"" + cpgs_file + "\"");
     }
     else if (site.pos > curr_pos + bin_size) {
-      bins.push_back(
-        SimpleGenomicRegion(curr_chrom, curr_pos, curr_pos + bin_size));
+      bins.push_back(Interval(curr_chrom, curr_pos, curr_pos + bin_size));
       meth.push_back(std::make_pair(array_meth_bin, num_probes_in_bin));
       (num_probes_in_bin > 0) ? reads.push_back(1) : reads.push_back(0);
 
@@ -798,8 +783,7 @@ load_array_data(const size_t bin_size, const string &cpgs_file,
       num_probes_in_bin = 0.0;
       curr_pos += bin_size;
       while (curr_pos + bin_size < site.pos) {
-        bins.push_back(
-          SimpleGenomicRegion(curr_chrom, curr_pos, curr_pos + bin_size));
+        bins.push_back(Interval(curr_chrom, curr_pos, curr_pos + bin_size));
         reads.push_back(0);
         meth.push_back(std::make_pair(0.0, 0.0));
         curr_pos += bin_size;
@@ -818,7 +802,7 @@ load_array_data(const size_t bin_size, const string &cpgs_file,
   }
   prev_pos = site.pos;
   if (!curr_chrom.empty()) {
-    bins.push_back(SimpleGenomicRegion(curr_chrom, curr_pos, prev_pos + 1));
+    bins.push_back(Interval(curr_chrom, curr_pos, prev_pos + 1));
     meth.push_back(std::make_pair(array_meth_bin, num_probes_in_bin));
     if (num_probes_in_bin > 0)
       reads.push_back(1);
@@ -828,51 +812,52 @@ load_array_data(const size_t bin_size, const string &cpgs_file,
 }
 
 static void
-load_wgbs_data(const size_t bin_size, const string &cpgs_file,
-               vector<SimpleGenomicRegion> &bins,
-               vector<std::pair<double, double>> &meth, vector<size_t> &reads) {
+load_wgbs_data(const std::size_t bin_size, const std::string &cpgs_file,
+               std::vector<Interval> &bins,
+               std::vector<std::pair<double, double>> &meth,
+               std::vector<std::size_t> &reads) {
   reads.clear();  // for safety
   meth.clear();
   bins.clear();
 
   // ADS: loading data each iteration should be put outside loop
-  bgzf_file in(cpgs_file, "r");
+  bamxx::bgzf_file in(cpgs_file, "r");
   if (!in)
-    throw runtime_error("bad sites file: " + cpgs_file);
+    throw std::runtime_error("bad sites file: " + cpgs_file);
 
   if (get_has_counts_header(cpgs_file))
     skip_counts_header(in);
 
   // keep track of the chroms we've seen
-  string curr_chrom;
-  std::unordered_set<string> chroms_seen;
+  std::string curr_chrom;
+  std::unordered_set<std::string> chroms_seen;
 
   MSite site;
-  size_t prev_pos = 0ul;
-  size_t sites_in_bin = 0ul;
+  std::size_t prev_pos = 0ul;
+  std::size_t sites_in_bin = 0ul;
 
   while (read_site(in, site)) {
     if (curr_chrom != site.chrom) {  // handle change of chrom
       if (sites_in_bin > 0)
-        bins.back().set_end(prev_pos);
-      if (chroms_seen.find(site.chrom) != end(chroms_seen))
-        throw runtime_error("sites not sorted");
+        bins.back().stop = prev_pos;
+      if (chroms_seen.find(site.chrom) != std::cend(chroms_seen))
+        throw std::runtime_error("sites not sorted");
       chroms_seen.insert(site.chrom);
       curr_chrom = site.chrom;
       reads.push_back(0);
       meth.push_back(std::make_pair(0.0, 0.0));
-      bins.push_back(SimpleGenomicRegion(site.chrom, 0, bin_size));
+      bins.push_back(Interval(site.chrom, 0, bin_size));
       sites_in_bin = 0;
     }
     prev_pos = site.pos;
-    if (site.pos < bins.back().get_start())
-      throw runtime_error("sites not sorted");
-    while (bins.back().get_end() < site.pos) {
+    if (site.pos < bins.back().start)
+      throw std::runtime_error("sites not sorted");
+    while (bins.back().stop < site.pos) {
       sites_in_bin = 0;
       reads.push_back(0);
       meth.push_back(std::make_pair(0.0, 0.0));
-      bins.push_back(SimpleGenomicRegion(site.chrom, bins.back().get_end(),
-                                         bins.back().get_end() + bin_size));
+      bins.push_back(
+        Interval(site.chrom, bins.back().stop, bins.back().stop + bin_size));
     }
     reads.back() += site.n_reads;
     meth.back().first += site.n_meth();
@@ -880,20 +865,20 @@ load_wgbs_data(const size_t bin_size, const string &cpgs_file,
     sites_in_bin++;
   }
   if (sites_in_bin > 0)
-    bins.back().set_end(prev_pos);
+    bins.back().stop = prev_pos;
 }
 
 static void
-remove_empty_bins_at_chrom_start(vector<SimpleGenomicRegion> &bins,
-                                 vector<std::pair<double, double>> &meth,
-                                 vector<size_t> &reads) {
+remove_empty_bins_at_chrom_start(std::vector<Interval> &bins,
+                                 std::vector<std::pair<double, double>> &meth,
+                                 std::vector<std::size_t> &reads) {
   bool chrom_start = true;
-  size_t j = 0;
-  string prev_chrom = "";
-  for (size_t i = 0; i < bins.size(); ++i) {
-    if (bins[i].get_chrom() != prev_chrom) {
+  std::size_t j = 0;
+  std::string prev_chrom = "";
+  for (std::size_t i = 0; i < std::size(bins); ++i) {
+    if (bins[i].chrom != prev_chrom) {
       chrom_start = true;
-      prev_chrom = bins[i].get_chrom();
+      prev_chrom = bins[i].chrom;
     }
     if (reads[i] > 0)
       chrom_start = false;
@@ -904,41 +889,41 @@ remove_empty_bins_at_chrom_start(vector<SimpleGenomicRegion> &bins,
       ++j;
     }
   }
-  bins.erase(begin(bins) + j, end(bins));
-  meth.erase(begin(meth) + j, end(meth));
-  reads.erase(begin(reads) + j, end(reads));
+  bins.erase(std::begin(bins) + j, std::end(bins));
+  meth.erase(std::begin(meth) + j, std::end(meth));
+  reads.erase(std::begin(reads) + j, std::end(reads));
 }
 
 static void
-load_read_counts(const string &cpgs_file, const size_t bin_size,
-                 vector<size_t> &reads) {
+load_read_counts(const std::string &cpgs_file, const std::size_t bin_size,
+                 std::vector<std::size_t> &reads) {
   reads.clear();  // for safety
 
   // ADS: loading data each iteration should be put outside loop
-  bgzf_file in(cpgs_file, "r");
+  bamxx::bgzf_file in(cpgs_file, "r");
   if (!in)
-    throw runtime_error("bad methcounts file: " + cpgs_file);
+    throw std::runtime_error("bad methcounts file: " + cpgs_file);
 
   if (get_has_counts_header(cpgs_file))
     skip_counts_header(in);
 
   // keep track of where we are and what we've seen
-  size_t bin_start = 0ul;
-  string curr_chrom;
-  std::unordered_set<string> chroms_seen;
+  std::size_t bin_start = 0ul;
+  std::string curr_chrom;
+  std::unordered_set<std::string> chroms_seen;
 
   MSite site;
   while (read_site(in, site)) {
     if (curr_chrom != site.chrom) {  // handle change of chrom
-      if (chroms_seen.find(site.chrom) != end(chroms_seen))
-        throw runtime_error("sites not sorted");
+      if (chroms_seen.find(site.chrom) != std::cend(chroms_seen))
+        throw std::runtime_error("sites not sorted");
       chroms_seen.insert(site.chrom);
       bin_start = 0;
       curr_chrom = site.chrom;
       reads.push_back(0);
     }
     if (site.pos < bin_start)
-      throw runtime_error("sites not sorted");
+      throw std::runtime_error("sites not sorted");
     for (; bin_start + bin_size < site.pos; bin_start += bin_size)
       reads.push_back(0);
     reads.back() += site.n_reads;
@@ -946,30 +931,32 @@ load_read_counts(const string &cpgs_file, const size_t bin_size,
 }
 
 static double
-good_bins_frac(const vector<size_t> &cumulative, const size_t min_bin_size,
-               const size_t bin_size, const size_t min_cov_to_pass) {
+good_bins_frac(const std::vector<std::size_t> &cumulative,
+               const std::size_t min_bin_size, const std::size_t bin_size,
+               const std::size_t min_cov_to_pass) {
   // make sure the target bin size is a multiple of the minimum so we
   // have the resolution to construct the new bins
   assert(bin_size % min_bin_size == 0);
 
   // the step size corresponds to the number of minium sized bins that
   // would make up a new bin of the target size
-  const size_t step_size = bin_size / min_bin_size;
+  const std::size_t step_size = bin_size / min_bin_size;
 
-  size_t passing_bins = 0, covered_bins = 0;
+  std::size_t passing_bins = 0, covered_bins = 0;
 
-  size_t prev_total = 0;
-  for (size_t i = 0; i + step_size < cumulative.size(); i += step_size) {
-    const size_t curr_cumulative = cumulative[i + step_size];
-    const size_t bin_count = curr_cumulative - prev_total;
+  std::size_t prev_total = 0;
+  for (std::size_t i = 0; i + step_size < std::size(cumulative);
+       i += step_size) {
+    const std::size_t curr_cumulative = cumulative[i + step_size];
+    const std::size_t bin_count = curr_cumulative - prev_total;
     covered_bins += (bin_count > 0);
     passing_bins += (bin_count >= min_cov_to_pass);
     prev_total = curr_cumulative;
   }
 
   // check if there is a leftover bin at the end
-  if (cumulative.size() % step_size != 0) {
-    const size_t bin_count = cumulative.back() - prev_total;
+  if (std::size(cumulative) % step_size != 0) {
+    const std::size_t bin_count = cumulative.back() - prev_total;
     covered_bins += (bin_count > 0);
     passing_bins += (bin_count >= min_cov_to_pass);
   }
@@ -977,11 +964,11 @@ good_bins_frac(const vector<size_t> &cumulative, const size_t min_bin_size,
   return static_cast<double>(passing_bins) / std::max(1ul, covered_bins);
 }
 
-static size_t
+static std::size_t
 get_min_reads_for_confidence(const double conf_level) {
   // ADS: value of 0.5 below important; this is where the CI is widest
   static const double fixed_phat = 0.5;
-  size_t n_reads = 0;
+  std::size_t n_reads = 0;
   double lower = 0.0, upper = 1.0;
   // ADS: should be doubling first, followed by bisection
   while (1.0 - conf_level < upper - lower) {
@@ -991,35 +978,36 @@ get_min_reads_for_confidence(const double conf_level) {
   return n_reads;
 }
 
-// ADS: this function will return num_lim<size_t>::max() if the
+// ADS: this function will return std::numeric_limits<std::size_t>::max() if the
 // fraction of "good" bins is zero for all attempted bin sizes.
-static size_t
-binsize_selection(const size_t resolution, const size_t min_bin_sz,
-                  const size_t max_bin_sz, const double conf_level,
-                  const double min_frac_passed, const string &cpgs_file) {
-  const size_t min_cov_to_pass = get_min_reads_for_confidence(conf_level);
+static std::size_t
+binsize_selection(const std::size_t resolution, const std::size_t min_bin_sz,
+                  const std::size_t max_bin_sz, const double conf_level,
+                  const double min_frac_passed, const std::string &cpgs_file) {
+  const std::size_t min_cov_to_pass = get_min_reads_for_confidence(conf_level);
 
-  vector<size_t> reads;
+  std::vector<std::size_t> reads;
   load_read_counts(cpgs_file, resolution, reads);
 
-  std::partial_sum(begin(reads), end(reads), begin(reads));
+  std::partial_sum(std::cbegin(reads), std::cend(reads), std::begin(reads));
 
   double frac_passed = 0.0;
-  size_t bin_size = min_bin_sz;
+  std::size_t bin_size = min_bin_sz;
 
   while (bin_size < max_bin_sz && frac_passed < min_frac_passed) {
     frac_passed = good_bins_frac(reads, resolution, bin_size, min_cov_to_pass);
     if (frac_passed < min_frac_passed)
       bin_size += resolution;
   }
-  return frac_passed < min_frac_passed ? num_lim<size_t>::max() : bin_size;
+  return frac_passed < min_frac_passed ? std::numeric_limits<std::size_t>::max()
+                                       : bin_size;
 }
 
 static void
-load_bins(const size_t bin_size, const string &cpgs_file,
-          vector<SimpleGenomicRegion> &bins,
-          vector<std::pair<double, double>> &meth, vector<size_t> &reads,
-          vector<bool> &array_status) {
+load_bins(const std::size_t bin_size, const std::string &cpgs_file,
+          std::vector<Interval> &bins,
+          std::vector<std::pair<double, double>> &meth,
+          std::vector<std::size_t> &reads, std::vector<bool> &array_status) {
   const bool is_array_data = check_if_array_data(cpgs_file);
 
   array_status.push_back(is_array_data);
@@ -1033,39 +1021,44 @@ load_bins(const size_t bin_size, const string &cpgs_file,
 }
 
 static void
-get_union_of_bins(const vector<vector<SimpleGenomicRegion>> &orig,
-                  vector<SimpleGenomicRegion> &bins) {
+get_union_of_bins(const std::vector<std::vector<Interval>> &orig,
+                  std::vector<Interval> &bins) {
+  const auto overlaps = [](const auto &x, const auto &y) {
+    const auto cmp = x.chrom.compare(y.chrom);
+    return cmp == 0 && std::max(x.start, x.start) < std::min(x.stop, y.stop);
+  };
+
   // flatten the set of sorted bins
   bins.clear();
   for (auto &&i : orig)
-    bins.insert(end(bins), begin(i), end(i));
+    bins.insert(std::end(bins), std::cbegin(i), std::cend(i));
 
   // merge each sorted interval of bins
-  const auto first = begin(bins);
-  auto middle = begin(bins);
-  for (size_t i = 1; i < orig.size(); ++i) {
-    middle += orig[i - 1].size();
-    std::inplace_merge(first, middle, middle + orig[i].size());
+  const auto first = std::begin(bins);
+  auto middle = std::begin(bins);
+  for (std::size_t i = 1; i < std::size(orig); ++i) {
+    middle += std::size(orig[i - 1]);
+    std::inplace_merge(first, middle, middle + std::size(orig[i]));
   }
   // ensure unique bins
-  bins.erase(unique(begin(bins), end(bins)), end(bins));
+  bins.erase(std::unique(std::begin(bins), std::end(bins)), std::end(bins));
   bins.shrink_to_fit();
 
   // make sure all bins are aligned at same boundaries
-  for (size_t i = 1; i < bins.size(); ++i)
-    if (bins[i - 1].overlaps(bins[i]))
+  for (std::size_t i = 1; i < std::size(bins); ++i)
+    if (overlaps(bins[i - 1], bins[i]))
       throw std::runtime_error("bins from reps not aligned");
 }
 
 static void
-add_missing_bins(const vector<SimpleGenomicRegion> &all_bins,
-                 vector<SimpleGenomicRegion> &bins,
-                 vector<std::pair<double, double>> &meth) {
-  const size_t n_bins = all_bins.size();
-  vector<std::pair<double, double>> tmp_meth(n_bins);
-
-  size_t j = 0;  // assume j range no larger than i range
-  for (size_t i = 0; i < n_bins; ++i) {
+add_missing_bins(const std::vector<Interval> &all_bins,
+                 std::vector<Interval> &bins,
+                 std::vector<std::pair<double, double>> &meth) {
+  const std::size_t n_bins = std::size(all_bins);
+  std::vector<std::pair<double, double>> tmp_meth(n_bins);
+
+  std::size_t j = 0;  // assume j range no larger than i range
+  for (std::size_t i = 0; i < n_bins; ++i) {
     if (all_bins[i] == bins[j])
       tmp_meth[i] = meth[j++];
     else
@@ -1076,53 +1069,68 @@ add_missing_bins(const vector<SimpleGenomicRegion> &all_bins,
 }
 
 static void
-write_empty_summary(const string &summary_file) {
-  if (!summary_file.empty()) {
-    ofstream summary_out(summary_file);
-    if (!summary_out)
-      throw runtime_error("failed to open: " + summary_file);
-    summary_out << pmd_summary({}).tostring() << '\n';
-  }
+write_empty_summary(const std::string &summary_file) {
+  if (summary_file.empty())
+    return;
+  std::ofstream summary_out(summary_file);
+  if (!summary_out)
+    throw std::runtime_error("failed to open: " + summary_file);
+  summary_out << to_string(pmd_summary({})) << '\n';
 }
 
 int
 main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
-    static const size_t min_observations_for_inference = 100;
-    static const size_t max_bin_size = 500000;
-    static const size_t min_bin_size = 1000;
-    size_t resolution = 500;
+    static constexpr auto min_observations_for_inference{100ul};
+    static constexpr auto default_self_transition{0.99};
+    static constexpr auto default_start_transition{0.5};
+    static constexpr auto default_end_transition{1e-10};
+    static constexpr auto default_transition{0.01};
 
-    string outfile;
+    // magic numbers from paper: highest jaccard index to wgbs
+    static constexpr auto bin_size_for_array{1000ul};
+    static constexpr auto desert_size_for_array{200000ul};
 
-    size_t rng_seed = 408;
+    // MAGIC: corrections for small values (not parameters):
+    static constexpr auto tolerance{1e-5};
+    static constexpr auto min_prob{1e-10};
+
+    static constexpr auto default_fg_alpha{0.05};
+    static constexpr auto default_fg_beta{0.95};
+    static constexpr auto default_bg_alpha{0.95};
+    static constexpr auto default_bg_beta{0.05};
+    const auto init_trans = [&] {
+      return std::vector{
+        std::vector<double>(default_self_transition, default_transition),
+        std::vector<double>(default_transition, default_self_transition),
+      };
+    };
+    // <std::vector<double>>
+    //   std::vector<std::vector<double>> t(
+    //     2, std::vector<double>(2, default_transition));
+    //   t[0][0] = t[1][1] = default_self_transition;
+    //   return t;
+    // };
+
+    // NOLINTBEGIN(*-avoid-magic-numbers)
+    std::size_t resolution = 500;
+    std::size_t rng_seed = 408;
+    std::size_t desert_size = 5000;
+    std::size_t bin_size = 1000;
+    std::size_t max_iterations = 10;
+    // NOLINTEND(*-avoid-magic-numbers)
 
-    bool DEBUG = false;
-    size_t desert_size = 5000;
-    size_t bin_size = 1000;
-    size_t max_iterations = 10;
     // run mode flags
+    bool DEBUG = false;
     bool verbose = false;
     bool ARRAY_MODE = false;
     bool fixed_bin_size = false;
 
-    // MAGIC: corrections for small values (not parameters):
-    static const double tolerance = 1e-5;
-    static const double min_prob = 1e-10;
-
-    string summary_file;
-
-    string params_in_files;
-    string params_out_file;
-    string posteriors_out_prefix;
-
-    constexpr auto description =
-      R"(Identify PMDs in methylomes. Methylation must be provided in the
-      methcounts file format (chrom, position, strand, context,
-      methylation, reads). See the methcounts documentation for
-      details. This program assumes only data at CpG sites and that
-      strands are collapsed so only the positive site appears in the
-      file, but reads counts are from both strands.)";
+    std::string summary_file;
+    std::string params_in_files;
+    std::string params_out_file;
+    std::string posteriors_out_prefix;
+    std::string outfile;
 
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
@@ -1151,41 +1159,39 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                       false, params_out_file);
     opt_parse.add_opt("seed", 's', "specify random seed", false, rng_seed);
 
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (leftover_args.empty()) {
-      cerr << opt_parse.help_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
     /****************** END COMMAND LINE OPTIONS *****************/
 
-    resolution = min(bin_size, resolution);
+    resolution = std::min(bin_size, resolution);
 
-    vector<string> cpgs_file = leftover_args;
-    vector<string> params_in_file;
-    if (!params_in_files.empty()) {
+    std::vector<std::string> cpgs_file = leftover_args;
+    std::vector<std::string> params_in_file;
+    if (!params_in_files.empty())
       params_in_file = smithlab::split(params_in_files, ",", false);
-      assert(cpgs_file.size() == params_in_file.size());
-    }
 
-    const size_t n_replicates = cpgs_file.size();
+    const std::size_t n_replicates = std::size(cpgs_file);
     std::for_each(std::cbegin(cpgs_file), std::cend(cpgs_file),
                   [](const auto &x) {
                     if (!is_msite_file(x))
-                      throw runtime_error("malformed counts file: " + x);
+                      throw std::runtime_error("malformed counts file: " + x);
                   });
 
     bool insufficient_data = false;  // ADS: this is used now to detect
@@ -1196,75 +1202,81 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     // Sanity checks input file format and dynamically selects bin
     // size from WGBS samples.
     if (!fixed_bin_size && !ARRAY_MODE) {
+      static constexpr auto max_bin_size{500000ul};
+      static constexpr auto min_bin_size{1000ul};
+      static constexpr auto desert_size_multiplier{5};  // ADS: what is this?
+      static constexpr auto default_confidence_interval{0.80};
+      static constexpr auto default_prop_accept{0.80};
       if (verbose)
-        cerr << "[DYNAMICALLY SELECTING BIN SIZE]" << '\n';
-      double confidence_interval = 0.80;
-      double prop_accept = 0.80;
-      for (size_t i = 0; i < n_replicates && !insufficient_data; ++i) {
+        std::cerr << "[DYNAMICALLY SELECTING BIN SIZE]" << '\n';
+      double confidence_interval = default_confidence_interval;
+      double prop_accept = default_prop_accept;
+      for (std::size_t i = 0; i < n_replicates && !insufficient_data; ++i) {
         const bool arrayData = check_if_array_data(cpgs_file[i]);
         if (!arrayData) {
           bin_size =
             binsize_selection(resolution, min_bin_size, max_bin_size,
                               confidence_interval, prop_accept, cpgs_file[i]);
-          if (bin_size == num_lim<size_t>::max())
+          if (bin_size == std::numeric_limits<std::size_t>::max())
             insufficient_data = true;
-          desert_size = 5 * bin_size;  // TODO(ADS): what should this be?
+          desert_size = desert_size_multiplier * bin_size;
         }
         else {
-          // same as the parameters below
-          bin_size = 1000;
-          desert_size = 200000;
+          bin_size = bin_size_for_array;
+          desert_size = desert_size_for_array;
         }
       }
     }
     else if (ARRAY_MODE) {
-      bin_size = 1000;       // MAGIC NUMBERS FROM PAPER
-      desert_size = 200000;  // PERFORM WITH HIGHEST JACCARD INDEX TO WGBS
+      bin_size = bin_size_for_array;
+      desert_size = desert_size_for_array;
     }
     else {
-      desert_size = max(desert_size, bin_size);
+      desert_size = std::max(desert_size, bin_size);
     }
 
     if (insufficient_data) {
       // ADS: first check for insufficient data; another is needed if
       // fixed bin size is used
       if (verbose)
-        cerr << "EXITING: INSUFFICIENT DATA" << '\n';
+        std::cerr << "EXITING: INSUFFICIENT DATA" << '\n';
       if (!summary_file.empty())
         write_empty_summary(summary_file);
       return EXIT_SUCCESS;
     }
 
     if (verbose)
-      cerr << "[READING IN AT BIN SIZE " << bin_size << "]" << '\n';
+      std::cerr << "[READING IN AT BIN SIZE " << bin_size << "]\n";
 
     // separate the regions by chrom and by desert
-    vector<vector<SimpleGenomicRegion>> bins(n_replicates);
-    vector<vector<std::pair<double, double>>> meth(n_replicates);
-    vector<vector<size_t>> reads(n_replicates);
-    vector<bool> array_status;
+    std::vector<std::vector<Interval>> bins(n_replicates);
+    std::vector<std::vector<std::pair<double, double>>> meth(n_replicates);
+    std::vector<std::vector<std::size_t>> reads(n_replicates);
+    std::vector<bool> array_status;
 
-    for (size_t i = 0; i < n_replicates && !insufficient_data; ++i) {
+    for (std::size_t i = 0; i < n_replicates && !insufficient_data; ++i) {
       if (verbose)
-        cerr << "[READING CPGS AND METH PROPS] from " << cpgs_file[i] << '\n';
+        std::cerr << "[READING CPGS AND METH PROPS] from " << cpgs_file[i]
+                  << '\n';
 
       load_bins(bin_size, cpgs_file[i], bins[i], meth[i], reads[i],
                 array_status);
       const double total_observations =
-        accumulate(begin(reads[i]), end(reads[i]), 0);
-      if (total_observations <= num_lim<double>::min())
+        std::accumulate(std::cbegin(reads[i]), std::cend(reads[i]), 0.0);
+      if (total_observations <= std::numeric_limits<double>::min())
         insufficient_data = true;
       if (verbose)
-        cerr << "TOTAL BINS: " << bins[i].size() << '\n'
-             << "MEAN COVERAGE: "
-             << total_observations / std::max(1ul, reads[i].size()) << '\n';
+        std::cerr << "TOTAL BINS: " << std::size(bins[i]) << '\n'
+                  << "MEAN COVERAGE: "
+                  << total_observations / std::max(1ul, std::size(reads[i]))
+                  << '\n';
     }
 
     if (insufficient_data) {
       // ADS: second check for insufficient data; another is needed if
       // filtered number of bins is too few
       if (verbose)
-        cerr << "EXITING: INSUFFICIENT DATA" << '\n';
+        std::cerr << "EXITING: INSUFFICIENT DATA" << '\n';
       if (!summary_file.empty())
         write_empty_summary(summary_file);
       return EXIT_SUCCESS;
@@ -1272,24 +1284,24 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
 
     if (n_replicates > 1) {
       bool need_to_adjust_bins = false;
-      const size_t n_bins_0 = bins[0].size();
-      for (size_t i = 1; i < n_replicates && !need_to_adjust_bins; ++i)
-        if (bins[i].size() != n_bins_0)
+      const std::size_t n_bins_0 = std::size(bins[0]);
+      for (std::size_t i = 1; i < n_replicates && !need_to_adjust_bins; ++i)
+        if (std::size(bins[i]) != n_bins_0)
           need_to_adjust_bins = true;
 
       if (need_to_adjust_bins) {
-        vector<SimpleGenomicRegion> all_bins;
+        std::vector<Interval> all_bins;
         get_union_of_bins(bins, all_bins);
-        for (size_t i = 0; i < bins.size(); ++i)
+        for (std::size_t i = 0; i < std::size(bins); ++i)
           add_missing_bins(all_bins, bins[i], meth[i]);
       }
     }
 
     // separate regions by chrom and desert; eliminate isolated Bins
-    vector<size_t> reset_points;
-    vector<size_t> dists_btwn_bins;
+    std::vector<std::size_t> reset_points;
+    std::vector<std::size_t> dists_btwn_bins;
     if (verbose)
-      cerr << "[separating by CpG desert]" << '\n';
+      std::cerr << "[separating by CpG desert]" << '\n';
     separate_regions(desert_size, bins, meth, reads, reset_points,
                      dists_btwn_bins);
     if (size(bins[0]) < min_observations_for_inference)
@@ -1299,32 +1311,31 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
       // ADS: final check for sufficient data failed; too few bins
       // after filtering
       if (verbose)
-        cerr << "EXITING: INSUFFICIENT DATA" << '\n';
+        std::cerr << "EXITING: INSUFFICIENT DATA" << '\n';
       if (!summary_file.empty())
         write_empty_summary(summary_file);
       return EXIT_SUCCESS;
     }
 
     if (verbose)
-      cerr << "bins retained: " << std::size(bins[0]) << '\n'
-           << "number of distances between: " << std::size(dists_btwn_bins)
-           << '\n'
-           << "deserts removed: " << size(reset_points) - 2 << '\n';
+      std::cerr << "bins retained: " << std::size(bins[0]) << '\n'
+                << "number of distances between: " << std::size(dists_btwn_bins)
+                << '\n'
+                << "deserts removed: " << size(reset_points) - 2 << '\n';
 
     /****************** Read in params *****************/
-    vector<double> start_trans(2, 0.5), end_trans(2, 1e-10);
-    vector<vector<double>> trans(2, vector<double>(2, 0.01));
-    trans[0][0] = trans[1][1] = 0.99;
+    auto start_trans = std::vector<double>(2, default_start_transition);
+    auto end_trans = std::vector<double>(2, default_end_transition);
+    auto trans = init_trans();
     const TwoStateHMM hmm(min_prob, tolerance, max_iterations, verbose, DEBUG);
-    vector<double> reps_fg_alpha(n_replicates, 0.05);
-    vector<double> reps_fg_beta(n_replicates, 0.95);
-    vector<double> reps_bg_alpha(n_replicates, 0.95);
-    vector<double> reps_bg_beta(n_replicates, 0.05);
-    double score_cutoff_for_fdr = num_lim<double>::max();
-
+    std::vector<double> reps_fg_alpha(n_replicates, default_fg_alpha);
+    std::vector<double> reps_fg_beta(n_replicates, default_fg_beta);
+    std::vector<double> reps_bg_alpha(n_replicates, default_bg_alpha);
+    std::vector<double> reps_bg_beta(n_replicates, default_bg_beta);
+    double score_cutoff_for_fdr = std::numeric_limits<double>::max();
     if (!params_in_file.empty()) {
       // read parameters files
-      for (size_t i = 0; i < n_replicates; ++i)
+      for (auto i = 0u; i < n_replicates; ++i)
         read_params_file(verbose, params_in_file[i], reps_fg_alpha[i],
                          reps_fg_beta[i], reps_bg_alpha[i], reps_bg_beta[i],
                          start_trans, trans, end_trans, score_cutoff_for_fdr);
@@ -1346,14 +1357,14 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     /***********************************/
 
     if (!posteriors_out_prefix.empty()) {
-      vector<double> into_scores;
+      std::vector<double> into_scores;
       hmm.TransitionPosteriors_rep(meth, reset_points, start_trans, trans,
                                    end_trans, reps_fg_alpha, reps_fg_beta,
                                    reps_bg_alpha, reps_bg_beta, array_status, 2,
                                    into_scores);
       write_posteriors_file(posteriors_out_prefix + ".intoTrans", bins,
                             into_scores);
-      vector<double> outof_scores;
+      std::vector<double> outof_scores;
       hmm.TransitionPosteriors_rep(meth, reset_points, start_trans, trans,
                                    end_trans, reps_fg_alpha, reps_fg_beta,
                                    reps_bg_alpha, reps_bg_beta, array_status, 1,
@@ -1362,8 +1373,8 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                             outof_scores);
     }
 
-    vector<bool> classes;
-    vector<double> scores;
+    std::vector<bool> classes;
+    std::vector<double> scores;
     hmm.PosteriorDecoding_rep(meth, reset_points, start_trans, trans, end_trans,
                               reps_fg_alpha, reps_fg_beta, reps_bg_alpha,
                               reps_bg_beta, classes, scores, array_status);
@@ -1372,38 +1383,42 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
       write_posteriors_file(posteriors_out_prefix + ".posteriors", bins,
                             scores);
 
-    vector<double> domain_scores;
+    std::vector<double> domain_scores;
     get_domain_scores(classes, meth, reset_points, domain_scores);
 
     if (verbose)
-      cerr << "[RANDOMIZING SCORES FOR FDR]" << '\n';
+      std::cerr << "[RANDOMIZING SCORES FOR FDR]" << '\n';
 
-    vector<double> random_scores;
+    std::vector<double> random_scores;
     shuffle_bins(rng_seed, hmm, meth, reset_points, start_trans, trans,
                  end_trans, reps_fg_alpha, reps_fg_beta, reps_bg_alpha,
                  reps_bg_beta, random_scores, array_status);
 
-    vector<double> p_values;
+    std::vector<double> p_values;
     assign_p_values(random_scores, domain_scores, p_values);
 
-    if (score_cutoff_for_fdr == num_lim<double>::max() && !p_values.empty())
-      score_cutoff_for_fdr = get_score_cutoff_for_fdr(p_values, 0.01);
-
+    if (score_cutoff_for_fdr == std::numeric_limits<double>::max() &&
+        !p_values.empty()) {
+      static constexpr auto default_p_value_cutoff{0.01};
+      score_cutoff_for_fdr =
+        get_score_cutoff_for_fdr(p_values, default_p_value_cutoff);
+    }
     if (!params_out_file.empty()) {
-      ofstream out(params_out_file, std::ios::app);
-      out << "FDR_CUTOFF\t" << std::setprecision(30) << score_cutoff_for_fdr
-          << '\n';
+      std::ofstream out(params_out_file, std::ios::app);
+      if (!out)
+        throw std::runtime_error("failed to open: " + params_out_file);
+      out << "FDR_CUTOFF\t" << score_cutoff_for_fdr << '\n';
     }
 
-    vector<GenomicRegion> domains;
+    std::vector<Interval6> domains;
     build_domains(bins[0], reset_points, classes, domains);
 
-    size_t good_pmd_count = 0;
-    vector<GenomicRegion> good_domains;
-    for (size_t i = 0; i < domains.size(); ++i)
+    std::size_t good_pmd_count = 0;
+    std::vector<Interval6> good_domains;
+    for (std::size_t i = 0; i < std::size(domains); ++i)
       if (p_values[i] < score_cutoff_for_fdr) {
         good_domains.push_back(domains[i]);
-        good_domains.back().set_name("PMD" + to_string(good_pmd_count++));
+        good_domains.back().name = "PMD" + std::to_string(good_pmd_count++);
       }
 
     optimize_boundaries(bin_size, cpgs_file, good_domains, array_status,
@@ -1411,25 +1426,23 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                         reps_bg_beta);
 
     if (!summary_file.empty()) {
-      ofstream summary_out(summary_file);
+      std::ofstream summary_out(summary_file);
       if (!summary_out)
-        throw runtime_error("failed to open: " + summary_file);
-      summary_out << pmd_summary(good_domains).tostring() << '\n';
+        throw std::runtime_error("failed to open: " + summary_file);
+      summary_out << to_string(pmd_summary(good_domains)) << '\n';
     }
 
-    ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
-
-    copy(begin(good_domains), end(good_domains),
-         std::ostream_iterator<GenomicRegion>(out, "\n"));
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open: " + outfile);
+    std::copy(std::cbegin(good_domains), std::cend(good_domains),
+              std::ostream_iterator<Interval6>(out, "\n"));
   }
   catch (const std::exception &e) {
-    cerr << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
 
-// NOLINTEND(*-avoid-magic-numbers,*-narrowing-conversions)
+// NOLINTEND(*-narrowing-conversions)
diff --git a/src/analysis/roimethstat.cpp b/src/analysis/roimethstat.cpp
index 120030c6..ab393c70 100644
--- a/src/analysis/roimethstat.cpp
+++ b/src/analysis/roimethstat.cpp
@@ -15,7 +15,7 @@
  * more details.
  */
 
-#include "GenomicRegion.hpp"
+#include "Interval6.hpp"
 #include "LevelsCounter.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
@@ -39,39 +39,11 @@
 #include <utility>
 #include <vector>
 
-// NOLINTBEGIN(*-avoid-c-arrays,*-avoid-magic-numbers,*-narrowing-conversions,*-pointer-arithmetic)
-
-[[nodiscard]] static std::string
-format_levels_counter(const LevelsCounter &lc) {
-  // ...
-  // (7) weighted mean methylation
-  // (8) unweighted mean methylation
-  // (9) fractional methylation
-  // (10) number of sites in the region
-  // (11) number of sites covered at least once
-  // (12) number of observations in reads indicating methylation
-  // (13) total number of observations from reads in the region
-  std::ostringstream oss;
-  // clang-format off
-  oss << lc.mean_meth_weighted() << '\t'
-      << lc.mean_meth() << '\t'
-      << lc.fractional_meth() << '\t'
-      << lc.total_sites << '\t'
-      << lc.sites_covered << '\t'
-      << lc.total_c << '\t'
-      << lc.coverage();
-  // clang-format on
-  return oss.str();
-}
-
-struct genomic_interval {
-  std::string chrom{};
-  std::uint64_t start_pos{};
-  std::uint64_t end_pos{};
-};
+// NOLINTBEGIN(*-narrowing-conversions)
 
 static void
 update(LevelsCounter &lc, const xcounts_entry &xse) {
+  static constexpr auto one_half = 0.5;
   const std::uint64_t n_reads = xse.n_meth + xse.n_unmeth;
   if (n_reads > 0) {
     ++lc.sites_covered;
@@ -80,37 +52,38 @@ update(LevelsCounter &lc, const xcounts_entry &xse) {
     lc.total_t += xse.n_unmeth;
     const auto meth = static_cast<double>(xse.n_unmeth) / n_reads;
     lc.total_meth += meth;
-    double lower = 0.0, upper = 0.0;
+    double lower{};
+    double upper{};
     wilson_ci_for_binomial(lc.alpha, n_reads, meth, lower, upper);
-    lc.called_meth += (lower > 0.5);
-    lc.called_unmeth += (upper < 0.5);
+    lc.called_meth += (lower > one_half);
+    lc.called_unmeth += (upper < one_half);
   }
   ++lc.total_sites;
 }
 
 static void
 process_chrom(const bool report_more_info, const char level_code,
-              const std::vector<GenomicRegion> &intervals,
+              const std::vector<Interval6> &intervals,
               const std::vector<xcounts_entry> &sites, std::ostream &out) {
   std::uint64_t j = 0;
   for (auto i = 0ul; i < std::size(intervals); ++i) {
-    while (j < std::size(sites) && sites[j].pos < intervals[i].get_start())
+    while (j < std::size(sites) && sites[j].pos < intervals[i].start)
       ++j;
-
     LevelsCounter lc;
-    while (j < std::size(sites) && sites[j].pos < intervals[i].get_end())
+    while (j < std::size(sites) && sites[j].pos < intervals[i].stop)
       update(lc, sites[j++]);
 
-    GenomicRegion r(intervals[i]);
-    r.set_score(level_code == 'w' ? lc.mean_meth_weighted()
-                                  : (level_code == 'u' ? lc.mean_meth()
-                                                       : lc.fractional_meth()));
-    r.set_name(r.get_name() + "_" +
-               std::to_string((level_code == 'w'
-                                 ? lc.coverage()
-                                 : (level_code == 'u' ? lc.sites_covered
-                                                      : lc.total_called()))));
-    out << r;
+    Interval6 interval(intervals[i]);
+    interval.score =
+      level_code == 'w'
+        ? lc.mean_meth_weighted()
+        : (level_code == 'u' ? lc.mean_meth() : lc.fractional_meth());
+    interval.name +=
+      "_" + std::to_string(
+              level_code == 'w'
+                ? lc.coverage()
+                : (level_code == 'u' ? lc.sites_covered : lc.total_called()));
+    out << to_string(interval);
     if (report_more_info)
       out << '\t' << format_levels_counter(lc);
     out << '\n';
@@ -119,11 +92,11 @@ process_chrom(const bool report_more_info, const char level_code,
 
 static void
 process_chrom(const bool report_more_info,
-              const std::vector<GenomicRegion> &intervals, std::ostream &out) {
+              const std::vector<Interval6> &intervals, std::ostream &out) {
   LevelsCounter lc;
   const std::string lc_formatted = format_levels_counter(lc);
   for (const auto &r : intervals) {
-    out << r;
+    out << to_string(r);
     if (report_more_info)
       out << '\t' << lc_formatted;
     out << '\n';
@@ -133,23 +106,23 @@ process_chrom(const bool report_more_info,
 static void
 process_from_xcounts(const std::uint32_t n_threads, const bool report_more_info,
                      const char level_code, const std::string &xsym_file,
-                     const std::vector<GenomicRegion> &intervals_in,
+                     const std::vector<Interval6> &intervals_in,
                      std::ostream &out) {
   const auto sites_by_chrom = read_xcounts_by_chrom(n_threads, xsym_file);
-  // const auto intervals = get_GenomicRegions(intervals_file);
+  // const auto intervals = get_Interval6s(intervals_file);
 
-  std::vector<std::vector<GenomicRegion>> intervals_by_chrom;
+  std::vector<std::vector<Interval6>> intervals_by_chrom;
   std::string prev_chrom;
   for (auto i = 0u; i < std::size(intervals_in); ++i) {
-    if (intervals_in[i].get_chrom() != prev_chrom) {
-      intervals_by_chrom.push_back(std::vector<GenomicRegion>());
-      prev_chrom = intervals_in[i].get_chrom();
+    if (intervals_in[i].chrom != prev_chrom) {
+      intervals_by_chrom.push_back(std::vector<Interval6>());
+      prev_chrom = intervals_in[i].chrom;
     }
     intervals_by_chrom.back().push_back(intervals_in[i]);
   }
 
   for (const auto &intervals : intervals_by_chrom) {
-    const auto chrom_name = intervals.front().get_chrom();
+    const auto chrom_name = intervals.front().chrom;
     const auto sites = sites_by_chrom.find(chrom_name);
     if (sites != std::cend(sites_by_chrom))
       process_chrom(report_more_info, level_code, intervals, sites->second,
@@ -160,12 +133,11 @@ process_from_xcounts(const std::uint32_t n_threads, const bool report_more_info,
 }
 
 [[nodiscard]] bool
-cmp_within_chrom(const GenomicRegion &r1, const GenomicRegion &r2) {
-  return r1.get_start() < r2.get_start() ||
-         (r1.get_start() == r2.get_start() &&
-          (r1.get_end() < r2.get_end() ||
-           (r1.get_end() == r2.get_end() &&
-            (r1.get_strand() < r2.get_strand()))));
+cmp_within_chrom(const Interval6 &r1, const Interval6 &r2) {
+  return r1.start < r2.start ||
+         (r1.start == r2.start &&
+          (r1.stop < r2.stop ||
+           (r1.stop == r2.stop && (r1.strand < r2.strand))));
 }
 
 [[nodiscard]] bool
@@ -181,8 +153,8 @@ get_chrom_order(const T &order, const MSite &s) {
 
 template <class T>
 [[nodiscard]] typename T::const_iterator
-get_chrom_order(const T &order, const GenomicRegion &r) {
-  return order.find(r.get_chrom());
+get_chrom_order(const T &order, const Interval6 &r) {
+  return order.find(r.chrom);
 }
 
 struct cmp_chrom_order {
@@ -248,9 +220,9 @@ region_bounds(const std::unordered_map<std::string, std::uint32_t> &chrom_order,
   // that region is on the negative strand const char
   // strand(region.get_strand());
 
-  const std::string chrom(region.get_chrom());
-  const MSite a(chrom, region.get_start(), '+', "", 0, 0);
-  const MSite b(chrom, region.get_end(), '+', "", 0, 0);
+  const std::string chrom(region.chrom);
+  const MSite a(chrom, region.start, '+', "", 0, 0);
+  const MSite b(chrom, region.stop, '+', "", 0, 0);
 
   // ADS: This function seems like std::equal_range but both elements
   // below use "lower_bound" because "b" is strictly following valid
@@ -292,10 +264,10 @@ read_sites(const std::string &filename) {
 
 static void
 process_preloaded(
-  const bool VERBOSE, const bool report_more_info, const char level_code,
+  const bool verbose, const bool report_more_info, const char level_code,
   const std::string &sites_file,
   const std::unordered_map<std::string, std::uint32_t> &chrom_order,
-  const std::vector<GenomicRegion> &regions, std::ostream &out) {
+  const std::vector<Interval6> &regions, std::ostream &out) {
   const auto sites = read_sites(sites_file);
   if (sites.empty())
     throw std::runtime_error("failed to read sites: " + sites_file);
@@ -303,7 +275,7 @@ process_preloaded(
   if (!is_sorted_within_chrom(sites))
     throw std::runtime_error("sites not sorted: " + sites_file);
 
-  if (VERBOSE)
+  if (verbose)
     std::cerr << "[n_sites=" << std::size(sites) << "]\n";
 
   for (const auto &r : regions) {
@@ -317,9 +289,9 @@ process_preloaded(
       level_code == 'w'
         ? lc.mean_meth_weighted()
         : (level_code == 'u' ? lc.mean_meth() : lc.fractional_meth());
-    GenomicRegion r_scored{r};
-    r_scored.set_score(score);
-    out << r_scored;
+    Interval6 r_scored{r};
+    r_scored.score = score;
+    out << to_string(r_scored);
     if (report_more_info)
       out << '\t' << format_levels_counter(lc);
     out << '\n';
@@ -347,15 +319,15 @@ at_or_after(const MSite &site, const std::string &chrom,
 
 [[nodiscard]] static LevelsCounter
 calc_site_stats(
-  std::ifstream &sites_in, const GenomicRegion &region,
+  std::ifstream &sites_in, const Interval6 &region,
   const std::unordered_map<std::string, std::uint32_t> &chrom_order) {
-  const std::string chrom{region.get_chrom()};
+  const std::string chrom{region.chrom};
   const auto chrom_itr = chrom_order.find(chrom);
   if (chrom_itr == std::cend(chrom_order))
     throw std::runtime_error("lookup failure: " + chrom);
   const auto chrom_idx = chrom_itr->second;
-  const auto start_pos = region.get_start();
-  const auto end_pos = region.get_end();
+  const auto start_pos = region.start;
+  const auto end_pos = region.stop;
   find_offset_for_msite(chrom_order, chrom, start_pos, sites_in);
 
   LevelsCounter lc;
@@ -375,7 +347,7 @@ process_on_disk(
   const bool report_more_info, const char level_code,
   const std::string &sites_file,
   const std::unordered_map<std::string, std::uint32_t> &chrom_order,
-  const std::vector<GenomicRegion> &regions, std::ostream &out) {
+  const std::vector<Interval6> &regions, std::ostream &out) {
   std::ifstream in(sites_file);
   if (!in)
     throw std::runtime_error("failed to open file: " + sites_file);
@@ -386,9 +358,9 @@ process_on_disk(
       level_code == 'w'
         ? lc.mean_meth_weighted()
         : (level_code == 'u' ? lc.mean_meth() : lc.fractional_meth());
-    GenomicRegion r{region};
-    r.set_score(score);
-    out << r;
+    Interval6 r{region};
+    r.score = score;
+    out << to_string(r);
     if (report_more_info)
       out << '\t' << format_levels_counter(lc);
     out << '\n';
@@ -399,22 +371,19 @@ process_on_disk(
 get_bed_columns(const std::string &regions_file) {
   std::ifstream in(regions_file);
   if (!in)
-    throw std::runtime_error("failed to open file: " + regions_file);
-
+    throw std::runtime_error("cannot open file: " + regions_file);
   std::string line;
-  std::getline(in, line);
-
+  getline(in, line);
   std::istringstream iss(line);
   std::string token;
   std::size_t n_columns = 0;
   while (iss >> token)
     ++n_columns;
-
   return n_columns;
 }
 
 int
-main_roimethstat(int argc, char *argv[]) {
+main_roimethstat(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
     static const std::string description = R"""(
 Compute average site methylation levels in each interval from a given
@@ -430,10 +399,13 @@ Columns (beyond the first 6) in the BED format output:
 (12) number of observations in reads indicating methylation
 (13) total number of observations from reads in the region
 )""";
+    constexpr auto valid_n_cols = [](const auto n_cols) {
+      return n_cols != 3 && n_cols < 6;  // NOLINT(*-avoid-magic-numbers)
+    };
 
     static const std::string default_name_prefix = "X";
 
-    bool VERBOSE = false;
+    bool verbose = false;
     bool print_numeric_only = false;
     bool preload = false;
     bool report_more_info = false;
@@ -446,11 +418,10 @@ Columns (beyond the first 6) in the BED format output:
     std::string outfile;
 
     /****************** COMMAND LINE OPTIONS ********************/
-    OptionParser opt_parse(argv[0], description,
-                           "<intervals-bed> <methylation-file>");
+    OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
+                           description, "<intervals-bed> <methylation-file>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)",
-                      false, outfile);
+    opt_parse.add_opt("output", 'o', "Name of output file", true, outfile);
     opt_parse.add_opt("numeric", 'N', "print numeric values only (not NAs)",
                       false, print_numeric_only);
     opt_parse.add_opt("preload", 'L', "load all site sites", false, preload);
@@ -467,7 +438,7 @@ Columns (beyond the first 6) in the BED format output:
     opt_parse.add_opt("relaxed", '\0',
                       "input has extra fields (used for nanopore)", false,
                       allow_extra_fields);
-    opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
     std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
@@ -509,27 +480,25 @@ Columns (beyond the first 6) in the BED format output:
       for (const auto &i : get_chroms(sites_file))
         chrom_order.emplace(i, std::size(chrom_order));
 
-    if (VERBOSE)
+    if (verbose)
       std::cerr << "loading regions" << '\n';
 
     if (!std::filesystem::is_regular_file(regions_file))
       // otherwise we could not read the file twice
       throw std::runtime_error("regions file must be regular file");
 
-    // MAGIC: below allow for ==3 or >=6 columns in the bed format
     const auto n_columns = get_bed_columns(regions_file);
-    if (n_columns != 3 && n_columns < 6)
+    if (valid_n_cols(n_columns))
       throw std::runtime_error("format must be 3 or 6+ column bed: " +
                                regions_file);
     if (is_msite_file(regions_file))
       throw std::runtime_error("seems to be a counts file: " + regions_file +
                                "\ncheck order of input arguments");
 
-    std::vector<GenomicRegion> regions;
-    ReadBEDFile(regions_file, regions);
+    auto regions = read_intervals6(regions_file);
     if (!std::is_sorted(std::begin(regions), std::end(regions))) {
       if (sort_data_if_needed) {
-        if (VERBOSE)
+        if (verbose)
           std::cerr << "sorting regions\n";
         std::sort(std::begin(regions), std::end(regions),
                   cmp_chrom_order(chrom_order));
@@ -543,24 +512,20 @@ Columns (beyond the first 6) in the BED format output:
     // be empty and the output format will be wrong
     if (n_columns == 3)
       for (auto i = 0u; i < std::size(regions); ++i)
-        regions[i].set_name(default_name_prefix + std::to_string(i));
+        regions[i].name = default_name_prefix + std::to_string(i);
 
-    if (VERBOSE)
+    if (verbose)
       std::cerr << "[n_regions=" << std::size(regions) << "]\n";
 
-    std::ofstream of;
-    if (!outfile.empty()) {
-      of.open(outfile);
-      if (!of)
-        throw std::runtime_error("failed to open outfile: " + outfile);
-    }
-    std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open outfile: " + outfile);
 
     if (is_xcounts)
       process_from_xcounts(n_threads, report_more_info, level_code[0],
                            sites_file, regions, out);
     else if (preload)
-      process_preloaded(VERBOSE, report_more_info, level_code[0], sites_file,
+      process_preloaded(verbose, report_more_info, level_code[0], sites_file,
                         chrom_order, regions, out);
     else
       process_on_disk(report_more_info, level_code[0], sites_file, chrom_order,
@@ -573,4 +538,4 @@ Columns (beyond the first 6) in the BED format output:
   return EXIT_SUCCESS;
 }
 
-// NOLINTEND(*-avoid-c-arrays,*-avoid-magic-numbers,*-narrowing-conversions,*-pointer-arithmetic)
+// NOLINTEND(*-narrowing-conversions)
diff --git a/src/common/Interval.hpp b/src/common/Interval.hpp
index ee1fa02f..b85fda82 100644
--- a/src/common/Interval.hpp
+++ b/src/common/Interval.hpp
@@ -20,6 +20,7 @@
 
 #include <cstdint>
 //  #include <format> // ADS: needs c++20
+#include <fstream>
 #include <iterator>  // std::size
 #include <stdexcept>
 #include <string>
@@ -32,9 +33,7 @@ struct Interval {
 
   Interval() = default;
   Interval(const std::string &chrom, const std::uint32_t start,
-           const std::uint32_t stop) :
-    chrom{chrom},
-    start{start}, stop{stop} {}
+           const std::uint32_t stop) : chrom{chrom}, start{start}, stop{stop} {}
 
   explicit Interval(const std::string &line) {
     if (!initialize(line.data(), line.data() + std::size(line)))
@@ -43,17 +42,27 @@ struct Interval {
   auto
   initialize(const char *, const char *) -> bool;
 
-  auto
+  [[nodiscard]] auto
   operator<(const Interval &rhs) const {
     return (chrom < rhs.chrom ||
             (chrom == rhs.chrom &&
              (start < rhs.start || (start == rhs.start && stop < rhs.stop))));
   }
 
+  [[nodiscard]] auto
+  operator==(const Interval &rhs) const {
+    return chrom == rhs.chrom && start == rhs.start && stop < rhs.stop;
+  }
+
   // auto
   // operator<=>(const Interval &) const = default;
 };
 
+inline auto
+operator<<(std::ostream &os, const Interval &x) -> std::ostream & {
+  return os << x.chrom << "\t" << x.start << "\t" << x.stop;
+}
+
 [[nodiscard]] inline auto
 to_string(const Interval &x) -> std::string {
   return x.chrom + "\t" + std::to_string(x.start) + "\t" +
diff --git a/src/common/Interval6.hpp b/src/common/Interval6.hpp
index 3b84e213..94eed1f3 100644
--- a/src/common/Interval6.hpp
+++ b/src/common/Interval6.hpp
@@ -21,6 +21,7 @@
 #include <cstdint>
 //  #include <format> // ADS: needs c++20
 #include <iterator>  // std::size
+#include <sstream>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -37,8 +38,8 @@ struct Interval6 {
   Interval6(const std::string &chrom, const std::uint32_t start,
             const std::uint32_t stop, const std::string &name,
             const double score, const char strand) :
-    chrom{chrom},
-    start{start}, stop{stop}, name{name}, score{score}, strand{strand} {}
+    chrom{chrom}, start{start}, stop{stop}, name{name}, score{score},
+    strand{strand} {}
 
   explicit Interval6(const std::string &line) {
     if (!initialize(line.data(), line.data() + std::size(line)))
@@ -58,11 +59,20 @@ struct Interval6 {
   // operator<=>(const Interval6 &) const = default;
 };
 
+inline auto
+operator<<(std::ostream &os, const Interval6 &x) -> std::ostream & {
+  return os << x.chrom << "\t" << x.start << "\t" << x.stop << "\t" << x.name
+            << "\t" << x.score << "\t" << x.strand;
+}
+
 [[nodiscard]] inline auto
 to_string(const Interval6 &x) -> std::string {
-  return x.chrom + "\t" + std::to_string(x.start) + "\t" +
-         std::to_string(x.stop) + "\t" + x.name + "\t" +
-         std::to_string(x.score) + "\t" + std::string(1, x.strand);
+  std::ostringstream oss;
+  oss << x;
+  return oss.str();
+  // return x.chrom + "\t" + std::to_string(x.start) + "\t" +
+  //        std::to_string(x.stop) + "\t" + x.name + "\t" +
+  //        std::to_string(x.score) + "\t" + std::string(1, x.strand);
 }
 
 // ADS: need to bump to c++20 for this
diff --git a/src/common/LevelsCounter.cpp b/src/common/LevelsCounter.cpp
index 92004c45..a9f6103e 100644
--- a/src/common/LevelsCounter.cpp
+++ b/src/common/LevelsCounter.cpp
@@ -21,6 +21,23 @@
 #include <stdexcept>
 #include <string>
 
+auto
+format_levels_counter(const LevelsCounter &lc) -> std::string {
+  std::ostringstream oss;
+  // clang-format off
+  oss << lc.mean_meth_weighted() << '\t'
+      << lc.mean_meth() << '\t'
+      << lc.fractional_meth() << '\t'
+      << lc.total_sites << '\t'
+      << lc.sites_covered << '\t'
+      << lc.total_c << '\t'
+      << (lc.total_c + lc.total_t) << '\t'
+      << lc.total_meth << '\t'
+      << lc.sites_covered;
+  // clang-format on
+  return oss.str();
+}
+
 void
 LevelsCounter::update(const MSite &s) {
   static constexpr auto half = 0.5;
@@ -43,8 +60,8 @@ LevelsCounter::update(const MSite &s) {
   ++total_sites;
 }
 
-std::string
-LevelsCounter::tostring() const {
+auto
+LevelsCounter::tostring() const -> std::string {
   static const std::string indent = std::string(2, ' ');
   const bool good = (sites_covered != 0);
   std::ostringstream oss;
@@ -74,8 +91,8 @@ LevelsCounter::tostring() const {
   return oss.str();
 }
 
-std::string
-LevelsCounter::format_row() const {
+auto
+LevelsCounter::format_row() const -> std::string {
   const bool good = (sites_covered > 0);
   std::ostringstream oss;
   // directly counted values
@@ -101,8 +118,8 @@ LevelsCounter::format_row() const {
   return oss.str();
 }
 
-std::string
-LevelsCounter::format_header() {
+auto
+LevelsCounter::format_header() -> std::string {
   std::ostringstream oss;
   // directly counted values
   oss << "01. total_sites" << '\n'
@@ -127,8 +144,8 @@ LevelsCounter::format_header() {
 
 double LevelsCounter::alpha = 0.05;  // NOLINT(*-avoid-magic-numbers)
 
-std::ostream &
-operator<<(std::ostream &out, const LevelsCounter &cs) {
+auto
+operator<<(std::ostream &out, const LevelsCounter &cs) -> std::ostream & {
   return out << cs.tostring();
 }
 
@@ -139,8 +156,8 @@ check_label(const std::string &observed, const std::string &expected) {
                              "]");
 }
 
-std::istream &
-operator>>(std::istream &in, LevelsCounter &cs) {
+auto
+operator>>(std::istream &in, LevelsCounter &cs) -> std::istream & {
   in >> cs.context;  // get the context
   cs.context = cs.context.substr(0, cs.context.find_first_of(':'));
 
@@ -175,8 +192,8 @@ operator>>(std::istream &in, LevelsCounter &cs) {
   return in;
 }
 
-LevelsCounter &
-LevelsCounter::operator+=(const LevelsCounter &rhs) {
+auto
+LevelsCounter::operator+=(const LevelsCounter &rhs) -> LevelsCounter & {
   total_sites += rhs.total_sites;
   sites_covered += rhs.sites_covered;
   max_depth += rhs.max_depth;
diff --git a/src/common/LevelsCounter.hpp b/src/common/LevelsCounter.hpp
index f6c24e75..cb55c7e0 100644
--- a/src/common/LevelsCounter.hpp
+++ b/src/common/LevelsCounter.hpp
@@ -20,6 +20,7 @@
 #include <cstdint>
 #include <iostream>
 #include <string>
+#include <utility>
 struct MSite;
 
 struct LevelsCounter {
@@ -72,51 +73,51 @@ struct LevelsCounter {
 
   // coverage is equal to total_c plus total_t. These are the counts that
   // contribute towards estimates of the various methylation levels.
-  [[nodiscard]] std::uint64_t
-  coverage() const {
+  [[nodiscard]] auto
+  coverage() const -> std::uint64_t {
     return total_c + total_t;
   }
 
   // mean_depth is the average number of reads contributing to methylation
   // calls over all sites.
-  [[nodiscard]] double
-  mean_depth() const {
+  [[nodiscard]] auto
+  mean_depth() const -> double {
     return static_cast<double>(coverage()) / total_sites;
   }
 
   // mean_depth_covered is the average number of reads contributing to
   // methylation calls over all sites that are covered at least once.
-  [[nodiscard]] double
-  mean_depth_covered() const {
+  [[nodiscard]] auto
+  mean_depth_covered() const -> double {
     return static_cast<double>(coverage()) / sites_covered;
   }
 
   // sites_covered_frac is the fraction of all sites that are covered at least
   // once.
-  [[nodiscard]] double
-  sites_covered_frac() const {
+  [[nodiscard]] auto
+  sites_covered_frac() const -> double {
     return static_cast<double>(sites_covered) / total_sites;
   }
 
   // total_called is equal to called_meth plus called_unmeth
-  [[nodiscard]] std::uint64_t
-  total_called() const {
+  [[nodiscard]] auto
+  total_called() const -> std::uint64_t {
     return called_meth + called_unmeth;
   }
 
   // mean_meth_weighted is the unweighted mean methylation level. This
   // is the ratio of total_c divided by coverage. This value is always
   // between 0 and 1.
-  [[nodiscard]] double
-  mean_meth_weighted() const {
+  [[nodiscard]] auto
+  mean_meth_weighted() const -> double {
     return static_cast<double>(total_c) /
            std::max(coverage(), static_cast<std::uint64_t>(1));
   }
 
   // mean_meth is the unweighted mean methylation level. This is the ratio of
   // total_meth divided by sites_covered. Value is always between 0 and 1.
-  [[nodiscard]] double
-  mean_meth() const {
+  [[nodiscard]] auto
+  mean_meth() const -> double {
     return static_cast<double>(total_meth) /
            std::max(sites_covered, static_cast<std::uint64_t>(1));
   }
@@ -124,36 +125,39 @@ struct LevelsCounter {
   // fractional_meth is the fraction of "called" sites that are called
   // methylated. It is the ratio of called_meth divided by total_called. This
   // value is always between 0 and 1.
-  [[nodiscard]] double
-  fractional_meth() const {
+  [[nodiscard]] auto
+  fractional_meth() const -> double {
     return static_cast<double>(called_meth) /
            std::max(total_called(), static_cast<std::uint64_t>(1));
   }
 
-  explicit LevelsCounter(const std::string &c) : context{c} {}
+  explicit LevelsCounter(std::string c) : context{std::move(c)} {}
 
   LevelsCounter() = default;
 
-  LevelsCounter &
-  operator+=(const LevelsCounter &rhs);
+  auto
+  operator+=(const LevelsCounter &rhs) -> LevelsCounter &;
 
   void
   update(const MSite &s);
 
-  [[nodiscard]] std::string
-  tostring() const;
-  [[nodiscard]] std::string
-  format_row() const;
-  [[nodiscard]] static std::string
-  format_header();
+  [[nodiscard]] auto
+  tostring() const -> std::string;
+  [[nodiscard]] auto
+  format_row() const -> std::string;
+  [[nodiscard]] static auto
+  format_header() -> std::string;
 
   static double alpha;
 };
 
-std::ostream &
-operator<<(std::ostream &out, const LevelsCounter &cs);
+auto
+format_levels_counter(const LevelsCounter &lc) -> std::string;
 
-std::istream &
-operator>>(std::istream &in, LevelsCounter &cs);
+auto
+operator<<(std::ostream &out, const LevelsCounter &cs) -> std::ostream &;
+
+auto
+operator>>(std::istream &in, LevelsCounter &cs) -> std::istream &;
 
 #endif
diff --git a/src/common/MSite.cpp b/src/common/MSite.cpp
index 053addc0..741ee59b 100644
--- a/src/common/MSite.cpp
+++ b/src/common/MSite.cpp
@@ -1,45 +1,31 @@
-/*
-  Copyright (C) 2015-2022 University of Southern California
-  Authors: Andrew D. Smith and Masaru Nakajima
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of the GNU General Public License as published by
-  the Free Software Foundation; either version 2 of the License, or
-  (at your option) any later version.
-
-  This program is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-  GNU General Public License for more details.
-*/
+/* Copyright (C) 2015-2025 Andrew D. Smith
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
 
 #include "MSite.hpp"
-
-#include "bamxx.hpp"
 #include "counts_header.hpp"
 
+#include <bamxx.hpp>
+
 #include <charconv>
 #include <fstream>
 #include <iterator>
 #include <limits>
-#include <regex>
 #include <sstream>
 #include <stdexcept>
 #include <string>
 #include <unordered_map>
 #include <utility>
 
-using std::cbegin;
-using std::cend;
-using std::end;
-using std::find_if;
-using std::from_chars;
-using std::ifstream;
-using std::ios_base;
-using std::regex_match;
-using std::runtime_error;
-using std::string;
-
 bool MSite::no_extra_fields = true;
 
 bool
@@ -51,42 +37,42 @@ MSite::initialize(const char *c, const char *c_end) {
 
   // NOLINTBEGIN(*-pointer-arithmetic)
   auto field_s = c;
-  auto field_e = find_if(field_s + 1, c_end, is_sep);
+  auto field_e = std::find_if(field_s + 1, c_end, is_sep);
   if (field_e == c_end)
     failed = true;
 
   {
     const std::uint32_t d = std::distance(field_s, field_e);
-    chrom = string{field_s, d};
+    chrom = std::string{field_s, d};
   }
 
-  field_s = find_if(field_e + 1, c_end, not_sep);
-  field_e = find_if(field_s + 1, c_end, is_sep);
+  field_s = std::find_if(field_e + 1, c_end, not_sep);
+  field_e = std::find_if(field_s + 1, c_end, is_sep);
   failed = failed || (field_e == c_end);
 
   {
-    const auto [ptr, ec] = from_chars(field_s, field_e, pos);
+    const auto [ptr, ec] = std::from_chars(field_s, field_e, pos);
     failed = failed || (ptr == field_s);
   }
 
-  field_s = find_if(field_e + 1, c_end, not_sep);
-  field_e = find_if(field_s + 1, c_end, is_sep);
+  field_s = std::find_if(field_e + 1, c_end, not_sep);
+  field_e = std::find_if(field_s + 1, c_end, is_sep);
   failed = failed || (field_e != field_s + 1 || field_e == c_end);
 
   strand = *field_s;
   failed = failed || (strand != '-' && strand != '+');
 
-  field_s = find_if(field_e + 1, c_end, not_sep);
-  field_e = find_if(field_s + 1, c_end, is_sep);
+  field_s = std::find_if(field_e + 1, c_end, not_sep);
+  field_e = std::find_if(field_s + 1, c_end, is_sep);
   failed = failed || (field_e == c_end);
 
   {
     const std::uint32_t d = std::distance(field_s, field_e);
-    context = string{field_s, d};
+    context = std::string{field_s, d};
   }
 
-  field_s = find_if(field_e + 1, c_end, not_sep);
-  field_e = find_if(field_s + 1, c_end, is_sep);
+  field_s = std::find_if(field_e + 1, c_end, not_sep);
+  field_e = std::find_if(field_s + 1, c_end, is_sep);
   failed = failed || (field_e == c_end);
 
   {
@@ -94,15 +80,15 @@ MSite::initialize(const char *c, const char *c_end) {
     const int ret = std::sscanf(field_s, "%lf", &meth);
     failed = failed || (ret < 1);
 #else
-    const auto [ptr, ec] = from_chars(field_s, field_e, meth);
+    const auto [ptr, ec] = std::from_chars(field_s, field_e, meth);
     failed = failed || (ptr == field_s);
 #endif
   }
 
-  field_s = find_if(field_e + 1, c_end, not_sep);
+  field_s = std::find_if(field_e + 1, c_end, not_sep);
 
   {
-    const auto [ptr, ec] = from_chars(field_s, c_end, n_reads);
+    const auto [ptr, ec] = std::from_chars(field_s, c_end, n_reads);
     failed = failed || (no_extra_fields && ptr != c_end);
   }
   // ADS: the value below would work for a flag
@@ -113,19 +99,19 @@ MSite::initialize(const char *c, const char *c_end) {
   return !failed;
 }
 
-MSite::MSite(const string &line) {
+MSite::MSite(const std::string &line) {
   // NOLINTNEXTLINE(*-pointer-arithmetic)
   if (!initialize(line.data(), line.data() + std::size(line)))
-    throw runtime_error("bad count line: " + line);
+    throw std::runtime_error("bad count line: " + line);
 }
 
 MSite::MSite(const char *line, const int n) {
   // NOLINTNEXTLINE(*-pointer-arithmetic)
   if (!initialize(line, line + n))
-    throw runtime_error("bad count line: " + string(line));
+    throw std::runtime_error("bad count line: " + std::string(line));
 }
 
-string
+std::string
 MSite::tostring() const {
   std::ostringstream oss;
   oss << chrom << '\t' << pos << '\t' << strand << '\t' << context << '\t'
@@ -140,7 +126,7 @@ distance(const MSite &a, const MSite &b) {
 }
 
 static void
-move_to_start_of_line(ifstream &in) {
+move_to_start_of_line(std::ifstream &in) {
   char next{};
   // move backwards by: one step forward, two steps back
   while (in.good() && in.get(next) && next != '\n') {
@@ -156,34 +142,34 @@ move_to_start_of_line(ifstream &in) {
 void
 find_offset_for_msite(const std::string &chr, const size_t idx,
                       std::ifstream &site_in) {
-  site_in.seekg(0, ios_base::beg);
+  site_in.seekg(0, std::ios_base::beg);
   const auto begin_pos = site_in.tellg();
-  site_in.seekg(0, ios_base::end);
+  site_in.seekg(0, std::ios_base::end);
   const auto end_pos = site_in.tellg();
 
   if (end_pos - begin_pos < 2)
-    throw runtime_error("empty counts file");
+    throw std::runtime_error("empty counts file");
 
   std::streamoff step_size = (end_pos - begin_pos) / 2;
 
-  site_in.seekg(0, ios_base::beg);
-  string low_chr;
-  size_t low_idx = 0;
+  site_in.seekg(0, std::ios_base::beg);
+  std::string low_chr;
+  size_t low_idx{};
   if (!(site_in >> low_chr >> low_idx))
-    throw runtime_error("failed search in counts file");
+    throw std::runtime_error("failed search in counts file");
 
   // MAGIC: need the -2 here to get past the EOF and possibly a '\n'
-  site_in.seekg(-2, ios_base::end);
+  site_in.seekg(-2, std::ios_base::end);
   move_to_start_of_line(site_in);
-  string high_chr;
+  std::string high_chr;
   size_t high_idx{};
   if (!(site_in >> high_chr >> high_idx))
-    throw runtime_error("failed search in counts file");
+    throw std::runtime_error("failed search in counts file");
 
   std::streamoff pos = step_size;
-  site_in.seekg(pos, ios_base::beg);
+  site_in.seekg(pos, std::ios_base::beg);
   move_to_start_of_line(site_in);
-  std::streamoff prev_pos = 0;  // keep track of previous position in file
+  std::streamoff prev_pos{};  // keep track of previous position in file
 
   // binary search inside sorted file on disk
   // iterate until step size is 0 or positions are identical
@@ -191,88 +177,89 @@ find_offset_for_msite(const std::string &chr, const size_t idx,
     // track (mid) position in file to make sure it keeps moving
     prev_pos = pos;
 
-    string mid_chr;      // chromosome name at mid point
-    size_t mid_idx = 0;  // position at mid point
+    std::string mid_chr;  // chromosome name at mid point
+    size_t mid_idx{};     // position at mid point
     if (!(site_in >> mid_chr >> mid_idx))
-      throw runtime_error("failed navigating inside file");
+      throw std::runtime_error("failed navigating inside file");
 
     // this check will never give a false indication of unsorted, but
     // might catch an unsorted file
     if (mid_chr < low_chr || mid_chr > high_chr) {
       // NOLINTBEGIN(performance-inefficient-string-concatenation)
-      throw runtime_error("chromosomes unsorted inside file: "
-                          "low=" +
-                          low_chr + ",mid=" + mid_chr + ",high=" + high_chr);
+      throw std::runtime_error("chromosomes unsorted inside file: "
+                               "low=" +
+                               low_chr + ",mid=" + mid_chr +
+                               ",high=" + high_chr);
       // NOLINTEND(performance-inefficient-string-concatenation)
     }
     step_size /= 2;  // cut the range in half
 
     if (chr < mid_chr || (chr == mid_chr && idx <= mid_idx)) {
       // move to the left
-      std::swap(mid_chr, high_chr);
-      std::swap(mid_idx, high_idx);
+      std::swap(high_chr, mid_chr);
+      std::swap(high_idx, mid_idx);
       pos -= step_size;
     }
     else {
       // move to the left
-      std::swap(mid_chr, low_chr);
-      std::swap(mid_idx, low_idx);
+      std::swap(low_chr, mid_chr);
+      std::swap(low_idx, mid_idx);
       pos += step_size;
     }
-    site_in.seekg(pos, ios_base::beg);
+    site_in.seekg(pos, std::ios_base::beg);
     move_to_start_of_line(site_in);
   }
 }
 
 void
 find_offset_for_msite(
-  const std::unordered_map<string, std::uint32_t> &chrom_order,
+  const std::unordered_map<std::string, std::uint32_t> &chrom_order,
   const std::string &chr, const size_t idx, std::ifstream &site_in) {
-  site_in.seekg(0, ios_base::beg);
+  site_in.seekg(0, std::ios_base::beg);
   const auto begin_pos = site_in.tellg();
-  site_in.seekg(0, ios_base::end);
+  site_in.seekg(0, std::ios_base::end);
   const auto end_pos = site_in.tellg();
 
   if (end_pos - begin_pos < 2)
-    throw runtime_error("empty counts file");
+    throw std::runtime_error("empty counts file");
 
   std::streamoff step_size = (end_pos - begin_pos) / 2;
 
   const auto chr_idx_itr = chrom_order.find(chr);
-  if (chr_idx_itr == end(chrom_order)) {
-    site_in.seekg(0, ios_base::end);
+  if (chr_idx_itr == std::cend(chrom_order)) {
+    site_in.seekg(0, std::ios_base::end);
     return;
   }
   const auto chr_idx = chr_idx_itr->second;
 
-  site_in.seekg(0, ios_base::beg);
-  string low_chr;
+  site_in.seekg(0, std::ios_base::beg);
+  std::string low_chr;
   size_t low_idx{};
   if (!(site_in >> low_chr >> low_idx))
-    throw runtime_error("failed search in counts file");
+    throw std::runtime_error("failed search in counts file");
 
   const auto low_chr_idx_itr = chrom_order.find(low_chr);
-  if (low_chr_idx_itr == end(chrom_order))
-    throw runtime_error("inconsistent chromosome order info");
+  if (low_chr_idx_itr == std::cend(chrom_order))
+    throw std::runtime_error("inconsistent chromosome order info");
   auto low_chr_idx = low_chr_idx_itr->second;
 
   // MAGIC: need the -2 here to get past the EOF and hopefully a '\n'
-  site_in.seekg(-2, ios_base::end);
+  site_in.seekg(-2, std::ios_base::end);
   move_to_start_of_line(site_in);
-  string high_chr;
+  std::string high_chr;
   size_t high_idx{};
   if (!(site_in >> high_chr >> high_idx))
-    throw runtime_error("failed search in counts file");
+    throw std::runtime_error("failed search in counts file");
 
   const auto high_chr_idx_itr = chrom_order.find(high_chr);
-  if (high_chr_idx_itr == end(chrom_order))
-    throw runtime_error("inconsistent chromosome order info");
+  if (high_chr_idx_itr == std::cend(chrom_order))
+    throw std::runtime_error("inconsistent chromosome order info");
   auto high_chr_idx = high_chr_idx_itr->second;
 
   std::streamoff pos = step_size;
-  site_in.seekg(pos, ios_base::beg);
+  site_in.seekg(pos, std::ios_base::beg);
   move_to_start_of_line(site_in);
-  std::streamoff prev_pos = 0;  // keep track of previous position in file
+  std::streamoff prev_pos{};  // keep track of previous position in file
 
   // binary search inside sorted file on disk
   // iterate until step size is 0 or positions are identical
@@ -280,26 +267,25 @@ find_offset_for_msite(
     // track (mid) position in file to make sure it keeps moving
     prev_pos = pos;
 
-    string mid_chr;    // chromosome name at mid point
-    size_t mid_idx{};  // position at mid point
+    std::string mid_chr;  // chromosome name at mid point
+    size_t mid_idx{};     // position at mid point
     if (!(site_in >> mid_chr >> mid_idx))
-      throw runtime_error("failed navigating inside file");
+      throw std::runtime_error("failed navigating inside file");
 
     const auto mid_chr_idx_itr = chrom_order.find(mid_chr);
-    if (mid_chr_idx_itr == end(chrom_order))
-      throw runtime_error("inconsistent chromosome order info");
+    if (mid_chr_idx_itr == std::cend(chrom_order))
+      throw std::runtime_error("inconsistent chromosome order info");
     const auto mid_chr_idx = mid_chr_idx_itr->second;
 
     // this check will never give a false indication of unsorted, but
     // might catch an unsorted file
-    using std::to_string;
     if (mid_chr_idx < low_chr_idx || mid_chr_idx > high_chr_idx) {
       // NOLINTBEGIN(performance-inefficient-string-concatenation)
-      throw runtime_error("chromosomes unsorted inside file: "
-                          "low=" +
-                          to_string(low_chr_idx) +
-                          ",mid=" + to_string(mid_chr_idx) +
-                          ",high=" + to_string(high_chr_idx));
+      throw std::runtime_error("chromosomes unsorted inside file: "
+                               "low=" +
+                               std::to_string(low_chr_idx) +
+                               ",mid=" + std::to_string(mid_chr_idx) +
+                               ",high=" + std::to_string(high_chr_idx));
       // NOLINTEND(performance-inefficient-string-concatenation)
     }
 
@@ -317,43 +303,43 @@ find_offset_for_msite(
       // low_idx = mid_idx;
       pos += step_size;
     }
-    site_in.seekg(pos, ios_base::beg);
+    site_in.seekg(pos, std::ios_base::beg);
     move_to_start_of_line(site_in);
   }
 }
 
 bool
-is_msite_line(const string &line) {
+is_msite_line(const std::string &line) {
   std::istringstream iss(line);
 
-  string chrom;
+  std::string chrom;
   if (!(iss >> chrom))
     return false;
 
-  int64_t pos = 0;
+  std::int64_t pos{};
   if (!(iss >> pos || pos < 0))
     return false;
 
-  string strand;
+  std::string strand;
   if (!(iss >> strand) || (strand.size() != 1) ||
       ((strand != "+") && (strand != "-")))
     return false;
 
-  string context;
+  std::string context;
   // ADS: below, allowing any context
   // std::regex pattern("^C[pHWX][GH]$");
   if (!(iss >> context))
     return false;
 
-  double level = 0.0;
+  double level{};
   if (!(iss >> level) || level < 0.0 || level > 1.0)
     return false;
 
-  int64_t n_reads = 0;
+  std::int64_t n_reads{};
   if (!(iss >> n_reads || n_reads < 0))
     return false;
 
-  string temp;
+  std::string temp;
   if (MSite::no_extra_fields && iss >> temp)
     return false;
   else
@@ -361,12 +347,12 @@ is_msite_line(const string &line) {
 }
 
 bool
-is_msite_file(const string &file) {
+is_msite_file(const std::string &file) {
   bamxx::bgzf_file in(file, "r");
   if (!in)
-    throw runtime_error("cannot open file: " + file);
+    throw std::runtime_error("cannot open file: " + file);
 
-  string line;
+  std::string line;
   while (getline(in, line) && is_counts_header_line(line))
     ;
 
diff --git a/src/common/bsutils.cpp b/src/common/bsutils.cpp
index d1e857da..b6dd0aa6 100644
--- a/src/common/bsutils.cpp
+++ b/src/common/bsutils.cpp
@@ -16,7 +16,7 @@
 #include "bsutils.hpp"
 #include "dnmtools_gaussinv.hpp"
 
-#include <GenomicRegion.hpp>
+#include "Interval6.hpp"
 
 #include <algorithm>
 #include <cassert>
@@ -47,31 +47,30 @@ wilson_ci_for_binomial(const double alpha, const double n, const double p_hat,
 }
 
 void
-adjust_region_ends(const std::vector<std::vector<GenomicRegion>> &clusters,
-                   std::vector<GenomicRegion> &regions) {
+adjust_region_ends(const std::vector<std::vector<Interval6>> &clusters,
+                   std::vector<Interval6> &regions) {
   assert(std::size(clusters) == std::size(regions));
   for (std::size_t i = 0; i < std::size(regions); ++i) {
-    std::size_t max_pos = regions[i].get_end();
-    std::size_t min_pos = regions[i].get_start();
+    auto max_pos = regions[i].stop;
+    auto min_pos = regions[i].start;
     for (std::size_t j = 0; j < std::size(clusters[i]); ++j) {
-      max_pos = std::max(clusters[i][j].get_end(), max_pos);
-      min_pos = std::min(clusters[i][j].get_start(), min_pos);
+      max_pos = std::max(clusters[i][j].stop, max_pos);
+      min_pos = std::min(clusters[i][j].start, min_pos);
     }
-    regions[i].set_end(max_pos);
-    regions[i].set_start(min_pos);
+    regions[i].stop = max_pos;
+    regions[i].start = min_pos;
   }
 }
 
 void
-relative_sort(const std::vector<GenomicRegion> &mapped_locations,
+relative_sort(const std::vector<Interval6> &mapped_locations,
               const std::vector<std::string> &names,
               std::vector<std::size_t> &lookup) {
   std::unordered_map<std::string, std::size_t> names_map;
   for (std::size_t i = 0; i < std::size(names); ++i)
     names_map[names[i]] = i;
-
   for (std::size_t i = 0; i < std::size(mapped_locations); ++i) {
-    const auto j = names_map.find(mapped_locations[i].get_name());
+    const auto j = names_map.find(mapped_locations[i].name);
     if (j == std::cend(names_map))
       throw std::runtime_error("read sequence not found for: " + names[i]);
     lookup.push_back(j->second);
diff --git a/src/common/bsutils.hpp b/src/common/bsutils.hpp
index 30aa50d6..7beb5aa6 100644
--- a/src/common/bsutils.hpp
+++ b/src/common/bsutils.hpp
@@ -21,7 +21,7 @@
 #include <cstddef>
 #include <string>
 #include <vector>
-class GenomicRegion;
+class Interval6;
 
 inline bool
 is_cytosine(char c) {
@@ -54,11 +54,11 @@ is_cpg(const std::string &s, size_t i) {
 }
 
 void
-adjust_region_ends(const std::vector<std::vector<GenomicRegion>> &clusters,
-                   std::vector<GenomicRegion> &regions);
+adjust_region_ends(const std::vector<std::vector<Interval6>> &clusters,
+                   std::vector<Interval6> &regions);
 
 void
-relative_sort(const std::vector<GenomicRegion> &mapped_locations,
+relative_sort(const std::vector<Interval6> &mapped_locations,
               const std::vector<std::string> &names,
               std::vector<size_t> &lookup);
 
diff --git a/src/radmeth/dmr.cpp b/src/radmeth/dmr.cpp
index 779a0bb1..b688bd4d 100644
--- a/src/radmeth/dmr.cpp
+++ b/src/radmeth/dmr.cpp
@@ -1,10 +1,4 @@
-/* dmr: computes DMRs based on HMRs and probability of differences at
- * individual CpGs
- *
- * Copyright (C) 2012-2023 University of Southern California and
- *                         Andrew D. Smith
- *
- * Authors: Andrew D. Smith
+/* Copyright (C) 2012-2025 Andrew D. Smith
  *
  * This program is free software: you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
@@ -17,15 +11,17 @@
  * General Public License for more details.
  */
 
-#include "GenomicRegion.hpp"
+[[maybe_unused]] static constexpr auto about = R"(
+dmr: compute DMRs based on HMRs and probability of differences at each site
+)";
+
+#include "Interval6.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
-#include "smithlab_utils.hpp"
 
 #include <bamxx.hpp>
 
 #include <algorithm>
-#include <cassert>
 #include <charconv>
 #include <cstdint>
 #include <cstdlib>
@@ -33,41 +29,32 @@
 #include <iostream>
 #include <iterator>
 #include <new>
+#include <numeric>
 #include <stdexcept>
 #include <string>
 #include <system_error>
 #include <utility>
 #include <vector>
 
-using std::cerr;
-using std::cout;
-using std::endl;
-using std::find_if;
-using std::from_chars;
-using std::ifstream;
-using std::max;
-using std::pair;
-using std::runtime_error;
-using std::string;
-using std::vector;
-
 static bool
-parse_methdiff_line(const char *c, const char *c_end, string &chrom,
-                    uint32_t &pos, char &strand, string &context,
-                    double &diffscore, uint32_t &n_meth_a, uint32_t &n_unmeth_a,
-                    uint32_t &n_meth_b, uint32_t &n_unmeth_b) {
+parse_methdiff_line(const char *c, const std::size_t c_sz, std::string &chrom,
+                    std::uint32_t &pos, char &strand, std::string &context,
+                    double &diffscore, std::uint32_t &n_meth_a,
+                    std::uint32_t &n_unmeth_a, std::uint32_t &n_meth_b,
+                    std::uint32_t &n_unmeth_b) {
   constexpr auto is_sep = [](const char x) { return x == ' ' || x == '\t'; };
   constexpr auto not_sep = [](const char x) { return x != ' ' && x != '\t'; };
 
   // NOLINTBEGIN(*-pointer-arithmetic)
+  const auto c_end = c + c_sz;
   auto field_s = c;
   auto field_e = std::find_if(field_s + 1, c_end, is_sep);
   bool failed = field_e == c_end;
 
   // chromosome name
   {
-    const uint32_t d = std::distance(field_s, field_e);
-    chrom = string{field_s, d};
+    const std::uint32_t d = std::distance(field_s, field_e);
+    chrom = std::string{field_s, d};
   }
 
   field_s = std::find_if(field_e + 1, c_end, not_sep);
@@ -95,8 +82,8 @@ parse_methdiff_line(const char *c, const char *c_end, string &chrom,
 
   // context
   {
-    const uint32_t d = std::distance(field_s, field_e);
-    context = string{field_s, d};
+    const std::uint32_t d = std::distance(field_s, field_e);
+    context = std::string{field_s, d};
   }
 
   field_s = std::find_if(field_e + 1, c_end, not_sep);
@@ -158,25 +145,25 @@ parse_methdiff_line(const char *c, const char *c_end, string &chrom,
   return !failed;
 }
 
-static vector<MSite>
-read_diffs_file(const string &diffs_file) {
+static std::vector<MSite>
+read_diffs_file(const std::string &diffs_file) {
   bamxx::bgzf_file in(diffs_file, "r");
   if (!in)
-    throw runtime_error("could not open file: " + diffs_file);
+    throw std::runtime_error("could not open file: " + diffs_file);
 
-  string chrom, name;
+  std::string chrom, name;
   char strand{};
   double diffscore{};
-  uint32_t pos{}, meth_a{}, unmeth_a{}, meth_b{}, unmeth_b{};
+  std::uint32_t pos{}, meth_a{}, unmeth_a{}, meth_b{}, unmeth_b{};
 
-  vector<MSite> cpgs;
-  string line;
+  std::vector<MSite> cpgs;
+  std::string line;
   while (getline(in, line)) {
     // NOLINTBEGIN(*-pointer-arithmetic)
-    if (!parse_methdiff_line(line.data(), line.data() + size(line), chrom, pos,
-                             strand, name, diffscore, meth_a, unmeth_a, meth_b,
+    if (!parse_methdiff_line(line.data(), std::size(line), chrom, pos, strand,
+                             name, diffscore, meth_a, unmeth_a, meth_b,
                              unmeth_b))
-      throw runtime_error("bad methdiff line: " + line);
+      throw std::runtime_error("bad methdiff line: " + line);
     // NOLINTEND(*-pointer-arithmetic)
 
     cpgs.emplace_back(chrom, pos, strand, name, diffscore, 1);
@@ -185,63 +172,64 @@ read_diffs_file(const string &diffs_file) {
 }
 
 static void
-complement_regions(const size_t max_end, const vector<GenomicRegion> &a,
-                   const size_t start, const size_t end,
-                   vector<GenomicRegion> &cmpl) {
-  cmpl.push_back(GenomicRegion(a[start]));
-  cmpl.back().set_start(0);
-  for (size_t i = start; i < end; ++i) {
-    cmpl.back().set_end(a[i].get_start());
-    cmpl.push_back(GenomicRegion(a[i]));
-    cmpl.back().set_start(a[i].get_end());
+complement(const std::size_t max_end, const std::vector<Interval6> &a,
+           const std::size_t start, const std::size_t end,
+           std::vector<Interval6> &cmpl) {
+  cmpl.push_back(Interval6(a[start]));
+  cmpl.back().start = 0;
+  for (std::size_t i = start; i < end; ++i) {
+    cmpl.back().stop = a[i].start;
+    cmpl.push_back(Interval6(a[i]));
+    cmpl.back().start = a[i].stop;
   }
-  cmpl.back().set_end(max_end);
+  cmpl.back().stop = max_end;
 }
 
-static vector<size_t>
-get_chrom_ends(const vector<GenomicRegion> &r) {
-  vector<size_t> ends;
-  for (size_t i = 0; i < r.size() - 1; ++i)
-    if (!r[i].same_chrom(r[i + 1]))
+static std::vector<std::size_t>
+get_chrom_ends(const std::vector<Interval6> &r) {
+  std::vector<std::size_t> ends;
+  for (std::size_t i = 0; i + 1 < std::size(r); ++i)
+    if (r[i].chrom != r[i + 1].chrom)
       ends.push_back(i + 1);
-  ends.push_back(r.size());
+  ends.push_back(std::size(r));
   return ends;
 }
 
-static vector<GenomicRegion>
-complement_regions(const size_t max_end, const vector<GenomicRegion> &r) {
-  vector<size_t> r_chroms = get_chrom_ends(r);
-  vector<GenomicRegion> r_cmpl;
-  size_t t = 0;
-  for (size_t i = 0; i < size(r_chroms); ++i) {
-    complement_regions(max_end, r, t, r_chroms[i], r_cmpl);
+static std::vector<Interval6>
+complement(const std::size_t max_end, const std::vector<Interval6> &r) {
+  std::vector<std::size_t> r_chroms = get_chrom_ends(r);
+  std::vector<Interval6> r_cmpl;
+  std::size_t t{};
+  for (std::size_t i = 0; i < std::size(r_chroms); ++i) {
+    complement(max_end, r, t, r_chroms[i], r_cmpl);
     t = r_chroms[i];
   }
   return r_cmpl;
 }
 
 static bool
-check_no_overlap(const vector<GenomicRegion> &regions) {
-  for (size_t i = 1; i < regions.size(); ++i)
-    if (regions[i].same_chrom(regions[i - 1]) &&
-        regions[i].get_start() < regions[i - 1].get_end())
+check_no_overlap(const std::vector<Interval6> &regions) {
+  for (std::size_t i = 1; i < std::size(regions); ++i)
+    if (regions[i].chrom == regions[i - 1].chrom &&
+        regions[i].start < regions[i - 1].stop)
       return false;
   return true;
 }
 
 static inline MSite
-get_left_msite(const GenomicRegion &r) {
-  return {r.get_chrom(), r.get_start(), r.get_strand(), r.get_name(), 0.0, 1u};
+get_left_msite(const Interval6 &r) {
+  return {r.chrom, r.start, r.strand, r.name, 0.0, 1u};
 }
 
 static inline MSite
-get_right_msite(const GenomicRegion &r) {
-  return {r.get_chrom(), r.get_end(), r.get_strand(), r.get_name(), 0.0, 1u};
+get_right_msite(const Interval6 &r) {
+  return {r.chrom, r.stop, r.strand, r.name, 0.0, 1u};
 }
 
-static vector<pair<size_t, size_t>>
-separate_sites(const vector<GenomicRegion> &dmrs, const vector<MSite> &sites) {
-  vector<pair<size_t, size_t>> sep_sites;
+static std::vector<std::pair<std::size_t, std::size_t>>
+separate_sites(const std::vector<Interval6> &dmrs,
+               const std::vector<MSite> &sites) {
+  std::vector<std::pair<std::size_t, std::size_t>> sep_sites;
   for (const auto &dmr : dmrs) {
     const auto a = get_left_msite(dmr);
     const auto b = get_right_msite(dmr);
@@ -260,10 +248,11 @@ pval_from_msite(const MSite &s) {
 
 static void
 get_cpg_stats(const bool LOW_CUTOFF, const double sig_cutoff,
-              const vector<MSite> &cpgs, const size_t start_idx,
-              const size_t end_idx, size_t &total_cpgs, size_t &total_sig) {
+              const std::vector<MSite> &cpgs, const std::size_t start_idx,
+              const std::size_t end_idx, std::size_t &total_cpgs,
+              std::size_t &total_sig) {
   total_cpgs = end_idx - start_idx;
-  for (size_t i = start_idx; i < end_idx; ++i) {
+  for (std::size_t i = start_idx; i < end_idx; ++i) {
     const auto pval = pval_from_msite(cpgs[i]);
     if ((LOW_CUTOFF && (pval < sig_cutoff)) ||
         (!LOW_CUTOFF && (pval > 1.0 - sig_cutoff)))
@@ -271,10 +260,42 @@ get_cpg_stats(const bool LOW_CUTOFF, const double sig_cutoff,
   }
 }
 
+template <class T>
+[[nodiscard]] auto
+intersection_by_base(const std::vector<T> &a,
+                     const std::vector<T> &b) -> std::vector<T> {
+  const auto end_precedes = [](const T &x, const T &y) {
+    const auto cmp = x.chrom.compare(y.chrom);
+    return cmp < 0 || (cmp == 0 && x.stop < y.stop);
+  };
+  const auto overlaps = [](const T &x, const T &y) {
+    const auto cmp = x.chrom.compare(y.chrom);
+    return cmp == 0 && std::max(x.start, x.start) < std::min(x.stop, y.stop);
+  };
+
+  auto a_itr = std::cbegin(a);
+  auto a_end = std::cend(a);
+  auto b_itr = std::cbegin(b);
+  auto b_end = std::cend(b);
+
+  std::vector<T> c;
+  while (a_itr != a_end && b_itr != b_end) {
+    if (overlaps(*a_itr, *b_itr))
+      c.emplace_back(a_itr->chrom, std::max(a_itr->start, b_itr->start),
+                     std::min(a_itr->stop, b_itr->stop), std::string{}, 0.0,
+                     '+');
+    if (end_precedes(*a_itr, *b_itr))
+      ++a_itr;
+    else
+      ++b_itr;
+  }
+  return c;
+}
+
 int
 main_dmr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
-    static const string description =
+    static const std::string description =
       R"(
 computes DMRs based on HMRs and probability of differences at
 individual CpGs";
@@ -291,116 +312,118 @@ individual CpGs";
     opt_parse.set_show_defaults();
     opt_parse.add_opt("cutoff", 'c', "Significance cutoff", false, sig_cutoff);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
-    if (leftover_args.size() != 5) {  // NOLINT(*-avoid-magic-numbers)
-      cerr << opt_parse.help_message() << '\n';
+    if (std::size(leftover_args) != 5) {  // NOLINT(*-avoid-magic-numbers)
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string diffs_file = leftover_args[0];
-    const string hmr1_file = leftover_args[1];
-    const string hmr2_file = leftover_args[2];
-    const string outfile_a = leftover_args[3];
-    const string outfile_b = leftover_args[4];
+    const std::string diffs_file = leftover_args[0];
+    const std::string hmr1_file = leftover_args[1];
+    const std::string hmr2_file = leftover_args[2];
+    const std::string outfile_a = leftover_args[3];
+    const std::string outfile_b = leftover_args[4];
     /****************** END COMMAND LINE OPTIONS *****************/
 
     if (VERBOSE)
-      cerr << "[LOADING HMRS] " << hmr1_file << '\n';
+      std::cerr << "[LOADING HMRS] " << hmr1_file << '\n';
 
-    vector<GenomicRegion> regions_a;
-    ReadBEDFile(hmr1_file, regions_a);
-    assert(check_sorted(regions_a));
-    if (!check_sorted(regions_a))
-      throw runtime_error("regions not sorted in file: " + hmr1_file);
+    const auto regions_a = read_intervals6(hmr1_file);
+    if (!std::is_sorted(std::cbegin(regions_a), std::cend(regions_a)))
+      throw std::runtime_error("regions not sorted in file: " + hmr1_file);
     if (!check_no_overlap(regions_a))
-      throw runtime_error("regions overlap in file: " + hmr1_file);
+      throw std::runtime_error("regions overlap in file: " + hmr1_file);
 
     if (VERBOSE)
-      cerr << "[LOADING HMRS] " << hmr2_file << '\n';
+      std::cerr << "[LOADING HMRS] " << hmr2_file << '\n';
 
-    vector<GenomicRegion> regions_b;
-    ReadBEDFile(hmr2_file, regions_b);
-    assert(check_sorted(regions_b));
-    if (!check_sorted(regions_b))
-      throw runtime_error("regions not sorted in file: " + hmr2_file);
+    const auto regions_b = read_intervals6(hmr2_file);
+    if (!std::is_sorted(std::cbegin(regions_b), std::cend(regions_b)))
+      throw std::runtime_error("regions not sorted in file: " + hmr2_file);
     if (!check_no_overlap(regions_b))
-      throw runtime_error("regions overlap in file: " + hmr2_file);
+      throw std::runtime_error("regions overlap in file: " + hmr2_file);
 
     if (VERBOSE)
-      cerr << "[COMPUTING SYMMETRIC DIFFERENCE]" << '\n';
+      std::cerr << "[COMPUTING SYMMETRIC DIFFERENCE]" << '\n';
 
-    size_t max_end = 0;
-    for (const auto &r : regions_a)
-      max_end = max(max_end, r.get_end());  // cppcheck-suppress useStlAlgorithm
-    for (const auto &r : regions_b)
-      max_end = max(max_end, r.get_end());  // cppcheck-suppress useStlAlgorithm
+    const auto get_max_stop = [](const auto &x) {
+      const auto acc = [](const auto s, const auto &r) {
+        return std::max(s, r.stop);
+      };
+      return std::accumulate(std::cbegin(x), std::cend(x), 0u, acc);
+    };
+    const auto max_stop =
+      std::max(get_max_stop(regions_a), get_max_stop(regions_b));
 
-    const auto a_cmpl = complement_regions(max_end, regions_a);
-    const auto b_cmpl = complement_regions(max_end, regions_b);
+    const auto a_cmpl = complement(max_stop, regions_a);
+    const auto b_cmpl = complement(max_stop, regions_b);
 
-    vector<GenomicRegion> dmrs_a, dmrs_b;
-    genomic_region_intersection_by_base(regions_a, b_cmpl, dmrs_a);
-    genomic_region_intersection_by_base(regions_b, a_cmpl, dmrs_b);
+    auto dmrs_a = intersection_by_base(regions_a, b_cmpl);
+    auto dmrs_b = intersection_by_base(regions_b, a_cmpl);
 
     // separate the regions by chrom and by desert
     if (VERBOSE)
-      cerr << "[READING CPG METH DIFFS]" << '\n';
+      std::cerr << "[READING CPG METH DIFFS]" << '\n';
     const auto cpgs = read_diffs_file(diffs_file);
     if (VERBOSE)
-      cerr << "[read " << size(cpgs) << " sites from " + diffs_file << "]"
-           << '\n';
+      std::cerr << "[read " << std::size(cpgs) << " sites from " + diffs_file
+                << "]\n";
 
-    if (!check_sorted(cpgs))
-      throw runtime_error("CpGs not sorted in: " + diffs_file);
+    if (!std::is_sorted(std::cbegin(cpgs), std::cend(cpgs)))
+      throw std::runtime_error("CpGs not sorted in: " + diffs_file);
     if (VERBOSE)
-      cerr << "[TOTAL CPGS]: " << cpgs.size() << '\n';
+      std::cerr << "[TOTAL CPGS]: " << std::size(cpgs) << '\n';
 
     auto sep_sites = separate_sites(dmrs_a, cpgs);
 
-    for (size_t i = 0; i < dmrs_a.size(); ++i) {
-      size_t total_cpgs = 0, total_sig = 0;
+    for (std::size_t i = 0; i < std::size(dmrs_a); ++i) {
+      std::size_t total_cpgs{}, total_sig{};
       get_cpg_stats(true, sig_cutoff, cpgs, sep_sites[i].first,
                     sep_sites[i].second, total_cpgs, total_sig);
-      dmrs_a[i].set_name(dmrs_a[i].get_name() + ":" + toa(total_cpgs));
-      dmrs_a[i].set_score(static_cast<float>(total_sig));
+      dmrs_a[i].name += ":" + std::to_string(total_cpgs);
+      dmrs_a[i].score = static_cast<double>(total_sig);
     }
 
     sep_sites = separate_sites(dmrs_b, cpgs);
 
-    for (size_t i = 0; i < dmrs_b.size(); ++i) {
-      size_t total_cpgs = 0, total_sig = 0;
+    for (std::size_t i = 0; i < std::size(dmrs_b); ++i) {
+      std::size_t total_cpgs{}, total_sig{};
       get_cpg_stats(false, sig_cutoff, cpgs, sep_sites[i].first,
                     sep_sites[i].second, total_cpgs, total_sig);
-      dmrs_b[i].set_name(dmrs_b[i].get_name() + ":" + toa(total_cpgs));
-      dmrs_b[i].set_score(static_cast<float>(total_sig));
+      dmrs_b[i].name += ":" + std::to_string(total_cpgs);
+      dmrs_b[i].score = static_cast<double>(total_sig);
     }
 
     std::ofstream out_a(outfile_a);
+    if (!out_a)
+      throw std::runtime_error("failed to open output file: " + outfile_a);
     std::copy(std::cbegin(dmrs_a), std::cend(dmrs_a),
-              std::ostream_iterator<GenomicRegion>(out_a, "\n"));
+              std::ostream_iterator<Interval6>(out_a, "\n"));
 
     std::ofstream out_b(outfile_b);
+    if (!out_b)
+      throw std::runtime_error("failed to open output file: " + outfile_b);
     std::copy(std::cbegin(dmrs_b), std::cend(dmrs_b),
-              std::ostream_iterator<GenomicRegion>(out_b, "\n"));
+              std::ostream_iterator<Interval6>(out_b, "\n"));
 
     if (VERBOSE)
-      cerr << "[OUTPUT FORMAT] COL4=NAME:N_COVERED_CPGS COL5=N_SIG_CPGS\n";
+      std::cerr << "[OUTPUT FORMAT] COL4=NAME:N_COVERED_CPGS COL5=N_SIG_CPGS\n";
   }
   catch (const std::exception &e) {
-    cerr << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
diff --git a/src/radmeth/radmeth-merge.cpp b/src/radmeth/radmeth-merge.cpp
index cc519072..0f525857 100644
--- a/src/radmeth/radmeth-merge.cpp
+++ b/src/radmeth/radmeth-merge.cpp
@@ -15,7 +15,7 @@
  * General Public License for more details.
  */
 
-#include "GenomicRegion.hpp"
+#include "Interval6.hpp"
 #include "OptionParser.hpp"
 
 #include <cstdlib>
@@ -27,49 +27,36 @@
 #include <string>
 #include <vector>
 
-using std::cerr;
-using std::cout;
-using std::endl;
-using std::ifstream;
-using std::istream;
-using std::istringstream;
-using std::ofstream;
-using std::ostream;
-using std::runtime_error;
-using std::string;
-using std::vector;
-
 // NOLINTBEGIN(*-narrowing-conversions)
 
 // Attemps to find the next significant CpG site. Returns true if one was found
 // and flase otherwise.
 static bool
-read_next_significant_cpg(istream &cpg_stream, GenomicRegion &cpg,
-                          double cutoff, bool &skipped_any, bool &n_sig_sites,
-                          size_t &test_cov, size_t &test_meth, size_t &rest_cov,
-                          size_t &rest_meth) {
-  GenomicRegion region;
+read_next_significant_cpg(std::istream &cpg_stream, Interval6 &cpg,
+                          double cutoff, bool &skipped_any, bool &has_sig_sites,
+                          std::size_t &test_cov, std::size_t &test_meth,
+                          std::size_t &rest_cov, std::size_t &rest_meth) {
+  Interval6 region;
   skipped_any = false;
-  n_sig_sites = false;
-  string cpg_encoding;
+  has_sig_sites = false;
+  std::string line;
 
-  while (getline(cpg_stream, cpg_encoding)) {
-    string chrom, name, sign;
-    size_t position{};
+  while (std::getline(cpg_stream, line)) {
+    std::string chrom, name, sign;
+    std::size_t position{};
     double raw_pval{};
     double adjusted_pval{};
     double corrected_pval{};
-
-    istringstream iss(cpg_encoding);
-    iss.exceptions(std::ios::failbit);
-    iss >> chrom >> position >> sign >> name >> raw_pval >> adjusted_pval >>
-      corrected_pval >> test_cov >> test_meth >> rest_cov >> rest_meth;
-
+    std::istringstream iss(line);
+    if (!(iss >> chrom >> position >> sign >> name >> raw_pval >>
+          adjusted_pval >> corrected_pval >> test_cov >> test_meth >>
+          rest_cov >> rest_meth))
+      throw std::runtime_error("failed to parse line:\n" + line);
     if (corrected_pval >= 0.0 && corrected_pval < cutoff) {
-      cpg.set_chrom(chrom);
-      cpg.set_start(position);
-      cpg.set_end(position + 1);
-      n_sig_sites = (0 <= raw_pval && raw_pval < cutoff);
+      cpg.chrom = chrom;
+      cpg.start = position;
+      cpg.stop = position + 1;
+      has_sig_sites = (0.0 <= raw_pval && raw_pval < cutoff);
       return true;
     }
     skipped_any = true;
@@ -79,77 +66,75 @@ read_next_significant_cpg(istream &cpg_stream, GenomicRegion &cpg,
 }
 
 static void
-merge(istream &cpg_stream, ostream &dmr_stream, double cutoff) {
-  GenomicRegion dmr;
-  dmr.set_name("dmr");
+merge(std::istream &cpg_stream, std::ostream &dmr_stream, double cutoff) {
+  Interval6 dmr;
+  dmr.name = "dmr";
 
-  size_t dmr_test_cov = 0;
-  size_t dmr_test_meth = 0;
-  size_t dmr_rest_cov = 0;
-  size_t dmr_rest_meth = 0;
+  std::size_t dmr_test_cov{};
+  std::size_t dmr_test_meth{};
+  std::size_t dmr_rest_cov{};
+  std::size_t dmr_rest_meth{};
 
-  size_t test_cov = 0;
-  size_t test_meth = 0;
-  size_t rest_cov = 0;
-  size_t rest_meth = 0;
+  std::size_t test_cov{};
+  std::size_t test_meth{};
+  std::size_t rest_cov{};
+  std::size_t rest_meth{};
 
   // Find the first significant CpG, or terminate the function if none exist.
   bool skipped_last_cpg{};
-  bool n_sig_sites{};
+  bool has_sig_sites{};
   if (!read_next_significant_cpg(cpg_stream, dmr, cutoff, skipped_last_cpg,
-                                 n_sig_sites, test_cov, test_meth, rest_cov,
+                                 has_sig_sites, test_cov, test_meth, rest_cov,
                                  rest_meth))
     return;
 
-  dmr.set_score(n_sig_sites);
+  dmr.score = has_sig_sites ? 1.0 : 0.0;
   dmr_test_cov += test_cov;
   dmr_test_meth += test_meth;
   dmr_rest_cov += rest_cov;
   dmr_rest_meth += rest_meth;
 
-  GenomicRegion cpg;
-  cpg.set_name("dmr");
+  Interval6 cpg;
+  cpg.name = "dmr";
 
   while (read_next_significant_cpg(cpg_stream, cpg, cutoff, skipped_last_cpg,
-                                   n_sig_sites, test_cov, test_meth, rest_cov,
+                                   has_sig_sites, test_cov, test_meth, rest_cov,
                                    rest_meth)) {
-    if (skipped_last_cpg || cpg.get_chrom() != dmr.get_chrom()) {
-      if (dmr.get_score() != 0)
-        dmr_stream << dmr.get_chrom() << '\t' << dmr.get_start() << '\t'
-                   << dmr.get_end() << '\t' << dmr.get_name() << '\t'
-                   << dmr.get_score() << '\t'
+    if (skipped_last_cpg || cpg.chrom != dmr.chrom) {
+      if (dmr.score != 0)
+        dmr_stream << dmr.chrom << '\t' << dmr.start << '\t' << dmr.stop << '\t'
+                   << dmr.name << '\t' << dmr.score << '\t'
                    << static_cast<double>(dmr_test_meth) / dmr_test_cov -
                         static_cast<double>(dmr_rest_meth) / dmr_rest_cov
                    << '\n';
       dmr = cpg;
-      dmr.set_score(n_sig_sites);
+      dmr.score += has_sig_sites ? 1.0 : 0.0;
       dmr_test_cov = test_cov;
       dmr_test_meth = test_meth;
       dmr_rest_cov = rest_cov;
       dmr_rest_meth = rest_meth;
     }
     else {
-      dmr.set_end(cpg.get_end());
-      dmr.set_score(dmr.get_score() + n_sig_sites);
+      dmr.stop = cpg.stop;
+      dmr.score += has_sig_sites ? 1.0 : 0.0;
       dmr_test_cov += test_cov;
       dmr_test_meth += test_meth;
       dmr_rest_cov += rest_cov;
       dmr_rest_meth += rest_meth;
     }
   }
-  if (dmr.get_score() != 0) {
+  if (dmr.score != 0) {
     const double diff = static_cast<double>(dmr_test_meth) / dmr_test_cov -
                         static_cast<double>(dmr_rest_meth) / dmr_rest_cov;
-    dmr_stream << dmr.get_chrom() << '\t' << dmr.get_start() << '\t'
-               << dmr.get_end() << '\t' << dmr.get_name() << '\t'
-               << dmr.get_score() << '\t' << diff << '\n';
+    dmr_stream << dmr.chrom << '\t' << dmr.start << '\t' << dmr.stop << '\t'
+               << dmr.name << '\t' << dmr.score << '\t' << diff << '\n';
   }
 }
 
 int
 main_radmeth_merge(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
-    string outfile;
+    std::string outfile;
     double cutoff = 0.01;  // NOLINT(*-avoid-magic-numbers)
 
     /**************** GET COMMAND LINE ARGUMENTS *************************/
@@ -161,41 +146,39 @@ main_radmeth_merge(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     opt_parse.add_opt("output", 'o', "output file (default: stdout)", false,
                       outfile);
     opt_parse.add_opt("cutoff", 'p', "p-value cutoff", false, cutoff);
-    vector<string> leftover_args;
+    std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (leftover_args.size() != 1) {
-      cerr << opt_parse.help_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
-    const string bed_filename = leftover_args.front();
+    const std::string bed_filename = leftover_args.front();
     /************************************************************************/
 
-    ofstream of;
-    if (!outfile.empty())
-      of.open(outfile);
-    ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
-
-    ifstream in(bed_filename);
+    std::ofstream out(outfile);
+    if (!out)
+      throw std::runtime_error("failed to open: " + outfile);
+    std::ifstream in(bed_filename);
     if (!in)
-      throw runtime_error("could not open file: " + bed_filename);
+      throw std::runtime_error("failed to open: " + bed_filename);
 
     merge(in, out, cutoff);
   }
   catch (const std::exception &e) {
-    cerr << "ERROR: " << e.what() << '\n';
-    exit(EXIT_FAILURE);
+    std::cerr << e.what() << '\n';
+    return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;
 }
diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp
index 92bde671..59a4a2f1 100644
--- a/src/utils/selectsites.cpp
+++ b/src/utils/selectsites.cpp
@@ -1,22 +1,22 @@
-/* selectsites: program to select sites, specified in a methcounts
- * format file, that are contained in given (bed format) intervals
+/* Copyright (C) 2019-2025 Andrew D. Smith
  *
- * Copyright (C) 2019-2023 Andrew D. Smith
+ * This program is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation, either version 3 of the License, or (at your option)
+ * any later version.
  *
- * Authors: Andrew D. Smith
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
  */
 
-#include "GenomicRegion.hpp"
+[[maybe_unused]] static constexpr auto about = R"(
+selectsites: Select sites, specified in a methcounts format file, that are
+contained in given (bed format) intervals.
+)";
+
+#include "Interval6.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
 
@@ -24,8 +24,6 @@
 
 #include <htslib/sam.h>
 
-#include <unistd.h>
-
 #include <algorithm>
 #include <cstddef>
 #include <cstdint>
@@ -44,42 +42,9 @@
 #include <utility>
 #include <vector>
 
-using std::cerr;
-using std::cout;
-using std::ios_base;
-using std::runtime_error;
-using std::string;
-using std::tie;
-using std::tuple;
-using std::unordered_map;
-
-using bamxx::bgzf_file;
-
-namespace fs = std::filesystem;
-
-struct quick_buf : public std::ostringstream,
-                   public std::basic_stringbuf<char> {
-  // ADS: By user ecatmur on SO; very fast. Seems to work...
-  quick_buf() {
-    // ...but this seems to depend on data layout
-    static_cast<std::basic_ios<char> &>(*this).rdbuf(this);
-  }
-  void
-  clear() {
-    // reset buffer pointers (member functions)
-    setp(pbase(), pbase());
-  }
-  char const *
-  data() {
-    /* between data() and insertion make sure to clear() */
-    *pptr() = '\0';
-    return pbase();
-  }
-};
-
 struct selectsites_summary {
-  // command_line is the command used to produce this summary file and
-  // the corresponding results
+  // command_line is the command used to produce this summary file and the
+  // corresponding results
   std::string command_line{};
 
   // n_target_regions is the number of target regions provided as
@@ -89,108 +54,104 @@ struct selectsites_summary {
   // target_region_size is the sum of the sizes of each target region
   std::uint64_t target_region_size{};
 
-  // n_target_regions_collapsed is the number of target regions after
-  // having collapsed the input target regions merging those that
-  // overlap
+  // n_target_regions_collapsed is the number of target regions after having
+  // collapsed the input target regions merging those that overlap
   std::uint64_t n_target_regions_collapsed{};
 
-  // target_region_collapsed_size is the sum of the sizes of target
-  // regions after collapsing
+  // target_region_collapsed_size is the sum of the sizes of target regions
+  // after collapsing
   std::uint64_t target_region_collapsed_size{};
 
-  // n_sites_total is the total number of sites available in the input
-  // counts file. This value is displayed as zero if the command
-  // specified to process the sites on disk.
+  // n_sites_total is the total number of sites available in the input counts
+  // file. This value is displayed as zero if the command specified to process
+  // the sites on disk.
   std::uint64_t n_sites_total{};
 
   // n_sites_selected is the total number of sites in the output file
   std::uint64_t n_sites_selected{};
 
   template <typename T>
-  static auto
-  measure_target_regions(const T &t) -> tuple<std::uint64_t, std::uint64_t> {
+  [[nodiscard]] static auto
+  measure_target_regions(const T &t)
+    -> std::tuple<std::uint64_t, std::uint64_t> {
     return {
       std::size(t),
-      accumulate(cbegin(t), cend(t), 0ul,
-                 [](const std::uint64_t a, const typename T::value_type &v) {
-                   return a + v.get_width();
-                 }),
+      std::accumulate(
+        std::cbegin(t), std::cend(t), 0ul,
+        [](const std::uint64_t a, const typename T::value_type &v) {
+          return a + size(v);
+        }),
     };
   }
-
-  auto
-  tostring() const -> std::string {
-    std::ostringstream oss;
-    oss << "command_line: " << command_line << '\n'
-        << "n_target_regions: " << n_target_regions << '\n'
-        << "target_region_size: " << target_region_size << '\n'
-        << "n_target_regions_collapsed: " << n_target_regions_collapsed << '\n'
-        << "target_region_collapsed_size: " << target_region_collapsed_size
-        << '\n'
-        << "n_sites_total: " << n_sites_total << '\n'
-        << "n_sites_selected: " << n_sites_selected;
-    return oss.str();
-  }
 };
 
+[[nodiscard]] auto
+to_string(const selectsites_summary &s) -> std::string {
+  std::ostringstream oss;
+  oss << "command_line: " << s.command_line << '\n'
+      << "n_target_regions: " << s.n_target_regions << '\n'
+      << "target_region_size: " << s.target_region_size << '\n'
+      << "n_target_regions_collapsed: " << s.n_target_regions_collapsed << '\n'
+      << "target_region_collapsed_size: " << s.target_region_collapsed_size
+      << '\n'
+      << "n_sites_total: " << s.n_sites_total << '\n'
+      << "n_sites_selected: " << s.n_sites_selected;
+  return oss.str();
+}
+
 static auto
 write_stats_output(const selectsites_summary &summary,
                    const std::string &summary_file) -> void {
   if (!summary_file.empty()) {
     std::ofstream out_summary(summary_file);
     if (!out_summary)
-      throw runtime_error("bad summary output file");
-    out_summary << summary.tostring() << '\n';
+      throw std::runtime_error("bad summary output file");
+    out_summary << to_string(summary) << '\n';
   }
 }
 
 static void
-collapsebed(std::vector<GenomicRegion> &regions) {
+collapsebed(std::vector<Interval6> &regions) {
   std::ptrdiff_t j = 0;
-  for (std::size_t i = 1; i < std::size(regions); ++i) {
-    if (regions[j].same_chrom(regions[i]) &&
-        regions[i].get_start() <= regions[j].get_end()) {
-      regions[j].set_end(std::max(regions[j].get_end(), regions[i].get_end()));
-    }
-    else {
-      regions[++j] = regions[i];
-    }
+  for (const auto &r : regions) {
+    if (regions[j].chrom == r.chrom && r.start <= regions[j].stop)
+      regions[j].stop = std::max(regions[j].stop, r.stop);
+    else
+      regions[++j] = r;
   }
   regions.erase(std::begin(regions) + j + 1, std::end(regions));
 }
 
 static inline bool
-precedes(const GenomicRegion &r, const MSite &s) {
-  return (r.get_chrom() < s.chrom ||
-          (r.get_chrom() == s.chrom && r.get_end() <= s.pos));
+precedes(const Interval6 &r, const MSite &s) {
+  return (r.chrom < s.chrom || (r.chrom == s.chrom && r.stop <= s.pos));
 }
 
 static inline bool
-contains(const GenomicRegion &r, const MSite &s) {
-  return (r.get_chrom() == s.chrom &&
-          (r.get_start() <= s.pos && s.pos < r.get_end()));
+contains(const Interval6 &r, const MSite &s) {
+  return (r.chrom == s.chrom && (r.start <= s.pos && s.pos < r.stop));
 }
 
 static auto
 process_all_sites(
-  const bool VERBOSE, const std::string &sites_file,
-  const unordered_map<std::string, std::vector<GenomicRegion>> &regions,
-  bgzf_file &out) -> tuple<std::uint64_t, std::uint64_t> {
-  bgzf_file in(sites_file, "r");
+  const bool verbose, const std::string &sites_file,
+  const std::unordered_map<std::string, std::vector<Interval6>> &regions,
+  bamxx::bgzf_file &out) -> std::tuple<std::uint64_t, std::uint64_t> {
+  bamxx::bgzf_file in(sites_file, "r");
   if (!in)
-    throw runtime_error("cannot open file: " + sites_file);
+    throw std::runtime_error("cannot open file: " + sites_file);
 
   std::uint64_t n_sites_total = 0u;
   std::uint64_t n_sites_selected = 0u;
 
   MSite the_site, prev_site;
-  std::vector<GenomicRegion>::const_iterator i, i_lim;
+  std::vector<Interval6>::const_iterator i, i_lim;
   bool chrom_is_relevant = false;
   while (read_site(in, the_site)) {
     ++n_sites_total;
     if (the_site.chrom != prev_site.chrom) {
-      if (VERBOSE)
-        cerr << "processing " << the_site.chrom << '\n';
+      if (verbose)
+        std::cerr << "processing " << the_site.chrom << '\n';
       const auto r = regions.find(the_site.chrom);
       chrom_is_relevant = (r != cend(regions));
       if (chrom_is_relevant) {
@@ -211,43 +172,39 @@ process_all_sites(
   return {n_sites_selected, n_sites_total};
 }
 
-static auto
-get_sites_in_region(std::ifstream &site_in, const GenomicRegion &region,
-                    bgzf_file &out) -> std::uint64_t {
-  quick_buf buf;
-
-  const std::string chrom{region.get_chrom()};
-  const std::size_t start_pos = region.get_start();
-  const std::size_t end_pos = region.get_end();
+[[nodiscard]] static auto
+get_sites_in_region(std::ifstream &site_in, const Interval6 &region,
+                    bamxx::bgzf_file &out) -> std::uint64_t {
+  const std::string chrom{region.chrom};
+  const std::size_t start_pos = region.start;
+  const std::size_t end_pos = region.stop;
   find_offset_for_msite(chrom, start_pos, site_in);
 
-  MSite the_site;
   std::uint64_t n_sites_selected = 0u;
   // ADS: should only happen once that "the_site.chrom < chrom" and
   // this is only needed because of end state of binary search on disk
-  while (site_in >> the_site &&
-         (the_site.chrom < chrom ||
-          (the_site.chrom == chrom && the_site.pos < end_pos)))
+  std::string line;
+  while (getline(site_in, line)) {
+    const auto the_site = MSite(line);
+    if (chrom <= the_site.chrom &&
+        (the_site.chrom != chrom || end_pos <= the_site.pos))
+      break;
     if (start_pos <= the_site.pos) {
-      // ADS: don't like this -- always needing to "clear()" this
-      // struct is bad...
-      buf.clear();
-      buf << the_site << '\n';
-      if (!out.write(buf.data(), buf.tellp()))
-        throw runtime_error("error writing output");
+      if (!out.write(line.data(), std::size(line)))
+        throw std::runtime_error("error writing output");
       ++n_sites_selected;
     }
+  }
   return n_sites_selected;
 }
 
 static auto
 process_with_sites_on_disk(const std::string &sites_file,
-                           const std::vector<GenomicRegion> &regions,
-                           bgzf_file &out) -> std::uint64_t {
+                           const std::vector<Interval6> &regions,
+                           bamxx::bgzf_file &out) -> std::uint64_t {
   std::ifstream in(sites_file);
   if (!in)
-    throw runtime_error("cannot open file: " + sites_file);
-
+    throw std::runtime_error("cannot open file: " + sites_file);
   std::uint64_t n_sites_selected = 0ul;
   for (auto i = 0u; i < std::size(regions) && in; ++i)
     n_sites_selected += get_sites_in_region(in, regions[i], out);
@@ -256,30 +213,21 @@ process_with_sites_on_disk(const std::string &sites_file,
 
 static void
 regions_by_chrom(
-  std::vector<GenomicRegion> &regions,
-  unordered_map<std::string, std::vector<GenomicRegion>> &lookup) {
-  for (auto &&r : regions) {
-    const std::string chrom_name(r.get_chrom());
-    if (lookup.find(chrom_name) == end(lookup))
-      lookup[chrom_name] = std::vector<GenomicRegion>();
-    lookup[chrom_name].push_back(r);
-  }
+  std::vector<Interval6> &regions,
+  std::unordered_map<std::string, std::vector<Interval6>> &lookup) {
+  for (auto &&r : regions)
+    lookup[r.chrom].push_back(r);
   regions.clear();
   regions.shrink_to_fit();
 }
 
-inline bool
-file_exists(const std::string &filename) {
-  return (access(filename.data(), F_OK) == 0);
-}
-
 static bool
 is_compressed_file(const std::string &filename) {
-  const bgzf_file f(filename.data(), "r");
+  const bamxx::bgzf_file f(filename.data(), "r");
   htsFormat fmt;
   const int ret = hts_detect_format(f.f->fp, &fmt);
   if (ret != 0)
-    throw runtime_error("failed to detect format: " + filename);
+    throw std::runtime_error("failed to detect format: " + filename);
   return fmt.compression != no_compression;
 }
 
@@ -288,10 +236,11 @@ get_command_line(const int argc,
                  const char *const argv[]  // NOLINT(*-avoid-c-arrays)
                  ) -> std::string {
   if (argc == 0)
-    return std::string();
+    return std::string{};
   std::ostringstream cmd;
   cmd << '"';
-  copy(argv, argv + (argc - 1), std::ostream_iterator<const char *>(cmd, " "));
+  std::copy(argv, argv + (argc - 1),
+            std::ostream_iterator<const char *>(cmd, " "));
   cmd << argv[argc - 1] << '"';  // NOLINT(*-pointer-arithmetic)
   return cmd.str();
 }
@@ -299,7 +248,7 @@ get_command_line(const int argc,
 int
 main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
-    bool VERBOSE = false;
+    bool verbose = false;
     bool keep_file_on_disk = false;
     bool compress_output = false;
     bool allow_extra_fields = false;
@@ -326,25 +275,25 @@ main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     opt_parse.add_opt("zip", 'z', "output gzip format", false, compress_output);
     opt_parse.add_opt("relaxed", '\0', "input has extra fields", false,
                       allow_extra_fields);
-    opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
+    opt_parse.add_opt("verbose", 'v', "print more run info", false, verbose);
     opt_parse.set_show_defaults();
     std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
     if (argc == 1 || opt_parse.help_requested()) {
-      cerr << opt_parse.help_message() << '\n'
-           << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n'
+                << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.about_requested()) {
-      cerr << opt_parse.about_message() << '\n';
+      std::cerr << opt_parse.about_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (opt_parse.option_missing()) {
-      cerr << opt_parse.option_missing_message() << '\n';
+      std::cerr << opt_parse.option_missing_message() << '\n';
       return EXIT_SUCCESS;
     }
     if (std::size(leftover_args) != 2) {
-      cerr << opt_parse.help_message() << '\n';
+      std::cerr << opt_parse.help_message() << '\n';
       return EXIT_SUCCESS;
     }
     const std::string regions_file = leftover_args.front();
@@ -356,33 +305,32 @@ main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     selectsites_summary summary;
     summary.command_line = get_command_line(argc, argv);
 
-    if (!fs::is_regular_file(sites_file))
-      throw runtime_error("bad input sites file: " + sites_file);
+    if (!std::filesystem::is_regular_file(sites_file))
+      throw std::runtime_error("bad input sites file: " + sites_file);
 
     if (is_compressed_file(sites_file)) {
       keep_file_on_disk = false;
-      if (VERBOSE)
-        cerr << "input file is so must be loaded\n";
+      if (verbose)
+        std::cerr << "input file is so must be loaded\n";
     }
 
-    std::vector<GenomicRegion> regions;
-    ReadBEDFile(regions_file, regions);
-    if (!check_sorted(regions))
-      throw runtime_error("regions not sorted in file: " + regions_file);
+    auto regions = read_intervals6(regions_file);
+    if (!std::is_sorted(std::cbegin(regions), std::cend(regions)))
+      throw std::runtime_error("regions not sorted in file: " + regions_file);
     std::tie(summary.n_target_regions, summary.target_region_size) =
       selectsites_summary::measure_target_regions(regions);
 
     const std::size_t n_orig_regions = std::size(regions);
     collapsebed(regions);
-    if (VERBOSE && n_orig_regions != std::size(regions))
-      cerr << "[number of regions merged due to overlap: "
-           << n_orig_regions - std::size(regions) << "]\n";
+    if (verbose && n_orig_regions != std::size(regions))
+      std::cerr << "[number of regions merged due to overlap: "
+                << n_orig_regions - std::size(regions) << "]\n";
 
     std::tie(summary.n_target_regions_collapsed,
              summary.target_region_collapsed_size) =
       selectsites_summary::measure_target_regions(regions);
 
-    unordered_map<std::string, std::vector<GenomicRegion>> regions_lookup;
+    std::unordered_map<std::string, std::vector<Interval6>> regions_lookup;
     if (!keep_file_on_disk)
       regions_by_chrom(regions, regions_lookup);
 
@@ -390,19 +338,19 @@ main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     const std::string output_mode = compress_output ? "w" : "wu";
     bamxx::bgzf_file out(outfile, output_mode);
     if (!out)
-      throw runtime_error("error opening output file: " + outfile);
+      throw std::runtime_error("error opening output file: " + outfile);
 
     if (keep_file_on_disk)
       summary.n_sites_selected =
         process_with_sites_on_disk(sites_file, regions, out);
     else
       std::tie(summary.n_sites_selected, summary.n_sites_total) =
-        process_all_sites(VERBOSE, sites_file, regions_lookup, out);
+        process_all_sites(verbose, sites_file, regions_lookup, out);
 
     write_stats_output(summary, summary_file);
   }
   catch (const std::exception &e) {
-    cerr << e.what() << '\n';
+    std::cerr << e.what() << '\n';
     return EXIT_FAILURE;
   }
   return EXIT_SUCCESS;

From bc7af19548f5646deac324c181aa891a30288801 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 21 Dec 2025 19:47:31 -0800
Subject: [PATCH 2/6] multiple sources: making output file a required argument

---
 src/amrfinder/allelicmeth.cpp  | 3 +--
 src/amrfinder/amrtester.cpp    | 2 +-
 src/analysis/cpgbins.cpp       | 3 +--
 src/analysis/hmr-rep.cpp       | 3 +--
 src/analysis/hypermr.cpp       | 2 +-
 src/analysis/metagene.cpp      | 3 +--
 src/analysis/multimethstat.cpp | 3 +--
 src/analysis/pmd.cpp           | 3 +--
 src/analysis/roimethstat.cpp   | 2 +-
 src/radmeth/radmeth-merge.cpp  | 3 +--
 src/utils/selectsites.cpp      | 3 +--
 11 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/src/amrfinder/allelicmeth.cpp b/src/amrfinder/allelicmeth.cpp
index 3b99ff58..8ac56a53 100644
--- a/src/amrfinder/allelicmeth.cpp
+++ b/src/amrfinder/allelicmeth.cpp
@@ -234,8 +234,7 @@ computes probability of allele-specific methylation at each tuple of CpGs
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<epireads>");
-    opt_parse.add_opt("output", 'o', "output file name (default: stdout)",
-                      false, outfile);
+    opt_parse.add_opt("output", 'o', "output file name", true, outfile);
     opt_parse.add_opt("chrom", 'c', "genome sequence file/directory", true,
                       chroms_dir);
     opt_parse.add_opt("verbose", 'v', "print more run info", false, VERBOSE);
diff --git a/src/amrfinder/amrtester.cpp b/src/amrfinder/amrtester.cpp
index 80d776a2..d76ecfac 100644
--- a/src/amrfinder/amrtester.cpp
+++ b/src/amrfinder/amrtester.cpp
@@ -193,7 +193,7 @@ main_amrtester(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            "resolve epi-alleles",
                            "<bed-regions> <mapped-reads>");
-    opt_parse.add_opt("output", 'o', "output file", false, outfile);
+    opt_parse.add_opt("output", 'o', "output file", true, outfile);
     opt_parse.add_opt("chrom", 'c', "reference genome fasta file", true,
                       chrom_file);
     opt_parse.add_opt("itr", 'i', "max iterations", false, max_itr);
diff --git a/src/analysis/cpgbins.cpp b/src/analysis/cpgbins.cpp
index a76d74f4..b17d102a 100644
--- a/src/analysis/cpgbins.cpp
+++ b/src/analysis/cpgbins.cpp
@@ -195,8 +195,7 @@ Columns (beyond the first 6) in the BED format output:
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<chrom-sizes> <xsym-file>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("output", 'o', "name of output file (default: stdout)",
-                      true, outfile);
+    opt_parse.add_opt("output", 'o', "name of output file", true, outfile);
     opt_parse.add_opt("bin", 'b', "bin size in base pairs", false, bin_size);
     opt_parse.add_opt("threads", 't', "number of threads", false, n_threads);
     opt_parse.add_opt("level", 'l',
diff --git a/src/analysis/hmr-rep.cpp b/src/analysis/hmr-rep.cpp
index 125af4d3..d8ce5624 100644
--- a/src/analysis/hmr-rep.cpp
+++ b/src/analysis/hmr-rep.cpp
@@ -348,8 +348,7 @@ main_hmr_rep(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description,
                            "<methcount-file-1> <methcount-file-2> ...");
-    opt_parse.add_opt("out", 'o', "output file (default: stdout)", false,
-                      outfile);
+    opt_parse.add_opt("out", 'o', "output file", true, outfile);
     opt_parse.add_opt("desert", 'd', "max dist btwn cpgs with reads in HMR",
                       false, desert_size);
     opt_parse.add_opt("itr", 'i', "max iterations", false, max_iterations);
diff --git a/src/analysis/hypermr.cpp b/src/analysis/hypermr.cpp
index 12c42532..723209ff 100644
--- a/src/analysis/hypermr.cpp
+++ b/src/analysis/hypermr.cpp
@@ -243,7 +243,7 @@ main_hypermr(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            about, "<counts-file>");
-    opt_parse.add_opt("out", 'o', "output file (BED format)", false, outfile);
+    opt_parse.add_opt("out", 'o', "output file", true, outfile);
     opt_parse.add_opt("scores", 's', "output file for posterior scores", false,
                       scores_file);
     opt_parse.add_opt("tolerance", 't', "numerical tolerance", false,
diff --git a/src/analysis/metagene.cpp b/src/analysis/metagene.cpp
index 4c8f574d..f4e928f2 100644
--- a/src/analysis/metagene.cpp
+++ b/src/analysis/metagene.cpp
@@ -112,8 +112,7 @@ correctly.
     /****************** GET COMMAND LINE ARGUMENTS ***************************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<features-bed> <counts>");
-    opt_parse.add_opt("output", 'o', "output file (default: terminal)", false,
-                      outfile);
+    opt_parse.add_opt("output", 'o', "output file", true, outfile);
     opt_parse.add_opt("size", 's', "analyze this size in both directions",
                       false, region_size);
     opt_parse.add_opt("bin", 'b', "bin size", false, bin_size);
diff --git a/src/analysis/multimethstat.cpp b/src/analysis/multimethstat.cpp
index 67e2ce75..3b4d1051 100644
--- a/src/analysis/multimethstat.cpp
+++ b/src/analysis/multimethstat.cpp
@@ -510,8 +510,7 @@ main_multimethstat(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                            "methylation in each of a set of genomic intervals",
                            "<intervals-bed> <methylation-file>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("output", 'o', "Name of output file (default: stdout)",
-                      false, outfile);
+    opt_parse.add_opt("output", 'o', "output file", true, outfile);
     opt_parse.add_opt("numeric", 'N', "print numeric values only (not NAs)",
                       false, print_numeric_only);
     opt_parse.add_opt("preload", 'L', "Load all CpG sites", false,
diff --git a/src/analysis/pmd.cpp b/src/analysis/pmd.cpp
index 02529e99..82580040 100644
--- a/src/analysis/pmd.cpp
+++ b/src/analysis/pmd.cpp
@@ -1135,8 +1135,7 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<methcount-files>");
-    opt_parse.add_opt("out", 'o', "output file (default: stdout)", false,
-                      outfile);
+    opt_parse.add_opt("out", 'o', "output file", true, outfile);
     opt_parse.add_opt("desert", 'd', "max dist between bins with data in PMD",
                       false, desert_size);
     opt_parse.add_opt("fixedbin", 'f', "Fixed bin size", false, fixed_bin_size);
diff --git a/src/analysis/roimethstat.cpp b/src/analysis/roimethstat.cpp
index ab393c70..bb2b6efd 100644
--- a/src/analysis/roimethstat.cpp
+++ b/src/analysis/roimethstat.cpp
@@ -421,7 +421,7 @@ Columns (beyond the first 6) in the BED format output:
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<intervals-bed> <methylation-file>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("output", 'o', "Name of output file", true, outfile);
+    opt_parse.add_opt("output", 'o', "output file", true, outfile);
     opt_parse.add_opt("numeric", 'N', "print numeric values only (not NAs)",
                       false, print_numeric_only);
     opt_parse.add_opt("preload", 'L', "load all site sites", false, preload);
diff --git a/src/radmeth/radmeth-merge.cpp b/src/radmeth/radmeth-merge.cpp
index 0f525857..15905956 100644
--- a/src/radmeth/radmeth-merge.cpp
+++ b/src/radmeth/radmeth-merge.cpp
@@ -143,8 +143,7 @@ main_radmeth_merge(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
                            " methylated CpGs into DMRs",
                            "<bed-file-in-radmeth-format>");
     opt_parse.set_show_defaults();
-    opt_parse.add_opt("output", 'o', "output file (default: stdout)", false,
-                      outfile);
+    opt_parse.add_opt("output", 'o', "output file", true, outfile);
     opt_parse.add_opt("cutoff", 'p', "p-value cutoff", false, cutoff);
     std::vector<std::string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp
index 59a4a2f1..49f21112 100644
--- a/src/utils/selectsites.cpp
+++ b/src/utils/selectsites.cpp
@@ -264,8 +264,7 @@ main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     /****************** COMMAND LINE OPTIONS ********************/
     OptionParser opt_parse(argv[0],  // NOLINT(*-pointer-arithmetic)
                            description, "<intervals-bed> <sites-file>", 2);
-    opt_parse.add_opt("output", 'o', "output file (default: stdout)", false,
-                      outfile);
+    opt_parse.add_opt("output", 'o', "output file", true, outfile);
     opt_parse.add_opt("disk", 'd',
                       "process sites on disk "
                       "(fast if target intervals are few)",

From c348cbce9f697a30f1913d225a0b043a9d51ec31 Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 21 Dec 2025 20:20:40 -0800
Subject: [PATCH 3/6] src/utils/selectsites.cpp: using Interval instead of
 Interval6

---
 src/utils/selectsites.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/utils/selectsites.cpp b/src/utils/selectsites.cpp
index 49f21112..3b3254e9 100644
--- a/src/utils/selectsites.cpp
+++ b/src/utils/selectsites.cpp
@@ -16,7 +16,7 @@ selectsites: Select sites, specified in a methcounts format file, that are
 contained in given (bed format) intervals.
 )";
 
-#include "Interval6.hpp"
+#include "Interval.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
 
@@ -111,7 +111,7 @@ write_stats_output(const selectsites_summary &summary,
 }
 
 static void
-collapsebed(std::vector<Interval6> &regions) {
+collapsebed(std::vector<Interval> &regions) {
   std::ptrdiff_t j = 0;
   for (const auto &r : regions) {
     if (regions[j].chrom == r.chrom && r.start <= regions[j].stop)
@@ -123,19 +123,19 @@ collapsebed(std::vector<Interval6> &regions) {
 }
 
 static inline bool
-precedes(const Interval6 &r, const MSite &s) {
+precedes(const Interval &r, const MSite &s) {
   return (r.chrom < s.chrom || (r.chrom == s.chrom && r.stop <= s.pos));
 }
 
 static inline bool
-contains(const Interval6 &r, const MSite &s) {
+contains(const Interval &r, const MSite &s) {
   return (r.chrom == s.chrom && (r.start <= s.pos && s.pos < r.stop));
 }
 
 static auto
 process_all_sites(
   const bool verbose, const std::string &sites_file,
-  const std::unordered_map<std::string, std::vector<Interval6>> &regions,
+  const std::unordered_map<std::string, std::vector<Interval>> &regions,
   bamxx::bgzf_file &out) -> std::tuple<std::uint64_t, std::uint64_t> {
   bamxx::bgzf_file in(sites_file, "r");
   if (!in)
@@ -145,7 +145,7 @@ process_all_sites(
   std::uint64_t n_sites_selected = 0u;
 
   MSite the_site, prev_site;
-  std::vector<Interval6>::const_iterator i, i_lim;
+  std::vector<Interval>::const_iterator i, i_lim;
   bool chrom_is_relevant = false;
   while (read_site(in, the_site)) {
     ++n_sites_total;
@@ -173,7 +173,7 @@ process_all_sites(
 }
 
 [[nodiscard]] static auto
-get_sites_in_region(std::ifstream &site_in, const Interval6 &region,
+get_sites_in_region(std::ifstream &site_in, const Interval &region,
                     bamxx::bgzf_file &out) -> std::uint64_t {
   const std::string chrom{region.chrom};
   const std::size_t start_pos = region.start;
@@ -200,7 +200,7 @@ get_sites_in_region(std::ifstream &site_in, const Interval6 &region,
 
 static auto
 process_with_sites_on_disk(const std::string &sites_file,
-                           const std::vector<Interval6> &regions,
+                           const std::vector<Interval> &regions,
                            bamxx::bgzf_file &out) -> std::uint64_t {
   std::ifstream in(sites_file);
   if (!in)
@@ -213,8 +213,8 @@ process_with_sites_on_disk(const std::string &sites_file,
 
 static void
 regions_by_chrom(
-  std::vector<Interval6> &regions,
-  std::unordered_map<std::string, std::vector<Interval6>> &lookup) {
+  std::vector<Interval> &regions,
+  std::unordered_map<std::string, std::vector<Interval>> &lookup) {
   for (auto &&r : regions)
     lookup[r.chrom].push_back(r);
   regions.clear();
@@ -313,7 +313,7 @@ main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
         std::cerr << "input file is so must be loaded\n";
     }
 
-    auto regions = read_intervals6(regions_file);
+    auto regions = read_intervals(regions_file);
     if (!std::is_sorted(std::cbegin(regions), std::cend(regions)))
       throw std::runtime_error("regions not sorted in file: " + regions_file);
     std::tie(summary.n_target_regions, summary.target_region_size) =
@@ -329,7 +329,7 @@ main_selectsites(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
              summary.target_region_collapsed_size) =
       selectsites_summary::measure_target_regions(regions);
 
-    std::unordered_map<std::string, std::vector<Interval6>> regions_lookup;
+    std::unordered_map<std::string, std::vector<Interval>> regions_lookup;
     if (!keep_file_on_disk)
       regions_by_chrom(regions, regions_lookup);
 

From dc94e7361c893b355f21240095c51b9465f1e34a Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 21 Dec 2025 20:21:17 -0800
Subject: [PATCH 4/6] src/common/Interval.cpp: fixing a bug that filed when the
 end of a line was just after the stop site

---
 src/common/Interval.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/common/Interval.cpp b/src/common/Interval.cpp
index 415c2c2a..be073bb0 100644
--- a/src/common/Interval.cpp
+++ b/src/common/Interval.cpp
@@ -56,7 +56,6 @@ Interval::initialize(const char *c, const char *c_end) -> bool {
   // stop
   field_s = std::find_if(field_e + 1, c_end, not_sep);
   field_e = std::find_if(field_s + 1, c_end, is_sep);
-  failed = failed || (field_e == c_end);
   {
     const auto [ptr, ec] = std::from_chars(field_s, field_e, stop);
     failed = failed || ec != std::errc{};

From f2ce2d7015cec0bd867a8add9ca94210a63f39be Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 21 Dec 2025 20:40:42 -0800
Subject: [PATCH 5/6] src/analysis/roimethstat.cpp: reverting to local function
 for formatting levels counter because the format used by roi has been
 slightly different from the general format. Also reading the input intervals
 from either a BED3 or BED6, depending on what the user provides

---
 src/analysis/roimethstat.cpp | 61 +++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 7 deletions(-)

diff --git a/src/analysis/roimethstat.cpp b/src/analysis/roimethstat.cpp
index bb2b6efd..560e5b41 100644
--- a/src/analysis/roimethstat.cpp
+++ b/src/analysis/roimethstat.cpp
@@ -15,7 +15,9 @@
  * more details.
  */
 
+#include "Interval.hpp"
 #include "Interval6.hpp"
+
 #include "LevelsCounter.hpp"
 #include "MSite.hpp"
 #include "OptionParser.hpp"
@@ -41,6 +43,29 @@
 
 // NOLINTBEGIN(*-narrowing-conversions)
 
+[[nodiscard]] static std::string
+format_levels_counter_for_roi(const LevelsCounter &lc) {
+  // ...
+  // (7) weighted mean methylation
+  // (8) unweighted mean methylation
+  // (9) fractional methylation
+  // (10) number of sites in the region
+  // (11) number of sites covered at least once
+  // (12) number of observations in reads indicating methylation
+  // (13) total number of observations from reads in the region
+  std::ostringstream oss;
+  // clang-format off
+  oss << lc.mean_meth_weighted() << '\t'
+      << lc.mean_meth() << '\t'
+      << lc.fractional_meth() << '\t'
+      << lc.total_sites << '\t'
+      << lc.sites_covered << '\t'
+      << lc.total_c << '\t'
+      << lc.coverage();
+  // clang-format on
+  return oss.str();
+}
+
 static void
 update(LevelsCounter &lc, const xcounts_entry &xse) {
   static constexpr auto one_half = 0.5;
@@ -85,7 +110,7 @@ process_chrom(const bool report_more_info, const char level_code,
                 : (level_code == 'u' ? lc.sites_covered : lc.total_called()));
     out << to_string(interval);
     if (report_more_info)
-      out << '\t' << format_levels_counter(lc);
+      out << '\t' << format_levels_counter_for_roi(lc);
     out << '\n';
   }
 }
@@ -94,7 +119,7 @@ static void
 process_chrom(const bool report_more_info,
               const std::vector<Interval6> &intervals, std::ostream &out) {
   LevelsCounter lc;
-  const std::string lc_formatted = format_levels_counter(lc);
+  const std::string lc_formatted = format_levels_counter_for_roi(lc);
   for (const auto &r : intervals) {
     out << to_string(r);
     if (report_more_info)
@@ -109,8 +134,6 @@ process_from_xcounts(const std::uint32_t n_threads, const bool report_more_info,
                      const std::vector<Interval6> &intervals_in,
                      std::ostream &out) {
   const auto sites_by_chrom = read_xcounts_by_chrom(n_threads, xsym_file);
-  // const auto intervals = get_Interval6s(intervals_file);
-
   std::vector<std::vector<Interval6>> intervals_by_chrom;
   std::string prev_chrom;
   for (auto i = 0u; i < std::size(intervals_in); ++i) {
@@ -293,7 +316,7 @@ process_preloaded(
     r_scored.score = score;
     out << to_string(r_scored);
     if (report_more_info)
-      out << '\t' << format_levels_counter(lc);
+      out << '\t' << format_levels_counter_for_roi(lc);
     out << '\n';
   }
 }
@@ -362,7 +385,7 @@ process_on_disk(
     r.score = score;
     out << to_string(r);
     if (report_more_info)
-      out << '\t' << format_levels_counter(lc);
+      out << '\t' << format_levels_counter_for_roi(lc);
     out << '\n';
   }
 }
@@ -382,6 +405,30 @@ get_bed_columns(const std::string &regions_file) {
   return n_columns;
 }
 
+static auto
+get_intervals(const std::string &filename) -> std::vector<Interval6> {
+  const auto field_count = [&] {
+    bamxx::bgzf_file in(filename, "r");
+    if (!in)
+      throw std::runtime_error("cannot open file: " + filename);
+    std::string line;
+    while (getline(in, line) && line[0] == '#')
+      ;
+    std::istringstream iss(line);
+    std::vector<std::string> v(std::istream_iterator<std::string>(iss),
+                               (std::istream_iterator<std::string>()));
+    return std::size(v);
+  }();
+  if (field_count >= 6)
+    return read_intervals6(filename);
+
+  auto without_names = read_intervals(filename);
+  std::vector<Interval6> intervals;
+  for (const auto &w : without_names)
+    intervals.emplace_back(w.chrom, w.start, w.stop, std::string{}, 0, '+');
+  return intervals;
+}
+
 int
 main_roimethstat(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
@@ -495,7 +542,7 @@ Columns (beyond the first 6) in the BED format output:
       throw std::runtime_error("seems to be a counts file: " + regions_file +
                                "\ncheck order of input arguments");
 
-    auto regions = read_intervals6(regions_file);
+    auto regions = get_intervals(regions_file);
     if (!std::is_sorted(std::begin(regions), std::end(regions))) {
       if (sort_data_if_needed) {
         if (verbose)

From 9b45c5ceb8d9183e915d7ff42abc64e42e70cfbc Mon Sep 17 00:00:00 2001
From: Andrew D Smith <andrewds@usc.edu>
Date: Sun, 21 Dec 2025 20:47:27 -0800
Subject: [PATCH 6/6] src/analysis/pmd.cpp: fixing a bug initializing
 transition matrix

---
 src/analysis/pmd.cpp | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/src/analysis/pmd.cpp b/src/analysis/pmd.cpp
index 82580040..02c6e0ba 100644
--- a/src/analysis/pmd.cpp
+++ b/src/analysis/pmd.cpp
@@ -1083,9 +1083,9 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
   try {
     static constexpr auto min_observations_for_inference{100ul};
     static constexpr auto default_self_transition{0.99};
+    static constexpr auto default_transition{0.01};
     static constexpr auto default_start_transition{0.5};
     static constexpr auto default_end_transition{1e-10};
-    static constexpr auto default_transition{0.01};
 
     // magic numbers from paper: highest jaccard index to wgbs
     static constexpr auto bin_size_for_array{1000ul};
@@ -1100,17 +1100,11 @@ main_pmd(int argc, char *argv[]) {  // NOLINT(*-avoid-c-arrays)
     static constexpr auto default_bg_alpha{0.95};
     static constexpr auto default_bg_beta{0.05};
     const auto init_trans = [&] {
-      return std::vector{
-        std::vector<double>(default_self_transition, default_transition),
-        std::vector<double>(default_transition, default_self_transition),
+      return std::vector<std::vector<double>>{
+        {default_self_transition, default_transition},
+        {default_transition, default_self_transition},
       };
     };
-    // <std::vector<double>>
-    //   std::vector<std::vector<double>> t(
-    //     2, std::vector<double>(2, default_transition));
-    //   t[0][0] = t[1][1] = default_self_transition;
-    //   return t;
-    // };
 
     // NOLINTBEGIN(*-avoid-magic-numbers)
     std::size_t resolution = 500;