From 46a450dc318c09ae5dcbb67c88604e611d607684 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Thu, 4 Dec 2025 00:55:14 -0800 Subject: [PATCH 01/38] working mechanic bshash --- include/btllib/bshash.hpp | 26 + include/btllib/bshash_kmer.hpp | 1142 +++++++++++++++++++++ include/btllib/bshash_kmer_conversion.hpp | 360 +++++++ include/btllib/hashing_internals.hpp | 362 ++++++- include/btllib/nthash_kmer.hpp | 163 ++- tests/bshash.cpp | 898 ++++++++++++++++ tests/bshash_conversion.cpp | 200 ++++ 7 files changed, 3102 insertions(+), 49 deletions(-) create mode 100644 include/btllib/bshash.hpp create mode 100644 include/btllib/bshash_kmer.hpp create mode 100644 include/btllib/bshash_kmer_conversion.hpp create mode 100644 tests/bshash.cpp create mode 100644 tests/bshash_conversion.cpp diff --git a/include/btllib/bshash.hpp b/include/btllib/bshash.hpp new file mode 100644 index 00000000..fdcf8aa6 --- /dev/null +++ b/include/btllib/bshash.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace btllib { + +/** + * String representing the hash function's name. Only change if hash outputs + * are different from the previous version. Useful for tracking differences in + * saved hashes, e.g., in Bloom filters. + */ +static const char* const BSHASH_FN_NAME = "bsHash_v1"; + +} // namespace btllib \ No newline at end of file diff --git a/include/btllib/bshash_kmer.hpp b/include/btllib/bshash_kmer.hpp new file mode 100644 index 00000000..2503a652 --- /dev/null +++ b/include/btllib/bshash_kmer.hpp @@ -0,0 +1,1142 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace btllib { +using hashing_internals::sror; +using hashing_internals::srol; +constexpr uint8_t CONVERT_TAB_SIMPLE[128] = { + // 0–64 + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0, + 1,0,2,0,0, + 0,3,0,0,0,0,0,0,0,0, + 0,0,0,0,4,0,0,0,0,0, + 0,0,0,0,0,0,0, + 1,0,2, + 0,0,0,3,0,0,0,0,0,0, + 0,0,0,0,0,0,4, + 0,0,0, + 0,0,0,0,0,0,0,0 +}; + +constexpr uint8_t CONVERT_TAB_RC[128] = { + + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0, + 4,0,3,0,0, + 0,2,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,0,0,0,0, + 0,0,0,0,0,0,0, + 4,0,3, + 0,0,0,2,0,0,0,0,0,0, + 0,0,0,0,0,0,1, + 0,0,0, + 0,0,0,0,0,0,0,0 +}; + +constexpr uint64_t CC_DIMER_TAB[45] = { + 0,0,0,0,0,0,0,0,0,0, // 0–9 + 0,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0,0,0,0,0, + 0,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL,0,0,0,0,0, + 0,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0,0,0,0,0, + 0,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL +}; + + +constexpr uint64_t CG_DIMER_TAB[45] = { + 0,0,0,0,0,0,0,0,0,0, + 0,0x54f8849020caa27fULL,0xab9766c9a48a5d70ULL,0x7f01ee303c53b1bcULL,0xab9766c9a48a5d70ULL,0,0,0,0,0, + 0,0xd22f7891d06d2f5aULL,0xf1bd2f3bbfb10000ULL,0xd22f7891d06d2f5aULL,0xf1bd2f3bbfb10000ULL,0,0,0,0,0, + 0,0x54f8849020caa27fULL,0xab9766c9a48a5d70ULL,0x7f01ee303c53b1bcULL,0xab9766c9a48a5d70ULL,0,0,0,0,0, + 0,0x414ce1fc78d4d36fULL,0xff3adb2d91cd4803ULL,0xd22f7891d06d2f5aULL,0xff3adb2d91cd4803ULL +}; + +constexpr uint64_t CA_DIMER_TAB[45] = { + 0,0,0,0,0,0,0,0,0,0, + 0,0x7e4a36919c5032afULL,0x95b4b90696718a9fULL,0xf482881f5f3610bfULL,0x95b4b90696718a9fULL,0,0,0,0,0, + 0,0xe12f0e8e678e5d30ULL,0x9ff825331cf8a361ULL,0xf248b960f9159cecULL,0x9ff825331cf8a361ULL,0,0,0,0,0, + 0,0x7e4a36919c5032afULL,0x95b4b90696718a9fULL,0xf482881f5f3610bfULL,0x95b4b90696718a9fULL,0,0,0,0,0, + 0,0xe12f0e8e678e5d30ULL,0x00e055ee083cfcffULL,0xe12f0e8e678e5d30ULL,0x00e055ee083cfcffULL +}; + +constexpr uint64_t CT_DIMER_TAB[45] = { + 0,0,0,0,0,0,0,0,0,0, + 0,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x897da34f6b464570ULL,0,0,0,0,0, + 0,0xeeaf42d6e10d9ac0ULL,0x049bab70128da11fULL,0x68c7d4adebfb5001ULL,0x049bab70128da11fULL,0,0,0,0,0, + 0,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x897da34f6b464570ULL,0,0,0,0,0, + 0,0x0f1bfc76a0e3e700ULL,0x049bab70128da11fULL,0x282450c57fae339fULL,0x049bab70128da11fULL +}; + + + +// Generic dimer-based rolling hash +inline std::vector +base_forward_bs_hash(const char* seq, bool has_second, unsigned k, const uint64_t* DIMER_TAB) +{ + uint64_t h0 = 0; + uint64_t h1 = 0; + + std::cout << "[BASE_HASH] Computing forward hashes, k=" << k + << ", has_second=" << has_second << "\n"; + + for (unsigned i = 0; i < k; i += 2) { + h0 = srol(h0, 1); + const char c1 = seq[i]; + const char c2 = seq[i + 1]; + const uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + const uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + const uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 + h0 ^= DIMER_TAB[loc]; + + std::cout << "[BASE_HASH] h0 loop " << i << " | dimer='" + << c1 << c2 << "' loc=" << (unsigned)loc + << " | h0=" << h0 << "\n"; + } + + if (!has_second) + return {h0, 0}; + + for (unsigned i = 1; i < k + 1; i += 2) { + h1 = srol(h1, 1); + const char c1 = seq[i]; + const char c2 = seq[i + 1]; + const uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + const uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + const uint8_t loc = 10 * a + b; + h1 ^= DIMER_TAB[loc]; + + std::cout << "[BASE_HASH] h1 loop " << i << " | dimer='" + << c1 << c2 << "' loc=" << (unsigned)loc + << " | h1=" << h1 << "\n"; + } + + return {h0, h1}; +} + + + +// ===== CC_group1 ===== +static const uint64_t CC_group1_33R[33] = { + 0xc934fdc1, 0x19269fb82, 0x124d3f705, 0x49a7ee0b, 0x934fdc16, 0x1269fb82c, 0x4d3f7059, 0x9a7ee0b2, 0x134fdc164, 0x69fb82c9, + 0xd3f70592, 0x1a7ee0b24, 0x14fdc1649, 0x9fb82c93, 0x13f705926, 0x7ee0b24d, 0xfdc1649a, 0x1fb82c934, 0x1f7059269, 0x1ee0b24d3, + 0x1dc1649a7, 0x1b82c934f, 0x17059269f, 0xe0b24d3f, 0x1c1649a7e, 0x182c934fd, 0x1059269fb, 0xb24d3f7, 0x1649a7ee, 0x2c934fdc, + 0x59269fb8, 0xb24d3f70, 0x1649a7ee0 +}; + +static const uint64_t CC_group1_31L[31] = { + 0x77c24a3600000000, 0xef84946c00000000, 0xdf0928da00000000, 0xbe1251b600000000, 0x7c24a36e00000000, 0xf84946dc00000000, 0xf0928dba00000000, 0xe1251b7600000000, 0xc24a36ee00000000, 0x84946dde00000000, + 0x928dbbe00000000, 0x1251b77c00000000, 0x24a36ef800000000, 0x4946ddf000000000, 0x928dbbe000000000, 0x251b77c200000000, 0x4a36ef8400000000, 0x946ddf0800000000, 0x28dbbe1200000000, 0x51b77c2400000000, + 0xa36ef84800000000, 0x46ddf09200000000, 0x8dbbe12400000000, 0x1b77c24a00000000, 0x36ef849400000000, 0x6ddf092800000000, 0xdbbe125000000000, 0xb77c24a200000000, 0x6ef8494600000000, 0xddf0928c00000000, + 0xbbe1251a00000000 +}; + +// ===== CC_group2 ===== +static const uint64_t CC_group2_33R[33] = { + 0x1a6ea6e02, 0x14dd4dc05, 0x9ba9b80b, 0x137537016, 0x6ea6e02d, 0xdd4dc05a, 0x1ba9b80b4, 0x175370169, 0xea6e02d3, 0x1d4dc05a6, + 0x1a9b80b4d, 0x15370169b, 0xa6e02d37, 0x14dc05a6e, 0x9b80b4dd, 0x1370169ba, 0x6e02d375, 0xdc05a6ea, 0x1b80b4dd4, 0x170169ba9, + 0xe02d3753, 0x1c05a6ea6, 0x180b4dd4d, 0x10169ba9b, 0x2d37537, 0x5a6ea6e, 0xb4dd4dc, 0x169ba9b8, 0x2d375370, 0x5a6ea6e0, + 0xb4dd4dc0, 0x169ba9b80, 0xd3753701 +}; + +static const uint64_t CC_group2_31L[31] = { + 0x1bc5d5aa00000000, 0x378bab5400000000, 0x6f1756a800000000, 0xde2ead5000000000, 0xbc5d5aa200000000, 0x78bab54600000000, 0xf1756a8c00000000, 0xe2ead51a00000000, 0xc5d5aa3600000000, 0x8bab546e00000000, + 0x1756a8de00000000, 0x2ead51bc00000000, 0x5d5aa37800000000, 0xbab546f000000000, 0x756a8de200000000, 0xead51bc400000000, 0xd5aa378a00000000, 0xab546f1600000000, 0x56a8de2e00000000, 0xad51bc5c00000000, + 0x5aa378ba00000000, 0xb546f17400000000, 0x6a8de2ea00000000, 0xd51bc5d400000000, 0xaa378baa00000000, 0x546f175600000000, 0xa8de2eac00000000, 0x51bc5d5a00000000, 0xa378bab400000000, 0x46f1756a00000000, + 0x8de2ead400000000 +}; + +// ===== CC_group3 ===== +static const uint64_t CC_group3_33R[33] = { + 0xa74bce00, 0x14e979c00, 0x9d2f3801, 0x13a5e7002, 0x74bce005, 0xe979c00a, 0x1d2f38014, 0x1a5e70029, 0x14bce0053, 0x979c00a7, + 0x12f38014e, 0x5e70029d, 0xbce0053a, 0x179c00a74, 0xf38014e9, 0x1e70029d2, 0x1ce0053a5, 0x19c00a74b, 0x138014e97, 0x70029d2f, + 0xe0053a5e, 0x1c00a74bc, 0x18014e979, 0x10029d2f3, 0x53a5e7, 0xa74bce, 0x14e979c, 0x29d2f38, 0x53a5e70, 0xa74bce0, + 0x14e979c0, 0x29d2f380, 0x53a5e700 +}; + +static const uint64_t CC_group3_31L[31] = { + 0x44fec7e400000000, 0x89fd8fc800000000, 0x13fb1f9200000000, 0x27f63f2400000000, 0x4fec7e4800000000, 0x9fd8fc9000000000, 0x3fb1f92200000000, 0x7f63f24400000000, 0xfec7e48800000000, 0xfd8fc91200000000, + 0xfb1f922600000000, 0xf63f244e00000000, 0xec7e489e00000000, 0xd8fc913e00000000, 0xb1f9227e00000000, 0x63f244fe00000000, 0xc7e489fc00000000, 0x8fc913fa00000000, 0x1f9227f600000000, 0x3f244fec00000000, + 0x7e489fd800000000, 0xfc913fb000000000, 0xf9227f6200000000, 0xf244fec600000000, 0xe489fd8e00000000, 0xc913fb1e00000000, 0x9227f63e00000000, 0x244fec7e00000000, 0x489fd8fc00000000, 0x913fb1f800000000, + 0x227f63f200000000 +}; + +// ===== CC_group4 ===== +static const uint64_t CC_group4_33R[33] = { + 0xb06b5ed9, 0x160d6bdb2, 0xc1ad7b65, 0x1835af6ca, 0x106b5ed95, 0xd6bdb2b, 0x1ad7b656, 0x35af6cac, 0x6b5ed958, 0xd6bdb2b0, + 0x1ad7b6560, 0x15af6cac1, 0xb5ed9583, 0x16bdb2b06, 0xd7b6560d, 0x1af6cac1a, 0x15ed95835, 0xbdb2b06b, 0x17b6560d6, 0xf6cac1ad, + 0x1ed95835a, 0x1db2b06b5, 0x1b6560d6b, 0x16cac1ad7, 0xd95835af, 0x1b2b06b5e, 0x16560d6bd, 0xcac1ad7b, 0x195835af6, 0x12b06b5ed, + 0x560d6bdb, 0xac1ad7b6, 0x15835af6c +}; + +static const uint64_t CC_group4_31L[31] = { + 0x8f1531400000000, 0x11e2a62800000000, 0x23c54c5000000000, 0x478a98a000000000, 0x8f15314000000000, 0x1e2a628200000000, 0x3c54c50400000000, 0x78a98a0800000000, 0xf153141000000000, 0xe2a6282200000000, + 0xc54c504600000000, 0x8a98a08e00000000, 0x1531411e00000000, 0x2a62823c00000000, 0x54c5047800000000, 0xa98a08f000000000, 0x531411e200000000, 0xa62823c400000000, 0x4c50478a00000000, 0x98a08f1400000000, + 0x31411e2a00000000, 0x62823c5400000000, 0xc50478a800000000, 0x8a08f15200000000, 0x1411e2a600000000, 0x2823c54c00000000, 0x50478a9800000000, 0xa08f153000000000, 0x411e2a6200000000, 0x823c54c400000000, + 0x478a98a00000000 +}; + +// ===== CG_group1 ===== +static const uint64_t CG_group1_33R[33] = { + 0x20caa27f, 0x419544fe, 0x832a89fc, 0x1065513f8, 0xcaa27f1, 0x19544fe2, 0x32a89fc4, 0x65513f88, 0xcaa27f10, 0x19544fe20, + 0x12a89fc41, 0x5513f883, 0xaa27f106, 0x1544fe20c, 0xa89fc419, 0x1513f8832, 0xa27f1065, 0x144fe20ca, 0x89fc4195, 0x113f8832a, + 0x27f10655, 0x4fe20caa, 0x9fc41954, 0x13f8832a8, 0x7f106551, 0xfe20caa2, 0x1fc419544, 0x1f8832a89, 0x1f1065513, 0x1e20caa27, + 0x1c419544f, 0x18832a89f, 0x11065513f +}; + +static const uint64_t CG_group1_31L[31] = { + 0x54f8849000000000, 0xa9f1092000000000, 0x53e2124200000000, 0xa7c4248400000000, 0x4f88490a00000000, 0x9f10921400000000, 0x3e21242a00000000, 0x7c42485400000000, 0xf88490a800000000, 0xf109215200000000, + 0xe21242a600000000, 0xc424854e00000000, 0x88490a9e00000000, 0x1092153e00000000, 0x21242a7c00000000, 0x424854f800000000, 0x8490a9f000000000, 0x92153e200000000, 0x1242a7c400000000, 0x24854f8800000000, + 0x490a9f1000000000, 0x92153e2000000000, 0x242a7c4200000000, 0x4854f88400000000, 0x90a9f10800000000, 0x2153e21200000000, 0x42a7c42400000000, 0x854f884800000000, 0xa9f109200000000, 0x153e212400000000, + 0x2a7c424800000000 +}; + +// ===== CG_group2 ===== +static const uint64_t CG_group2_33R[33] = { + 0x1a48a5d70, 0x14914bae1, 0x922975c3, 0x12452eb86, 0x48a5d70d, 0x914bae1a, 0x122975c34, 0x452eb869, 0x8a5d70d2, 0x114bae1a4, + 0x2975c349, 0x52eb8692, 0xa5d70d24, 0x14bae1a48, 0x975c3491, 0x12eb86922, 0x5d70d245, 0xbae1a48a, 0x175c34914, 0xeb869229, + 0x1d70d2452, 0x1ae1a48a5, 0x15c34914b, 0xb8692297, 0x170d2452e, 0xe1a48a5d, 0x1c34914ba, 0x186922975, 0x10d2452eb, 0x1a48a5d7, + 0x34914bae, 0x6922975c, 0xd2452eb8 +}; + +static const uint64_t CG_group2_31L[31] = { + 0xab9766c800000000, 0x572ecd9200000000, 0xae5d9b2400000000, 0x5cbb364a00000000, 0xb9766c9400000000, 0x72ecd92a00000000, 0xe5d9b25400000000, 0xcbb364aa00000000, 0x9766c95600000000, 0x2ecd92ae00000000, + 0x5d9b255c00000000, 0xbb364ab800000000, 0x766c957200000000, 0xecd92ae400000000, 0xd9b255ca00000000, 0xb364ab9600000000, 0x66c9572e00000000, 0xcd92ae5c00000000, 0x9b255cba00000000, 0x364ab97600000000, + 0x6c9572ec00000000, 0xd92ae5d800000000, 0xb255cbb200000000, 0x64ab976600000000, 0xc9572ecc00000000, 0x92ae5d9a00000000, 0x255cbb3600000000, 0x4ab9766c00000000, 0x9572ecd800000000, 0x2ae5d9b200000000, + 0x55cbb36400000000 +}; + +// ===== CG_group3 ===== +static const uint64_t CG_group3_33R[33] = { + 0x3c53b1bc, 0x78a76378, 0xf14ec6f0, 0x1e29d8de0, 0x1c53b1bc1, 0x18a763783, 0x114ec6f07, 0x29d8de0f, 0x53b1bc1e, 0xa763783c, + 0x14ec6f078, 0x9d8de0f1, 0x13b1bc1e2, 0x763783c5, 0xec6f078a, 0x1d8de0f14, 0x1b1bc1e29, 0x163783c53, 0xc6f078a7, 0x18de0f14e, + 0x11bc1e29d, 0x3783c53b, 0x6f078a76, 0xde0f14ec, 0x1bc1e29d8, 0x1783c53b1, 0xf078a763, 0x1e0f14ec6, 0x1c1e29d8d, 0x183c53b1b, + 0x1078a7637, 0xf14ec6f, 0x1e29d8de +}; + +static const uint64_t CG_group3_31L[31] = { + 0x7f01ee3000000000, 0xfe03dc6000000000, 0xfc07b8c200000000, 0xf80f718600000000, 0xf01ee30e00000000, 0xe03dc61e00000000, 0xc07b8c3e00000000, 0x80f7187e00000000, 0x1ee30fe00000000, 0x3dc61fc00000000, + 0x7b8c3f800000000, 0xf7187f000000000, 0x1ee30fe000000000, 0x3dc61fc000000000, 0x7b8c3f8000000000, 0xf7187f0000000000, 0xee30fe0200000000, 0xdc61fc0600000000, 0xb8c3f80e00000000, 0x7187f01e00000000, + 0xe30fe03c00000000, 0xc61fc07a00000000, 0x8c3f80f600000000, 0x187f01ee00000000, 0x30fe03dc00000000, 0x61fc07b800000000, 0xc3f80f7000000000, 0x87f01ee200000000, 0xfe03dc600000000, 0x1fc07b8c00000000, + 0x3f80f71800000000 +}; + +// ===== CG_group4 ===== +static const uint64_t CG_group4_33R[33] = { + 0x1d06d2f5a, 0x1a0da5eb5, 0x141b4bd6b, 0x83697ad7, 0x106d2f5ae, 0xda5eb5d, 0x1b4bd6ba, 0x3697ad74, 0x6d2f5ae8, 0xda5eb5d0, + 0x1b4bd6ba0, 0x1697ad741, 0xd2f5ae83, 0x1a5eb5d06, 0x14bd6ba0d, 0x97ad741b, 0x12f5ae836, 0x5eb5d06d, 0xbd6ba0da, 0x17ad741b4, + 0xf5ae8369, 0x1eb5d06d2, 0x1d6ba0da5, 0x1ad741b4b, 0x15ae83697, 0xb5d06d2f, 0x16ba0da5e, 0xd741b4bd, 0x1ae83697a, 0x15d06d2f5, + 0xba0da5eb, 0x1741b4bd6, 0xe83697ad +}; + +static const uint64_t CG_group4_31L[31] = { + 0xd22f789000000000, 0xa45ef12200000000, 0x48bde24600000000, 0x917bc48c00000000, 0x22f7891a00000000, 0x45ef123400000000, 0x8bde246800000000, 0x17bc48d200000000, 0x2f7891a400000000, 0x5ef1234800000000, + 0xbde2469000000000, 0x7bc48d2200000000, 0xf7891a4400000000, 0xef12348a00000000, 0xde24691600000000, 0xbc48d22e00000000, 0x7891a45e00000000, 0xf12348bc00000000, 0xe246917a00000000, 0xc48d22f600000000, + 0x891a45ee00000000, 0x12348bde00000000, 0x246917bc00000000, 0x48d22f7800000000, 0x91a45ef000000000, 0x2348bde200000000, 0x46917bc400000000, 0x8d22f78800000000, 0x1a45ef1200000000, 0x348bde2400000000, + 0x6917bc4800000000 +}; + +// ===== CG_group5 ===== +static const uint64_t CG_group5_33R[33] = { + 0x1bfb10000, 0x17f620001, 0xfec40003, 0x1fd880006, 0x1fb10000d, 0x1f620001b, 0x1ec400037, 0x1d880006f, 0x1b10000df, 0x1620001bf, + 0xc400037f, 0x1880006fe, 0x110000dfd, 0x20001bfb, 0x400037f6, 0x80006fec, 0x10000dfd8, 0x1bfb1, 0x37f62, 0x6fec4, + 0xdfd88, 0x1bfb10, 0x37f620, 0x6fec40, 0xdfd880, 0x1bfb100, 0x37f6200, 0x6fec400, 0xdfd8800, 0x1bfb1000, + 0x37f62000, 0x6fec4000, 0xdfd88000 +}; + +static const uint64_t CG_group5_31L[31] = { + 0xf1bd2f3a00000000, 0xe37a5e7600000000, 0xc6f4bcee00000000, 0x8de979de00000000, 0x1bd2f3be00000000, 0x37a5e77c00000000, 0x6f4bcef800000000, 0xde979df000000000, 0xbd2f3be200000000, 0x7a5e77c600000000, + 0xf4bcef8c00000000, 0xe979df1a00000000, 0xd2f3be3600000000, 0xa5e77c6e00000000, 0x4bcef8de00000000, 0x979df1bc00000000, 0x2f3be37a00000000, 0x5e77c6f400000000, 0xbcef8de800000000, 0x79df1bd200000000, + 0xf3be37a400000000, 0xe77c6f4a00000000, 0xcef8de9600000000, 0x9df1bd2e00000000, 0x3be37a5e00000000, 0x77c6f4bc00000000, 0xef8de97800000000, 0xdf1bd2f200000000, 0xbe37a5e600000000, 0x7c6f4bce00000000, + 0xf8de979c00000000 +}; + +// ===== CG_group6 ===== +static const uint64_t CG_group6_33R[33] = { + 0x78d4d36f, 0xf1a9a6de, 0x1e3534dbc, 0x1c6a69b79, 0x18d4d36f3, 0x11a9a6de7, 0x3534dbcf, 0x6a69b79e, 0xd4d36f3c, 0x1a9a6de78, + 0x1534dbcf1, 0xa69b79e3, 0x14d36f3c6, 0x9a6de78d, 0x134dbcf1a, 0x69b79e35, 0xd36f3c6a, 0x1a6de78d4, 0x14dbcf1a9, 0x9b79e353, + 0x136f3c6a6, 0x6de78d4d, 0xdbcf1a9a, 0x1b79e3534, 0x16f3c6a69, 0xde78d4d3, 0x1bcf1a9a6, 0x179e3534d, 0xf3c6a69b, 0x1e78d4d36, + 0x1cf1a9a6d, 0x19e3534db, 0x13c6a69b7 +}; + +static const uint64_t CG_group6_31L[31] = { + 0x414ce1fc00000000, 0x8299c3f800000000, 0x53387f200000000, 0xa670fe400000000, 0x14ce1fc800000000, 0x299c3f9000000000, 0x53387f2000000000, 0xa670fe4000000000, 0x4ce1fc8200000000, 0x99c3f90400000000, + 0x3387f20a00000000, 0x670fe41400000000, 0xce1fc82800000000, 0x9c3f905200000000, 0x387f20a600000000, 0x70fe414c00000000, 0xe1fc829800000000, 0xc3f9053200000000, 0x87f20a6600000000, 0xfe414ce00000000, + 0x1fc8299c00000000, 0x3f90533800000000, 0x7f20a67000000000, 0xfe414ce000000000, 0xfc8299c200000000, 0xf905338600000000, 0xf20a670e00000000, 0xe414ce1e00000000, 0xc8299c3e00000000, 0x9053387e00000000, + 0x20a670fe00000000 +}; + +// ===== CG_group7 ===== +static const uint64_t CG_group7_33R[33] = { + 0x191cd4803, 0x1239a9007, 0x4735200f, 0x8e6a401e, 0x11cd4803c, 0x39a90079, 0x735200f2, 0xe6a401e4, 0x1cd4803c8, 0x19a900791, + 0x135200f23, 0x6a401e47, 0xd4803c8e, 0x1a900791c, 0x15200f239, 0xa401e473, 0x14803c8e6, 0x900791cd, 0x1200f239a, 0x401e4735, + 0x803c8e6a, 0x100791cd4, 0xf239a9, 0x1e47352, 0x3c8e6a4, 0x791cd48, 0xf239a90, 0x1e473520, 0x3c8e6a40, 0x791cd480, + 0xf239a900, 0x1e4735200, 0x1c8e6a401 +}; + +static const uint64_t CG_group7_31L[31] = { + 0xff3adb2c00000000, 0xfe75b65a00000000, 0xfceb6cb600000000, 0xf9d6d96e00000000, 0xf3adb2de00000000, 0xe75b65be00000000, 0xceb6cb7e00000000, 0x9d6d96fe00000000, 0x3adb2dfe00000000, 0x75b65bfc00000000, + 0xeb6cb7f800000000, 0xd6d96ff200000000, 0xadb2dfe600000000, 0x5b65bfce00000000, 0xb6cb7f9c00000000, 0x6d96ff3a00000000, 0xdb2dfe7400000000, 0xb65bfcea00000000, 0x6cb7f9d600000000, 0xd96ff3ac00000000, + 0xb2dfe75a00000000, 0x65bfceb600000000, 0xcb7f9d6c00000000, 0x96ff3ada00000000, 0x2dfe75b600000000, 0x5bfceb6c00000000, 0xb7f9d6d800000000, 0x6ff3adb200000000, 0xdfe75b6400000000, 0xbfceb6ca00000000, + 0x7f9d6d9600000000 +}; + +// ===== CT_group1 ===== +static const uint64_t CT_group1_33R[33] = { + 0x128da11f, 0x251b423e, 0x4a36847c, 0x946d08f8, 0x128da11f0, 0x51b423e1, 0xa36847c2, 0x146d08f84, 0x8da11f09, 0x11b423e12, + 0x36847c25, 0x6d08f84a, 0xda11f094, 0x1b423e128, 0x16847c251, 0xd08f84a3, 0x1a11f0946, 0x1423e128d, 0x847c251b, 0x108f84a36, + 0x11f0946d, 0x23e128da, 0x47c251b4, 0x8f84a368, 0x11f0946d0, 0x3e128da1, 0x7c251b42, 0xf84a3684, 0x1f0946d08, 0x1e128da11, + 0x1c251b423, 0x184a36847, 0x10946d08f +}; + +static const uint64_t CT_group1_31L[31] = { + 0x49bab7000000000, 0x93756e000000000, 0x126eadc000000000, 0x24dd5b8000000000, 0x49bab70000000000, 0x93756e0000000000, 0x26eadc0200000000, 0x4dd5b80400000000, 0x9bab700800000000, 0x3756e01200000000, + 0x6eadc02400000000, 0xdd5b804800000000, 0xbab7009200000000, 0x756e012600000000, 0xeadc024c00000000, 0xd5b8049a00000000, 0xab70093600000000, 0x56e0126e00000000, 0xadc024dc00000000, 0x5b8049ba00000000, + 0xb700937400000000, 0x6e0126ea00000000, 0xdc024dd400000000, 0xb8049baa00000000, 0x7009375600000000, 0xe0126eac00000000, 0xc024dd5a00000000, 0x8049bab600000000, 0x93756e00000000, 0x126eadc00000000, + 0x24dd5b800000000 +}; + +// ===== CT_group2 ===== +static const uint64_t CT_group2_33R[33] = { + 0x5e7b14e2, 0xbcf629c4, 0x179ec5388, 0xf3d8a711, 0x1e7b14e22, 0x1cf629c45, 0x19ec5388b, 0x13d8a7117, 0x7b14e22f, 0xf629c45e, + 0x1ec5388bc, 0x1d8a71179, 0x1b14e22f3, 0x1629c45e7, 0xc5388bcf, 0x18a71179e, 0x114e22f3d, 0x29c45e7b, 0x5388bcf6, 0xa71179ec, + 0x14e22f3d8, 0x9c45e7b1, 0x1388bcf62, 0x71179ec5, 0xe22f3d8a, 0x1c45e7b14, 0x188bcf629, 0x11179ec53, 0x22f3d8a7, 0x45e7b14e, + 0x8bcf629c, 0x1179ec538, 0x2f3d8a71 +}; + +static const uint64_t CT_group2_31L[31] = { + 0x9a533a6a00000000, 0x34a674d600000000, 0x694ce9ac00000000, 0xd299d35800000000, 0xa533a6b200000000, 0x4a674d6600000000, 0x94ce9acc00000000, 0x299d359a00000000, 0x533a6b3400000000, 0xa674d66800000000, + 0x4ce9acd200000000, 0x99d359a400000000, 0x33a6b34a00000000, 0x674d669400000000, 0xce9acd2800000000, 0x9d359a5200000000, 0x3a6b34a600000000, 0x74d6694c00000000, 0xe9acd29800000000, 0xd359a53200000000, + 0xa6b34a6600000000, 0x4d6694ce00000000, 0x9acd299c00000000, 0x359a533a00000000, 0x6b34a67400000000, 0xd6694ce800000000, 0xacd299d200000000, 0x59a533a600000000, 0xb34a674c00000000, 0x6694ce9a00000000, + 0xcd299d3400000000 +}; + +// ===== CT_group3 ===== +static const uint64_t CT_group3_33R[33] = { + 0xe10d9ac0, 0x1c21b3580, 0x184366b01, 0x1086cd603, 0x10d9ac07, 0x21b3580e, 0x4366b01c, 0x86cd6038, 0x10d9ac070, 0x1b3580e1, + 0x366b01c2, 0x6cd60384, 0xd9ac0708, 0x1b3580e10, 0x166b01c21, 0xcd603843, 0x19ac07086, 0x13580e10d, 0x6b01c21b, 0xd6038436, + 0x1ac07086c, 0x1580e10d9, 0xb01c21b3, 0x160384366, 0xc07086cd, 0x180e10d9a, 0x101c21b35, 0x384366b, 0x7086cd6, 0xe10d9ac, + 0x1c21b358, 0x384366b0, 0x7086cd60 +}; + +static const uint64_t CT_group3_31L[31] = { + 0xeeaf42d600000000, 0xdd5e85ae00000000, 0xbabd0b5e00000000, 0x757a16be00000000, 0xeaf42d7c00000000, 0xd5e85afa00000000, 0xabd0b5f600000000, 0x57a16bee00000000, 0xaf42d7dc00000000, 0x5e85afba00000000, + 0xbd0b5f7400000000, 0x7a16beea00000000, 0xf42d7dd400000000, 0xe85afbaa00000000, 0xd0b5f75600000000, 0xa16beeae00000000, 0x42d7dd5e00000000, 0x85afbabc00000000, 0xb5f757a00000000, 0x16beeaf400000000, + 0x2d7dd5e800000000, 0x5afbabd000000000, 0xb5f757a000000000, 0x6beeaf4200000000, 0xd7dd5e8400000000, 0xafbabd0a00000000, 0x5f757a1600000000, 0xbeeaf42c00000000, 0x7dd5e85a00000000, 0xfbabd0b400000000, + 0xf757a16a00000000 +}; + +// ===== CT_group4 ===== +static const uint64_t CT_group4_33R[33] = { + 0x1ebfb5001, 0x1d7f6a003, 0x1afed4007, 0x15fda800f, 0xbfb5001f, 0x17f6a003e, 0xfed4007d, 0x1fda800fa, 0x1fb5001f5, 0x1f6a003eb, + 0x1ed4007d7, 0x1da800faf, 0x1b5001f5f, 0x16a003ebf, 0xd4007d7f, 0x1a800fafe, 0x15001f5fd, 0xa003ebfb, 0x14007d7f6, 0x800fafed, + 0x1001f5fda, 0x3ebfb5, 0x7d7f6a, 0xfafed4, 0x1f5fda8, 0x3ebfb50, 0x7d7f6a0, 0xfafed40, 0x1f5fda80, 0x3ebfb500, + 0x7d7f6a00, 0xfafed400, 0x1f5fda800 +}; + +static const uint64_t CT_group4_31L[31] = { + 0x68c7d4ac00000000, 0xd18fa95800000000, 0xa31f52b200000000, 0x463ea56600000000, 0x8c7d4acc00000000, 0x18fa959a00000000, 0x31f52b3400000000, 0x63ea566800000000, 0xc7d4acd000000000, 0x8fa959a200000000, + 0x1f52b34600000000, 0x3ea5668c00000000, 0x7d4acd1800000000, 0xfa959a3000000000, 0xf52b346200000000, 0xea5668c600000000, 0xd4acd18e00000000, 0xa959a31e00000000, 0x52b3463e00000000, 0xa5668c7c00000000, + 0x4acd18fa00000000, 0x959a31f400000000, 0x2b3463ea00000000, 0x5668c7d400000000, 0xacd18fa800000000, 0x59a31f5200000000, 0xb3463ea400000000, 0x668c7d4a00000000, 0xcd18fa9400000000, 0x9a31f52a00000000, + 0x3463ea5600000000 +}; + +// ===== CT_group5 ===== +static const uint64_t CT_group5_33R[33] = { + 0xa0e3e700, 0x141c7ce00, 0x838f9c01, 0x1071f3802, 0xe3e7005, 0x1c7ce00a, 0x38f9c014, 0x71f38028, 0xe3e70050, 0x1c7ce00a0, + 0x18f9c0141, 0x11f380283, 0x3e700507, 0x7ce00a0e, 0xf9c0141c, 0x1f3802838, 0x1e7005071, 0x1ce00a0e3, 0x19c0141c7, 0x13802838f, + 0x7005071f, 0xe00a0e3e, 0x1c0141c7c, 0x1802838f9, 0x1005071f3, 0xa0e3e7, 0x141c7ce, 0x2838f9c, 0x5071f38, 0xa0e3e70, + 0x141c7ce0, 0x2838f9c0, 0x5071f380 +}; + +static const uint64_t CT_group5_31L[31] = { + 0xf1bfc7600000000, 0x1e37f8ec00000000, 0x3c6ff1d800000000, 0x78dfe3b000000000, 0xf1bfc76000000000, 0xe37f8ec200000000, 0xc6ff1d8600000000, 0x8dfe3b0e00000000, 0x1bfc761e00000000, 0x37f8ec3c00000000, + 0x6ff1d87800000000, 0xdfe3b0f000000000, 0xbfc761e200000000, 0x7f8ec3c600000000, 0xff1d878c00000000, 0xfe3b0f1a00000000, 0xfc761e3600000000, 0xf8ec3c6e00000000, 0xf1d878de00000000, 0xe3b0f1be00000000, + 0xc761e37e00000000, 0x8ec3c6fe00000000, 0x1d878dfe00000000, 0x3b0f1bfc00000000, 0x761e37f800000000, 0xec3c6ff000000000, 0xd878dfe200000000, 0xb0f1bfc600000000, 0x61e37f8e00000000, 0xc3c6ff1c00000000, + 0x878dfe3a00000000 +}; + +// ===== CT_group6 ===== +static const uint64_t CT_group6_33R[33] = { + 0x17fae339f, 0xff5c673f, 0x1feb8ce7e, 0x1fd719cfd, 0x1fae339fb, 0x1f5c673f7, 0x1eb8ce7ef, 0x1d719cfdf, 0x1ae339fbf, 0x15c673f7f, + 0xb8ce7eff, 0x1719cfdfe, 0xe339fbfd, 0x1c673f7fa, 0x18ce7eff5, 0x119cfdfeb, 0x339fbfd7, 0x673f7fae, 0xce7eff5c, 0x19cfdfeb8, + 0x139fbfd71, 0x73f7fae3, 0xe7eff5c6, 0x1cfdfeb8c, 0x19fbfd719, 0x13f7fae33, 0x7eff5c67, 0xfdfeb8ce, 0x1fbfd719c, 0x1f7fae339, + 0x1eff5c673, 0x1dfeb8ce7, 0x1bfd719cf +}; + +static const uint64_t CT_group6_31L[31] = { + 0x282450c400000000, 0x5048a18800000000, 0xa091431000000000, 0x4122862200000000, 0x82450c4400000000, 0x48a188a00000000, 0x914311400000000, 0x1228622800000000, 0x2450c45000000000, 0x48a188a000000000, + 0x9143114000000000, 0x2286228200000000, 0x450c450400000000, 0x8a188a0800000000, 0x1431141200000000, 0x2862282400000000, 0x50c4504800000000, 0xa188a09000000000, 0x4311412200000000, 0x8622824400000000, + 0xc45048a00000000, 0x188a091400000000, 0x3114122800000000, 0x6228245000000000, 0xc45048a000000000, 0x88a0914200000000, 0x1141228600000000, 0x2282450c00000000, 0x45048a1800000000, 0x8a09143000000000, + 0x1412286200000000 +}; + +// ===== CT_group7 ===== +static const uint64_t CT_group7_33R[33] = { + 0x16b464570, 0xd68c8ae1, 0x1ad1915c2, 0x15a322b85, 0xb464570b, 0x168c8ae16, 0xd1915c2d, 0x1a322b85a, 0x1464570b5, 0x8c8ae16b, + 0x11915c2d6, 0x322b85ad, 0x64570b5a, 0xc8ae16b4, 0x1915c2d68, 0x122b85ad1, 0x4570b5a3, 0x8ae16b46, 0x115c2d68c, 0x2b85ad19, + 0x570b5a32, 0xae16b464, 0x15c2d68c8, 0xb85ad191, 0x170b5a322, 0xe16b4645, 0x1c2d68c8a, 0x185ad1915, 0x10b5a322b, 0x16b46457, + 0x2d68c8ae, 0x5ad1915c, 0xb5a322b8 +}; + +static const uint64_t CT_group7_31L[31] = { + 0x897da34e00000000, 0x12fb469e00000000, 0x25f68d3c00000000, 0x4bed1a7800000000, 0x97da34f000000000, 0x2fb469e200000000, 0x5f68d3c400000000, 0xbed1a78800000000, 0x7da34f1200000000, 0xfb469e2400000000, + 0xf68d3c4a00000000, 0xed1a789600000000, 0xda34f12e00000000, 0xb469e25e00000000, 0x68d3c4be00000000, 0xd1a7897c00000000, 0xa34f12fa00000000, 0x469e25f600000000, 0x8d3c4bec00000000, 0x1a7897da00000000, + 0x34f12fb400000000, 0x69e25f6800000000, 0xd3c4bed000000000, 0xa7897da200000000, 0x4f12fb4600000000, 0x9e25f68c00000000, 0x3c4bed1a00000000, 0x7897da3400000000, 0xf12fb46800000000, 0xe25f68d200000000, + 0xc4bed1a600000000 +}; + +// ===== CA_group1 ===== +static const uint64_t CA_group1_33R[33] = { + 0x678e5d30, 0xcf1cba60, 0x19e3974c0, 0x13c72e981, 0x78e5d303, 0xf1cba606, 0x1e3974c0c, 0x1c72e9819, 0x18e5d3033, 0x11cba6067, + 0x3974c0cf, 0x72e9819e, 0xe5d3033c, 0x1cba60678, 0x1974c0cf1, 0x12e9819e3, 0x5d3033c7, 0xba60678e, 0x174c0cf1c, 0xe9819e39, + 0x1d3033c72, 0x1a60678e5, 0x14c0cf1cb, 0x9819e397, 0x13033c72e, 0x60678e5d, 0xc0cf1cba, 0x1819e3974, 0x1033c72e9, 0x678e5d3, + 0xcf1cba6, 0x19e3974c, 0x33c72e98 +}; + +static const uint64_t CA_group1_31L[31] = { + 0xe12f0e8e00000000, 0xc25e1d1e00000000, 0x84bc3a3e00000000, 0x978747e00000000, 0x12f0e8fc00000000, 0x25e1d1f800000000, 0x4bc3a3f000000000, 0x978747e000000000, 0x2f0e8fc200000000, 0x5e1d1f8400000000, + 0xbc3a3f0800000000, 0x78747e1200000000, 0xf0e8fc2400000000, 0xe1d1f84a00000000, 0xc3a3f09600000000, 0x8747e12e00000000, 0xe8fc25e00000000, 0x1d1f84bc00000000, 0x3a3f097800000000, 0x747e12f000000000, + 0xe8fc25e000000000, 0xd1f84bc200000000, 0xa3f0978600000000, 0x47e12f0e00000000, 0x8fc25e1c00000000, 0x1f84bc3a00000000, 0x3f09787400000000, 0x7e12f0e800000000, 0xfc25e1d000000000, 0xf84bc3a200000000, + 0xf097874600000000 +}; + +// ===== CA_group2 ===== +static const uint64_t CA_group2_33R[33] = { + 0x96718a9f, 0x12ce3153e, 0x59c62a7d, 0xb38c54fa, 0x16718a9f4, 0xce3153e9, 0x19c62a7d2, 0x138c54fa5, 0x718a9f4b, 0xe3153e96, + 0x1c62a7d2c, 0x18c54fa59, 0x118a9f4b3, 0x3153e967, 0x62a7d2ce, 0xc54fa59c, 0x18a9f4b38, 0x1153e9671, 0x2a7d2ce3, 0x54fa59c6, + 0xa9f4b38c, 0x153e96718, 0xa7d2ce31, 0x14fa59c62, 0x9f4b38c5, 0x13e96718a, 0x7d2ce315, 0xfa59c62a, 0x1f4b38c54, 0x1e96718a9, + 0x1d2ce3153, 0x1a59c62a7, 0x14b38c54f +}; + +static const uint64_t CA_group2_31L[31] = { + 0x95b4b90600000000, 0x2b69720e00000000, 0x56d2e41c00000000, 0xada5c83800000000, 0x5b4b907200000000, 0xb69720e400000000, 0x6d2e41ca00000000, 0xda5c839400000000, 0xb4b9072a00000000, 0x69720e5600000000, + 0xd2e41cac00000000, 0xa5c8395a00000000, 0x4b9072b600000000, 0x9720e56c00000000, 0x2e41cada00000000, 0x5c8395b400000000, 0xb9072b6800000000, 0x720e56d200000000, 0xe41cada400000000, 0xc8395b4a00000000, + 0x9072b69600000000, 0x20e56d2e00000000, 0x41cada5c00000000, 0x8395b4b800000000, 0x72b697200000000, 0xe56d2e400000000, 0x1cada5c800000000, 0x395b4b9000000000, 0x72b6972000000000, 0xe56d2e4000000000, + 0xcada5c8200000000 +}; + +// ===== CA_group3 ===== +static const uint64_t CA_group3_33R[33] = { + 0x15f3610bf, 0xbe6c217f, 0x17cd842fe, 0xf9b085fd, 0x1f3610bfa, 0x1e6c217f5, 0x1cd842feb, 0x19b085fd7, 0x13610bfaf, 0x6c217f5f, + 0xd842febe, 0x1b085fd7c, 0x1610bfaf9, 0xc217f5f3, 0x1842febe6, 0x1085fd7cd, 0x10bfaf9b, 0x217f5f36, 0x42febe6c, 0x85fd7cd8, + 0x10bfaf9b0, 0x17f5f361, 0x2febe6c2, 0x5fd7cd84, 0xbfaf9b08, 0x17f5f3610, 0xfebe6c21, 0x1fd7cd842, 0x1faf9b085, 0x1f5f3610b, + 0x1ebe6c217, 0x1d7cd842f, 0x1af9b085f +}; + +static const uint64_t CA_group3_31L[31] = { + 0xf482881e00000000, 0xe905103e00000000, 0xd20a207e00000000, 0xa41440fe00000000, 0x482881fe00000000, 0x905103fc00000000, 0x20a207fa00000000, 0x41440ff400000000, 0x82881fe800000000, 0x5103fd200000000, + 0xa207fa400000000, 0x1440ff4800000000, 0x2881fe9000000000, 0x5103fd2000000000, 0xa207fa4000000000, 0x440ff48200000000, 0x881fe90400000000, 0x103fd20a00000000, 0x207fa41400000000, 0x40ff482800000000, + 0x81fe905000000000, 0x3fd20a200000000, 0x7fa414400000000, 0xff4828800000000, 0x1fe9051000000000, 0x3fd20a2000000000, 0x7fa4144000000000, 0xff48288000000000, 0xfe90510200000000, 0xfd20a20600000000, + 0xfa41440e00000000 +}; + +// ===== CA_group4 ===== +static const uint64_t CA_group4_33R[33] = { + 0x19c5032af, 0x138a0655f, 0x7140cabf, 0xe281957e, 0x1c5032afc, 0x18a0655f9, 0x1140cabf3, 0x281957e7, 0x5032afce, 0xa0655f9c, + 0x140cabf38, 0x81957e71, 0x1032afce2, 0x655f9c5, 0xcabf38a, 0x1957e714, 0x32afce28, 0x655f9c50, 0xcabf38a0, 0x1957e7140, + 0x12afce281, 0x55f9c503, 0xabf38a06, 0x157e7140c, 0xafce2819, 0x15f9c5032, 0xbf38a065, 0x17e7140ca, 0xfce28195, 0x1f9c5032a, + 0x1f38a0655, 0x1e7140cab, 0x1ce281957 +}; + +static const uint64_t CA_group4_31L[31] = { + 0x7e4a369000000000, 0xfc946d2000000000, 0xf928da4200000000, 0xf251b48600000000, 0xe4a3690e00000000, 0xc946d21e00000000, 0x928da43e00000000, 0x251b487e00000000, 0x4a3690fc00000000, 0x946d21f800000000, + 0x28da43f200000000, 0x51b487e400000000, 0xa3690fc800000000, 0x46d21f9200000000, 0x8da43f2400000000, 0x1b487e4a00000000, 0x3690fc9400000000, 0x6d21f92800000000, 0xda43f25000000000, 0xb487e4a200000000, + 0x690fc94600000000, 0xd21f928c00000000, 0xa43f251a00000000, 0x487e4a3600000000, 0x90fc946c00000000, 0x21f928da00000000, 0x43f251b400000000, 0x87e4a36800000000, 0xfc946d200000000, 0x1f928da400000000, + 0x3f251b4800000000 +}; + +// ===== CA_group5 ===== +static const uint64_t CA_group5_33R[33] = { + 0xf9159cec, 0x1f22b39d8, 0x1e45673b1, 0x1c8ace763, 0x19159cec7, 0x122b39d8f, 0x45673b1f, 0x8ace763e, 0x1159cec7c, 0x2b39d8f9, + 0x5673b1f2, 0xace763e4, 0x159cec7c8, 0xb39d8f91, 0x1673b1f22, 0xce763e45, 0x19cec7c8a, 0x139d8f915, 0x73b1f22b, 0xe763e456, + 0x1cec7c8ac, 0x19d8f9159, 0x13b1f22b3, 0x763e4567, 0xec7c8ace, 0x1d8f9159c, 0x1b1f22b39, 0x163e45673, 0xc7c8ace7, 0x18f9159ce, + 0x11f22b39d, 0x3e45673b, 0x7c8ace76 +}; + +static const uint64_t CA_group5_31L[31] = { + 0xf248b96000000000, 0xe49172c200000000, 0xc922e58600000000, 0x9245cb0e00000000, 0x248b961e00000000, 0x49172c3c00000000, 0x922e587800000000, 0x245cb0f200000000, 0x48b961e400000000, 0x9172c3c800000000, + 0x22e5879200000000, 0x45cb0f2400000000, 0x8b961e4800000000, 0x172c3c9200000000, 0x2e58792400000000, 0x5cb0f24800000000, 0xb961e49000000000, 0x72c3c92200000000, 0xe587924400000000, 0xcb0f248a00000000, + 0x961e491600000000, 0x2c3c922e00000000, 0x5879245c00000000, 0xb0f248b800000000, 0x61e4917200000000, 0xc3c922e400000000, 0x879245ca00000000, 0xf248b9600000000, 0x1e49172c00000000, 0x3c922e5800000000, + 0x79245cb000000000 +}; + +// ===== CA_group6 ===== +static const uint64_t CA_group6_33R[33] = { + 0x11cf8a361, 0x39f146c3, 0x73e28d86, 0xe7c51b0c, 0x1cf8a3618, 0x19f146c31, 0x13e28d863, 0x7c51b0c7, 0xf8a3618e, 0x1f146c31c, + 0x1e28d8639, 0x1c51b0c73, 0x18a3618e7, 0x1146c31cf, 0x28d8639f, 0x51b0c73e, 0xa3618e7c, 0x146c31cf8, 0x8d8639f1, 0x11b0c73e2, + 0x3618e7c5, 0x6c31cf8a, 0xd8639f14, 0x1b0c73e28, 0x1618e7c51, 0xc31cf8a3, 0x18639f146, 0x10c73e28d, 0x18e7c51b, 0x31cf8a36, + 0x639f146c, 0xc73e28d8, 0x18e7c51b0 +}; + +static const uint64_t CA_group6_31L[31] = { + 0x9ff8253200000000, 0x3ff04a6600000000, 0x7fe094cc00000000, 0xffc1299800000000, 0xff82533200000000, 0xff04a66600000000, 0xfe094cce00000000, 0xfc12999e00000000, 0xf825333e00000000, 0xf04a667e00000000, + 0xe094ccfe00000000, 0xc12999fe00000000, 0x825333fe00000000, 0x4a667fe00000000, 0x94ccffc00000000, 0x12999ff800000000, 0x25333ff000000000, 0x4a667fe000000000, 0x94ccffc000000000, 0x2999ff8200000000, + 0x5333ff0400000000, 0xa667fe0800000000, 0x4ccffc1200000000, 0x999ff82400000000, 0x333ff04a00000000, 0x667fe09400000000, 0xccffc12800000000, 0x99ff825200000000, 0x33ff04a600000000, 0x67fe094c00000000, + 0xcffc129800000000 +}; + +// ===== CA_group7 ===== +static const uint64_t CA_group7_33R[33] = { + 0x83cfcff, 0x1079f9fe, 0x20f3f3fc, 0x41e7e7f8, 0x83cfcff0, 0x1079f9fe0, 0xf3f3fc1, 0x1e7e7f82, 0x3cfcff04, 0x79f9fe08, + 0xf3f3fc10, 0x1e7e7f820, 0x1cfcff041, 0x19f9fe083, 0x13f3fc107, 0x7e7f820f, 0xfcff041e, 0x1f9fe083c, 0x1f3fc1079, 0x1e7f820f3, + 0x1cff041e7, 0x19fe083cf, 0x13fc1079f, 0x7f820f3f, 0xff041e7e, 0x1fe083cfc, 0x1fc1079f9, 0x1f820f3f3, 0x1f041e7e7, 0x1e083cfcf, + 0x1c1079f9f, 0x1820f3f3f, 0x1041e7e7f +}; + +static const uint64_t CA_group7_31L[31] = { + 0xe055ee00000000, 0x1c0abdc00000000, 0x38157b800000000, 0x702af7000000000, 0xe055ee000000000, 0x1c0abdc000000000, 0x38157b8000000000, 0x702af70000000000, 0xe055ee0000000000, 0xc0abdc0200000000, + 0x8157b80600000000, 0x2af700e00000000, 0x55ee01c00000000, 0xabdc03800000000, 0x157b807000000000, 0x2af700e000000000, 0x55ee01c000000000, 0xabdc038000000000, 0x57b8070200000000, 0xaf700e0400000000, + 0x5ee01c0a00000000, 0xbdc0381400000000, 0x7b80702a00000000, 0xf700e05400000000, 0xee01c0aa00000000, 0xdc03815600000000, 0xb80702ae00000000, 0x700e055e00000000, 0xe01c0abc00000000, 0xc038157a00000000, + 0x80702af600000000 +}; + +constexpr const uint64_t* MS_TAB_33R_CC[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CC_group2_33R, CC_group4_33R, CC_group2_33R, CC_group4_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CC_group3_33R, CC_group1_33R, CC_group3_33R, CC_group1_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CC_group2_33R, CC_group4_33R, CC_group2_33R, CC_group4_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CC_group3_33R, CC_group1_33R, CC_group3_33R, CC_group1_33R // 40–44 +}; + +constexpr const uint64_t* MS_TAB_31L_CC[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CC_group2_31L, CC_group4_31L, CC_group2_31L, CC_group4_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CC_group3_31L, CC_group1_31L, CC_group3_31L, CC_group1_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CC_group2_31L, CC_group4_31L, CC_group2_31L, CC_group4_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CC_group3_31L, CC_group1_31L, CC_group3_31L, CC_group1_31L // 40–44 +}; + +// ===== MS_TAB_33R_CG ===== +constexpr const uint64_t* MS_TAB_33R_CG[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CG_group1_33R, CG_group2_33R, CG_group3_33R, CG_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CG_group4_33R, CG_group5_33R, CG_group4_33R, CG_group5_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CG_group1_33R, CG_group2_33R, CG_group3_33R, CG_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CG_group6_33R, CG_group7_33R, CG_group4_33R, CG_group7_33R // 40–44 +}; + +// ===== MS_TAB_31L_CG ===== +constexpr const uint64_t* MS_TAB_31L_CG[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CG_group1_31L, CG_group2_31L, CG_group3_31L, CG_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CG_group4_31L, CG_group5_31L, CG_group4_31L, CG_group5_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CG_group1_31L, CG_group2_31L, CG_group3_31L, CG_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CG_group6_31L, CG_group7_31L, CG_group4_31L, CG_group7_31L // 40–44 +}; + + +// ===== MS_TAB_33R_CA ===== +constexpr const uint64_t* MS_TAB_33R_CA[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CA_group4_33R, CA_group2_33R, CA_group3_33R, CA_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CA_group1_33R, CA_group6_33R, CA_group5_33R, CA_group6_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CA_group4_33R, CA_group2_33R, CA_group3_33R, CA_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CA_group1_33R, CA_group7_33R, CA_group1_33R, CA_group7_33R // 40–44 +}; + +// ===== MS_TAB_31L_CA ===== +constexpr const uint64_t* MS_TAB_31L_CA[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CA_group4_31L, CA_group2_31L, CA_group3_31L, CA_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CA_group1_31L, CA_group6_31L, CA_group5_31L, CA_group6_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CA_group4_31L, CA_group2_31L, CA_group3_31L, CA_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CA_group1_31L, CA_group7_31L, CA_group1_31L, CA_group7_31L // 40–44 +}; + +// ===== MS_TAB_33R_CT ===== +constexpr const uint64_t* MS_TAB_33R_CT[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CT_group2_33R, CT_group2_33R, CT_group2_33R, CT_group7_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CT_group3_33R, CT_group1_33R, CT_group4_33R, CT_group1_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CT_group2_33R, CT_group2_33R, CT_group2_33R, CT_group7_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CT_group5_33R, CT_group1_33R, CT_group6_33R, CT_group1_33R // 40–44 +}; + +// ===== MS_TAB_31L_CT ===== +constexpr const uint64_t* MS_TAB_31L_CT[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CT_group2_31L, CT_group2_31L, CT_group2_31L, CT_group7_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CT_group3_31L, CT_group1_31L, CT_group4_31L, CT_group1_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CT_group2_31L, CT_group2_31L, CT_group2_31L, CT_group7_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CT_group5_31L, CT_group1_31L, CT_group6_31L, CT_group1_31L // 40–44 +}; + + +inline uint64_t +srol_bs_table(uint8_t out_idx, unsigned d, const uint64_t* const* TAB_33R, const uint64_t* const* TAB_31L) +{ + d= d/2; + return (TAB_31L[out_idx][d < 31 ? d : d % 31] | /* NOLINT */ + TAB_33R[out_idx][d < 33 ? d : d % 33]); /* NOLINT */ +} + + + +inline uint64_t +next_forward_bs_hash(uint64_t fh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + // Compute indices for outgoing and incoming dimers + // Each dimer is encoded as (10 * first + second) + uint8_t out_idx = 10 * CONVERT_TAB_SIMPLE[char_out1] + CONVERT_TAB_SIMPLE[char_out2]; + uint8_t in_idx = 10 * CONVERT_TAB_SIMPLE[char_in1] + CONVERT_TAB_SIMPLE[char_in2]; + + // Rotate the hash left by 2 since we move by two bases + uint64_t h_val = srol(fh_val); + + // XOR out the old dimer and XOR in the new dimer + h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); + h_val ^= DIMER_TAB[in_idx]; + + return h_val; +} + + +inline uint64_t +prev_forward_bs_hash(uint64_t fh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + // Compute indices for outgoing and incoming dimers + // Each dimer is encoded as (10 * first + second) + uint8_t out_idx = 10 * CONVERT_TAB_SIMPLE[char_out1] + CONVERT_TAB_SIMPLE[char_out2]; + uint8_t in_idx = 10 * CONVERT_TAB_SIMPLE[char_in1] + CONVERT_TAB_SIMPLE[char_in2]; + + uint64_t h_val = fh_val ^ srol_bs_table(in_idx, k, TAB_33R, TAB_31L); + h_val ^= DIMER_TAB[out_idx]; + h_val = sror(h_val); + return h_val; +} + +// Generic dimer-based reverse hash using pointer table +inline std::vector +base_reverse_bs_hash(const char* seq, bool has_second, unsigned k, const uint64_t* DIMER_TAB) +{ + + uint64_t h0 = 0; + uint64_t h1 = 0; + + // Process pairs from the end for h0 + for (int i = (int)k - 1; i >= 1; i -= 2) { + h0 = srol(h0, 1); + const uint8_t a = CONVERT_TAB_RC[(unsigned char)seq[i]]; + const uint8_t b = CONVERT_TAB_RC[(unsigned char)seq[i - 1]]; + const uint8_t loc = 10 * a + b; // yields 11..44 + h0 ^= DIMER_TAB[loc]; // first element of pointer table + } + + // If length exactly k, return only h0 + if (!has_second) + return {h0, 0}; + + // Offset by 1 for second hash (mirroring h1 in forward) + for (int i = (int)k; i >= 2; i -= 2) { + h1 = srol(h1, 1); + const uint8_t a = CONVERT_TAB_RC[(unsigned char)seq[i]]; + const uint8_t b = CONVERT_TAB_RC[(unsigned char)seq[i - 1]]; + const uint8_t loc = 10 * a + b; + h1 ^= DIMER_TAB[loc]; + } + + return {h0, h1}; +} + +inline uint64_t +next_reverse_bs_hash(uint64_t rh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + uint8_t out_idx = 10 * CONVERT_TAB_RC[char_out2] + CONVERT_TAB_RC[char_out1]; + uint8_t in_idx = 10 * CONVERT_TAB_RC[char_in2] + CONVERT_TAB_RC[char_in1]; + uint64_t h_val = rh_val ^ srol_bs_table(in_idx, k, TAB_33R, TAB_31L); + h_val ^= DIMER_TAB[out_idx]; + h_val = sror(h_val); + return h_val; +} + + +inline uint64_t +prev_reverse_bs_hash(uint64_t rh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + uint8_t out_idx = 10 * CONVERT_TAB_RC[char_out2] + CONVERT_TAB_RC[char_out1]; + uint8_t in_idx = 10 * CONVERT_TAB_RC[char_in2] + CONVERT_TAB_RC[char_in1]; + uint64_t h_val = srol(rh_val); + h_val ^= DIMER_TAB[in_idx]; + h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); + return h_val; +} +}// namespace btllib::hashing_internals + +namespace btllib { + +using hashing_internals::extend_hashes; +using hashing_internals::sror; +using hashing_internals::srol; +class BsHash +{ + +public: + /** + * Construct an BsHash object for k-mers. + * @param seq C-string containing sequence data + * @param seq_len Length of the sequence + * @param num_hashes Number of hashes to generate per k-mer + * @param k K-mer size + * @param pos Position in the sequence to start hashing from + */ + BsHash(const char* seq, + size_t seq_len, + hashing_internals::NUM_HASHES_TYPE num_hashes, + hashing_internals::K_TYPE k, + std::string mode, + size_t pos = 0) + : seq(seq) + , seq_len(seq_len) + , num_hashes(num_hashes) + , k(k) + , pos(pos) + , initialized(false) + , hash_arr(new uint64_t[num_hashes]) + { + check_error(k == 0, "BsHash: k must be greater than 0"); + check_error(k % 2 == 1, "BsHash: k must be even"); + check_error(this->seq_len < k, + "BsHash: sequence length (" + std::to_string(this->seq_len) + + ") is smaller than k (" + std::to_string(k) + ")"); + check_error(pos > this->seq_len - k, + "BsHash: passed position (" + std::to_string(pos) + + ") is larger than sequence length (" + + std::to_string(this->seq_len) + ")"); + static const std::unordered_set valid_modes = {"CG", "CC", "CT", "CA"}; + check_error(valid_modes.find(mode) == valid_modes.end(), + "BsHash: mode must be one of [CG, CC, CT, CA], but got '" + mode + "'"); + + if (mode == "CC") { + DIMER_TAB = CC_DIMER_TAB; + TAB_33R = MS_TAB_33R_CC; + TAB_31L = MS_TAB_31L_CC; + } else if (mode == "CG") { + DIMER_TAB = CG_DIMER_TAB; + TAB_33R = MS_TAB_33R_CG; + TAB_31L = MS_TAB_31L_CG; + } else if (mode == "CT") { + DIMER_TAB = CT_DIMER_TAB; + TAB_33R = MS_TAB_33R_CT; + TAB_31L = MS_TAB_31L_CT; + } else if (mode == "CA") { + DIMER_TAB = CA_DIMER_TAB; + TAB_33R = MS_TAB_33R_CA; + TAB_31L = MS_TAB_31L_CA; + } + } + + /** + * Construct an BsHash object for k-mers. + * @param seq Sequence string + * @param num_hashes Number of hashes to produce per k-mer + * @param k K-mer size + * @param pos Position in sequence to start hashing from + */ + BsHash(const std::string& seq, + hashing_internals::NUM_HASHES_TYPE num_hashes, + hashing_internals::K_TYPE k, + std::string mode = "CG", + size_t pos = 0) + : BsHash(seq.data(), seq.size(), num_hashes, k, mode, pos) + { + } + + BsHash(const BsHash& obj) + : seq(obj.seq) + , seq_len(obj.seq_len) + , num_hashes(obj.num_hashes) + , k(obj.k) + , pos(obj.pos) + , initialized(obj.initialized) + , fwd_hashes(obj.fwd_hashes) + , rev_hashes(obj.rev_hashes) + , hash_arr(new uint64_t[obj.num_hashes] ) + { + DIMER_TAB = obj.DIMER_TAB; + TAB_33R = obj.TAB_33R; + TAB_31L = obj.TAB_31L; + std::memcpy( + hash_arr.get(), obj.hash_arr.get(), num_hashes * sizeof(uint64_t)); + } + + +// TODO + BsHash(BsHash&&) = default; + + /** + * Calculate the hash values of current k-mer and advance to the next k-mer. + * BsHash advances one nucleotide at a time until it finds a k-mer with valid + * characters (ACGTU) and skips over those with invalid characters (non-ACGTU, + * including N). This method must be called before hashes() is accessed, for + * the first and every subsequent hashed kmer. get_pos() may be called at any + * time to obtain the position of last hashed k-mer or the k-mer to be hashed + * if roll() has never been called on this BsHash object. It is important to + * note that the number of roll() calls is NOT necessarily equal to get_pos(), + * if there are N's or invalid characters in the hashed sequence. + * @return \p true on success and \p false otherwise + */ +bool roll() +{ + if (!initialized) { + std::cout << "[ROLL] Not initialized. Calling init()\n"; + return init(); + } + + std::cout << "[ROLL] Rolling at position " << pos << ", even=" << even << "\n"; + + if (even) { + if (pos >= seq_len - k) { + std::cout << "[ROLL] Reached end of sequence, cannot roll\n"; + return false; + } + char c1 = seq[pos + k - 1]; + char c2 = seq[pos + k]; + uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 + if (DIMER_TAB[loc] == 0) { + std::cout << "[ROLL] Invalid dimer at position " << (pos + k) + << ", skipping " << k << " positions and re-initializing\n"; + pos += k; + return init(); + } + std::cout << "[ROLL] Even roll, extending hashes at position " << pos << "\n"; + even = false; + ++pos; + extend_hashes(fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + return true; + } + + if (pos >= seq_len - k) { + std::cout << "[ROLL] Reached end of sequence, cannot roll\n"; + return false; + } + + char c1 = seq[pos + k - 1]; + char c2 = seq[pos + k]; + uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 + if (DIMER_TAB[loc] == 0) { + std::cout << "[ROLL] Invalid dimer at position " << (pos + k) + << ", skipping " << k << " positions and re-initializing\n"; + pos += k; + return init(); + } + + bool can_roll_second = true; + c1 = seq[pos + k]; + c2 = seq[pos + k + 1]; + a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + loc = 10 * a + b; + if (pos + 1 >= seq_len - k || DIMER_TAB[loc] == 0) { + can_roll_second = false; + std::cout << "[ROLL] Cannot roll second k-mer at position " << (pos + 1) << "\n"; + } + + for (int i = 0; i < 2; i++) { + if (i == 0 || can_roll_second) { + char out1 = seq[pos + i - 1]; + char out2 = seq[pos + i + 1 - 1]; + char in1 = seq[pos + k + i - 1]; + char in2 = seq[pos + k + i + 1 - 1]; + + std::cout << "[ROLL] Loop " << i << " at pos=" << pos + << " | OUT: " << out1 << out2 + << " | IN: " << in1 << in2 << "\n"; + + fwd_hashes[i] = next_forward_bs_hash(fwd_hashes[i], k, + out1, out2, in1, in2, + DIMER_TAB, TAB_33R, TAB_31L); + rev_hashes[i] = next_reverse_bs_hash(rev_hashes[i], k, + out1, out2, in1, in2, + DIMER_TAB, TAB_33R, TAB_31L); + } + } + + std::cout << "[ROLL] Extending hashes at position " << pos << "\n"; + extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); + + ++pos; + even = true; + + std::cout << "[ROLL] Roll complete, new position=" << pos << ", even=" << even << "\n"; + + if (!can_roll_second ) { + fwd_hashes[1] = 0; + rev_hashes[1] = 0; + } + return true; +} + + +bool is_methylated() { + if (k < 2) return false; // cannot have a dimer + + // Center dimer index within k-mer + size_t center_idx; + size_t num_dimers = k / 2; + + if (num_dimers % 2 == 1) { + // Odd number of dimers → pick exact center dimer + center_idx = pos + (num_dimers / 2) * 2; + } else { + // Even number of dimers → pick left of two central dimers + center_idx = pos + ((num_dimers / 2) - 1) * 2; + } + + // Safety check + if (center_idx + 1 >= seq_len) return false; + + return seq[center_idx] == 'C' && seq[center_idx + 1] == 'G'; +} + + /** + * Like the roll() function, but advance backwards. + * @return \p true on success and \p false otherwise + */ + bool roll_back() + { + + if (!initialized) { + return init(); + } + + if (pos == 0) { + return false; + } + + if (!even) { + extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); + --pos; + even = true; // toggle to indicate next roll_back can attempt both + return true; + } + + + char c1 = seq[pos - 1]; + char c2 = seq[pos]; + uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 + if (DIMER_TAB[loc] == 0) { + std::cout << "[ROLL] Invalid dimer at position " << (pos + k) + << ", skipping " << k << " positions and re-initializing\n"; + if (pos <= k) { + return false; + } + pos -= k; + return init(); + + } + + c1 = seq[pos - 2]; + c2 = seq[pos - 1]; + a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + loc = 10 * a + b; + // even step: try to roll back both hashes + bool can_roll_second = true; + if (pos <= 1 || DIMER_TAB[loc] == 0) { + can_roll_second = false; + } + if (fwd_hashes[1] != 0) { + for (int i = 0; i < 2; i++) { + if (i == 1 || can_roll_second) { + fwd_hashes[i] = prev_forward_bs_hash(fwd_hashes[i], k, + seq[pos + k - 2 + i], seq[pos + k - 1 + i], + seq[pos - 2 + i], seq[pos - 1 + i], + DIMER_TAB, TAB_33R, TAB_31L); + rev_hashes[i] = prev_reverse_bs_hash(rev_hashes[i], k, + seq[pos + k - 2 + i], seq[pos + k - 1 + i], + seq[pos - 2 + i], seq[pos - 1 + i], + DIMER_TAB, TAB_33R, TAB_31L); + } + } + } else { + std::cerr << "abcdefgh" << std::endl; + pos -=2; + init(); + pos++; + extend_hashes(fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + even = false; + return true; + } + + extend_hashes(fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + --pos; + even = false; + return true; + } + + + /** + * Get the array of current canonical hash values (length = \p get_hash_num()) + * @return Pointer to the hash array + */ + const uint64_t* hashes() const { return hash_arr.get(); } + + /** + * Get the position of last hashed k-mer or the k-mer to be hashed if roll() + * has never been called on this BsHash object. + * @return Position of the most recently hashed k-mer's first base-pair + */ + size_t get_pos() const { return pos; } + + /** + * Get the number of hashes generated per k-mer. + * @return Number of hashes per k-mer + */ + hashing_internals::NUM_HASHES_TYPE get_hash_num() const { return num_hashes; } + + /** + * Get the length of the k-mers. + * @return \p k + */ + hashing_internals::K_TYPE get_k() const { return k; } + + /** + * Get the hash value of the forward strand. + * @return Forward hash value + */ + uint64_t get_forward_hash() const { if (even) {return fwd_hashes[0];} else {return fwd_hashes[1];}} + + /** + * Get the hash value of the reverse strand. + * @return Reverse-complement hash value + */ + uint64_t get_reverse_hash() const { if (even) {return rev_hashes[0];} else {return rev_hashes[1];}} + +private: + const char* seq; + const size_t seq_len; + hashing_internals::NUM_HASHES_TYPE num_hashes; + hashing_internals::K_TYPE k; + size_t pos; + bool initialized; + std::vector fwd_hashes = {0, 0}; + std::vector rev_hashes = {0, 0}; + bool even = true; + std::unique_ptr hash_arr; + const uint64_t* DIMER_TAB = nullptr; + const uint64_t* const* TAB_33R = nullptr; + const uint64_t* const* TAB_31L = nullptr; + + /** + * Initialize the internal state of the iterator + * @return \p true if successful, \p false otherwise + */ + +bool init() +{ + std::cout << "[INIT] Starting initialization at position " << pos << "\n"; + + bool has_n = true; + while (pos <= seq_len - k + 1 && has_n) { + has_n = false; + for (unsigned i = 0; i < k && pos <= seq_len - k + 1; i += 2) { + const char c1 = seq[pos + k - i - 2]; + const char c2 = seq[pos + k - i - 1]; + const uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; + const uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; + const uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 + + std::cout << "[INIT] Checking dimer '" << c1 << c2 + << "' -> loc=" << (unsigned)loc << "\n"; + + if (DIMER_TAB[loc] == 0) { + std::cout << "[INIT] Found invalid base at position " << (pos + k - i - 1) + << ", skipping " << (k - i) << " positions\n"; + if (b == 0) { + pos += k - i; + } else { + pos += k - i - 1; + } + + has_n = true; + } + } + } + + if (pos > seq_len - k) { + std::cout << "[INIT] Position exceeded sequence length, returning false\n"; + return false; + } + + bool has_second = true; + if (pos == seq_len - k) { + has_second = false; + std::cout << "[INIT] At last k-mer, no second k-mer available\n"; + } + + std::cout << "[INIT] Computing forward hashes at position " << pos << "\n"; + fwd_hashes = base_forward_bs_hash(seq + pos, has_second, k, DIMER_TAB); + + std::cout << "[INIT] Computing reverse hashes at position " << pos << "\n"; + rev_hashes = base_reverse_bs_hash(seq + pos, has_second, k, DIMER_TAB); + + std::cout << "[INIT] Extending hashes\n"; + extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); + + initialized = true; + std::cout << "[INIT] Initialization complete at position " << pos << "\n"; + return true; +} + +}; +} \ No newline at end of file diff --git a/include/btllib/bshash_kmer_conversion.hpp b/include/btllib/bshash_kmer_conversion.hpp new file mode 100644 index 00000000..6dcc237d --- /dev/null +++ b/include/btllib/bshash_kmer_conversion.hpp @@ -0,0 +1,360 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + + + +namespace btllib { + +using hashing_internals::base_forward_hash; +using hashing_internals::base_reverse_hash; +using hashing_internals::extend_hashes; +using hashing_internals::next_forward_hash; +using hashing_internals::next_reverse_hash; +using hashing_internals::prev_forward_hash; +using hashing_internals::prev_reverse_hash; +using hashing_internals::SEED_N; +using hashing_internals::SEED_TAB; +using hashing_internals::sub_hash; +// Bring the tables into this file +using hashing_internals::BS_CT_SEED_TAB; +using hashing_internals::BS_GA_SEED_TAB; + +using hashing_internals::BS_CT_MS_TAB_33R; +using hashing_internals::BS_GA_MS_TAB_33R; + +using hashing_internals::BS_CT_MS_TAB_31L; +using hashing_internals::BS_GA_MS_TAB_31L; + +using hashing_internals::BS_CT_CONVERT_TAB; +using hashing_internals::BS_GA_CONVERT_TAB; + +using hashing_internals::BS_CT_RC_CONVERT_TAB; +using hashing_internals::BS_GA_RC_CONVERT_TAB; + +// Optional: if you want the dimer/trimer/tetramer tables too +using hashing_internals::DIMER_TAB; +using hashing_internals::TRIMER_TAB; +using hashing_internals::TETRAMER_TAB; + +/** + * Normal k-mer hashing. + */ +class BSHashConversion : public NtHash +{ + +public: + /** + * Construct an ntHash object for k-mers. + * @param seq C-string containing sequence data + * @param seq_len Length of the sequence + * @param num_hashes Number of hashes to generate per k-mer + * @param k K-mer size + * @param pos Position in the sequence to start hashing from + */ + BSHashConversion(const char* seq, + size_t seq_len, + hashing_internals::NUM_HASHES_TYPE num_hashes, + hashing_internals::K_TYPE k, + std::string conversion_type, + size_t pos = 0) + : NtHash(seq, seq_len, num_hashes, k, pos), + seq(seq) + , seq_len(seq_len) + , num_hashes(num_hashes) + , k(k) + , conversion_type(conversion_type) + , pos(pos) + , initialized(false) + , hash_arr(new uint64_t[num_hashes]) + { + check_error(k == 0, "NtHash: k must be greater than 0"); + check_error(this->seq_len < k, + "NtHash: sequence length (" + std::to_string(this->seq_len) + + ") is smaller than k (" + std::to_string(k) + ")"); + check_error(pos > this->seq_len - k, + "NtHash: passed position (" + std::to_string(pos) + + ") is larger than sequence length (" + + std::to_string(this->seq_len) + ")"); + if (conversion_type == "CT"|| conversion_type == "ct") { + // Seed table + primitive_tab = BS_CT_SEED_TAB; + + // Rolling hash right/left tables + right_table = BS_CT_MS_TAB_33R; + left_table = BS_CT_MS_TAB_31L; + + // Convert tables + convert_tab = BS_CT_CONVERT_TAB; + rc_convert_tab = BS_CT_RC_CONVERT_TAB; + + // dimer_tab, trimer_tab, tetramer_tab stay unchanged + + } + else if (conversion_type == "GA" || conversion_type == "ga") { + primitive_tab = BS_GA_SEED_TAB; + + right_table = BS_GA_MS_TAB_33R; + left_table = BS_GA_MS_TAB_31L; + + convert_tab = BS_GA_CONVERT_TAB; + rc_convert_tab = BS_GA_RC_CONVERT_TAB; + + } + else { + check_error(k == 0, "Unsupported conversion_type. Must be CT or GA."); + exit(1); + } + } + + /** + * Construct an ntHash object for k-mers. + * @param seq Sequence string + * @param num_hashes Number of hashes to produce per k-mer + * @param k K-mer size + * @param pos Position in sequence to start hashing from + */ + BSHashConversion(const std::string& seq, + hashing_internals::NUM_HASHES_TYPE num_hashes, + hashing_internals::K_TYPE k, + std::string conversion_type, + size_t pos = 0) + : BSHashConversion(seq.data(), seq.size(), num_hashes, k, conversion_type, pos) + { + } + + + /** + * Calculate the hash values of current k-mer and advance to the next k-mer. + * NtHash advances one nucleotide at a time until it finds a k-mer with valid + * characters (ACGTU) and skips over those with invalid characters (non-ACGTU, + * including N). This method must be called before hashes() is accessed, for + * the first and every subsequent hashed kmer. get_pos() may be called at any + * time to obtain the position of last hashed k-mer or the k-mer to be hashed + * if roll() has never been called on this NtHash object. It is important to + * note that the number of roll() calls is NOT necessarily equal to get_pos(), + * if there are N's or invalid characters in the hashed sequence. + * @return \p true on success and \p false otherwise + */ + bool roll() + { + if (!initialized) { + return init(); + } + if (pos >= seq_len - k) { + return false; + } + if (hashing_internals::SEED_TAB[(unsigned char)seq[pos + k]] == + hashing_internals::SEED_N) { + pos += k; + return init(); + } + fwd_hash = next_forward_hash(fwd_hash, k, seq[pos], seq[pos + k], primitive_tab, right_table, left_table); + rev_hash = next_reverse_hash(rev_hash, k, seq[pos], seq[pos + k], primitive_tab, right_table, left_table); + extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); + ++pos; + return true; + } + + /** + * Like the roll() function, but advance backwards. + * @return \p true on success and \p false otherwise + */ + bool roll_back() + { + if (!initialized) { + return init(); + } + if (pos == 0) { + return false; + } + if (SEED_TAB[(unsigned char)seq[pos - 1]] == SEED_N && pos >= k) { + pos -= k; + return init(); + } + if (SEED_TAB[(unsigned char)seq[pos - 1]] == SEED_N) { + return false; + } + fwd_hash = prev_forward_hash(fwd_hash, k, seq[pos + k - 1], seq[pos - 1], primitive_tab, right_table, left_table); + rev_hash = prev_reverse_hash(rev_hash, k, seq[pos + k - 1], seq[pos - 1], primitive_tab, right_table, left_table); + extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); + --pos; + return true; + } + + /** + * Peeks the hash values as if roll() was called (without advancing the + * NtHash object. The peeked hash values can be obtained through the + * hashes() method. + * @return \p true on success and \p false otherwise + */ + bool peek() + { + if (pos >= seq_len - k) { + return false; + } + return peek(seq[pos + k]); + } + + /** + * Like peek(), but as if roll_back() was called. + * @return \p true on success and \p false otherwise + */ + bool peek_back() + { + if (pos == 0) { + return false; + } + return peek_back(seq[pos - 1]); + } + + /** + * Peeks the hash values as if roll() was called for char_in (without + * advancing the NtHash object. The peeked hash values can be obtained through + * the hashes() method. + * @return \p true on success and \p false otherwise + */ + bool peek(char char_in) + { + if (!initialized) { + return init(); + } + if (SEED_TAB[(unsigned char)char_in] == SEED_N) { + return false; + } + const uint64_t fwd = next_forward_hash(fwd_hash, k, seq[pos], char_in, primitive_tab, right_table, left_table); + const uint64_t rev = next_reverse_hash(rev_hash, k, seq[pos], char_in, primitive_tab, right_table, left_table); + extend_hashes(fwd, rev, k, num_hashes, hash_arr.get()); + return true; + } + + /** + * Like peek(), but as if roll_back on char_in was called. + * @return \p true on success and \p false otherwise + */ + bool peek_back(char char_in) + { + if (!initialized) { + return init(); + } + if (SEED_TAB[(unsigned char)char_in] == SEED_N) { + return false; + } + const unsigned char char_out = seq[pos + k - 1]; + const uint64_t fwd = prev_forward_hash(fwd_hash, k, char_out, char_in, primitive_tab, right_table, left_table); + const uint64_t rev = prev_reverse_hash(rev_hash, k, char_out, char_in, primitive_tab, right_table, left_table); + extend_hashes(fwd, rev, k, num_hashes, hash_arr.get()); + return true; + } + + void sub(const std::vector& positions, + const std::vector& new_bases) + { + sub_hash(fwd_hash, + rev_hash, + seq + pos, + positions, + new_bases, + get_k(), + get_hash_num(), + hash_arr.get()); + } + + /** + * Get the array of current canonical hash values (length = \p get_hash_num()) + * @return Pointer to the hash array + */ + const uint64_t* hashes() const { return hash_arr.get(); } + + /** + * Get the position of last hashed k-mer or the k-mer to be hashed if roll() + * has never been called on this NtHash object. + * @return Position of the most recently hashed k-mer's first base-pair + */ + size_t get_pos() const { return pos; } + + /** + * Get the number of hashes generated per k-mer. + * @return Number of hashes per k-mer + */ + hashing_internals::NUM_HASHES_TYPE get_hash_num() const { return num_hashes; } + + /** + * Get the length of the k-mers. + * @return \p k + */ + hashing_internals::K_TYPE get_k() const { return k; } + + /** + * Get the hash value of the forward strand. + * @return Forward hash value + */ + uint64_t get_forward_hash() const { return fwd_hash; } + + /** + * Get the hash value of the reverse strand. + * @return Reverse-complement hash value + */ + uint64_t get_reverse_hash() const { return rev_hash; } + +private: + const char* seq; + const size_t seq_len; + hashing_internals::NUM_HASHES_TYPE num_hashes; + hashing_internals::K_TYPE k; + std::string conversion_type; + size_t pos; + bool initialized; + uint64_t fwd_hash = 0; + uint64_t rev_hash = 0; + std::unique_ptr hash_arr; + +const uint64_t* primitive_tab; +const uint64_t* dimer_tab = DIMER_TAB; +const uint64_t* trimer_tab = TRIMER_TAB; +const uint64_t* tetramer_tab = TETRAMER_TAB; + +const uint8_t* convert_tab; +const uint8_t* rc_convert_tab; // NEW + +const uint64_t* const* right_table; +const uint64_t* const* left_table; + /** + * Initialize the internal state of the iterator + * @return \p true if successful, \p false otherwise + */ + bool init() + { + bool has_n = true; + while (pos <= seq_len - k + 1 && has_n) { + has_n = false; + for (unsigned i = 0; i < k && pos <= seq_len - k + 1; i++) { + if (SEED_TAB[(unsigned char)seq[pos + k - i - 1]] == SEED_N) { + pos += k - i; + has_n = true; + } + } + } + if (pos > seq_len - k) { + return false; + } + std::cerr << "conversion bshasb" << std::endl; + fwd_hash = base_forward_hash(seq + pos, k, primitive_tab, dimer_tab, trimer_tab, tetramer_tab, convert_tab); + rev_hash = base_reverse_hash(seq + pos, k, primitive_tab, dimer_tab, trimer_tab, tetramer_tab, rc_convert_tab); + extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); + initialized = true; + return true; + } +}; + +} // namespace btllib \ No newline at end of file diff --git a/include/btllib/hashing_internals.hpp b/include/btllib/hashing_internals.hpp index 798b1b14..76b344de 100644 --- a/include/btllib/hashing_internals.hpp +++ b/include/btllib/hashing_internals.hpp @@ -57,7 +57,9 @@ srol(const uint64_t x, const unsigned d) * @return Split-rotation result */ inline uint64_t -srol_table(unsigned char c, unsigned d); +srol_table(unsigned char c, unsigned d, + const uint64_t* const* right_table, + const uint64_t* const* left_table ); /** * Split a 64-bit word into 33 and 31-bit sub-words and right-rotate them @@ -168,6 +170,77 @@ const uint64_t SEED_TAB[ASCII_SIZE] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 }; +const uint64_t BS_CT_SEED_TAB[ASCII_SIZE] = { + SEED_N, SEED_T, SEED_N, SEED_A, SEED_A, SEED_A, SEED_N, SEED_C, // 0..7 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 8..15 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 16..23 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 24..31 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 32..39 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 40..47 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 48..55 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 56..63 + SEED_N, SEED_A, SEED_N, SEED_T, SEED_N, SEED_N, SEED_N, SEED_G, // 64..71 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 72..79 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 80..87 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 88..95 + SEED_N, SEED_A, SEED_N, SEED_T, SEED_N, SEED_N, SEED_N, SEED_G, // 96..103 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 104..111 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 112..119 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 120..127 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 128..135 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 136..143 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 144..151 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 152..159 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 160..167 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 168..175 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 176..183 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 184..191 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 192..199 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 200..207 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 208..215 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 216..223 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 224..231 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 232..239 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 240..247 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 +}; + +const uint64_t BS_GA_SEED_TAB[ASCII_SIZE] = { + SEED_N, SEED_T, SEED_N, SEED_G, SEED_A, SEED_A, SEED_N, SEED_T, // 0..7 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 8..15 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 16..23 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 24..31 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 32..39 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 40..47 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 48..55 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 56..63 + SEED_N, SEED_A, SEED_N, SEED_C, SEED_N, SEED_N, SEED_N, SEED_A, // 64..71 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 72..79 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 80..87 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 88..95 + SEED_N, SEED_A, SEED_N, SEED_C, SEED_N, SEED_N, SEED_N, SEED_A, // 96..103 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 104..111 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 112..119 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 120..127 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 128..135 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 136..143 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 144..151 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 152..159 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 160..167 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 168..175 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 176..183 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 184..191 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 192..199 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 200..207 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 208..215 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 216..223 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 224..231 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 232..239 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 240..247 + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 +}; + + const uint64_t A33R[33] = { 0x195c60474, 0x12b8c08e9, 0x571811d3, 0xae3023a6, 0x15c60474c, 0xb8c08e99, 0x171811d32, 0xe3023a65, 0x1c60474ca, 0x18c08e995, 0x11811d32b, 0x3023a657, @@ -344,11 +417,152 @@ const uint64_t* const MS_TAB_31L[ASCII_SIZE] = { N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 }; +const uint64_t* const BS_CT_MS_TAB_33R[ASCII_SIZE] = { + N33R, T33R, N33R, A33R, A33R, A33R, N33R, C33R, // 0..7 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 8..15 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 16..23 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 24..31 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 32..39 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 40..47 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 48..55 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 56..63 + N33R, A33R, N33R, T33R, N33R, N33R, N33R, G33R, // 64..71 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 72..79 + N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 80..87 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 88..95 + N33R, A33R, N33R, T33R, N33R, N33R, N33R, G33R, // 96..103 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 104..111 + N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 112..119 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 120..127 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 128..135 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 136..143 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 144..151 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 152..159 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 160..167 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 168..175 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 176..183 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 184..191 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 192..199 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 200..207 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 208..215 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 216..223 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 224..231 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 232..239 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 240..247 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R // 248..255 +}; + +const uint64_t* const BS_CT_MS_TAB_31L[ASCII_SIZE] = { + N31L, T31L, N31L, A31L, A31L, A31L, N31L, C31L, // 0..7 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 8..15 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 16..23 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 24..31 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 32..39 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 40..47 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 48..55 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 56..63 + N31L, A31L, N31L, T31L, N31L, N31L, N31L, G31L, // 64..71 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 72..79 + N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 80..87 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 88..95 + N31L, A31L, N31L, T31L, N31L, N31L, N31L, G31L, // 96..103 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 104..111 + N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 112..119 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 120..127 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 128..135 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 136..143 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 144..151 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 152..159 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 160..167 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 168..175 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 176..183 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 184..191 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 192..199 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 200..207 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 208..215 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 216..223 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 224..231 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 232..239 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 240..247 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 +}; + +const uint64_t* const BS_GA_MS_TAB_33R[ASCII_SIZE] = { + N33R, T33R, N33R, G33R, A33R, A33R, N33R, T33R, // 0..7 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 8..15 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 16..23 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 24..31 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 32..39 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 40..47 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 48..55 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 56..63 + N33R, A33R, N33R, C33R, N33R, N33R, N33R, A33R, // 64..71 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 72..79 + N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 80..87 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 88..95 + N33R, A33R, N33R, C33R, N33R, N33R, N33R, A33R, // 96..103 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 104..111 + N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 112..119 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 120..127 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 128..135 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 136..143 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 144..151 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 152..159 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 160..167 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 168..175 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 176..183 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 184..191 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 192..199 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 200..207 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 208..215 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 216..223 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 224..231 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 232..239 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 240..247 + N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R // 248..255 +}; + +const uint64_t* const BS_GA_MS_TAB_31L[ASCII_SIZE] = { + N31L, T31L, N31L, G31L, A31L, A31L, N31L, T31L, // 0..7 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 8..15 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 16..23 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 24..31 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 32..39 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 40..47 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 48..55 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 56..63 + N31L, A31L, N31L, C31L, N31L, N31L, N31L, A31L, // 64..71 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 72..79 + N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 80..87 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 88..95 + N31L, A31L, N31L, C31L, N31L, N31L, N31L, A31L, // 96..103 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 104..111 + N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 112..119 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 120..127 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 128..135 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 136..143 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 144..151 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 152..159 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 160..167 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 168..175 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 176..183 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 184..191 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 192..199 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 200..207 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 208..215 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 216..223 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 224..231 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 232..239 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 240..247 + N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 +}; + inline uint64_t -srol_table(unsigned char c, unsigned d) +srol_table(unsigned char c, unsigned d, const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { - return (MS_TAB_31L[c][d < 31 ? d : d % 31] | /* NOLINT */ - MS_TAB_33R[c][d < 33 ? d : d % 33]); /* NOLINT */ + return (left_table[c][d < 31 ? d : d % 31] | /* NOLINT */ + right_table[c][d < 33 ? d : d % 33]); /* NOLINT */ } const uint8_t CONVERT_TAB[ASCII_SIZE] = { @@ -421,6 +635,146 @@ const uint8_t RC_CONVERT_TAB[ASCII_SIZE] = { 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 }; +const uint8_t BS_CT_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 0, 255, 3, 255, 255, 255, 2, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 3, 3, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 0, 255, 3, 255, 255, 255, 2, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 3, 3, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +const uint8_t BS_CT_RC_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 3, 255, 0, 255, 255, 255, 1, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 0, 0, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 3, 255, 0, 255, 255, 255, 1, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 0, 0, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +const uint8_t BS_GA_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 0, 255, 1, 255, 255, 255, 0, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 3, 3, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 0, 255, 1, 255, 255, 255, 0, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 3, 3, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +const uint8_t BS_GA_RC_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 3, 255, 2, 255, 255, 255, 3, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 0, 0, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 3, 255, 2, 255, 255, 255, 3, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 0, 0, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + const uint64_t DIMER_TAB[4 * 4] = { 5015898201438948509U, 5225361804584821669U, 6423762225589857229U, 5783394398799547583U, 6894017875502584557U, 5959461383092338133U, diff --git a/include/btllib/nthash_kmer.hpp b/include/btllib/nthash_kmer.hpp index a12ae59d..64010c72 100644 --- a/include/btllib/nthash_kmer.hpp +++ b/include/btllib/nthash_kmer.hpp @@ -20,36 +20,65 @@ namespace btllib::hashing_internals { * @return Hash value of k-mer_0 */ inline uint64_t -base_forward_hash(const char* seq, unsigned k) +base_forward_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* dimer_tab = DIMER_TAB, const uint64_t* trimer_tab = TRIMER_TAB, const uint64_t* tetramer_tab = TETRAMER_TAB +, const uint8_t* convert_tab = CONVERT_TAB) { uint64_t h_val = 0; + std::cerr << "abc" << std::endl; + for (unsigned i = 0; i < k - 3; i += 4) { h_val = srol(h_val, 4); uint8_t loc = 0; - loc += 64 * CONVERT_TAB[(unsigned char)seq[i]]; // NOLINT - loc += 16 * CONVERT_TAB[(unsigned char)seq[i + 1]]; // NOLINT - loc += 4 * CONVERT_TAB[(unsigned char)seq[i + 2]]; - loc += CONVERT_TAB[(unsigned char)seq[i + 3]]; - h_val ^= TETRAMER_TAB[loc]; + /*loc += 64 * convert_tab[(unsigned char)seq[i]]; // NOLINT + loc += 16 * convert_tab[(unsigned char)seq[i + 1]]; // NOLINT + loc += 4 * convert_tab[(unsigned char)seq[i + 2]]; + loc += convert_tab[(unsigned char)seq[i + 3]]; + h_val ^= tetramer_tab[loc];*/ + uint8_t v0 = convert_tab[(unsigned char)seq[i]]; + uint8_t v1 = convert_tab[(unsigned char)seq[i + 1]]; + uint8_t v2 = convert_tab[(unsigned char)seq[i + 2]]; + uint8_t v3 = convert_tab[(unsigned char)seq[i + 3]]; + + loc += 64 * v0; + loc += 16 * v1; + loc += 4 * v2; + loc += v3; + + h_val ^= tetramer_tab[loc]; + + // Print each char, its convert_tab value, and the resulting loc + std::cerr << "Chars: " + << seq[i] << seq[i+1] << seq[i+2] << seq[i+3] + << " | Convert: " + << static_cast(v0) << " " + << static_cast(v1) << " " + << static_cast(v2) << " " + << static_cast(v3) + << " | loc = " << static_cast(loc) + << " (0x" << std::hex << static_cast(loc) << std::dec << ")" + << std::endl; } + std::cerr << h_val << std::endl; + const unsigned remainder = k % 4; if (remainder > 0) { h_val = srol(h_val, remainder); } if (remainder == 3) { uint8_t trimer_loc = 0; - trimer_loc += 16 * CONVERT_TAB[(unsigned char)seq[k - 3]]; // NOLINT - trimer_loc += 4 * CONVERT_TAB[(unsigned char)seq[k - 2]]; - trimer_loc += CONVERT_TAB[(unsigned char)seq[k - 1]]; - h_val ^= TRIMER_TAB[trimer_loc]; + trimer_loc += 16 * convert_tab[(unsigned char)seq[k - 3]]; // NOLINT + trimer_loc += 4 * convert_tab[(unsigned char)seq[k - 2]]; + trimer_loc += convert_tab[(unsigned char)seq[k - 1]]; + h_val ^= trimer_tab[trimer_loc]; } else if (remainder == 2) { uint8_t dimer_loc = 0; - dimer_loc += 4 * CONVERT_TAB[(unsigned char)seq[k - 2]]; - dimer_loc += CONVERT_TAB[(unsigned char)seq[k - 1]]; - h_val ^= DIMER_TAB[dimer_loc]; + dimer_loc += 4 * convert_tab[(unsigned char)seq[k - 2]]; + dimer_loc += convert_tab[(unsigned char)seq[k - 1]]; + h_val ^= dimer_tab[dimer_loc]; } else if (remainder == 1) { - h_val ^= SEED_TAB[(unsigned char)seq[k - 1]]; + h_val ^= primitive_tab[(unsigned char)seq[k - 1]]; } + std::cerr << h_val << std::endl; return h_val; } @@ -66,11 +95,14 @@ inline uint64_t next_forward_hash(uint64_t fh_val, unsigned k, unsigned char char_out, - unsigned char char_in) + unsigned char char_in, + const uint64_t* primitive_tab = SEED_TAB, + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t h_val = srol(fh_val); - h_val ^= SEED_TAB[char_in]; - h_val ^= srol_table(char_out, k); + h_val ^= primitive_tab[char_in]; + h_val ^= srol_table(char_out, k, right_table, left_table); return h_val; } @@ -86,10 +118,13 @@ inline uint64_t prev_forward_hash(uint64_t fh_val, unsigned k, unsigned char char_out, - unsigned char char_in) + unsigned char char_in, + const uint64_t* primitive_tab = SEED_TAB, + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { - uint64_t h_val = fh_val ^ srol_table(char_in, k); - h_val ^= SEED_TAB[char_out]; + uint64_t h_val = fh_val ^ srol_table(char_in, k, right_table, left_table); + h_val ^= primitive_tab[char_out]; h_val = sror(h_val); return h_val; } @@ -102,33 +137,62 @@ prev_forward_hash(uint64_t fh_val, * @return Hash value of the reverse-complement of k-mer_0 */ inline uint64_t -base_reverse_hash(const char* seq, unsigned k) +base_reverse_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* dimer_tab = DIMER_TAB, const uint64_t* trimer_tab = TRIMER_TAB, const uint64_t* tetramer_tab = TETRAMER_TAB +, const uint8_t* rc_convert_tab = RC_CONVERT_TAB) { uint64_t h_val = 0; const unsigned remainder = k % 4; + std::cerr << "cde" << std::endl; if (remainder == 3) { uint8_t trimer_loc = 0; - trimer_loc += 16 * RC_CONVERT_TAB[(unsigned char)seq[k - 1]]; // NOLINT - trimer_loc += 4 * RC_CONVERT_TAB[(unsigned char)seq[k - 2]]; - trimer_loc += RC_CONVERT_TAB[(unsigned char)seq[k - 3]]; - h_val ^= TRIMER_TAB[trimer_loc]; + trimer_loc += 16 * rc_convert_tab[(unsigned char)seq[k - 1]]; // NOLINT + trimer_loc += 4 * rc_convert_tab[(unsigned char)seq[k - 2]]; + trimer_loc += rc_convert_tab[(unsigned char)seq[k - 3]]; + h_val ^= trimer_tab[trimer_loc]; } else if (remainder == 2) { uint8_t dimer_loc = 0; - dimer_loc += 4 * RC_CONVERT_TAB[(unsigned char)seq[k - 1]]; - dimer_loc += RC_CONVERT_TAB[(unsigned char)seq[k - 2]]; - h_val ^= DIMER_TAB[dimer_loc]; + dimer_loc += 4 * rc_convert_tab[(unsigned char)seq[k - 1]]; + dimer_loc += rc_convert_tab[(unsigned char)seq[k - 2]]; + h_val ^= dimer_tab[dimer_loc]; } else if (remainder == 1) { - h_val ^= SEED_TAB[(unsigned char)seq[k - 1] & CP_OFF]; + h_val ^= primitive_tab[(unsigned char)seq[k - 1] & CP_OFF]; } + std::cerr << h_val << std::endl; for (int i = (int)(k - remainder) - 1; i >= 3; i -= 4) { h_val = srol(h_val, 4); uint8_t loc = 0; - loc += 64 * RC_CONVERT_TAB[(unsigned char)seq[i]]; // NOLINT - loc += 16 * RC_CONVERT_TAB[(unsigned char)seq[i - 1]]; // NOLINT - loc += 4 * RC_CONVERT_TAB[(unsigned char)seq[i - 2]]; - loc += RC_CONVERT_TAB[(unsigned char)seq[i - 3]]; - h_val ^= TETRAMER_TAB[loc]; + /*loc += 64 * rc_convert_tab[(unsigned char)seq[i]]; // NOLINT + loc += 16 * rc_convert_tab[(unsigned char)seq[i - 1]]; // NOLINT + loc += 4 * rc_convert_tab[(unsigned char)seq[i - 2]]; + loc += rc_convert_tab[(unsigned char)seq[i - 3]]; + h_val ^= tetramer_tab[loc]; + std::cout << "loc (dec): " << static_cast(loc) << std::endl;*/ + uint8_t rv0 = rc_convert_tab[(unsigned char)seq[i]]; +uint8_t rv1 = rc_convert_tab[(unsigned char)seq[i - 1]]; +uint8_t rv2 = rc_convert_tab[(unsigned char)seq[i - 2]]; +uint8_t rv3 = rc_convert_tab[(unsigned char)seq[i - 3]]; + +loc += 64 * rv0; +loc += 16 * rv1; +loc += 4 * rv2; +loc += rv3; + +h_val ^= tetramer_tab[loc]; + +// Print each char, its rc_convert_tab value, and the resulting loc +std::cerr << "Chars (rev): " + << seq[i] << seq[i-1] << seq[i-2] << seq[i-3] + << " | RC Convert: " + << static_cast(rv0) << " " + << static_cast(rv1) << " " + << static_cast(rv2) << " " + << static_cast(rv3) + << " | loc = " << static_cast(loc) + << " (0x" << std::hex << static_cast(loc) << std::dec << ")" + << std::endl; } + + std::cerr << h_val << std::endl; return h_val; } @@ -146,10 +210,13 @@ inline uint64_t next_reverse_hash(uint64_t rh_val, unsigned k, unsigned char char_out, - unsigned char char_in) + unsigned char char_in, + const uint64_t* primitive_tab = SEED_TAB, + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { - uint64_t h_val = rh_val ^ srol_table(char_in & CP_OFF, k); - h_val ^= SEED_TAB[char_out & CP_OFF]; + uint64_t h_val = rh_val ^ srol_table(char_in & CP_OFF, k, right_table, left_table); + h_val ^= primitive_tab[char_out & CP_OFF]; h_val = sror(h_val); return h_val; } @@ -166,11 +233,14 @@ inline uint64_t prev_reverse_hash(uint64_t rh_val, unsigned k, unsigned char char_out, - unsigned char char_in) + unsigned char char_in, + const uint64_t* primitive_tab = SEED_TAB, + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t h_val = srol(rh_val); - h_val ^= SEED_TAB[char_in & CP_OFF]; - h_val ^= srol_table(char_out & CP_OFF, k); + h_val ^= primitive_tab[char_in & CP_OFF]; + h_val ^= srol_table(char_out & CP_OFF, k, right_table, left_table); return h_val; } @@ -196,7 +266,9 @@ sub_hash(uint64_t fh_val, const std::vector& new_bases, unsigned k, unsigned m, - uint64_t* h_val) + uint64_t* h_val, + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t b_val = 0; @@ -204,11 +276,11 @@ sub_hash(uint64_t fh_val, const auto pos = positions[i]; const auto new_base = new_bases[i]; - fh_val ^= srol_table((unsigned char)kmer_seq[pos], k - 1 - pos); - fh_val ^= srol_table(new_base, k - 1 - pos); + fh_val ^= srol_table((unsigned char)kmer_seq[pos], k - 1 - pos, right_table, left_table); + fh_val ^= srol_table(new_base, k - 1 - pos, right_table, left_table); - rh_val ^= srol_table((unsigned char)kmer_seq[pos] & CP_OFF, pos); - rh_val ^= srol_table(new_base & CP_OFF, pos); + rh_val ^= srol_table((unsigned char)kmer_seq[pos] & CP_OFF, pos, right_table, left_table); + rh_val ^= srol_table(new_base & CP_OFF, pos, right_table, left_table); } b_val = canonical(fh_val, rh_val); @@ -484,6 +556,7 @@ class NtHash uint64_t rev_hash = 0; std::unique_ptr hash_arr; + /** * Initialize the internal state of the iterator * @return \p true if successful, \p false otherwise diff --git a/tests/bshash.cpp b/tests/bshash.cpp new file mode 100644 index 00000000..a6e2fefd --- /dev/null +++ b/tests/bshash.cpp @@ -0,0 +1,898 @@ +#include "btllib/bshash.hpp" +#include "btllib/seq.hpp" +#include "helpers.hpp" + +#include +#include +#include +#include + +int +main() +{ + + + +{ + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (64-mer base)") + // ─────────────────────────────── + + std::string first32 = + "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGT"; + + std::string seq = first32 + first32; // 64 bases total + unsigned h = 3; + unsigned k = 32; + + btllib::BsHash nthash(seq, h, k, "CG"); + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + + // ─────────────────────────────── + // Define degeneracy groups + // ─────────────────────────────── + const std::vector> GROUPS = { + {"AA", "GA"}, + {"AC", "AT", "GC", "GT"}, + {"AG", "GG"}, + {"CA", "CG", "TG"}, + {"CC", "CT"}, + {"TA"}, + {"TC", "TT"} + }; + + auto mutate_by_shift = [&](const std::string& s, int shift) { + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } + } + if (!done) out += dimer; + } + return out; + }; + + + auto compute_hashes = [&](const std::string& s) { + btllib::BsHash nh(s, h, k, "CG"); + std::vector out; + while (nh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(nh.hashes(), nh.hashes() + h, hv); + out.push_back(hv); + } + return out; + }; +// ─────────────────────────────── +PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +1)") +// ─────────────────────────────── +{ + std::string seq_shift = first32 + mutate_by_shift(first32, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } +} + + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +2)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +3)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } + } + +{ + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (C→T mutation)") + // ─────────────────────────────── + + std::string first32 = + "ATGCGTAGGTCGATGACGTTGAGCGTACGAGT"; + + std::string seq = first32 + first32; // 64 bases total + unsigned h = 3; + unsigned k = 32; + + btllib::BsHash nthash(seq, h, k, "CG"); + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + + // ─────────────────────────────── + // Mutation function: C→T + // ─────────────────────────────── + auto mutate_CT = [](const std::string& s) { + std::string out = s; + for (char& base : out) { + if (base == 'C') base = 'T'; + else if (base == 'c') base = 't'; + } + return out; + }; + + auto compute_hashes = [&](const std::string& s) { + btllib::BsHash nh(s, h, k, "CG"); + std::vector out; + while (nh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(nh.hashes(), nh.hashes() + h, hv); + out.push_back(hv); + } + return out; + }; + + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (C→T forward check)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_CT(first32); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + std::cerr << seq << std::endl; + std::cerr << seq_shift << std::endl; + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + + for (unsigned i = 0; i < h1.size(); i += 2) { + std::cerr << i << std::endl; + + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } +} + +{ + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (G→A mutation)") + // ─────────────────────────────── + + std::string first32 = + "TACCGATACCTACGCCATACGTCATACCGTTA"; + + std::string seq = first32 + first32; // 64 bases total + unsigned h = 3; + unsigned k = 32; + + btllib::BsHash nthash(seq, h, k, "CG"); + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + + // ─────────────────────────────── + // Mutation function: G→A + // ─────────────────────────────── + auto mutate_GA = [](const std::string& s) { + std::string out = s; + for (char& base : out) { + if (base == 'G') base = 'A'; + else if (base == 'g') base = 'a'; + } + return out; + }; + + auto compute_hashes = [&](const std::string& s) { + btllib::BsHash nh(s, h, k, "CG"); + std::vector out; + while (nh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(nh.hashes(), nh.hashes() + h, hv); + out.push_back(hv); + } + return out; + }; + + // ─────────────────────────────── + PRINT_TEST_NAME("CG k-mer rolling (G→A forward check)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_GA(first32); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + + for (unsigned i = 0; i < h1.size(); i += 2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } +} + + +/*{ + // ─────────────────────────────── + PRINT_TEST_NAME("CC k-mer rolling (64-mer base)") + // ─────────────────────────────── + + std::string first32 = + "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGT"; + + std::string seq = first32 + first32; // 64 bases total + unsigned h = 3; + unsigned k = 32; + + btllib::BsHash nthash(seq, h, k, "CC"); // or "CC" if that parameter matters + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + + // ─────────────────────────────── + // Define CC degeneracy groups + // ─────────────────────────────── + const std::vector> CC_GROUPS = { + {"AA", "AG", "GA", "GG"}, // 0x1bc5d5aba6ea6e02 + {"AC", "AT", "GC", "GT"}, // 0x8f15314b06b5ed9c + {"CA", "CG", "TA", "TG"}, // 0x44fec7e4a74bce00 + {"CC", "CT", "TC", "TT"} // 0x77c24a36c934fdc1 + }; + + auto mutate_by_shift = [&](const std::string& s, int shift) { + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : CC_GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } + } + if (!done) out += dimer; + } + return out; + }; + + auto compute_hashes = [&](const std::string& s) { + btllib::BsHash nh(s, h, k, "CC"); // or "CC" if appropriate + std::vector out; + while (nh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(nh.hashes(), nh.hashes() + h, hv); + out.push_back(hv); + } + return out; + }; + + // ─────────────────────────────── + PRINT_TEST_NAME("CC k-mer rolling (degeneracy shift +1)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CC k-mer rolling (degeneracy shift +2)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CC k-mer rolling (degeneracy shift +3)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } + } + } +}*/ + +/*{ + // ─────────────────────────────── + PRINT_TEST_NAME("CT k-mer rolling (64-mer base)") + // ─────────────────────────────── + + std::string first32 = + "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGT"; + + std::string seq = first32 + first32; // 64 bases total + unsigned h = 3; + unsigned k = 32; + + btllib::BsHash nthash(seq, h, k, "CT"); // or "CT" if that parameter matters + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + + // ─────────────────────────────── + // Define CT degeneracy groups based on CT_DIMER_TAB + // ─────────────────────────────── + const std::vector> CT_GROUPS = { + {"AA", "AC", "AG", "GG"}, // 0x9a533a6a5e7b14e2 + {"AT", "GT"}, // 0x897da34f6b464570 + {"CA"}, + {"CG"}, // 0xeeaf42d6e10d9ac0 and 0x68c7d4adebfb5001 + {"CC", "CT", "TC", "TT"}, // 0x049bab70128da11f + {"GA", "GC", "GG", "AG"}, // 0x9a533a6a5e7b14e2 + {"TA"}, // 0x0f1bfc76a0e3e700 + {"TG"} // 0x282450c57fae339f + }; + + auto mutate_by_shift = [&](const std::string& s, int shift) { + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : CT_GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } + } + if (!done) out += dimer; + } + return out; + }; + + auto compute_hashes = [&](const std::string& s) { + btllib::BsHash nh(s, h, k, "CT"); // or "CT" if needed + std::vector out; + while (nh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(nh.hashes(), nh.hashes() + h, hv); + out.push_back(hv); + } + return out; + }; + + // ─────────────────────────────── + PRINT_TEST_NAME("CT k-mer rolling (degeneracy shift +1)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + std::cerr << seq << std::endl; + std::cerr << seq_shift << std::endl; + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); +for (unsigned i = 0; i < h1.size(); i = i+2) { + std::cerr << i << std::endl; + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } +} + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CT k-mer rolling (degeneracy shift +2)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); +for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } +} + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CT k-mer rolling (degeneracy shift +3)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); +for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } +} + } +} + +{ + // ─────────────────────────────── + PRINT_TEST_NAME("CA k-mer rolling (64-mer base)") + // ─────────────────────────────── + + std::string first32 = + "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGT"; + + std::string seq = first32 + first32; // 64 bases total + unsigned h = 3; + unsigned k = 32; + + btllib::BsHash nthash(seq, h, k, "CA"); // or "CA" if that parameter matters + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + + // ─────────────────────────────── + // Define CA degeneracy groups based on CA_DIMER_TAB + // ─────────────────────────────── + const std::vector> CA_GROUPS = { + {"AA", "GA"}, // 0x7e4a36919c5032af + {"AC", "AT", "GC", "GT"}, // 0x95b4b90696718a9f + {"AG", "GG"}, // 0xf482881f5f3610bf + {"CA", "TA", "TG"}, // 0xe12f0e8e678e5d30 + {"CC", "CT", "TT", "TC"}, // 0x9ff825331cf8a361 and 0x00e055ee083cfcff + {"CG"} // 0xf248b960f9159cec + }; + + auto mutate_by_shift = [&](const std::string& s, int shift) { + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : CA_GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } + } + if (!done) out += dimer; + } + return out; + }; + + auto compute_hashes = [&](const std::string& s) { + btllib::BsHash nh(s, h, k, "CA"); // or "CA" if needed + std::vector out; + while (nh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(nh.hashes(), nh.hashes() + h, hv); + out.push_back(hv); + } + return out; + }; + + // ─────────────────────────────── + PRINT_TEST_NAME("CA k-mer rolling (degeneracy shift +1)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); +for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } +} + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CA k-mer rolling (degeneracy shift +2)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); +for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } +} + } + + // ─────────────────────────────── + PRINT_TEST_NAME("CA k-mer rolling (degeneracy shift +3)") + // ─────────────────────────────── + { + std::string seq_shift = first32 + mutate_by_shift(first32, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); +for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } +} + } +} +*/ + + + { + PRINT_TEST_NAME("k-mer rolling vs. base hash values") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + + btllib::BsHash nthash(seq, 3, seq.size() - 2, "CG"); + /* 18-mers of kmer*/ + std::string kmer1 = seq.substr(0, 18); + std::string kmer2 = seq.substr(1, 18); + std::string kmer3 = seq.substr(2, 18); + + btllib::BsHash nthash_vector[] = { + btllib::BsHash(kmer1, nthash.get_hash_num(), kmer1.size(), "CG"), + btllib::BsHash(kmer2, nthash.get_hash_num(), kmer2.size(), "CG"), + btllib::BsHash(kmer3, nthash.get_hash_num(), kmer3.size(), "CG") + }; + + size_t i; + for (i = 0; nthash.roll() && nthash_vector[i].roll(); ++i) { + for (size_t j = 0; j < nthash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(nthash.hashes()[j], nthash_vector[i].hashes()[j]); + } + } + TEST_ASSERT_EQ(i, 3); + } + + { + PRINT_TEST_NAME("CG canonical hashing") + + std::string seq_f = "ACGTACACTGGACTGAGTCT"; + std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + unsigned h = 3; + + btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CG"); + btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CG"); + + nthash_f.roll(); + nthash_r.roll(); + TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + } + + /*{ + PRINT_TEST_NAME("CC canonical hashing") + + std::string seq_f = "ACGTACACTGGACTGAGTCT"; + std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + unsigned h = 3; + + btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CC"); + btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CC"); + + nthash_f.roll(); + nthash_r.roll(); + TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + } + + { + PRINT_TEST_NAME("CT canonical hashing") + + std::string seq_f = "ACGTACACTGGACTGAGTCT"; + std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + unsigned h = 3; + + btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CT"); + btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CT"); + + nthash_f.roll(); + nthash_r.roll(); + TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + } + + { + PRINT_TEST_NAME("CA canonical hashing") + + std::string seq_f = "ACGTACACTGGACTGAGTCT"; + std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + unsigned h = 3; + + btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CA"); + btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CA"); + + nthash_f.roll(); + nthash_r.roll(); + TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + } +*/ + { + PRINT_TEST_NAME("CG k-mer back rolling") + + std::string seq = "ACTGACTA"; + unsigned h = 3; + unsigned k = 4; + + btllib::BsHash nthash(seq, h, k, "CG"); + std::stack hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) + + do { + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + hashes.pop(); + } while (nthash.roll_back()); + } + + /*{ + PRINT_TEST_NAME("CC k-mer back rolling") + + std::string seq = "ACTGACTG"; + unsigned h = 3; + unsigned k = 4; + + btllib::BsHash nthash(seq, h, k, "CC"); + std::stack hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) + + do { + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + hashes.pop(); + } while (nthash.roll_back()); + } + + { + PRINT_TEST_NAME("CT k-mer back rolling") + + std::string seq = "ACTGACTG"; + unsigned h = 3; + unsigned k = 4; + + btllib::BsHash nthash(seq, h, k, "CT"); + std::stack hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) + + do { + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + hashes.pop(); + } while (nthash.roll_back()); + } + + { + PRINT_TEST_NAME("CA k-mer back rolling") + + std::string seq = "ACTGACTG"; + unsigned h = 3; + unsigned k = 4; + + btllib::BsHash nthash(seq, h, k, "CA"); + std::stack hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) + + do { + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + hashes.pop(); + } while (nthash.roll_back()); + } */ +/* + { + PRINT_TEST_NAME("k-mer peeking") + + std::string seq = "ACTGATCAG"; + unsigned h = 3; + unsigned k = 6; + + btllib::BsHash nthash(seq, h, k); + nthash.roll(); + + size_t steps = 3; + while (steps--) { + nthash.peek(); + uint64_t* h_peek = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_peek); + nthash.peek(seq[nthash.get_pos() + k]); + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); + nthash.roll(); + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); + } + }*/ + + { + PRINT_TEST_NAME("skipping Ns") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + std::string seq_with_ns = seq; + + TEST_ASSERT_GE(seq_with_ns.size(), 10) + //10, 11 are Ns + seq_with_ns[seq_with_ns.size() / 2] = 'N'; + seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; + unsigned k = (seq.size() - 2) / 2 - 1; + //k = 8 + btllib::BsHash nthash(seq_with_ns, 3, k); + + std::vector positions; + for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { + positions.push_back(i); + } + for (size_t i = seq_with_ns.size() / 2 + 2; i < seq_with_ns.size() - k + 1; + i++) { + positions.push_back(i); + } + + size_t i = 0; + while (nthash.roll()) { + TEST_ASSERT_EQ(nthash.get_pos(), positions[i]) + i++; + } + TEST_ASSERT_EQ(positions.size(), i) + } + + + + + { + PRINT_TEST_NAME("check methylation") + + std::string seq_k6 = "ATCGGA"; + std::string seq_k4 = "CGTA"; + btllib::BsHash nthash1(seq_k6, 3, 6); + nthash1.roll(); + TEST_ASSERT_EQ(nthash1.is_methylated(), true) + btllib::BsHash nthash2(seq_k4, 3, 4); + nthash2.roll(); + TEST_ASSERT_EQ(nthash2.is_methylated(), true) + + + } + + + return 0; +} diff --git a/tests/bshash_conversion.cpp b/tests/bshash_conversion.cpp new file mode 100644 index 00000000..b0da50c4 --- /dev/null +++ b/tests/bshash_conversion.cpp @@ -0,0 +1,200 @@ +#include "btllib/bshash.hpp" +#include "btllib/seq.hpp" +#include "helpers.hpp" + +#include +#include +#include +#include + +int +main() +{ + + /*{ + PRINT_TEST_NAME("k-mer hash values") + + std::string seq = "ACATGCATGCA"; + const unsigned k = 5; + const unsigned h = 3; + + const std::vector> hashes = { + { 0x38cc00f940aebdae, 0xab7e1b110e086fc6, 0x11a1818bcfdd553 }, + { 0x603a48c5a11c794a, 0xe66016e61816b9c4, 0xc5b13cb146996ffe } + }; + + btllib::NtHash nthash(seq, h, k); + nthash.roll(); + btllib::BlindNtHash blind(seq.data(), h, k); + + for (const auto& h_vals : hashes) { + nthash.roll(); + TEST_ASSERT_ARRAY_EQ(h_vals, nthash.hashes(), h); + blind.roll(seq[blind.get_pos() + k]); + TEST_ASSERT_ARRAY_EQ(h_vals, blind.hashes(), h); + } + }*/ + + { + PRINT_TEST_NAME("k-mer rolling") + + std::string seq = "AGTCAGTC"; + unsigned h = 1; + unsigned k = 4; + + btllib::BSHashConversion nthash(seq, h, k, "CT"); + std::vector hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + + // check same hash value for identical k-mers (first and last) + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + } + + { + PRINT_TEST_NAME("k-mer rolling vs. base hash values") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + + btllib::BSHashConversion nthash(seq, 3, seq.size() - 2, "CT"); + /* 18-mers of kmer*/ + std::string kmer1 = seq.substr(0, 18); + std::string kmer2 = seq.substr(1, 18); + std::string kmer3 = seq.substr(2, 18); + + btllib::BSHashConversion nthash_vector[] = { + btllib::BSHashConversion(kmer1, nthash.get_hash_num(), kmer1.size(), "CT"), + btllib::BSHashConversion(kmer2, nthash.get_hash_num(), kmer2.size(), "CT"), + btllib::BSHashConversion(kmer3, nthash.get_hash_num(), kmer3.size(), "CT") + }; + + size_t i; + for (i = 0; nthash.roll() && nthash_vector[i].roll(); ++i) { + for (size_t j = 0; j < nthash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(nthash.hashes()[j], nthash_vector[i].hashes()[j]); + } + } + TEST_ASSERT_EQ(i, 3); + } + + { + PRINT_TEST_NAME("canonical hashing") + + std::string seq_f = "ACGTACACTGGACTGAGTCT"; + std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + unsigned h = 3; + + btllib::BSHashConversion nthash_f(seq_f, h, seq_f.size(), "CT"); + btllib::BSHashConversion nthash_r(seq_r, h, seq_r.size(), "GA"); + + nthash_f.roll(); + nthash_r.roll(); + TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + } + + { + PRINT_TEST_NAME("k-mer back rolling") + + std::string seq = "ACTAGCTG"; + unsigned h = 3; + unsigned k = 5; + + btllib::BSHashConversion nthash(seq, h, k, "CT"); + std::stack hashes; + + while (nthash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) + + do { + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + hashes.pop(); + } while (nthash.roll_back()); + } + + { + PRINT_TEST_NAME("k-mer peeking") + + std::string seq = "ACTGATCAG"; + unsigned h = 3; + unsigned k = 6; + + btllib::BSHashConversion nthash(seq, h, k, "CT"); + nthash.roll(); + + size_t steps = 3; + while (steps--) { + nthash.peek(); + uint64_t* h_peek = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_peek); + nthash.peek(seq[nthash.get_pos() + k]); + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); + nthash.roll(); + TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); + } + } + + { + PRINT_TEST_NAME("skipping Ns") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + std::string seq_with_ns = seq; + + TEST_ASSERT_GE(seq_with_ns.size(), 10) + seq_with_ns[seq_with_ns.size() / 2] = 'N'; + seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; + unsigned k = (seq.size() - 2) / 2 - 1; + btllib::BSHashConversion nthash(seq_with_ns, 3, k, "CT"); + + std::vector positions; + for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { + positions.push_back(i); + } + for (size_t i = seq_with_ns.size() / 2 + 2; i < seq_with_ns.size() - k + 1; + i++) { + positions.push_back(i); + } + + size_t i = 0; + while (nthash.roll()) { + TEST_ASSERT_EQ(nthash.get_pos(), positions[i]) + i++; + } + TEST_ASSERT_EQ(positions.size(), i) + } + + { + PRINT_TEST_NAME("base substitution") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + std::string sub = "ACGCGCACTGGACTGAGTCT"; + + btllib::BSHashConversion nthash(seq, 3, seq.size(), "CT"); + btllib::BSHashConversion nthash_subbed(sub, 3, sub.size(), "CT"); + + nthash.roll(); + nthash.sub({ 3, 4 }, { 'C', 'G' }); + nthash_subbed.roll(); + TEST_ASSERT_EQ(nthash.get_hash_num(), nthash_subbed.get_hash_num()); + size_t i; + for (i = 0; i < nthash.get_hash_num(); ++i) { + TEST_ASSERT_EQ(nthash.hashes()[i], nthash_subbed.hashes()[i]); + } + TEST_ASSERT_EQ(i, 3); + } + + + + return 0; +} From fe4e9d03e783760ec690774e40c84d8bd5a1d9a7 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Thu, 4 Dec 2025 01:05:08 -0800 Subject: [PATCH 02/38] seq.hpp: add bisulfite conversion utility declaration --- include/btllib/seq.hpp | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/include/btllib/seq.hpp b/include/btllib/seq.hpp index 5ee2c299..28e14b1d 100644 --- a/include/btllib/seq.hpp +++ b/include/btllib/seq.hpp @@ -13,6 +13,8 @@ namespace btllib { extern const char COMPLEMENTS[256]; extern const char AMINO_ACIDS[256]; extern const char CAPITALS[256]; +extern const char CT_CONVERSION[256]; +extern const char GA_CONVERSION[256]; /** * Reverse complement a sequence in-place. @@ -33,6 +35,24 @@ reverse_complement(std::string& seq); std::string get_reverse_complement(const std::string& seq); -} // namespace btllib +} -#endif \ No newline at end of file +void +bs_conversion(std::string& seq, std::string& type); + +/** + * Obtain a reverse complement of the provided sequence. The argument sequence + * is left untouched. + * + * @param seq Sequence to reverse complement. + * + * @return Reverse complemented sequence. + */ +std::string +get_bs_conversion(const std::string& seq, std::string& type); + +} + +// namespace btllib + +#endif From bd8e422ab881d199f0ea19e16325ca3c79e9718d Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Thu, 4 Dec 2025 01:11:38 -0800 Subject: [PATCH 03/38] seq.cpp: add bisulfite conversion utility definition --- src/btllib/seq.cpp | 84 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/src/btllib/seq.cpp b/src/btllib/seq.cpp index 022fe0cf..9528f11f 100644 --- a/src/btllib/seq.cpp +++ b/src/btllib/seq.cpp @@ -102,6 +102,70 @@ const char CAPITALS[256] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 }; + +const char CT_CONVERSION[256] = { + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + +// ! " # $ % & ' ( ) * + , - . / + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '-' , '.', 0 , + +// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + +// @ A B C D E F G H I J K L M N O + 0 , 'A', 0, 'T', 0, 0, 0, 'G', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + +// P Q R S T U V W X Y Z [ \ ] ^ _ + 0 , 0 , 0 , 0 , 'T', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + +// ` a b c d e f g h i j k l m n o + 0 , 'a', 0, 't', 0, 0, 0, 'g', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + +// p q r s t u v w x y z { | } ~ DEL + 0 , 0 , 0 , 0 , 't', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 +}; + +const char GA_CONVERSION[256] = { + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + +// ! " # $ % & ' ( ) * + , - . / + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , '-' , '.', 0 , + +// 0 1 2 3 4 5 6 7 8 9 : ; < = > ? + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + +// @ A B C D E F G H I J K L M N O + 0 , 'A', 0, 'C', 0, 0, 0, 'A', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + +// P Q R S T U V W X Y Z [ \ ] ^ _ + 0 , 0 , 0 , 0 , 'T', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + +// ` a b c d e f g h i j k l m n o + 0 , 'a', 0, 'c', 0, 0, 0, 'a', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + +// p q r s t u v w x y z { | } ~ DEL + 0 , 0 , 0 , 0 , 't', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , + 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 +}; // clang-format on void @@ -121,4 +185,24 @@ get_reverse_complement(const std::string& seq) return rc; } +void +bs_conversion(std::string& seq, std::string& type){ + std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { + if (type == "CT" || type == "ct") { + return CT_CONVERSION[(unsigned char)(c)]; + } else { + return GA_CONVERSION[(unsigned char)(c)]; + } + }); +} +std::string +get_bs_conversion(const std::string& seq, std::string& type){ + std::string conversion(seq); + bs_conversion(conversion); + return conversion; +} + + + + } // namespace btllib From a5ee9aed534d897490cf92a4b9730cd428f340a6 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Thu, 4 Dec 2025 01:12:17 -0800 Subject: [PATCH 04/38] Update seq.hpp --- include/btllib/seq.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/btllib/seq.hpp b/include/btllib/seq.hpp index 28e14b1d..52149604 100644 --- a/include/btllib/seq.hpp +++ b/include/btllib/seq.hpp @@ -35,7 +35,7 @@ reverse_complement(std::string& seq); std::string get_reverse_complement(const std::string& seq); -} + void bs_conversion(std::string& seq, std::string& type); From e781544647e8829012a5e9e9d9a05f67da3ec876 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:05:29 -0800 Subject: [PATCH 05/38] Update seq.cpp --- src/btllib/seq.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/btllib/seq.cpp b/src/btllib/seq.cpp index 9528f11f..58b9cd4c 100644 --- a/src/btllib/seq.cpp +++ b/src/btllib/seq.cpp @@ -187,7 +187,7 @@ get_reverse_complement(const std::string& seq) void bs_conversion(std::string& seq, std::string& type){ - std::transform(seq.begin(), seq.end(), seq.begin(), [](char c) { + std::transform(seq.begin(), seq.end(), seq.begin(), [&type](char c) { if (type == "CT" || type == "ct") { return CT_CONVERSION[(unsigned char)(c)]; } else { From 41dc966f1848acc59f4802eef5597bb7aec3a017 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Fri, 5 Dec 2025 20:21:17 -0800 Subject: [PATCH 06/38] Update seq.cpp --- src/btllib/seq.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/btllib/seq.cpp b/src/btllib/seq.cpp index 58b9cd4c..2769cd1b 100644 --- a/src/btllib/seq.cpp +++ b/src/btllib/seq.cpp @@ -187,13 +187,11 @@ get_reverse_complement(const std::string& seq) void bs_conversion(std::string& seq, std::string& type){ - std::transform(seq.begin(), seq.end(), seq.begin(), [&type](char c) { - if (type == "CT" || type == "ct") { - return CT_CONVERSION[(unsigned char)(c)]; - } else { - return GA_CONVERSION[(unsigned char)(c)]; - } - }); + const unsigned char* table = + (type == "CT" || type == "ct") ? CT_CONVERSION : GA_CONVERSION; + + std::transform(seq.begin(), seq.end(), seq.begin(), + [table](char c) { return table[(unsigned char)c]; }); } std::string get_bs_conversion(const std::string& seq, std::string& type){ From 2d96301b5e54fc3510007ba3b1d8fb9210a80fa8 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Fri, 5 Dec 2025 21:15:21 -0800 Subject: [PATCH 07/38] update seq --- src/btllib/seq.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btllib/seq.cpp b/src/btllib/seq.cpp index 2769cd1b..633cd652 100644 --- a/src/btllib/seq.cpp +++ b/src/btllib/seq.cpp @@ -187,7 +187,7 @@ get_reverse_complement(const std::string& seq) void bs_conversion(std::string& seq, std::string& type){ - const unsigned char* table = + const char* table = (type == "CT" || type == "ct") ? CT_CONVERSION : GA_CONVERSION; std::transform(seq.begin(), seq.end(), seq.begin(), @@ -196,7 +196,7 @@ bs_conversion(std::string& seq, std::string& type){ std::string get_bs_conversion(const std::string& seq, std::string& type){ std::string conversion(seq); - bs_conversion(conversion); + bs_conversion(conversion, type); return conversion; } From b56e3fff68a5deb9cd52dd8a655c5b5d05e5de0d Mon Sep 17 00:00:00 2001 From: jowong04 Date: Sat, 6 Dec 2025 21:23:51 -0800 Subject: [PATCH 08/38] update working version --- include/btllib/bshash_kmer.hpp | 46 +++++++++++++++++----- include/btllib/bshash_kmer_conversion.hpp | 47 +++++++++++++++++------ tests/bshash.cpp | 24 ++++++------ tests/bshash_conversion.cpp | 28 +++++++------- 4 files changed, 99 insertions(+), 46 deletions(-) diff --git a/include/btllib/bshash_kmer.hpp b/include/btllib/bshash_kmer.hpp index 2503a652..d5dc45c4 100644 --- a/include/btllib/bshash_kmer.hpp +++ b/include/btllib/bshash_kmer.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -749,6 +750,7 @@ class BsHash { check_error(k == 0, "BsHash: k must be greater than 0"); check_error(k % 2 == 1, "BsHash: k must be even"); + check_error((k/2) % 2 == 0, "BsHash: k must have odd number of dimers"); check_error(this->seq_len < k, "BsHash: sequence length (" + std::to_string(this->seq_len) + ") is smaller than k (" + std::to_string(k) + ")"); @@ -764,18 +766,22 @@ class BsHash DIMER_TAB = CC_DIMER_TAB; TAB_33R = MS_TAB_33R_CC; TAB_31L = MS_TAB_31L_CC; + meth_signal = {"CC", "GG"}; } else if (mode == "CG") { DIMER_TAB = CG_DIMER_TAB; TAB_33R = MS_TAB_33R_CG; TAB_31L = MS_TAB_31L_CG; + meth_signal = {"CG"}; } else if (mode == "CT") { DIMER_TAB = CT_DIMER_TAB; TAB_33R = MS_TAB_33R_CT; TAB_31L = MS_TAB_31L_CT; + meth_signal = {"CT", "AG"}; } else if (mode == "CA") { DIMER_TAB = CA_DIMER_TAB; TAB_33R = MS_TAB_33R_CA; TAB_31L = MS_TAB_31L_CA; + meth_signal = {"CA", "TG"}; } } @@ -924,28 +930,49 @@ bool roll() return true; } +std::string center_dimer() { + if (k < 2) return ""; -bool is_methylated() { - if (k < 2) return false; // cannot have a dimer - - // Center dimer index within k-mer size_t center_idx; size_t num_dimers = k / 2; if (num_dimers % 2 == 1) { - // Odd number of dimers → pick exact center dimer center_idx = pos + (num_dimers / 2) * 2; } else { - // Even number of dimers → pick left of two central dimers center_idx = pos + ((num_dimers / 2) - 1) * 2; } - // Safety check - if (center_idx + 1 >= seq_len) return false; + if (center_idx + 1 >= seq_len) return ""; - return seq[center_idx] == 'C' && seq[center_idx + 1] == 'G'; + // Return actual dimer as a 2-char string + return std::string{seq[center_idx], seq[center_idx + 1]}; } + +bool is_methylated() { + + + size_t num_dimers = k / 2; + size_t center_idx = pos + (num_dimers / 2) * 2; + // Build center dimer string + std::string dimer; + dimer += seq[center_idx]; + dimer += seq[center_idx + 1]; + std::transform(dimer.begin(), dimer.end(), dimer.begin(), ::toupper); + + // Check membership in meth_signal + return std::find(meth_signal.begin(), meth_signal.end(), dimer) != meth_signal.end(); +} + +bool is_forward_higher(const std::vector& fwd_hashes, + const std::vector& rev_hashes, + bool even) +{ + size_t idx = even ? 0 : 1; + return fwd_hashes[idx] > rev_hashes[idx]; +} + + /** * Like the roll() function, but advance backwards. * @return \p true on success and \p false otherwise @@ -1071,6 +1098,7 @@ bool is_methylated() { bool initialized; std::vector fwd_hashes = {0, 0}; std::vector rev_hashes = {0, 0}; + std::vector meth_signal; bool even = true; std::unique_ptr hash_arr; const uint64_t* DIMER_TAB = nullptr; diff --git a/include/btllib/bshash_kmer_conversion.hpp b/include/btllib/bshash_kmer_conversion.hpp index 6dcc237d..99b68680 100644 --- a/include/btllib/bshash_kmer_conversion.hpp +++ b/include/btllib/bshash_kmer_conversion.hpp @@ -78,12 +78,13 @@ class BSHashConversion : public NtHash , initialized(false) , hash_arr(new uint64_t[num_hashes]) { - check_error(k == 0, "NtHash: k must be greater than 0"); + check_error(k == 0, "BsHash: k must be greater than 0"); + check_error(k%2 != 1, "BsHash: k must be odd"); check_error(this->seq_len < k, - "NtHash: sequence length (" + std::to_string(this->seq_len) + + "BsHash: sequence length (" + std::to_string(this->seq_len) + ") is smaller than k (" + std::to_string(k) + ")"); check_error(pos > this->seq_len - k, - "NtHash: passed position (" + std::to_string(pos) + + "BsHash: passed position (" + std::to_string(pos) + ") is larger than sequence length (" + std::to_string(this->seq_len) + ")"); if (conversion_type == "CT"|| conversion_type == "ct") { @@ -112,13 +113,13 @@ class BSHashConversion : public NtHash } else { - check_error(k == 0, "Unsupported conversion_type. Must be CT or GA."); + check_error(true, "Unsupported conversion_type. Must be CT or GA."); exit(1); } } /** - * Construct an ntHash object for k-mers. + * Construct an BsHash object for k-mers. * @param seq Sequence string * @param num_hashes Number of hashes to produce per k-mer * @param k K-mer size @@ -136,12 +137,12 @@ class BSHashConversion : public NtHash /** * Calculate the hash values of current k-mer and advance to the next k-mer. - * NtHash advances one nucleotide at a time until it finds a k-mer with valid + * BsHash advances one nucleotide at a time until it finds a k-mer with valid * characters (ACGTU) and skips over those with invalid characters (non-ACGTU, * including N). This method must be called before hashes() is accessed, for * the first and every subsequent hashed kmer. get_pos() may be called at any * time to obtain the position of last hashed k-mer or the k-mer to be hashed - * if roll() has never been called on this NtHash object. It is important to + * if roll() has never been called on this BsHash object. It is important to * note that the number of roll() calls is NOT necessarily equal to get_pos(), * if there are N's or invalid characters in the hashed sequence. * @return \p true on success and \p false otherwise @@ -194,7 +195,7 @@ class BSHashConversion : public NtHash /** * Peeks the hash values as if roll() was called (without advancing the - * NtHash object. The peeked hash values can be obtained through the + * BsHash object. The peeked hash values can be obtained through the * hashes() method. * @return \p true on success and \p false otherwise */ @@ -220,7 +221,7 @@ class BSHashConversion : public NtHash /** * Peeks the hash values as if roll() was called for char_in (without - * advancing the NtHash object. The peeked hash values can be obtained through + * advancing the BsHash object. The peeked hash values can be obtained through * the hashes() method. * @return \p true on success and \p false otherwise */ @@ -257,6 +258,28 @@ class BSHashConversion : public NtHash return true; } +bool is_methylated() { + size_t num_dimers = k / 2; + size_t center_idx = pos + (num_dimers / 2) * 2; // guaranteed valid + + char base = seq[center_idx]; + + if (conversion_type == "CT") { + return base == 'C' || base == 'c'; + } else { + return base == 'G' || base == 'g'; + } +} + +/** + * Check if the forward hash is higher than the reverse hash. + * @return true if forward hash > reverse hash, false otherwise + */ +bool is_forward_higher() const { + return fwd_hash > rev_hash; +} + + void sub(const std::vector& positions, const std::vector& new_bases) { @@ -267,7 +290,9 @@ class BSHashConversion : public NtHash new_bases, get_k(), get_hash_num(), - hash_arr.get()); + hash_arr.get(), + right_table, + left_table); } /** @@ -278,7 +303,7 @@ class BSHashConversion : public NtHash /** * Get the position of last hashed k-mer or the k-mer to be hashed if roll() - * has never been called on this NtHash object. + * has never been called on this BsHash object. * @return Position of the most recently hashed k-mer's first base-pair */ size_t get_pos() const { return pos; } diff --git a/tests/bshash.cpp b/tests/bshash.cpp index a6e2fefd..3bb1d63f 100644 --- a/tests/bshash.cpp +++ b/tests/bshash.cpp @@ -22,11 +22,11 @@ main() "AAACAGAT" "CACCCGCT" "GAGCGGGT" - "TACTGTGT"; + "TACTGTGTAA"; std::string seq = first32 + first32; // 64 bases total unsigned h = 3; - unsigned k = 32; + unsigned k = 34; btllib::BsHash nthash(seq, h, k, "CG"); std::vector hashes; @@ -140,11 +140,11 @@ PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +1)") // ─────────────────────────────── std::string first32 = - "ATGCGTAGGTCGATGACGTTGAGCGTACGAGT"; + "ATGCGTAGGTCGATGACGTTGAGCGTACGAGTAA"; std::string seq = first32 + first32; // 64 bases total unsigned h = 3; - unsigned k = 32; + unsigned k = 34; btllib::BsHash nthash(seq, h, k, "CG"); std::vector hashes; @@ -210,11 +210,11 @@ PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +1)") // ─────────────────────────────── std::string first32 = - "TACCGATACCTACGCCATACGTCATACCGTTA"; + "TACCGATACCTACGCCATACGTCATACCGTTAAA"; std::string seq = first32 + first32; // 64 bases total unsigned h = 3; - unsigned k = 32; + unsigned k = 34; btllib::BsHash nthash(seq, h, k, "CG"); std::vector hashes; @@ -665,8 +665,8 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { { PRINT_TEST_NAME("CG canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCT"; - std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + std::string seq_f = "ACGTACACTGGACTGAGTCTAA"; + std::string seq_r = "TTAGACTCAGTCCAGTGTACGT"; unsigned h = 3; btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CG"); @@ -731,7 +731,7 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { std::string seq = "ACTGACTA"; unsigned h = 3; - unsigned k = 4; + unsigned k = 6; btllib::BsHash nthash(seq, h, k, "CG"); std::stack hashes; @@ -847,7 +847,7 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { { PRINT_TEST_NAME("skipping Ns") - std::string seq = "ACGTACACTGGACTGAGTCT"; + std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; std::string seq_with_ns = seq; TEST_ASSERT_GE(seq_with_ns.size(), 10) @@ -882,11 +882,11 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { PRINT_TEST_NAME("check methylation") std::string seq_k6 = "ATCGGA"; - std::string seq_k4 = "CGTA"; + std::string seq_k4 = "GGCGTA"; btllib::BsHash nthash1(seq_k6, 3, 6); nthash1.roll(); TEST_ASSERT_EQ(nthash1.is_methylated(), true) - btllib::BsHash nthash2(seq_k4, 3, 4); + btllib::BsHash nthash2(seq_k4, 3, 6); nthash2.roll(); TEST_ASSERT_EQ(nthash2.is_methylated(), true) diff --git a/tests/bshash_conversion.cpp b/tests/bshash_conversion.cpp index b0da50c4..e6d95f1f 100644 --- a/tests/bshash_conversion.cpp +++ b/tests/bshash_conversion.cpp @@ -38,9 +38,9 @@ main() { PRINT_TEST_NAME("k-mer rolling") - std::string seq = "AGTCAGTC"; - unsigned h = 1; - unsigned k = 4; + std::string seq = "AGTCAGTCA"; + unsigned h = 3; + unsigned k = 5; btllib::BSHashConversion nthash(seq, h, k, "CT"); std::vector hashes; @@ -60,13 +60,13 @@ main() { PRINT_TEST_NAME("k-mer rolling vs. base hash values") - std::string seq = "ACGTACACTGGACTGAGTCT"; + std::string seq = "ACGTACACTGGACTGAGTCTA"; btllib::BSHashConversion nthash(seq, 3, seq.size() - 2, "CT"); - /* 18-mers of kmer*/ - std::string kmer1 = seq.substr(0, 18); - std::string kmer2 = seq.substr(1, 18); - std::string kmer3 = seq.substr(2, 18); + /* 19-mers of kmer*/ + std::string kmer1 = seq.substr(0, 19); + std::string kmer2 = seq.substr(1, 19); + std::string kmer3 = seq.substr(2, 19); btllib::BSHashConversion nthash_vector[] = { btllib::BSHashConversion(kmer1, nthash.get_hash_num(), kmer1.size(), "CT"), @@ -86,8 +86,8 @@ main() { PRINT_TEST_NAME("canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCT"; - std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + std::string seq_f = "ACGTACACTGGACTGAGTCTA"; + std::string seq_r = "TAGACTCAGTCCAGTGTACGT"; unsigned h = 3; btllib::BSHashConversion nthash_f(seq_f, h, seq_f.size(), "CT"); @@ -128,7 +128,7 @@ main() std::string seq = "ACTGATCAG"; unsigned h = 3; - unsigned k = 6; + unsigned k = 5; btllib::BSHashConversion nthash(seq, h, k, "CT"); nthash.roll(); @@ -148,7 +148,7 @@ main() { PRINT_TEST_NAME("skipping Ns") - std::string seq = "ACGTACACTGGACTGAGTCT"; + std::string seq = "ACGTACACTGGACTGAGTCTAA"; std::string seq_with_ns = seq; TEST_ASSERT_GE(seq_with_ns.size(), 10) @@ -177,8 +177,8 @@ main() { PRINT_TEST_NAME("base substitution") - std::string seq = "ACGTACACTGGACTGAGTCT"; - std::string sub = "ACGCGCACTGGACTGAGTCT"; + std::string seq = "ACGTACACTGGACTGAGTCTA"; + std::string sub = "ACGCGCACTGGACTGAGTCTA"; btllib::BSHashConversion nthash(seq, 3, seq.size(), "CT"); btllib::BSHashConversion nthash_subbed(sub, 3, sub.size(), "CT"); From a9326eff23ab2d62731b922d0bdc0a32002c536c Mon Sep 17 00:00:00 2001 From: jowong04 Date: Sat, 6 Dec 2025 22:47:22 -0800 Subject: [PATCH 09/38] final bshash directional: pre name change and additional test and refactor --- include/btllib/bshash_kmer_conversion.hpp | 57 +++- include/btllib/hashing_internals.hpp | 349 ---------------------- include/btllib/nthash_kmer.hpp | 1 + tests/bshash_conversion.cpp | 113 ++++++- 4 files changed, 158 insertions(+), 362 deletions(-) diff --git a/include/btllib/bshash_kmer_conversion.hpp b/include/btllib/bshash_kmer_conversion.hpp index 99b68680..99677d2c 100644 --- a/include/btllib/bshash_kmer_conversion.hpp +++ b/include/btllib/bshash_kmer_conversion.hpp @@ -8,6 +8,7 @@ #include #include +#include #include #include #include @@ -43,9 +44,12 @@ using hashing_internals::BS_CT_RC_CONVERT_TAB; using hashing_internals::BS_GA_RC_CONVERT_TAB; // Optional: if you want the dimer/trimer/tetramer tables too -using hashing_internals::DIMER_TAB; -using hashing_internals::TRIMER_TAB; -using hashing_internals::TETRAMER_TAB; +using hashing_internals::BS_CT_DIMER_TAB; +using hashing_internals::BS_CT_TRIMER_TAB; +using hashing_internals::BS_CT_TETRAMER_TAB; +using hashing_internals::BS_GA_DIMER_TAB; +using hashing_internals::BS_GA_TRIMER_TAB; +using hashing_internals::BS_GA_TETRAMER_TAB; /** * Normal k-mer hashing. @@ -91,25 +95,49 @@ class BSHashConversion : public NtHash // Seed table primitive_tab = BS_CT_SEED_TAB; + // Rolling hash right/left tables right_table = BS_CT_MS_TAB_33R; left_table = BS_CT_MS_TAB_31L; + // Convert tables convert_tab = BS_CT_CONVERT_TAB; rc_convert_tab = BS_CT_RC_CONVERT_TAB; + dimer_tab = BS_CT_DIMER_TAB; + rc_dimer_tab = BS_GA_DIMER_TAB; +trimer_tab = BS_CT_TRIMER_TAB; +rc_trimer_tab = BS_GA_TRIMER_TAB; + tetramer_tab = BS_CT_TETRAMER_TAB;; +rc_tetramer_tab= BS_GA_TETRAMER_TAB; + + // dimer_tab, trimer_tab, tetramer_tab stay unchanged } else if (conversion_type == "GA" || conversion_type == "ga") { - primitive_tab = BS_GA_SEED_TAB; + primitive_tab = BS_GA_SEED_TAB; + + + // Rolling hash right/left tables + right_table = BS_GA_MS_TAB_33R; + left_table = BS_GA_MS_TAB_31L; - right_table = BS_GA_MS_TAB_33R; - left_table = BS_GA_MS_TAB_31L; - convert_tab = BS_GA_CONVERT_TAB; - rc_convert_tab = BS_GA_RC_CONVERT_TAB; + // Convert tables + convert_tab = BS_GA_CONVERT_TAB; + rc_convert_tab = BS_GA_RC_CONVERT_TAB; + + // K-mer tables (forward GA, reverse CT) + dimer_tab = BS_GA_DIMER_TAB; + rc_dimer_tab = BS_CT_DIMER_TAB; + + trimer_tab = BS_GA_TRIMER_TAB; + rc_trimer_tab = BS_CT_TRIMER_TAB; + + tetramer_tab = BS_GA_TETRAMER_TAB; + rc_tetramer_tab = BS_CT_TETRAMER_TAB; } else { @@ -345,15 +373,20 @@ bool is_forward_higher() const { std::unique_ptr hash_arr; const uint64_t* primitive_tab; -const uint64_t* dimer_tab = DIMER_TAB; -const uint64_t* trimer_tab = TRIMER_TAB; -const uint64_t* tetramer_tab = TETRAMER_TAB; + +const uint64_t* dimer_tab; +const uint64_t* rc_dimer_tab; +const uint64_t* trimer_tab; +const uint64_t* rc_trimer_tab; +const uint64_t* tetramer_tab; +const uint64_t* rc_tetramer_tab; const uint8_t* convert_tab; const uint8_t* rc_convert_tab; // NEW const uint64_t* const* right_table; const uint64_t* const* left_table; + /** * Initialize the internal state of the iterator * @return \p true if successful, \p false otherwise @@ -375,7 +408,7 @@ const uint64_t* const* left_table; } std::cerr << "conversion bshasb" << std::endl; fwd_hash = base_forward_hash(seq + pos, k, primitive_tab, dimer_tab, trimer_tab, tetramer_tab, convert_tab); - rev_hash = base_reverse_hash(seq + pos, k, primitive_tab, dimer_tab, trimer_tab, tetramer_tab, rc_convert_tab); + rev_hash = base_reverse_hash(seq + pos, k, primitive_tab, rc_dimer_tab, rc_trimer_tab, rc_tetramer_tab, rc_convert_tab); extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); initialized = true; return true; diff --git a/include/btllib/hashing_internals.hpp b/include/btllib/hashing_internals.hpp index 76b344de..3ea06f6f 100644 --- a/include/btllib/hashing_internals.hpp +++ b/include/btllib/hashing_internals.hpp @@ -170,76 +170,6 @@ const uint64_t SEED_TAB[ASCII_SIZE] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 }; -const uint64_t BS_CT_SEED_TAB[ASCII_SIZE] = { - SEED_N, SEED_T, SEED_N, SEED_A, SEED_A, SEED_A, SEED_N, SEED_C, // 0..7 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 8..15 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 16..23 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 24..31 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 32..39 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 40..47 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 48..55 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 56..63 - SEED_N, SEED_A, SEED_N, SEED_T, SEED_N, SEED_N, SEED_N, SEED_G, // 64..71 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 72..79 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 80..87 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 88..95 - SEED_N, SEED_A, SEED_N, SEED_T, SEED_N, SEED_N, SEED_N, SEED_G, // 96..103 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 104..111 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 112..119 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 120..127 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 128..135 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 136..143 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 144..151 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 152..159 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 160..167 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 168..175 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 176..183 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 184..191 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 192..199 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 200..207 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 208..215 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 216..223 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 224..231 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 232..239 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 240..247 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 -}; - -const uint64_t BS_GA_SEED_TAB[ASCII_SIZE] = { - SEED_N, SEED_T, SEED_N, SEED_G, SEED_A, SEED_A, SEED_N, SEED_T, // 0..7 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 8..15 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 16..23 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 24..31 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 32..39 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 40..47 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 48..55 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 56..63 - SEED_N, SEED_A, SEED_N, SEED_C, SEED_N, SEED_N, SEED_N, SEED_A, // 64..71 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 72..79 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 80..87 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 88..95 - SEED_N, SEED_A, SEED_N, SEED_C, SEED_N, SEED_N, SEED_N, SEED_A, // 96..103 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 104..111 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_T, SEED_T, SEED_N, SEED_N, // 112..119 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 120..127 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 128..135 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 136..143 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 144..151 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 152..159 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 160..167 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 168..175 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 176..183 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 184..191 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 192..199 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 200..207 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 208..215 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 216..223 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 224..231 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 232..239 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, // 240..247 - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 -}; - const uint64_t A33R[33] = { 0x195c60474, 0x12b8c08e9, 0x571811d3, 0xae3023a6, 0x15c60474c, 0xb8c08e99, @@ -417,145 +347,6 @@ const uint64_t* const MS_TAB_31L[ASCII_SIZE] = { N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 }; -const uint64_t* const BS_CT_MS_TAB_33R[ASCII_SIZE] = { - N33R, T33R, N33R, A33R, A33R, A33R, N33R, C33R, // 0..7 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 8..15 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 16..23 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 24..31 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 32..39 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 40..47 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 48..55 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 56..63 - N33R, A33R, N33R, T33R, N33R, N33R, N33R, G33R, // 64..71 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 72..79 - N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 80..87 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 88..95 - N33R, A33R, N33R, T33R, N33R, N33R, N33R, G33R, // 96..103 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 104..111 - N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 112..119 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 120..127 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 128..135 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 136..143 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 144..151 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 152..159 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 160..167 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 168..175 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 176..183 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 184..191 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 192..199 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 200..207 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 208..215 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 216..223 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 224..231 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 232..239 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 240..247 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R // 248..255 -}; - -const uint64_t* const BS_CT_MS_TAB_31L[ASCII_SIZE] = { - N31L, T31L, N31L, A31L, A31L, A31L, N31L, C31L, // 0..7 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 8..15 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 16..23 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 24..31 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 32..39 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 40..47 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 48..55 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 56..63 - N31L, A31L, N31L, T31L, N31L, N31L, N31L, G31L, // 64..71 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 72..79 - N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 80..87 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 88..95 - N31L, A31L, N31L, T31L, N31L, N31L, N31L, G31L, // 96..103 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 104..111 - N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 112..119 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 120..127 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 128..135 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 136..143 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 144..151 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 152..159 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 160..167 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 168..175 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 176..183 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 184..191 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 192..199 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 200..207 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 208..215 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 216..223 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 224..231 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 232..239 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 240..247 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 -}; - -const uint64_t* const BS_GA_MS_TAB_33R[ASCII_SIZE] = { - N33R, T33R, N33R, G33R, A33R, A33R, N33R, T33R, // 0..7 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 8..15 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 16..23 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 24..31 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 32..39 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 40..47 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 48..55 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 56..63 - N33R, A33R, N33R, C33R, N33R, N33R, N33R, A33R, // 64..71 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 72..79 - N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 80..87 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 88..95 - N33R, A33R, N33R, C33R, N33R, N33R, N33R, A33R, // 96..103 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 104..111 - N33R, N33R, N33R, N33R, T33R, T33R, N33R, N33R, // 112..119 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 120..127 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 128..135 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 136..143 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 144..151 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 152..159 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 160..167 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 168..175 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 176..183 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 184..191 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 192..199 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 200..207 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 208..215 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 216..223 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 224..231 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 232..239 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R, // 240..247 - N33R, N33R, N33R, N33R, N33R, N33R, N33R, N33R // 248..255 -}; - -const uint64_t* const BS_GA_MS_TAB_31L[ASCII_SIZE] = { - N31L, T31L, N31L, G31L, A31L, A31L, N31L, T31L, // 0..7 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 8..15 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 16..23 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 24..31 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 32..39 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 40..47 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 48..55 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 56..63 - N31L, A31L, N31L, C31L, N31L, N31L, N31L, A31L, // 64..71 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 72..79 - N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 80..87 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 88..95 - N31L, A31L, N31L, C31L, N31L, N31L, N31L, A31L, // 96..103 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 104..111 - N31L, N31L, N31L, N31L, T31L, T31L, N31L, N31L, // 112..119 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 120..127 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 128..135 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 136..143 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 144..151 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 152..159 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 160..167 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 168..175 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 176..183 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 184..191 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 192..199 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 200..207 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 208..215 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 216..223 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 224..231 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 232..239 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L, // 240..247 - N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 -}; inline uint64_t srol_table(unsigned char c, unsigned d, const uint64_t* const* right_table = MS_TAB_33R, @@ -635,146 +426,6 @@ const uint8_t RC_CONVERT_TAB[ASCII_SIZE] = { 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 }; -const uint8_t BS_CT_CONVERT_TAB[ASCII_SIZE] = { - 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 - 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 - 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 - 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 - 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 - 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 - 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 - 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 - 255, 0, 255, 3, 255, 255, 255, 2, // 64..71 - 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 - 255, 255, 255, 255, 3, 3, 255, 255, // 80..87 - 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 - 255, 0, 255, 3, 255, 255, 255, 2, // 96..103 - 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 - 255, 255, 255, 255, 3, 3, 255, 255, // 112..119 - 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 - 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 - 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 - 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 - 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 - 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 - 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 - 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 - 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 - 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 - 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 - 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 - 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 - 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 - 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 - 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 - 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 -}; - -const uint8_t BS_CT_RC_CONVERT_TAB[ASCII_SIZE] = { - 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 - 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 - 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 - 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 - 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 - 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 - 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 - 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 - 255, 3, 255, 0, 255, 255, 255, 1, // 64..71 - 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 - 255, 255, 255, 255, 0, 0, 255, 255, // 80..87 - 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 - 255, 3, 255, 0, 255, 255, 255, 1, // 96..103 - 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 - 255, 255, 255, 255, 0, 0, 255, 255, // 112..119 - 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 - 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 - 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 - 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 - 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 - 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 - 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 - 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 - 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 - 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 - 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 - 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 - 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 - 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 - 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 - 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 - 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 -}; - -const uint8_t BS_GA_CONVERT_TAB[ASCII_SIZE] = { - 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 - 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 - 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 - 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 - 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 - 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 - 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 - 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 - 255, 0, 255, 1, 255, 255, 255, 0, // 64..71 - 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 - 255, 255, 255, 255, 3, 3, 255, 255, // 80..87 - 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 - 255, 0, 255, 1, 255, 255, 255, 0, // 96..103 - 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 - 255, 255, 255, 255, 3, 3, 255, 255, // 112..119 - 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 - 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 - 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 - 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 - 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 - 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 - 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 - 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 - 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 - 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 - 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 - 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 - 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 - 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 - 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 - 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 - 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 -}; - -const uint8_t BS_GA_RC_CONVERT_TAB[ASCII_SIZE] = { - 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 - 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 - 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 - 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 - 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 - 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 - 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 - 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 - 255, 3, 255, 2, 255, 255, 255, 3, // 64..71 - 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 - 255, 255, 255, 255, 0, 0, 255, 255, // 80..87 - 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 - 255, 3, 255, 2, 255, 255, 255, 3, // 96..103 - 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 - 255, 255, 255, 255, 0, 0, 255, 255, // 112..119 - 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 - 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 - 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 - 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 - 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 - 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 - 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 - 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 - 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 - 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 - 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 - 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 - 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 - 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 - 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 - 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 - 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 -}; - const uint64_t DIMER_TAB[4 * 4] = { 5015898201438948509U, 5225361804584821669U, 6423762225589857229U, 5783394398799547583U, 6894017875502584557U, 5959461383092338133U, diff --git a/include/btllib/nthash_kmer.hpp b/include/btllib/nthash_kmer.hpp index 64010c72..1b5fdc27 100644 --- a/include/btllib/nthash_kmer.hpp +++ b/include/btllib/nthash_kmer.hpp @@ -64,6 +64,7 @@ base_forward_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = S if (remainder > 0) { h_val = srol(h_val, remainder); } + // TODO double check bug use k = 7 to test if (remainder == 3) { uint8_t trimer_loc = 0; trimer_loc += 16 * convert_tab[(unsigned char)seq[k - 3]]; // NOLINT diff --git a/tests/bshash_conversion.cpp b/tests/bshash_conversion.cpp index e6d95f1f..6a08bab1 100644 --- a/tests/bshash_conversion.cpp +++ b/tests/bshash_conversion.cpp @@ -38,14 +38,123 @@ main() { PRINT_TEST_NAME("k-mer rolling") - std::string seq = "AGTCAGTCA"; + std::string seq = "AGTCAGTCAGTCA"; unsigned h = 3; unsigned k = 5; + //AGTCA + //TGACT btllib::BSHashConversion nthash(seq, h, k, "CT"); + /*nthash.roll(); + nthash.roll(); + std::cerr << "CT forrward: " << nthash.get_forward_hash() << std::endl; + std::cerr << "CT reverse: " << nthash.get_reverse_hash() << std::endl; + btllib::BSHashConversion nthash2("GTCAG", h, k, "CT"); + nthash2.roll(); + std::cerr << "CT forrward: " << nthash2.get_forward_hash() << std::endl; + std::cerr << "CT reverse: " << nthash2.get_reverse_hash() << std::endl;*/ + std::vector hashes; + uint64_t fwd_hash = 0; + uint64_t final_fwd_hash = 0; + uint64_t rev_hash = 0; + uint64_t final_rev_hash = 0; + + while (nthash.roll()) { + if (fwd_hash == 0) { + fwd_hash = nthash.get_forward_hash(); + rev_hash = nthash.get_reverse_hash(); + } + final_fwd_hash = nthash.get_forward_hash(); + final_rev_hash = nthash.get_reverse_hash(); + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + + // check same hash value for identical k-mers (first and last) + TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); + TEST_ASSERT_EQ(rev_hash, final_rev_hash); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + } + + { + PRINT_TEST_NAME("k-mer rolling - (CT Conversion)") + + std::string seq = "AGTCAGTCAGTTA"; + unsigned h = 3; + unsigned k = 5; + //AGTCA + //TGACT + + btllib::BSHashConversion nthash(seq, h, k, "CT"); + /*nthash.roll(); + nthash.roll(); + std::cerr << "CT forrward: " << nthash.get_forward_hash() << std::endl; + std::cerr << "CT reverse: " << nthash.get_reverse_hash() << std::endl; + btllib::BSHashConversion nthash2("GTCAG", h, k, "CT"); + nthash2.roll(); + std::cerr << "CT forrward: " << nthash2.get_forward_hash() << std::endl; + std::cerr << "CT reverse: " << nthash2.get_reverse_hash() << std::endl;*/ std::vector hashes; + uint64_t fwd_hash = 0; + uint64_t final_fwd_hash = 0; + uint64_t rev_hash = 0; + uint64_t final_rev_hash = 0; while (nthash.roll()) { + if (fwd_hash == 0) { + fwd_hash = nthash.get_forward_hash(); + rev_hash = nthash.get_reverse_hash(); + } + final_fwd_hash = nthash.get_forward_hash(); + final_rev_hash = nthash.get_reverse_hash(); + uint64_t* h_vals = new uint64_t[h]; + std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + + // check same hash value for identical k-mers (first and last) + TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); + TEST_ASSERT_EQ(rev_hash, final_rev_hash); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + } + + + { + PRINT_TEST_NAME("k-mer rolling - (GA Conversion)") + + std::string seq = "AGTCAGTCAATCA"; + unsigned h = 3; + unsigned k = 5; + //AGTCA + //TGACT + + btllib::BSHashConversion nthash(seq, h, k, "GA"); + /*nthash.roll(); + nthash.roll(); + std::cerr << "CT forrward: " << nthash.get_forward_hash() << std::endl; + std::cerr << "CT reverse: " << nthash.get_reverse_hash() << std::endl; + btllib::BSHashConversion nthash2("GTCAG", h, k, "CT"); + nthash2.roll(); + std::cerr << "CT forrward: " << nthash2.get_forward_hash() << std::endl; + std::cerr << "CT reverse: " << nthash2.get_reverse_hash() << std::endl;*/ + std::vector hashes; + uint64_t fwd_hash = 0; + uint64_t final_fwd_hash = 0; + uint64_t rev_hash = 0; + uint64_t final_rev_hash = 0; + + while (nthash.roll()) { + if (fwd_hash == 0) { + fwd_hash = nthash.get_forward_hash(); + rev_hash = nthash.get_reverse_hash(); + } + final_fwd_hash = nthash.get_forward_hash(); + final_rev_hash = nthash.get_reverse_hash(); uint64_t* h_vals = new uint64_t[h]; std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); hashes.push_back(h_vals); @@ -54,6 +163,8 @@ main() TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); // check same hash value for identical k-mers (first and last) + TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); + TEST_ASSERT_EQ(rev_hash, final_rev_hash); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); } From d07cede3a0388e9ea57862da210304aaccd8e279 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 15:52:11 -0800 Subject: [PATCH 10/38] bshash_conversion: odd number of dimer version --- include/btllib/bshash_kmer_conversion.hpp | 190 +++++++++------- tests/bshash_conversion.cpp | 265 ++++++++++------------ 2 files changed, 228 insertions(+), 227 deletions(-) diff --git a/include/btllib/bshash_kmer_conversion.hpp b/include/btllib/bshash_kmer_conversion.hpp index 99677d2c..5d3ae1a4 100644 --- a/include/btllib/bshash_kmer_conversion.hpp +++ b/include/btllib/bshash_kmer_conversion.hpp @@ -27,7 +27,7 @@ using hashing_internals::prev_reverse_hash; using hashing_internals::SEED_N; using hashing_internals::SEED_TAB; using hashing_internals::sub_hash; -// Bring the tables into this file + using hashing_internals::BS_CT_SEED_TAB; using hashing_internals::BS_GA_SEED_TAB; @@ -43,7 +43,6 @@ using hashing_internals::BS_GA_CONVERT_TAB; using hashing_internals::BS_CT_RC_CONVERT_TAB; using hashing_internals::BS_GA_RC_CONVERT_TAB; -// Optional: if you want the dimer/trimer/tetramer tables too using hashing_internals::BS_CT_DIMER_TAB; using hashing_internals::BS_CT_TRIMER_TAB; using hashing_internals::BS_CT_TETRAMER_TAB; @@ -52,9 +51,9 @@ using hashing_internals::BS_GA_TRIMER_TAB; using hashing_internals::BS_GA_TETRAMER_TAB; /** - * Normal k-mer hashing. + * Directional BS-seq k-mer hashing. */ -class BSHashConversion : public NtHash +class BSHashConversion { public: @@ -64,6 +63,8 @@ class BSHashConversion : public NtHash * @param seq_len Length of the sequence * @param num_hashes Number of hashes to generate per k-mer * @param k K-mer size + * @param conversion_type Bisulfite conversion mode ("CT" or "GA"), + * used to determine how bases are interpreted during hashing. * @param pos Position in the sequence to start hashing from */ BSHashConversion(const char* seq, @@ -72,8 +73,7 @@ class BSHashConversion : public NtHash hashing_internals::K_TYPE k, std::string conversion_type, size_t pos = 0) - : NtHash(seq, seq_len, num_hashes, k, pos), - seq(seq) + : seq(seq) , seq_len(seq_len) , num_hashes(num_hashes) , k(k) @@ -83,7 +83,8 @@ class BSHashConversion : public NtHash , hash_arr(new uint64_t[num_hashes]) { check_error(k == 0, "BsHash: k must be greater than 0"); - check_error(k%2 != 1, "BsHash: k must be odd"); + check_error(k % 2 == 1, "BsHash: k must be even"); + check_error((k/2) % 2 == 0, "BsHash: k must have odd number of dimers"); check_error(this->seq_len < k, "BsHash: sequence length (" + std::to_string(this->seq_len) + ") is smaller than k (" + std::to_string(k) + ")"); @@ -91,59 +92,46 @@ class BSHashConversion : public NtHash "BsHash: passed position (" + std::to_string(pos) + ") is larger than sequence length (" + std::to_string(this->seq_len) + ")"); - if (conversion_type == "CT"|| conversion_type == "ct") { - // Seed table - primitive_tab = BS_CT_SEED_TAB; + size_t num_dimers = k / 2; + size_t center_dimer_start = (num_dimers / 2) * 2; + if (conversion_type == "CT") { + meth_base_idx = center_dimer_start; + + primitive_tab = BS_CT_SEED_TAB; - // Rolling hash right/left tables right_table = BS_CT_MS_TAB_33R; left_table = BS_CT_MS_TAB_31L; - - // Convert tables convert_tab = BS_CT_CONVERT_TAB; rc_convert_tab = BS_CT_RC_CONVERT_TAB; - dimer_tab = BS_CT_DIMER_TAB; - rc_dimer_tab = BS_GA_DIMER_TAB; -trimer_tab = BS_CT_TRIMER_TAB; -rc_trimer_tab = BS_GA_TRIMER_TAB; - tetramer_tab = BS_CT_TETRAMER_TAB;; -rc_tetramer_tab= BS_GA_TETRAMER_TAB; - - - // dimer_tab, trimer_tab, tetramer_tab stay unchanged - - } - else if (conversion_type == "GA" || conversion_type == "ga") { - primitive_tab = BS_GA_SEED_TAB; - - - // Rolling hash right/left tables - right_table = BS_GA_MS_TAB_33R; - left_table = BS_GA_MS_TAB_31L; - - - // Convert tables - convert_tab = BS_GA_CONVERT_TAB; - rc_convert_tab = BS_GA_RC_CONVERT_TAB; - - // K-mer tables (forward GA, reverse CT) - dimer_tab = BS_GA_DIMER_TAB; - rc_dimer_tab = BS_CT_DIMER_TAB; - - trimer_tab = BS_GA_TRIMER_TAB; - rc_trimer_tab = BS_CT_TRIMER_TAB; - - tetramer_tab = BS_GA_TETRAMER_TAB; - rc_tetramer_tab = BS_CT_TETRAMER_TAB; - - } - else { - check_error(true, "Unsupported conversion_type. Must be CT or GA."); - exit(1); - } + dimer_tab = BS_CT_DIMER_TAB; + rc_dimer_tab = BS_GA_DIMER_TAB; + trimer_tab = BS_CT_TRIMER_TAB; + rc_trimer_tab = BS_GA_TRIMER_TAB; + tetramer_tab = BS_CT_TETRAMER_TAB;; + rc_tetramer_tab= BS_GA_TETRAMER_TAB; + } else if (conversion_type == "GA") { + meth_base_idx = center_dimer_start + 1; + + primitive_tab = BS_GA_SEED_TAB; + + right_table = BS_GA_MS_TAB_33R; + left_table = BS_GA_MS_TAB_31L; + + convert_tab = BS_GA_CONVERT_TAB; + rc_convert_tab = BS_GA_RC_CONVERT_TAB; + + dimer_tab = BS_GA_DIMER_TAB; + rc_dimer_tab = BS_CT_DIMER_TAB; + trimer_tab = BS_GA_TRIMER_TAB; + rc_trimer_tab = BS_CT_TRIMER_TAB; + tetramer_tab = BS_GA_TETRAMER_TAB; + rc_tetramer_tab = BS_CT_TETRAMER_TAB; + } else { + check_error(true, "Unsupported conversion_type. Must be CT or GA, as capitalized."); + } } /** @@ -151,6 +139,8 @@ rc_tetramer_tab= BS_GA_TETRAMER_TAB; * @param seq Sequence string * @param num_hashes Number of hashes to produce per k-mer * @param k K-mer size + * @param conversion_type Bisulfite conversion mode ("CT" or "GA"), + * used to determine how bases are interpreted during hashing. * @param pos Position in sequence to start hashing from */ BSHashConversion(const std::string& seq, @@ -162,7 +152,37 @@ rc_tetramer_tab= BS_GA_TETRAMER_TAB; { } + BSHashConversion(const BSHashConversion& obj) + : seq(obj.seq) + , seq_len(obj.seq_len) + , num_hashes(obj.num_hashes) + , k(obj.k) + , conversion_type(obj.conversion_type) + , pos(obj.pos) + , initialized(obj.initialized) + , fwd_hash(obj.fwd_hash) + , rev_hash(obj.rev_hash) + , hash_arr(new uint64_t[obj.num_hashes]) + , primitive_tab(obj.primitive_tab) + , right_table(obj.right_table) + , left_table(obj.left_table) + , convert_tab(obj.convert_tab) + , rc_convert_tab(obj.rc_convert_tab) + , dimer_tab(obj.dimer_tab) + , rc_dimer_tab(obj.rc_dimer_tab) + , trimer_tab(obj.trimer_tab) + , rc_trimer_tab(obj.rc_trimer_tab) + , tetramer_tab(obj.tetramer_tab) + , rc_tetramer_tab(obj.rc_tetramer_tab) +{ + std::memcpy( + hash_arr.get(), + obj.hash_arr.get(), + num_hashes * sizeof(uint64_t) + ); +} + BSHashConversion(BSHashConversion&&) = default; /** * Calculate the hash values of current k-mer and advance to the next k-mer. * BsHash advances one nucleotide at a time until it finds a k-mer with valid @@ -286,26 +306,29 @@ rc_tetramer_tab= BS_GA_TETRAMER_TAB; return true; } -bool is_methylated() { - size_t num_dimers = k / 2; - size_t center_idx = pos + (num_dimers / 2) * 2; // guaranteed valid + /** + * Check if the k-mer center position is methylated based on conversion type. + * For CT conversion, methylation corresponds to a cytosine ('C'/'c'). + * For GA conversion, methylation corresponds to a guanine ('G'/'g'). + * + * @return true if the base at the methylation index indicates methylation, false otherwise. + */ - char base = seq[center_idx]; + bool is_methylated() { + char base = seq[pos + meth_base_idx]; - if (conversion_type == "CT") { - return base == 'C' || base == 'c'; - } else { - return base == 'G' || base == 'g'; - } -} - -/** - * Check if the forward hash is higher than the reverse hash. - * @return true if forward hash > reverse hash, false otherwise - */ -bool is_forward_higher() const { - return fwd_hash > rev_hash; -} + if (conversion_type == "CT") + return base == 'C' || base == 'c'; + else + return base == 'G' || base == 'g'; + } + /** + * Check if the forward hash is higher than the reverse hash. + * @return true if forward hash > reverse hash, false otherwise + */ + bool is_forward_higher() const { + return fwd_hash > rev_hash; + } void sub(const std::vector& positions, @@ -365,27 +388,28 @@ bool is_forward_higher() const { const size_t seq_len; hashing_internals::NUM_HASHES_TYPE num_hashes; hashing_internals::K_TYPE k; - std::string conversion_type; + std::string conversion_type; size_t pos; + size_t meth_base_idx; bool initialized; uint64_t fwd_hash = 0; uint64_t rev_hash = 0; std::unique_ptr hash_arr; -const uint64_t* primitive_tab; + const uint64_t* primitive_tab; -const uint64_t* dimer_tab; -const uint64_t* rc_dimer_tab; -const uint64_t* trimer_tab; -const uint64_t* rc_trimer_tab; -const uint64_t* tetramer_tab; -const uint64_t* rc_tetramer_tab; + const uint64_t* const* right_table; + const uint64_t* const* left_table; -const uint8_t* convert_tab; -const uint8_t* rc_convert_tab; // NEW + const uint8_t* convert_tab; + const uint8_t* rc_convert_tab; -const uint64_t* const* right_table; -const uint64_t* const* left_table; + const uint64_t* dimer_tab; + const uint64_t* rc_dimer_tab; + const uint64_t* trimer_tab; + const uint64_t* rc_trimer_tab; + const uint64_t* tetramer_tab; + const uint64_t* rc_tetramer_tab; /** * Initialize the internal state of the iterator @@ -406,13 +430,13 @@ const uint64_t* const* left_table; if (pos > seq_len - k) { return false; } - std::cerr << "conversion bshasb" << std::endl; fwd_hash = base_forward_hash(seq + pos, k, primitive_tab, dimer_tab, trimer_tab, tetramer_tab, convert_tab); rev_hash = base_reverse_hash(seq + pos, k, primitive_tab, rc_dimer_tab, rc_trimer_tab, rc_tetramer_tab, rc_convert_tab); extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); initialized = true; return true; } + }; } // namespace btllib \ No newline at end of file diff --git a/tests/bshash_conversion.cpp b/tests/bshash_conversion.cpp index 6a08bab1..bfa5fe1f 100644 --- a/tests/bshash_conversion.cpp +++ b/tests/bshash_conversion.cpp @@ -10,70 +10,33 @@ int main() { - - /*{ - PRINT_TEST_NAME("k-mer hash values") - - std::string seq = "ACATGCATGCA"; - const unsigned k = 5; - const unsigned h = 3; - - const std::vector> hashes = { - { 0x38cc00f940aebdae, 0xab7e1b110e086fc6, 0x11a1818bcfdd553 }, - { 0x603a48c5a11c794a, 0xe66016e61816b9c4, 0xc5b13cb146996ffe } - }; - - btllib::NtHash nthash(seq, h, k); - nthash.roll(); - btllib::BlindNtHash blind(seq.data(), h, k); - - for (const auto& h_vals : hashes) { - nthash.roll(); - TEST_ASSERT_ARRAY_EQ(h_vals, nthash.hashes(), h); - blind.roll(seq[blind.get_pos() + k]); - TEST_ASSERT_ARRAY_EQ(h_vals, blind.hashes(), h); - } - }*/ - { PRINT_TEST_NAME("k-mer rolling") - std::string seq = "AGTCAGTCAGTCA"; + std::string seq = "AGTCAGTCAGTCAG"; unsigned h = 3; - unsigned k = 5; - //AGTCA - //TGACT - - btllib::BSHashConversion nthash(seq, h, k, "CT"); - /*nthash.roll(); - nthash.roll(); - std::cerr << "CT forrward: " << nthash.get_forward_hash() << std::endl; - std::cerr << "CT reverse: " << nthash.get_reverse_hash() << std::endl; - btllib::BSHashConversion nthash2("GTCAG", h, k, "CT"); - nthash2.roll(); - std::cerr << "CT forrward: " << nthash2.get_forward_hash() << std::endl; - std::cerr << "CT reverse: " << nthash2.get_reverse_hash() << std::endl;*/ + unsigned k = 6; + + btllib::BSHashConversion bshash(seq, h, k, "CT"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; uint64_t rev_hash = 0; uint64_t final_rev_hash = 0; - while (nthash.roll()) { + while (bshash.roll()) { if (fwd_hash == 0) { - fwd_hash = nthash.get_forward_hash(); - rev_hash = nthash.get_reverse_hash(); + fwd_hash = bshash.get_forward_hash(); + rev_hash = bshash.get_reverse_hash(); } - final_fwd_hash = nthash.get_forward_hash(); - final_rev_hash = nthash.get_reverse_hash(); + final_fwd_hash = bshash.get_forward_hash(); + final_rev_hash = bshash.get_reverse_hash(); uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); - - // check same hash value for identical k-mers (first and last) TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); TEST_ASSERT_EQ(rev_hash, final_rev_hash); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); @@ -82,87 +45,64 @@ main() { PRINT_TEST_NAME("k-mer rolling - (CT Conversion)") - std::string seq = "AGTCAGTCAGTTA"; + std::string seq = "AGTCAGTCAGTTAG"; unsigned h = 3; - unsigned k = 5; - //AGTCA - //TGACT - - btllib::BSHashConversion nthash(seq, h, k, "CT"); - /*nthash.roll(); - nthash.roll(); - std::cerr << "CT forrward: " << nthash.get_forward_hash() << std::endl; - std::cerr << "CT reverse: " << nthash.get_reverse_hash() << std::endl; - btllib::BSHashConversion nthash2("GTCAG", h, k, "CT"); - nthash2.roll(); - std::cerr << "CT forrward: " << nthash2.get_forward_hash() << std::endl; - std::cerr << "CT reverse: " << nthash2.get_reverse_hash() << std::endl;*/ + unsigned k = 6; + + btllib::BSHashConversion bshash(seq, h, k, "CT"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; uint64_t rev_hash = 0; uint64_t final_rev_hash = 0; - while (nthash.roll()) { + while (bshash.roll()) { if (fwd_hash == 0) { - fwd_hash = nthash.get_forward_hash(); - rev_hash = nthash.get_reverse_hash(); + fwd_hash = bshash.get_forward_hash(); + rev_hash = bshash.get_reverse_hash(); } - final_fwd_hash = nthash.get_forward_hash(); - final_rev_hash = nthash.get_reverse_hash(); + final_fwd_hash = bshash.get_forward_hash(); + final_rev_hash = bshash.get_reverse_hash(); uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); - - // check same hash value for identical k-mers (first and last) TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); TEST_ASSERT_EQ(rev_hash, final_rev_hash); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); } - { + { PRINT_TEST_NAME("k-mer rolling - (GA Conversion)") - std::string seq = "AGTCAGTCAATCA"; + std::string seq = "AGTCAGTCAATCAA"; unsigned h = 3; - unsigned k = 5; - //AGTCA - //TGACT - - btllib::BSHashConversion nthash(seq, h, k, "GA"); - /*nthash.roll(); - nthash.roll(); - std::cerr << "CT forrward: " << nthash.get_forward_hash() << std::endl; - std::cerr << "CT reverse: " << nthash.get_reverse_hash() << std::endl; - btllib::BSHashConversion nthash2("GTCAG", h, k, "CT"); - nthash2.roll(); - std::cerr << "CT forrward: " << nthash2.get_forward_hash() << std::endl; - std::cerr << "CT reverse: " << nthash2.get_reverse_hash() << std::endl;*/ + unsigned k = 6; + + + btllib::BSHashConversion bshash(seq, h, k, "GA"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; uint64_t rev_hash = 0; uint64_t final_rev_hash = 0; - while (nthash.roll()) { + while (bshash.roll()) { if (fwd_hash == 0) { - fwd_hash = nthash.get_forward_hash(); - rev_hash = nthash.get_reverse_hash(); + fwd_hash = bshash.get_forward_hash(); + rev_hash = bshash.get_reverse_hash(); } - final_fwd_hash = nthash.get_forward_hash(); - final_rev_hash = nthash.get_reverse_hash(); + final_fwd_hash = bshash.get_forward_hash(); + final_rev_hash = bshash.get_reverse_hash(); uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); - - // check same hash value for identical k-mers (first and last) TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); TEST_ASSERT_EQ(rev_hash, final_rev_hash); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); @@ -171,24 +111,24 @@ main() { PRINT_TEST_NAME("k-mer rolling vs. base hash values") - std::string seq = "ACGTACACTGGACTGAGTCTA"; + std::string seq = "ACGTACACTGGACTGAGTCTATCG"; - btllib::BSHashConversion nthash(seq, 3, seq.size() - 2, "CT"); + btllib::BSHashConversion bshash(seq, 3, seq.size() - 2, "CT"); /* 19-mers of kmer*/ - std::string kmer1 = seq.substr(0, 19); - std::string kmer2 = seq.substr(1, 19); - std::string kmer3 = seq.substr(2, 19); - - btllib::BSHashConversion nthash_vector[] = { - btllib::BSHashConversion(kmer1, nthash.get_hash_num(), kmer1.size(), "CT"), - btllib::BSHashConversion(kmer2, nthash.get_hash_num(), kmer2.size(), "CT"), - btllib::BSHashConversion(kmer3, nthash.get_hash_num(), kmer3.size(), "CT") + std::string kmer1 = seq.substr(0, 22); + std::string kmer2 = seq.substr(1, 22); + std::string kmer3 = seq.substr(2, 22); + + btllib::BSHashConversion bshash_vector[] = { + btllib::BSHashConversion(kmer1, bshash.get_hash_num(), kmer1.size(), "CT"), + btllib::BSHashConversion(kmer2, bshash.get_hash_num(), kmer2.size(), "CT"), + btllib::BSHashConversion(kmer3, bshash.get_hash_num(), kmer3.size(), "CT") }; size_t i; - for (i = 0; nthash.roll() && nthash_vector[i].roll(); ++i) { - for (size_t j = 0; j < nthash.get_hash_num(); ++j) { - TEST_ASSERT_EQ(nthash.hashes()[j], nthash_vector[i].hashes()[j]); + for (i = 0; bshash.roll() && bshash_vector[i].roll(); ++i) { + for (size_t j = 0; j < bshash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(bshash.hashes()[j], bshash_vector[i].hashes()[j]); } } TEST_ASSERT_EQ(i, 3); @@ -197,41 +137,41 @@ main() { PRINT_TEST_NAME("canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCTA"; - std::string seq_r = "TAGACTCAGTCCAGTGTACGT"; + std::string seq_f = "ACGTACACTGGACTGAGTCTAA"; + std::string seq_r = "TTAGACTCAGTCCAGTGTACGT"; unsigned h = 3; - btllib::BSHashConversion nthash_f(seq_f, h, seq_f.size(), "CT"); - btllib::BSHashConversion nthash_r(seq_r, h, seq_r.size(), "GA"); + btllib::BSHashConversion bshash_f(seq_f, h, seq_f.size(), "CT"); + btllib::BSHashConversion bshash_r(seq_r, h, seq_r.size(), "GA"); - nthash_f.roll(); - nthash_r.roll(); - TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) - TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + bshash_f.roll(); + bshash_r.roll(); + TEST_ASSERT_EQ(bshash_f.get_hash_num(), bshash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(bshash_f.hashes(), bshash_r.hashes(), h) } { PRINT_TEST_NAME("k-mer back rolling") - std::string seq = "ACTAGCTG"; + std::string seq = "ACTAGACTG"; unsigned h = 3; - unsigned k = 5; + unsigned k = 6; - btllib::BSHashConversion nthash(seq, h, k, "CT"); + btllib::BSHashConversion bshash(seq, h, k, "CT"); std::stack hashes; - while (nthash.roll()) { + while (bshash.roll()) { uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) do { - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), hashes.top(), h) hashes.pop(); - } while (nthash.roll_back()); + } while (bshash.roll_back()); } { @@ -239,34 +179,34 @@ main() std::string seq = "ACTGATCAG"; unsigned h = 3; - unsigned k = 5; + unsigned k = 6; - btllib::BSHashConversion nthash(seq, h, k, "CT"); - nthash.roll(); + btllib::BSHashConversion bshash(seq, h, k, "CT"); + bshash.roll(); size_t steps = 3; while (steps--) { - nthash.peek(); + bshash.peek(); uint64_t* h_peek = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_peek); - nthash.peek(seq[nthash.get_pos() + k]); - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); - nthash.roll(); - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); + std::copy(bshash.hashes(), bshash.hashes() + h, h_peek); + bshash.peek(seq[bshash.get_pos() + k]); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), h_peek, h); + bshash.roll(); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), h_peek, h); } } { PRINT_TEST_NAME("skipping Ns") - std::string seq = "ACGTACACTGGACTGAGTCTAA"; + std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; std::string seq_with_ns = seq; TEST_ASSERT_GE(seq_with_ns.size(), 10) seq_with_ns[seq_with_ns.size() / 2] = 'N'; seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; unsigned k = (seq.size() - 2) / 2 - 1; - btllib::BSHashConversion nthash(seq_with_ns, 3, k, "CT"); + btllib::BSHashConversion bshash(seq_with_ns, 3, k, "CT"); std::vector positions; for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { @@ -278,8 +218,8 @@ main() } size_t i = 0; - while (nthash.roll()) { - TEST_ASSERT_EQ(nthash.get_pos(), positions[i]) + while (bshash.roll()) { + TEST_ASSERT_EQ(bshash.get_pos(), positions[i]) i++; } TEST_ASSERT_EQ(positions.size(), i) @@ -288,24 +228,61 @@ main() { PRINT_TEST_NAME("base substitution") - std::string seq = "ACGTACACTGGACTGAGTCTA"; - std::string sub = "ACGCGCACTGGACTGAGTCTA"; + std::string seq = "ACGTACACTGGACTGAGTCTAA"; + std::string sub = "ACGCGCACTGGACTGAGTCTAA"; - btllib::BSHashConversion nthash(seq, 3, seq.size(), "CT"); - btllib::BSHashConversion nthash_subbed(sub, 3, sub.size(), "CT"); + btllib::BSHashConversion bshash(seq, 3, seq.size(), "CT"); + btllib::BSHashConversion bshash_subbed(sub, 3, sub.size(), "CT"); - nthash.roll(); - nthash.sub({ 3, 4 }, { 'C', 'G' }); - nthash_subbed.roll(); - TEST_ASSERT_EQ(nthash.get_hash_num(), nthash_subbed.get_hash_num()); + bshash.roll(); + bshash.sub({ 3, 4 }, { 'C', 'G' }); + bshash_subbed.roll(); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_subbed.get_hash_num()); size_t i; - for (i = 0; i < nthash.get_hash_num(); ++i) { - TEST_ASSERT_EQ(nthash.hashes()[i], nthash_subbed.hashes()[i]); + for (i = 0; i < bshash.get_hash_num(); ++i) { + TEST_ASSERT_EQ(bshash.hashes()[i], bshash_subbed.hashes()[i]); } TEST_ASSERT_EQ(i, 3); } - + { + PRINT_TEST_NAME("Methylation Module Tests") + + std::string seq = "ACCGAA"; + std::string seq_no_meth = "ACTGAA"; + std::string rc_seq = "TTCGGT"; + std::string rc_seq_no_meth = "TTCAAT"; + + + btllib::BSHashConversion bshash(seq, 3, seq.size(), "CT"); + btllib::BSHashConversion bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(),"CT"); + btllib::BSHashConversion bshash_rc(rc_seq, 3, rc_seq.size(), "GA"); + btllib::BSHashConversion bshash_rc_no_meth(rc_seq_no_meth, 3, rc_seq_no_meth.size(), "GA"); + + bshash.roll(); + bshash_no_meth.roll(); + bshash_rc.roll(); + bshash_rc_no_meth.roll(); + + + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_no_meth.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc_no_meth.get_hash_num()); + + size_t h = bshash.get_hash_num(); + TEST_ASSERT_EQ(h, 3); + + + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_no_meth.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc_no_meth.hashes(), h); + + TEST_ASSERT_EQ(bshash.is_methylated(), bshash_rc.is_methylated()); + TEST_ASSERT_NE(bshash.is_methylated(), bshash_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash_rc.is_methylated(), bshash_rc_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); + } + return 0; } From 5f4886db9ab6fc76575f907c8a397b4e45270606 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 15:57:08 -0800 Subject: [PATCH 11/38] renamed files --- .../{bshash_kmer_conversion.hpp => bshash_kmer_directional.hpp} | 0 tests/{bshash_conversion.cpp => bshash_directional.cpp} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename include/btllib/{bshash_kmer_conversion.hpp => bshash_kmer_directional.hpp} (100%) rename tests/{bshash_conversion.cpp => bshash_directional.cpp} (100%) diff --git a/include/btllib/bshash_kmer_conversion.hpp b/include/btllib/bshash_kmer_directional.hpp similarity index 100% rename from include/btllib/bshash_kmer_conversion.hpp rename to include/btllib/bshash_kmer_directional.hpp diff --git a/tests/bshash_conversion.cpp b/tests/bshash_directional.cpp similarity index 100% rename from tests/bshash_conversion.cpp rename to tests/bshash_directional.cpp From 0d0491b4d14c5eb9df5d852211953c49faeb82c1 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 16:06:57 -0800 Subject: [PATCH 12/38] bshash_conversion: final version --- include/btllib/bshash.hpp | 2 +- include/btllib/bshash_kmer_directional.hpp | 12 +++---- tests/bshash_directional.cpp | 42 +++++++++++----------- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/include/btllib/bshash.hpp b/include/btllib/bshash.hpp index fdcf8aa6..5a412671 100644 --- a/include/btllib/bshash.hpp +++ b/include/btllib/bshash.hpp @@ -11,7 +11,7 @@ #include #include -#include +#include #include namespace btllib { diff --git a/include/btllib/bshash_kmer_directional.hpp b/include/btllib/bshash_kmer_directional.hpp index 5d3ae1a4..ed991087 100644 --- a/include/btllib/bshash_kmer_directional.hpp +++ b/include/btllib/bshash_kmer_directional.hpp @@ -53,7 +53,7 @@ using hashing_internals::BS_GA_TETRAMER_TAB; /** * Directional BS-seq k-mer hashing. */ -class BSHashConversion +class BSHashDirectional { public: @@ -67,7 +67,7 @@ class BSHashConversion * used to determine how bases are interpreted during hashing. * @param pos Position in the sequence to start hashing from */ - BSHashConversion(const char* seq, + BSHashDirectional(const char* seq, size_t seq_len, hashing_internals::NUM_HASHES_TYPE num_hashes, hashing_internals::K_TYPE k, @@ -143,16 +143,16 @@ class BSHashConversion * used to determine how bases are interpreted during hashing. * @param pos Position in sequence to start hashing from */ - BSHashConversion(const std::string& seq, + BSHashDirectional(const std::string& seq, hashing_internals::NUM_HASHES_TYPE num_hashes, hashing_internals::K_TYPE k, std::string conversion_type, size_t pos = 0) - : BSHashConversion(seq.data(), seq.size(), num_hashes, k, conversion_type, pos) + : BSHashDirectional(seq.data(), seq.size(), num_hashes, k, conversion_type, pos) { } - BSHashConversion(const BSHashConversion& obj) + BSHashDirectional(const BSHashDirectional& obj) : seq(obj.seq) , seq_len(obj.seq_len) , num_hashes(obj.num_hashes) @@ -182,7 +182,7 @@ class BSHashConversion ); } - BSHashConversion(BSHashConversion&&) = default; + BSHashDirectional(BSHashDirectional&&) = default; /** * Calculate the hash values of current k-mer and advance to the next k-mer. * BsHash advances one nucleotide at a time until it finds a k-mer with valid diff --git a/tests/bshash_directional.cpp b/tests/bshash_directional.cpp index bfa5fe1f..58b990b2 100644 --- a/tests/bshash_directional.cpp +++ b/tests/bshash_directional.cpp @@ -17,7 +17,7 @@ main() unsigned h = 3; unsigned k = 6; - btllib::BSHashConversion bshash(seq, h, k, "CT"); + btllib::BSHashDirectional bshash(seq, h, k, "CT"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; @@ -43,13 +43,13 @@ main() } { - PRINT_TEST_NAME("k-mer rolling - (CT Conversion)") + PRINT_TEST_NAME("k-mer rolling - (CT Directional)") std::string seq = "AGTCAGTCAGTTAG"; unsigned h = 3; unsigned k = 6; - btllib::BSHashConversion bshash(seq, h, k, "CT"); + btllib::BSHashDirectional bshash(seq, h, k, "CT"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; @@ -76,14 +76,14 @@ main() { - PRINT_TEST_NAME("k-mer rolling - (GA Conversion)") + PRINT_TEST_NAME("k-mer rolling - (GA Directional)") std::string seq = "AGTCAGTCAATCAA"; unsigned h = 3; unsigned k = 6; - btllib::BSHashConversion bshash(seq, h, k, "GA"); + btllib::BSHashDirectional bshash(seq, h, k, "GA"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; @@ -113,16 +113,16 @@ main() std::string seq = "ACGTACACTGGACTGAGTCTATCG"; - btllib::BSHashConversion bshash(seq, 3, seq.size() - 2, "CT"); + btllib::BSHashDirectional bshash(seq, 3, seq.size() - 2, "CT"); /* 19-mers of kmer*/ std::string kmer1 = seq.substr(0, 22); std::string kmer2 = seq.substr(1, 22); std::string kmer3 = seq.substr(2, 22); - btllib::BSHashConversion bshash_vector[] = { - btllib::BSHashConversion(kmer1, bshash.get_hash_num(), kmer1.size(), "CT"), - btllib::BSHashConversion(kmer2, bshash.get_hash_num(), kmer2.size(), "CT"), - btllib::BSHashConversion(kmer3, bshash.get_hash_num(), kmer3.size(), "CT") + btllib::BSHashDirectional bshash_vector[] = { + btllib::BSHashDirectional(kmer1, bshash.get_hash_num(), kmer1.size(), "CT"), + btllib::BSHashDirectional(kmer2, bshash.get_hash_num(), kmer2.size(), "CT"), + btllib::BSHashDirectional(kmer3, bshash.get_hash_num(), kmer3.size(), "CT") }; size_t i; @@ -141,8 +141,8 @@ main() std::string seq_r = "TTAGACTCAGTCCAGTGTACGT"; unsigned h = 3; - btllib::BSHashConversion bshash_f(seq_f, h, seq_f.size(), "CT"); - btllib::BSHashConversion bshash_r(seq_r, h, seq_r.size(), "GA"); + btllib::BSHashDirectional bshash_f(seq_f, h, seq_f.size(), "CT"); + btllib::BSHashDirectional bshash_r(seq_r, h, seq_r.size(), "GA"); bshash_f.roll(); bshash_r.roll(); @@ -157,7 +157,7 @@ main() unsigned h = 3; unsigned k = 6; - btllib::BSHashConversion bshash(seq, h, k, "CT"); + btllib::BSHashDirectional bshash(seq, h, k, "CT"); std::stack hashes; while (bshash.roll()) { @@ -181,7 +181,7 @@ main() unsigned h = 3; unsigned k = 6; - btllib::BSHashConversion bshash(seq, h, k, "CT"); + btllib::BSHashDirectional bshash(seq, h, k, "CT"); bshash.roll(); size_t steps = 3; @@ -206,7 +206,7 @@ main() seq_with_ns[seq_with_ns.size() / 2] = 'N'; seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; unsigned k = (seq.size() - 2) / 2 - 1; - btllib::BSHashConversion bshash(seq_with_ns, 3, k, "CT"); + btllib::BSHashDirectional bshash(seq_with_ns, 3, k, "CT"); std::vector positions; for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { @@ -231,8 +231,8 @@ main() std::string seq = "ACGTACACTGGACTGAGTCTAA"; std::string sub = "ACGCGCACTGGACTGAGTCTAA"; - btllib::BSHashConversion bshash(seq, 3, seq.size(), "CT"); - btllib::BSHashConversion bshash_subbed(sub, 3, sub.size(), "CT"); + btllib::BSHashDirectional bshash(seq, 3, seq.size(), "CT"); + btllib::BSHashDirectional bshash_subbed(sub, 3, sub.size(), "CT"); bshash.roll(); bshash.sub({ 3, 4 }, { 'C', 'G' }); @@ -254,10 +254,10 @@ main() std::string rc_seq_no_meth = "TTCAAT"; - btllib::BSHashConversion bshash(seq, 3, seq.size(), "CT"); - btllib::BSHashConversion bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(),"CT"); - btllib::BSHashConversion bshash_rc(rc_seq, 3, rc_seq.size(), "GA"); - btllib::BSHashConversion bshash_rc_no_meth(rc_seq_no_meth, 3, rc_seq_no_meth.size(), "GA"); + btllib::BSHashDirectional bshash(seq, 3, seq.size(), "CT"); + btllib::BSHashDirectional bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(),"CT"); + btllib::BSHashDirectional bshash_rc(rc_seq, 3, rc_seq.size(), "GA"); + btllib::BSHashDirectional bshash_rc_no_meth(rc_seq_no_meth, 3, rc_seq_no_meth.size(), "GA"); bshash.roll(); From 41454d2e88fec206151717fb2d53741337faab8c Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:23:59 -0800 Subject: [PATCH 13/38] azure-pipelines.yml: pin cmake --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 522fabc6..acc8137d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja cmake openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja cmake=3.29.3 openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja cmake openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja cmake=3.29.3 openmp gcovr displayName: 'Install required software' - script: | From 2403e3a3f24710264bae5bbcdfe997f3bac4b480 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 16:30:20 -0800 Subject: [PATCH 14/38] azure-pipelines.yml: pin meson and ninja --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index acc8137d..ae5d88c6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson ninja cmake=3.29.3 openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake=3.29.3 openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson ninja cmake=3.29.3 openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake=3.29.3 openmp gcovr displayName: 'Install required software' - script: | From 3300cf2adf2862236a561ca468e49daa9e1c3214 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 16:53:10 -0800 Subject: [PATCH 15/38] bshash: hashing internals --- include/btllib/bshash_hashing_internals.hpp | 672 ++++++++++++++++++++ 1 file changed, 672 insertions(+) create mode 100644 include/btllib/bshash_hashing_internals.hpp diff --git a/include/btllib/bshash_hashing_internals.hpp b/include/btllib/bshash_hashing_internals.hpp new file mode 100644 index 00000000..8be5f2b7 --- /dev/null +++ b/include/btllib/bshash_hashing_internals.hpp @@ -0,0 +1,672 @@ +#pragma once + +#include +#include +#include + +#include + +namespace btllib::hashing_internals { + +// Group CTConversion XOR pairs (6 seeds → 3 pairs) +static const uint64_t CT_SEED_A_ = 0x38e1680db3188ddfULL; //0 +static const uint64_t CT_SEED_G_ = 0x07d6453133a367b1ULL; //2 +static const uint64_t CT_SEED_CT = 0x2c555d4ed6a221a1ULL; //3 +static const uint64_t XX_SEED_N_ = SEED_N; //3 + +// Group GAConversion XOR pairs (6 seeds → 3 pairs) +static const uint64_t GA_SEED_GA = 0x4d9c9d29392bb7d6ULL; //0 +static const uint64_t GA_SEED_C_ = 0x0e06d727c6e35043ULL; //1 +static const uint64_t GA_SEED_T_ = 0x013a8fe8dea57b07ULL; //3 + +const uint64_t BS_CT_SEED_TAB[ASCII_SIZE] = { + XX_SEED_N_, GA_SEED_T_, XX_SEED_N_, GA_SEED_GA, GA_SEED_GA, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, // 0..7 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 8..15 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 16..23 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 24..31 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 32..39 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 40..47 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 48..55 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 56..63 + XX_SEED_N_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_G_, // 64..71 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 72..79 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_CT, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, // 80..87 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 88..95 + XX_SEED_N_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_G_, // 96..103 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 104..111 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_CT, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, // 112..119 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 120..127 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 128..135 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 136..143 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 144..151 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 152..159 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 160..167 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 168..175 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 176..183 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 184..191 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 192..199 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 200..207 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 208..215 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 216..223 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 224..231 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 232..239 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 240..247 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_ // 248..255 +}; + +const uint64_t BS_GA_SEED_TAB[ASCII_SIZE] = { + XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, CT_SEED_G_, CT_SEED_A_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, // 0..7 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 8..15 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 16..23 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 24..31 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 32..39 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 40..47 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 48..55 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 56..63 + XX_SEED_N_, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_GA, // 64..71 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 72..79 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_T_, GA_SEED_T_, XX_SEED_N_, XX_SEED_N_, // 80..87 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 88..95 + XX_SEED_N_, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_GA, // 96..103 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 104..111 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_T_, GA_SEED_T_, XX_SEED_N_, XX_SEED_N_, // 112..119 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 120..127 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 128..135 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 136..143 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 144..151 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 152..159 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 160..167 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 168..175 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 176..183 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 184..191 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 192..199 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 200..207 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 208..215 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 216..223 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 224..231 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 232..239 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 240..247 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_ // 248..255 +}; + +const uint64_t BS_CT_DIMER_TAB[16] = { + 5270258388568741472ULL, 0ULL, 8508589596476603406ULL, 6744014374774716958ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 3985090206635016892ULL, 0ULL, 611028647685761234ULL, 2592339656081469122ULL, + 6938871167320903325ULL, 0ULL, 6880655447580681459ULL, 8430711922963669731ULL + +}; + +const uint64_t BS_CT_TRIMER_TAB[64] = { + 12296542375903994143ULL, 0ULL, 10777453763052456817ULL, 13696059339288153441ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 15332577898988860867ULL, 0ULL, 17005433182935818157ULL, 13870091925232998845ULL, + 9497654699218631138ULL, 0ULL, 13617020073335395212ULL, 10915115690247869852ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 6231483061417281702ULL, 0ULL, 7587863783224107720ULL, 4813953927640687832ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 2888204484391197818ULL, 0ULL, 1667417705464477204ULL, 4368707067656630276ULL, + 9156599042800701531ULL, 0ULL, 4622359443003849269ULL, 7757154674165939237ULL, + 17903722964224905445ULL, 0ULL, 14358003450906344075ULL, 17060471493690637467ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 9662639389952885817ULL, 0ULL, 13344089177003470423ULL, 10569000649149409351ULL, + 15068665748674332696ULL, 0ULL, 17161400230922396278ULL, 14243358305793533030ULL + +}; + +const uint64_t BS_CT_TETRAMER_TAB[256] = { + 7903069985244696545ULL, 0ULL, 5952199028685153679ULL, 8726125620783047583ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 1423984374654180157ULL, 0ULL, 3239265717627122003ULL, 537959694350858051ULL, + 4955422779349047068ULL, 0ULL, 8931236194499526002ULL, 5796422442532485986ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 10480418080795158104ULL, 0ULL, 12557937413778541622ULL, 9639350223324760614ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 17230691697404916356ULL, 0ULL, 14999410022686677226ULL, 18134732970037944058ULL, + 13265781732207312549ULL, 0ULL, 9740911128328721611ULL, 12442798639878738651ULL, + 4574968163017784859ULL, 0ULL, 21005562311251061ULL, 3155255381851099749ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 4689328268616313543ULL, 0ULL, 9089664875619762345ULL, 6172150804119029433ULL, + 1591432901806794470ULL, 0ULL, 2964154664228736136ULL, 189664148293585560ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 10670208004106787987ULL, 0ULL, 12331731648208664317ULL, 9268413422807691501ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 16895935546904758351ULL, 0ULL, 15370019031266140705ULL, 18360716771597345841ULL, + 13653888368900424814ULL, 0ULL, 9388727718593310208ULL, 12234149755652892688ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 7550431377666028842ULL, 0ULL, 6340971969386962756ULL, 8970238170159749460ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 1632237297318027766ULL, 0ULL, 2995441377836469144ULL, 149439503918058888ULL, + 4729043290971336151ULL, 0ULL, 9121973896872036281ULL, 6130765314036646313ULL, + 14322824142644718953ULL, 0ULL, 18010960957546809095ULL, 15163850771780370711ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 13306657871389129141ULL, 0ULL, 9772129397874496475ULL, 12402503902173812171ULL, + 17270334977163097492ULL, 0ULL, 15031787489074651130ULL, 18093417574358702570ULL, + 14415162961378847764ULL, 0ULL, 17815078398106617466ULL, 15904680980454047850ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 13142251585342896328ULL, 0ULL, 9895962073632248486ULL, 11733738969707828406ULL, + 16241274185646601449ULL, 0ULL, 16020348337179074183ULL, 17712848172773365911ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 3805619366837683629ULL, 0ULL, 857772563979235267ULL, 2334117937676265939ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 5385773951433757041ULL, 0ULL, 8469355426070739743ULL, 6776274342720863503ULL, + 2141510149062286672ULL, 0ULL, 2490210351169910590ULL, 651923950472458542ULL, + 11158837266677243374ULL, 0ULL, 11955661059224500096ULL, 10261471664026595728ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 16479371589667244338ULL, 0ULL, 15818137341953581916ULL, 17295591019177141580ULL, + 12949060435646432531ULL, 0ULL, 10125042934416846717ULL, 12033750801055738221ULL + +}; + +const uint64_t BS_GA_DIMER_TAB[16] = { + 15466952634673191035ULL, 10754575533572636654ULL, 0ULL, 11097913711551190186ULL, + 5877535496320390993ULL, 1300266414860792004ULL, 0ULL, 2105188344771369856ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 5758277602454356440ULL, 897281711739086413ULL, 0ULL, 238567881438104841ULL + +}; + +const uint64_t BS_GA_TRIMER_TAB[64] = { + 16201651124989724449ULL, 11767230542839996596ULL, 0ULL, 12425925165519260656ULL, + 7485905922695612427ULL, 2628147013137149854ULL, 0ULL, 3117991808660735194ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 8762868368011140738ULL, 4179829000807872791ULL, 0ULL, 3836473814229799507ULL, + 17203464567451130228ULL, 12476292506423230177ULL, 0ULL, 11680341964607149477ULL, + 7605014056128239198ULL, 3030964276180857291ULL, 0ULL, 2678653576253708943ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 8643215145753641175ULL, 3776431493625472834ULL, 0ULL, 4275266935868017670ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 15154499325442733158ULL, 10508537125562078195ULL, 0ULL, 11450835371142150327ULL, + 6159530959071411020ULL, 1648676777120242905ULL, 0ULL, 1863678824033957789ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 5405372172001914309ULL, 619798376795097680ULL, 0ULL, 551039597220094228ULL + +}; + +const uint64_t BS_GA_TETRAMER_TAB[256] = { + 10102482841122093461ULL, 14963615003264376320ULL, 0ULL, 13877042217463412036ULL, + 794795739487041215ULL, 5232589638041243946ULL, 0ULL, 5161613641720576622ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 1548991017688113206ULL, 6261504389880137635ULL, 0ULL, 6474221187766217959ULL, + 9392840475291690944ULL, 13961256338029068373ULL, 0ULL, 14914796497557187345ULL, + 391398344175249642ULL, 5112936561611444095ULL, 0ULL, 5316662609698825275ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 1951808117724528227ULL, 6380612394262551030ULL, 0ULL, 6318591945652269746ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 13739200011520215762ULL, 18244429068126416199ULL, 0ULL, 17441706179187852803ULL, + 4152289719497344504ULL, 8792626876107301485ULL, 0ULL, 8447054438424348969ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 2875359367268995953ULL, 7240976841901733092ULL, 0ULL, 7728536354038472608ULL, + 10439742971334657342ULL, 15238809933785883307ULL, 0ULL, 15872787535609874927ULL, + 1717559167161027092ULL, 6075273402150661505ULL, 0ULL, 6589870402902132421ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 697300835471006877ULL, 5347749012401158920ULL, 0ULL, 4975173205340442700ULL, + 11423453917689355115ULL, 15929804584091122942ULL, 0ULL, 15145130632992380858ULL, + 1854593325084628033ULL, 6496051875600806868ULL, 0ULL, 6132429400583706768ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 559685854125523656ULL, 4926424929898751325ULL, 0ULL, 5432033363231264281ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 11707425986759079545ULL, 16276972118015169004ULL, 0ULL, 17212532555803097768ULL, + 2688004620665649491ULL, 7410673165475159750ULL, 0ULL, 7632377835808404866ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 4266321074409320410ULL, 8696255610672826447ULL, 0ULL, 8616255423678309131ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 16790453619013574426ULL, 12293110188280461455ULL, 0ULL, 11936155684350399435ULL, + 7941878618842992688ULL, 3291412832639317925ULL, 0ULL, 2490818554466065633ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 8092837802684991161ULL, 3735105958421766444ULL, 0ULL, 4389210678668986984ULL, + 16675286395804860751ULL, 11885226664861542106ULL, 0ULL, 12379434820203482526ULL, + 6944006080901647973ULL, 2577284614385265136ULL, 0ULL, 3240342666676093620ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 9090130062791399660ULL, 4448689121532002169ULL, 0ULL, 3639106275939783741ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 15824496359018523741ULL, 10953226629488970696ULL, 0ULL, 10898136436499554444ULL, + 6678683788476883831ULL, 2248766861403210978ULL, 0ULL, 1155561727788445606ULL, + 0ULL, 0ULL, 0ULL, 0ULL, + 4816531739325199870ULL, 93880803705459307ULL, 0ULL, 1040842618995587375ULL + +}; + + +// ===== CTConversion_XOR_1_4 ===== +static const uint64_t CT_A_33R[33] = { + 0x00000001b3188ddf, 0x0000000166311bbf, 0x00000000cc62377f, 0x0000000198c46efe, 0x000000013188ddfd, 0x000000006311bbfb, 0x00000000c62377f6, 0x000000018c46efec, 0x00000001188ddfd9, 0x00000000311bbfb3, + 0x0000000062377f66, 0x00000000c46efecc, 0x0000000188ddfd98, 0x0000000111bbfb31, 0x000000002377f663, 0x0000000046efecc6, 0x000000008ddfd98c, 0x000000011bbfb318, 0x00000000377f6631, 0x000000006efecc62, + 0x00000000ddfd98c4, 0x00000001bbfb3188, 0x0000000177f66311, 0x00000000efecc623, 0x00000001dfd98c46, 0x00000001bfb3188d, 0x000000017f66311b, 0x00000000fecc6237, 0x00000001fd98c46e, 0x00000001fb3188dd, + 0x00000001f66311bb, 0x00000001ecc62377, 0x00000001d98c46ef +}; + +static const uint64_t CT_A_31L[31] = { + 0x38e1680c00000000, 0x71c2d01800000000, 0xe385a03000000000, 0xc70b406200000000, 0x8e1680c600000000, 0x1c2d018e00000000, 0x385a031c00000000, 0x70b4063800000000, 0xe1680c7000000000, 0xc2d018e200000000, + 0x85a031c600000000, 0xb40638e00000000, 0x1680c71c00000000, 0x2d018e3800000000, 0x5a031c7000000000, 0xb40638e000000000, 0x680c71c200000000, 0xd018e38400000000, 0xa031c70a00000000, 0x40638e1600000000, + 0x80c71c2c00000000, 0x18e385a00000000, 0x31c70b400000000, 0x638e16800000000, 0xc71c2d000000000, 0x18e385a000000000, 0x31c70b4000000000, 0x638e168000000000, 0xc71c2d0000000000, 0x8e385a0200000000, + 0x1c70b40600000000 +}; + +// ===== CTConversion_XOR_2_5 ===== +static const uint64_t CT_G_33R[33] = { + 0x0000000133a367b1, 0x000000006746cf63, 0x00000000ce8d9ec6, 0x000000019d1b3d8c, 0x000000013a367b19, 0x00000000746cf633, 0x00000000e8d9ec66, 0x00000001d1b3d8cc, 0x00000001a367b199, 0x0000000146cf6333, + 0x000000008d9ec667, 0x000000011b3d8cce, 0x00000000367b199d, 0x000000006cf6333a, 0x00000000d9ec6674, 0x00000001b3d8cce8, 0x0000000167b199d1, 0x00000000cf6333a3, 0x000000019ec66746, 0x000000013d8cce8d, + 0x000000007b199d1b, 0x00000000f6333a36, 0x00000001ec66746c, 0x00000001d8cce8d9, 0x00000001b199d1b3, 0x000000016333a367, 0x00000000c66746cf, 0x000000018cce8d9e, 0x00000001199d1b3d, 0x00000000333a367b, + 0x0000000066746cf6, 0x00000000cce8d9ec, 0x0000000199d1b3d8 +}; + +static const uint64_t CT_G_31L[31] = { + 0x7d6453000000000, 0xfac8a6000000000, 0x1f5914c000000000, 0x3eb2298000000000, 0x7d64530000000000, 0xfac8a60000000000, 0xf5914c0200000000, 0xeb22980600000000, 0xd645300e00000000, 0xac8a601e00000000, + 0x5914c03e00000000, 0xb229807c00000000, 0x645300fa00000000, 0xc8a601f400000000, 0x914c03ea00000000, 0x229807d600000000, 0x45300fac00000000, 0x8a601f5800000000, 0x14c03eb200000000, 0x29807d6400000000, + 0x5300fac800000000, 0xa601f59000000000, 0x4c03eb2200000000, 0x9807d64400000000, 0x300fac8a00000000, 0x601f591400000000, 0xc03eb22800000000, 0x807d645200000000, 0xfac8a600000000, 0x1f5914c00000000, + 0x3eb229800000000 +}; + +// ===== CTConversion_XOR_3_6 ===== +static const uint64_t CT_CT33R[33] = { + 0x00000000d6a221a1, 0x00000001ad444342, 0x000000015a888685, 0x00000000b5110d0b, 0x000000016a221a16, 0x00000000d444342d, 0x00000001a888685a, 0x000000015110d0b5, 0x00000000a221a16b, 0x00000001444342d6, + 0x00000000888685ad, 0x00000001110d0b5a, 0x00000000221a16b5, 0x0000000044342d6a, 0x0000000088685ad4, 0x0000000110d0b5a8, 0x0000000021a16b51, 0x000000004342d6a2, 0x000000008685ad44, 0x000000010d0b5a88, + 0x000000001a16b511, 0x00000000342d6a22, 0x00000000685ad444, 0x00000000d0b5a888, 0x00000001a16b5110, 0x0000000142d6a221, 0x0000000085ad4443, 0x000000010b5a8886, 0x0000000016b5110d, 0x000000002d6a221a, + 0x000000005ad44434, 0x00000000b5a88868, 0x000000016b5110d0 +}; + +static const uint64_t CT_CT31L[31] = { + 0x2c555d4e00000000, 0x58aaba9c00000000, 0xb155753800000000, 0x62aaea7200000000, 0xc555d4e400000000, 0x8aaba9ca00000000, 0x1557539600000000, 0x2aaea72c00000000, 0x555d4e5800000000, 0xaaba9cb000000000, + 0x5575396200000000, 0xaaea72c400000000, 0x55d4e58a00000000, 0xaba9cb1400000000, 0x5753962a00000000, 0xaea72c5400000000, 0x5d4e58aa00000000, 0xba9cb15400000000, 0x753962aa00000000, 0xea72c55400000000, + 0xd4e58aaa00000000, 0xa9cb155600000000, 0x53962aae00000000, 0xa72c555c00000000, 0x4e58aaba00000000, 0x9cb1557400000000, 0x3962aaea00000000, 0x72c555d400000000, 0xe58aaba800000000, 0xcb15575200000000, + 0x962aaea600000000 +}; + + +// ===== GAConversion_XOR_1_4 ===== +static const uint64_t GA_GA33R[33] = { + 0x00000001392bb7d6, 0x0000000072576fad, 0x00000000e4aedf5a, 0x00000001c95dbeb4, 0x0000000192bb7d69, 0x000000012576fad3, 0x000000004aedf5a7, 0x0000000095dbeb4e, 0x000000012bb7d69c, 0x00000000576fad39, + 0x00000000aedf5a72, 0x000000015dbeb4e4, 0x00000000bb7d69c9, 0x0000000176fad392, 0x00000000edf5a725, 0x00000001dbeb4e4a, 0x00000001b7d69c95, 0x000000016fad392b, 0x00000000df5a7257, 0x00000001beb4e4ae, + 0x000000017d69c95d, 0x00000000fad392bb, 0x00000001f5a72576, 0x00000001eb4e4aed, 0x00000001d69c95db, 0x00000001ad392bb7, 0x000000015a72576f, 0x00000000b4e4aedf, 0x0000000169c95dbe, 0x00000000d392bb7d, + 0x00000001a72576fa, 0x000000014e4aedf5, 0x000000009c95dbeb +}; + +static const uint64_t GA_GA31L[31] = { + 0x4d9c9d2800000000, 0x9b393a5000000000, 0x367274a200000000, 0x6ce4e94400000000, 0xd9c9d28800000000, 0xb393a51200000000, 0x67274a2600000000, 0xce4e944c00000000, 0x9c9d289a00000000, 0x393a513600000000, + 0x7274a26c00000000, 0xe4e944d800000000, 0xc9d289b200000000, 0x93a5136600000000, 0x274a26ce00000000, 0x4e944d9c00000000, 0x9d289b3800000000, 0x3a51367200000000, 0x74a26ce400000000, 0xe944d9c800000000, + 0xd289b39200000000, 0xa513672600000000, 0x4a26ce4e00000000, 0x944d9c9c00000000, 0x289b393a00000000, 0x5136727400000000, 0xa26ce4e800000000, 0x44d9c9d200000000, 0x89b393a400000000, 0x1367274a00000000, + 0x26ce4e9400000000 +}; + +// ===== GAConversion_XOR_2_5 ===== +static const uint64_t GA_C_33R[33] = { + 0x00000001c6e35043, 0x000000018dc6a087, 0x000000011b8d410f, 0x00000000371a821f, 0x000000006e35043e, 0x00000000dc6a087c, 0x00000001b8d410f8, 0x0000000171a821f1, 0x00000000e35043e3, 0x00000001c6a087c6, + 0x000000018d410f8d, 0x000000011a821f1b, 0x0000000035043e37, 0x000000006a087c6e, 0x00000000d410f8dc, 0x00000001a821f1b8, 0x000000015043e371, 0x00000000a087c6e3, 0x00000001410f8dc6, 0x00000000821f1b8d, + 0x00000001043e371a, 0x00000000087c6e35, 0x0000000010f8dc6a, 0x0000000021f1b8d4, 0x0000000043e371a8, 0x0000000087c6e350, 0x000000010f8dc6a0, 0x000000001f1b8d41, 0x000000003e371a82, 0x000000007c6e3504, + 0x00000000f8dc6a08, 0x00000001f1b8d410, 0x00000001e371a821 +}; + +static const uint64_t GA_C_31L[31] = { + 0xe06d72600000000, 0x1c0dae4c00000000, 0x381b5c9800000000, 0x7036b93000000000, 0xe06d726000000000, 0xc0dae4c200000000, 0x81b5c98600000000, 0x36b930e00000000, 0x6d7261c00000000, 0xdae4c3800000000, + 0x1b5c987000000000, 0x36b930e000000000, 0x6d7261c000000000, 0xdae4c38000000000, 0xb5c9870200000000, 0x6b930e0600000000, 0xd7261c0c00000000, 0xae4c381a00000000, 0x5c98703600000000, 0xb930e06c00000000, + 0x7261c0da00000000, 0xe4c381b400000000, 0xc987036a00000000, 0x930e06d600000000, 0x261c0dae00000000, 0x4c381b5c00000000, 0x987036b800000000, 0x30e06d7200000000, 0x61c0dae400000000, 0xc381b5c800000000, + 0x87036b9200000000 +}; + +// ===== GAConversion_XOR_3_6 ===== +static const uint64_t GA_T_33R[33] = { + 0x00000000dea57b07, 0x00000001bd4af60e, 0x000000017a95ec1d, 0x00000000f52bd83b, 0x00000001ea57b076, 0x00000001d4af60ed, 0x00000001a95ec1db, 0x0000000152bd83b7, 0x00000000a57b076f, 0x000000014af60ede, + 0x0000000095ec1dbd, 0x000000012bd83b7a, 0x0000000057b076f5, 0x00000000af60edea, 0x000000015ec1dbd4, 0x00000000bd83b7a9, 0x000000017b076f52, 0x00000000f60edea5, 0x00000001ec1dbd4a, 0x00000001d83b7a95, + 0x00000001b076f52b, 0x0000000160edea57, 0x00000000c1dbd4af, 0x0000000183b7a95e, 0x00000001076f52bd, 0x000000000edea57b, 0x000000001dbd4af6, 0x000000003b7a95ec, 0x0000000076f52bd8, 0x00000000edea57b0, + 0x00000001dbd4af60, 0x00000001b7a95ec1, 0x000000016f52bd83 +}; + +static const uint64_t GA_T_31L[31] = { + 0x13a8fe800000000, 0x2751fd000000000, 0x4ea3fa000000000, 0x9d47f4000000000, 0x13a8fe8000000000, 0x2751fd0000000000, 0x4ea3fa0000000000, 0x9d47f40000000000, 0x3a8fe80200000000, 0x751fd00400000000, + 0xea3fa00800000000, 0xd47f401200000000, 0xa8fe802600000000, 0x51fd004e00000000, 0xa3fa009c00000000, 0x47f4013a00000000, 0x8fe8027400000000, 0x1fd004ea00000000, 0x3fa009d400000000, 0x7f4013a800000000, + 0xfe80275000000000, 0xfd004ea200000000, 0xfa009d4600000000, 0xf4013a8e00000000, 0xe802751e00000000, 0xd004ea3e00000000, 0xa009d47e00000000, 0x4013a8fe00000000, 0x802751fc00000000, 0x4ea3fa00000000, + 0x9d47f400000000 +}; + +//TODO see if alias works +const uint64_t XX_N_33R[33] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N }; + +const uint64_t XX_N_31L[31] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N }; + + +const uint64_t* const BS_CT_MS_TAB_33R[ASCII_SIZE] = { + XX_N_33R, GA_T_33R, XX_N_33R, GA_GA33R, GA_GA33R, GA_GA33R, XX_N_33R, GA_C_33R, // 0..7 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 8..15 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 16..23 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 24..31 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 32..39 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 40..47 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 48..55 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 56..63 + XX_N_33R, CT_A_33R, XX_N_33R, CT_CT33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_G_33R, // 64..71 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 72..79 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_CT33R, CT_CT33R, XX_N_33R, XX_N_33R, // 80..87 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 88..95 + XX_N_33R, CT_A_33R, XX_N_33R, CT_CT33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_G_33R, // 96..103 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 104..111 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_CT33R, CT_CT33R, XX_N_33R, XX_N_33R, // 112..119 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 120..127 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 128..135 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 136..143 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 144..151 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 152..159 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 160..167 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 168..175 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 176..183 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 184..191 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 192..199 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 200..207 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 208..215 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 216..223 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 224..231 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 232..239 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 240..247 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R // 248..255 +}; + +const uint64_t* const BS_CT_MS_TAB_31L[ASCII_SIZE] = { + XX_N_31L, GA_T_31L, XX_N_31L, GA_GA31L, GA_GA31L, GA_GA31L, XX_N_31L, GA_C_31L, // 0..7 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 8..15 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 16..23 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 24..31 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 32..39 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 40..47 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 48..55 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 56..63 + XX_N_31L, CT_A_31L, XX_N_31L, CT_CT31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_G_31L, // 64..71 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 72..79 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_CT31L, CT_CT31L, XX_N_31L, XX_N_31L, // 80..87 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 88..95 + XX_N_31L, CT_A_31L, XX_N_31L, CT_CT31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_G_31L, // 96..103 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 104..111 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_CT31L, CT_CT31L, XX_N_31L, XX_N_31L, // 112..119 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 120..127 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 128..135 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 136..143 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 144..151 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 152..159 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 160..167 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 168..175 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 176..183 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 184..191 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 192..199 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 200..207 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 208..215 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 216..223 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 224..231 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 232..239 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 240..247 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L // 248..255 +}; + +const uint64_t* const BS_GA_MS_TAB_33R[ASCII_SIZE] = { + XX_N_33R, CT_CT33R, XX_N_33R, CT_G_33R, CT_A_33R, CT_A_33R, XX_N_33R, CT_CT33R, // 0..7 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 8..15 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 16..23 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 24..31 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 32..39 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 40..47 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 48..55 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 56..63 + XX_N_33R, GA_GA33R, XX_N_33R, GA_C_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_GA33R, // 64..71 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 72..79 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_T_33R, GA_T_33R, XX_N_33R, XX_N_33R, // 80..87 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 88..95 + XX_N_33R, GA_GA33R, XX_N_33R, GA_C_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_GA33R, // 96..103 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 104..111 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_T_33R, GA_T_33R, XX_N_33R, XX_N_33R, // 112..119 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 120..127 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 128..135 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 136..143 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 144..151 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 152..159 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 160..167 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 168..175 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 176..183 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 184..191 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 192..199 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 200..207 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 208..215 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 216..223 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 224..231 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 232..239 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 240..247 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R // 248..255 +}; + +const uint64_t* const BS_GA_MS_TAB_31L[ASCII_SIZE] = { + XX_N_31L, CT_CT31L, XX_N_31L, CT_G_31L, CT_A_31L, CT_A_31L, XX_N_31L, CT_CT31L, // 0..7 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 8..15 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 16..23 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 24..31 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 32..39 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 40..47 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 48..55 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 56..63 + XX_N_31L, GA_GA31L, XX_N_31L, GA_C_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_GA31L, // 64..71 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 72..79 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_T_31L, GA_T_31L, XX_N_31L, XX_N_31L, // 80..87 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 88..95 + XX_N_31L, GA_GA31L, XX_N_31L, GA_C_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_GA31L, // 96..103 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 104..111 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_T_31L, GA_T_31L, XX_N_31L, XX_N_31L, // 112..119 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 120..127 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 128..135 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 136..143 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 144..151 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 152..159 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 160..167 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 168..175 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 176..183 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 184..191 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 192..199 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 200..207 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 208..215 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 216..223 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 224..231 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 232..239 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 240..247 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L // 248..255 +}; + +const uint8_t BS_CT_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 0, 255, 3, 255, 255, 255, 2, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 3, 3, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 0, 255, 3, 255, 255, 255, 2, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 3, 3, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +const uint8_t BS_CT_RC_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 3, 255, 0, 255, 255, 255, 1, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 0, 0, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 3, 255, 0, 255, 255, 255, 1, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 0, 0, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +const uint8_t BS_GA_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 0, 255, 1, 255, 255, 255, 0, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 3, 3, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 0, 255, 1, 255, 255, 255, 0, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 3, 3, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +const uint8_t BS_GA_RC_CONVERT_TAB[ASCII_SIZE] = { + 255, 255, 255, 255, 255, 255, 255, 255, // 0..7 + 255, 255, 255, 255, 255, 255, 255, 255, // 8..15 + 255, 255, 255, 255, 255, 255, 255, 255, // 16..23 + 255, 255, 255, 255, 255, 255, 255, 255, // 24..31 + 255, 255, 255, 255, 255, 255, 255, 255, // 32..39 + 255, 255, 255, 255, 255, 255, 255, 255, // 40..47 + 255, 255, 255, 255, 255, 255, 255, 255, // 48..55 + 255, 255, 255, 255, 255, 255, 255, 255, // 56..63 + 255, 3, 255, 2, 255, 255, 255, 3, // 64..71 + 255, 255, 255, 255, 255, 255, 255, 255, // 72..79 + 255, 255, 255, 255, 0, 0, 255, 255, // 80..87 + 255, 255, 255, 255, 255, 255, 255, 255, // 88..95 + 255, 3, 255, 2, 255, 255, 255, 3, // 96..103 + 255, 255, 255, 255, 255, 255, 255, 255, // 104..111 + 255, 255, 255, 255, 0, 0, 255, 255, // 112..119 + 255, 255, 255, 255, 255, 255, 255, 255, // 120..127 + 255, 255, 255, 255, 255, 255, 255, 255, // 128..135 + 255, 255, 255, 255, 255, 255, 255, 255, // 136..143 + 255, 255, 255, 255, 255, 255, 255, 255, // 144..151 + 255, 255, 255, 255, 255, 255, 255, 255, // 152..159 + 255, 255, 255, 255, 255, 255, 255, 255, // 160..167 + 255, 255, 255, 255, 255, 255, 255, 255, // 168..175 + 255, 255, 255, 255, 255, 255, 255, 255, // 176..183 + 255, 255, 255, 255, 255, 255, 255, 255, // 184..191 + 255, 255, 255, 255, 255, 255, 255, 255, // 192..199 + 255, 255, 255, 255, 255, 255, 255, 255, // 200..207 + 255, 255, 255, 255, 255, 255, 255, 255, // 208..215 + 255, 255, 255, 255, 255, 255, 255, 255, // 216..223 + 255, 255, 255, 255, 255, 255, 255, 255, // 224..231 + 255, 255, 255, 255, 255, 255, 255, 255, // 232..239 + 255, 255, 255, 255, 255, 255, 255, 255, // 240..247 + 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 +}; + +} \ No newline at end of file From e7432724364edd4e1583b9343f0fcffda31f8477 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 21:33:59 -0800 Subject: [PATCH 16/38] bshash test: spacing fixed --- tests/bshash.cpp | 1267 ++++++++++++++++++++-------------- tests/bshash_directional.cpp | 240 ++++++- 2 files changed, 961 insertions(+), 546 deletions(-) diff --git a/tests/bshash.cpp b/tests/bshash.cpp index 3bb1d63f..27147c4e 100644 --- a/tests/bshash.cpp +++ b/tests/bshash.cpp @@ -11,652 +11,694 @@ int main() { - - -{ - // ─────────────────────────────── + { PRINT_TEST_NAME("CG k-mer rolling (64-mer base)") - // ─────────────────────────────── - std::string first32 = - "AAACAGAT" - "CACCCGCT" - "GAGCGGGT" - "TACTGTGTAA"; + std::string first34 = "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGTAA"; - std::string seq = first32 + first32; // 64 bases total + std::string seq = first34 + first34; unsigned h = 3; unsigned k = 34; - btllib::BsHash nthash(seq, h, k, "CG"); + btllib::BsHash bshash(seq, h, k, "CG"); std::vector hashes; - while (nthash.roll()) { - uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); - hashes.push_back(h_vals); + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); - // ─────────────────────────────── - // Define degeneracy groups - // ─────────────────────────────── const std::vector> GROUPS = { - {"AA", "GA"}, - {"AC", "AT", "GC", "GT"}, - {"AG", "GG"}, - {"CA", "CG", "TG"}, - {"CC", "CT"}, - {"TA"}, - {"TC", "TT"} + { "AA", "GA" }, + { "AC", "AT", "GC", "GT" }, + { "AG" }, + { "GG" }, + { "CA", "CG", "TG" }, + { "CC" }, + { "CT" }, + { "TA" }, + { "TC", "TT" } }; auto mutate_by_shift = [&](const std::string& s, int shift) { - std::string out; - for (size_t i = 0; i < s.size(); i += 2) { - std::string dimer = s.substr(i, 2); - bool done = false; - for (const auto& g : GROUPS) { - auto it = std::find(g.begin(), g.end(), dimer); - if (it != g.end()) { - out += g[(std::distance(g.begin(), it) + shift) % g.size()]; - done = true; - break; - } - } - if (!done) out += dimer; + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } } - return out; + if (!done) + out += dimer; + } + return out; }; - auto compute_hashes = [&](const std::string& s) { - btllib::BsHash nh(s, h, k, "CG"); - std::vector out; - while (nh.roll()) { - uint64_t* hv = new uint64_t[h]; - std::copy(nh.hashes(), nh.hashes() + h, hv); - out.push_back(hv); - } - return out; + btllib::BsHash bh(s, h, k, "CG"); + std::vector out; + while (bh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(bh.hashes(), bh.hashes() + h, hv); + out.push_back(hv); + } + return out; }; -// ─────────────────────────────── -PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +1)") -// ─────────────────────────────── -{ - std::string seq_shift = first32 + mutate_by_shift(first32, 1); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); - - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i = i+2) { - for (unsigned j = 0; j < h; ++j) { + + PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +1)") + + { + std::string seq_shift = first34 + mutate_by_shift(first34, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { + for (unsigned j = 0; j < h; ++j) { TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + } } } -} - // ─────────────────────────────── PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +2)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 2); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); - - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i = i+2) { - for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); - } + std::string seq_shift = first34 + mutate_by_shift(first34, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } + } } - // ─────────────────────────────── PRINT_TEST_NAME("CG k-mer rolling (degeneracy shift +3)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 3); + std::string seq_shift = first34 + mutate_by_shift(first34, 3); auto h1 = compute_hashes(seq); auto h2 = compute_hashes(seq_shift); TEST_ASSERT_EQ(h1.size(), h2.size()); TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i = i+2) { + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } } } } -{ - // ─────────────────────────────── + { + PRINT_TEST_NAME("CG k-mer rolling (C→T mutation)") - // ─────────────────────────────── - std::string first32 = - "ATGCGTAGGTCGATGACGTTGAGCGTACGAGTAA"; + std::string first34 = "ATGCGTAGGTCGATGACGTTGAGCGTACGAGTAA"; - std::string seq = first32 + first32; // 64 bases total + std::string seq = first34 + first34; unsigned h = 3; unsigned k = 34; - btllib::BsHash nthash(seq, h, k, "CG"); + btllib::BsHash bshash(seq, h, k, "CG"); std::vector hashes; - while (nthash.roll()) { - uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); - hashes.push_back(h_vals); + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); - // ─────────────────────────────── - // Mutation function: C→T - // ─────────────────────────────── auto mutate_CT = [](const std::string& s) { - std::string out = s; - for (char& base : out) { - if (base == 'C') base = 'T'; - else if (base == 'c') base = 't'; - } - return out; + std::string out = s; + for (char& base : out) { + if (base == 'C') + base = 'T'; + else if (base == 'c') + base = 't'; + } + return out; }; auto compute_hashes = [&](const std::string& s) { - btllib::BsHash nh(s, h, k, "CG"); - std::vector out; - while (nh.roll()) { - uint64_t* hv = new uint64_t[h]; - std::copy(nh.hashes(), nh.hashes() + h, hv); - out.push_back(hv); - } - return out; + btllib::BsHash bh(s, h, k, "CG"); + std::vector out; + while (bh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(bh.hashes(), bh.hashes() + h, hv); + out.push_back(hv); + } + return out; }; - // ─────────────────────────────── PRINT_TEST_NAME("CG k-mer rolling (C→T forward check)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_CT(first32); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); - std::cerr << seq << std::endl; - std::cerr << seq_shift << std::endl; + std::string seq_shift = first34 + mutate_CT(first34); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + std::cerr << seq << std::endl; + std::cerr << seq_shift << std::endl; - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i += 2) { - std::cerr << i << std::endl; + for (unsigned i = 0; i < h1.size(); i += 2) { + std::cerr << i << std::endl; - for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); - } + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } + } } -} + } + + { -{ - // ─────────────────────────────── PRINT_TEST_NAME("CG k-mer rolling (G→A mutation)") - // ─────────────────────────────── - std::string first32 = - "TACCGATACCTACGCCATACGTCATACCGTTAAA"; + std::string first34 = "TACCGATACCTACGCCATACGTCATACCGTTAAA"; - std::string seq = first32 + first32; // 64 bases total + std::string seq = first34 + first34; unsigned h = 3; unsigned k = 34; - btllib::BsHash nthash(seq, h, k, "CG"); + btllib::BsHash bshash(seq, h, k, "CG"); std::vector hashes; - while (nthash.roll()) { - uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); - hashes.push_back(h_vals); + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); - // ─────────────────────────────── - // Mutation function: G→A - // ─────────────────────────────── auto mutate_GA = [](const std::string& s) { - std::string out = s; - for (char& base : out) { - if (base == 'G') base = 'A'; - else if (base == 'g') base = 'a'; - } - return out; + std::string out = s; + for (char& base : out) { + if (base == 'G') + base = 'A'; + else if (base == 'g') + base = 'a'; + } + return out; }; auto compute_hashes = [&](const std::string& s) { - btllib::BsHash nh(s, h, k, "CG"); - std::vector out; - while (nh.roll()) { - uint64_t* hv = new uint64_t[h]; - std::copy(nh.hashes(), nh.hashes() + h, hv); - out.push_back(hv); - } - return out; + btllib::BsHash bh(s, h, k, "CG"); + std::vector out; + while (bh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(bh.hashes(), bh.hashes() + h, hv); + out.push_back(hv); + } + return out; }; - // ─────────────────────────────── PRINT_TEST_NAME("CG k-mer rolling (G→A forward check)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_GA(first32); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_GA(first34); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i += 2) { - for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); - } + for (unsigned i = 0; i < h1.size(); i += 2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } + } } -} + } + { -/*{ - // ─────────────────────────────── PRINT_TEST_NAME("CC k-mer rolling (64-mer base)") - // ─────────────────────────────── - std::string first32 = - "AAACAGAT" - "CACCCGCT" - "GAGCGGGT" - "TACTGTGT"; + std::string first34 = "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGTAA"; - std::string seq = first32 + first32; // 64 bases total + std::string seq = first34 + first34; unsigned h = 3; - unsigned k = 32; + unsigned k = 34; - btllib::BsHash nthash(seq, h, k, "CC"); // or "CC" if that parameter matters + btllib::BsHash bshash(seq, h, k, "CC"); std::vector hashes; - while (nthash.roll()) { - uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); - hashes.push_back(h_vals); + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); - // ─────────────────────────────── - // Define CC degeneracy groups - // ─────────────────────────────── const std::vector> CC_GROUPS = { - {"AA", "AG", "GA", "GG"}, // 0x1bc5d5aba6ea6e02 - {"AC", "AT", "GC", "GT"}, // 0x8f15314b06b5ed9c - {"CA", "CG", "TA", "TG"}, // 0x44fec7e4a74bce00 - {"CC", "CT", "TC", "TT"} // 0x77c24a36c934fdc1 + { "AA", "AG", "GA", "GG" }, + { "AC", "AT", "GC", "GT" }, + { "CA", "TA", "TG" }, + { "CC", "CT", "TC", "TT" }, + { "CG" } }; auto mutate_by_shift = [&](const std::string& s, int shift) { - std::string out; - for (size_t i = 0; i < s.size(); i += 2) { - std::string dimer = s.substr(i, 2); - bool done = false; - for (const auto& g : CC_GROUPS) { - auto it = std::find(g.begin(), g.end(), dimer); - if (it != g.end()) { - out += g[(std::distance(g.begin(), it) + shift) % g.size()]; - done = true; - break; - } - } - if (!done) out += dimer; + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : CC_GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } } - return out; + if (!done) + out += dimer; + } + return out; }; auto compute_hashes = [&](const std::string& s) { - btllib::BsHash nh(s, h, k, "CC"); // or "CC" if appropriate - std::vector out; - while (nh.roll()) { - uint64_t* hv = new uint64_t[h]; - std::copy(nh.hashes(), nh.hashes() + h, hv); - out.push_back(hv); - } - return out; + btllib::BsHash bh(s, h, k, "CC"); + std::vector out; + while (bh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(bh.hashes(), bh.hashes() + h, hv); + out.push_back(hv); + } + return out; }; - // ─────────────────────────────── PRINT_TEST_NAME("CC k-mer rolling (degeneracy shift +1)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 1); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); - - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i = i+2) { - for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); - } + std::string seq_shift = first34 + mutate_by_shift(first34, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } + } } - // ─────────────────────────────── PRINT_TEST_NAME("CC k-mer rolling (degeneracy shift +2)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 2); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_by_shift(first34, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i = i+2) { + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); - } + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } + } } - // ─────────────────────────────── PRINT_TEST_NAME("CC k-mer rolling (degeneracy shift +3)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 3); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); - - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); - for (unsigned i = 0; i < h1.size(); i = i+2) { - for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); - } + std::string seq_shift = first34 + mutate_by_shift(first34, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { + for (unsigned j = 0; j < h; ++j) { + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } + } } -}*/ + } + + { -/*{ - // ─────────────────────────────── PRINT_TEST_NAME("CT k-mer rolling (64-mer base)") - // ─────────────────────────────── - std::string first32 = - "AAACAGAT" - "CACCCGCT" - "GAGCGGGT" - "TACTGTGT"; + std::string first34 = "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGTAA"; - std::string seq = first32 + first32; // 64 bases total + std::string seq = first34 + first34; unsigned h = 3; - unsigned k = 32; + unsigned k = 34; - btllib::BsHash nthash(seq, h, k, "CT"); // or "CT" if that parameter matters + btllib::BsHash bshash(seq, h, k, "CT"); std::vector hashes; - while (nthash.roll()) { - uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); - hashes.push_back(h_vals); + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); - // ─────────────────────────────── - // Define CT degeneracy groups based on CT_DIMER_TAB - // ─────────────────────────────── const std::vector> CT_GROUPS = { - {"AA", "AC", "AG", "GG"}, // 0x9a533a6a5e7b14e2 - {"AT", "GT"}, // 0x897da34f6b464570 - {"CA"}, - {"CG"}, // 0xeeaf42d6e10d9ac0 and 0x68c7d4adebfb5001 - {"CC", "CT", "TC", "TT"}, // 0x049bab70128da11f - {"GA", "GC", "GG", "AG"}, // 0x9a533a6a5e7b14e2 - {"TA"}, // 0x0f1bfc76a0e3e700 - {"TG"} // 0x282450c57fae339f + { "AT", "AC", "GC", "GT" }, + { "CA" }, + { "CG" }, + { "CT", "TC", "TT" }, + { "GA", "AA", "AG" }, + { "TA" }, + { "CC" }, + { "GG" }, + { "TG" } }; auto mutate_by_shift = [&](const std::string& s, int shift) { - std::string out; - for (size_t i = 0; i < s.size(); i += 2) { - std::string dimer = s.substr(i, 2); - bool done = false; - for (const auto& g : CT_GROUPS) { - auto it = std::find(g.begin(), g.end(), dimer); - if (it != g.end()) { - out += g[(std::distance(g.begin(), it) + shift) % g.size()]; - done = true; - break; - } - } - if (!done) out += dimer; + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : CT_GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } } - return out; + if (!done) + out += dimer; + } + return out; }; auto compute_hashes = [&](const std::string& s) { - btllib::BsHash nh(s, h, k, "CT"); // or "CT" if needed - std::vector out; - while (nh.roll()) { - uint64_t* hv = new uint64_t[h]; - std::copy(nh.hashes(), nh.hashes() + h, hv); - out.push_back(hv); - } - return out; + btllib::BsHash bh(s, h, k, "CT"); + std::vector out; + while (bh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(bh.hashes(), bh.hashes() + h, hv); + out.push_back(hv); + } + return out; }; - // ─────────────────────────────── PRINT_TEST_NAME("CT k-mer rolling (degeneracy shift +1)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 1); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); - std::cerr << seq << std::endl; - std::cerr << seq_shift << std::endl; - - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); -for (unsigned i = 0; i < h1.size(); i = i+2) { - std::cerr << i << std::endl; + std::string seq_shift = first34 + mutate_by_shift(first34, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); + std::cerr << seq << std::endl; + std::cerr << seq_shift << std::endl; + + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { + std::cerr << i << std::endl; for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } -} + } } - // ─────────────────────────────── PRINT_TEST_NAME("CT k-mer rolling (degeneracy shift +2)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 2); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_by_shift(first34, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); -for (unsigned i = 0; i < h1.size(); i = i+2) { + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } -} + } } - // ─────────────────────────────── PRINT_TEST_NAME("CT k-mer rolling (degeneracy shift +3)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 3); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_by_shift(first34, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); -for (unsigned i = 0; i < h1.size(); i = i+2) { + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } -} + } } -} + } + + { -{ - // ─────────────────────────────── PRINT_TEST_NAME("CA k-mer rolling (64-mer base)") - // ─────────────────────────────── - std::string first32 = - "AAACAGAT" - "CACCCGCT" - "GAGCGGGT" - "TACTGTGT"; + std::string first34 = "AAACAGAT" + "CACCCGCT" + "GAGCGGGT" + "TACTGTGTAA"; - std::string seq = first32 + first32; // 64 bases total + std::string seq = first34 + first34; unsigned h = 3; - unsigned k = 32; + unsigned k = 34; - btllib::BsHash nthash(seq, h, k, "CA"); // or "CA" if that parameter matters + btllib::BsHash bshash(seq, h, k, "CA"); std::vector hashes; - while (nthash.roll()) { - uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); - hashes.push_back(h_vals); + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); - // ─────────────────────────────── - // Define CA degeneracy groups based on CA_DIMER_TAB - // ─────────────────────────────── const std::vector> CA_GROUPS = { - {"AA", "GA"}, // 0x7e4a36919c5032af - {"AC", "AT", "GC", "GT"}, // 0x95b4b90696718a9f - {"AG", "GG"}, // 0xf482881f5f3610bf - {"CA", "TA", "TG"}, // 0xe12f0e8e678e5d30 - {"CC", "CT", "TT", "TC"}, // 0x9ff825331cf8a361 and 0x00e055ee083cfcff - {"CG"} // 0xf248b960f9159cec + { "AA", "GA" }, + { "AC", "AT", "GC", "GT" }, + { "AG" }, + { "GG" }, + { "CA", "TA", "TG" }, + { "CC" }, + { "CT" }, + { "TT", "TC" }, + { "CG" } }; auto mutate_by_shift = [&](const std::string& s, int shift) { - std::string out; - for (size_t i = 0; i < s.size(); i += 2) { - std::string dimer = s.substr(i, 2); - bool done = false; - for (const auto& g : CA_GROUPS) { - auto it = std::find(g.begin(), g.end(), dimer); - if (it != g.end()) { - out += g[(std::distance(g.begin(), it) + shift) % g.size()]; - done = true; - break; - } - } - if (!done) out += dimer; + std::string out; + for (size_t i = 0; i < s.size(); i += 2) { + std::string dimer = s.substr(i, 2); + bool done = false; + for (const auto& g : CA_GROUPS) { + auto it = std::find(g.begin(), g.end(), dimer); + if (it != g.end()) { + out += g[(std::distance(g.begin(), it) + shift) % g.size()]; + done = true; + break; + } } - return out; + if (!done) + out += dimer; + } + return out; }; auto compute_hashes = [&](const std::string& s) { - btllib::BsHash nh(s, h, k, "CA"); // or "CA" if needed - std::vector out; - while (nh.roll()) { - uint64_t* hv = new uint64_t[h]; - std::copy(nh.hashes(), nh.hashes() + h, hv); - out.push_back(hv); - } - return out; + btllib::BsHash bh(s, h, k, "CA"); + std::vector out; + while (bh.roll()) { + uint64_t* hv = new uint64_t[h]; + std::copy(bh.hashes(), bh.hashes() + h, hv); + out.push_back(hv); + } + return out; }; - // ─────────────────────────────── PRINT_TEST_NAME("CA k-mer rolling (degeneracy shift +1)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 1); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_by_shift(first34, 1); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); -for (unsigned i = 0; i < h1.size(); i = i+2) { + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } -} + } } - // ─────────────────────────────── PRINT_TEST_NAME("CA k-mer rolling (degeneracy shift +2)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 2); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_by_shift(first34, 2); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); -for (unsigned i = 0; i < h1.size(); i = i+2) { + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } -} + } } - // ─────────────────────────────── PRINT_TEST_NAME("CA k-mer rolling (degeneracy shift +3)") - // ─────────────────────────────── + { - std::string seq_shift = first32 + mutate_by_shift(first32, 3); - auto h1 = compute_hashes(seq); - auto h2 = compute_hashes(seq_shift); + std::string seq_shift = first34 + mutate_by_shift(first34, 3); + auto h1 = compute_hashes(seq); + auto h2 = compute_hashes(seq_shift); - TEST_ASSERT_EQ(h1.size(), h2.size()); - TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); -for (unsigned i = 0; i < h1.size(); i = i+2) { + TEST_ASSERT_EQ(h1.size(), h2.size()); + TEST_ASSERT_ARRAY_EQ(h1.front(), h1.back(), h); + for (unsigned i = 0; i < h1.size(); i = i + 2) { for (unsigned j = 0; j < h; ++j) { - TEST_ASSERT_EQ(h1[i][j], h2[i][j]); + TEST_ASSERT_EQ(h1[i][j], h2[i][j]); } -} + } } -} -*/ + } + + { + PRINT_TEST_NAME("k-mer rolling vs. base hash values - CG") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + + btllib::BsHash bshash(seq, 3, seq.size() - 2, "CG"); + + std::string kmer1 = seq.substr(0, 18); + std::string kmer2 = seq.substr(1, 18); + std::string kmer3 = seq.substr(2, 18); + + btllib::BsHash bshash_vector[] = { + btllib::BsHash(kmer1, bshash.get_hash_num(), kmer1.size(), "CG"), + btllib::BsHash(kmer2, bshash.get_hash_num(), kmer2.size(), "CG"), + btllib::BsHash(kmer3, bshash.get_hash_num(), kmer3.size(), "CG") + }; + + size_t i; + for (i = 0; bshash.roll() && bshash_vector[i].roll(); ++i) { + for (size_t j = 0; j < bshash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(bshash.hashes()[j], bshash_vector[i].hashes()[j]); + } + } + TEST_ASSERT_EQ(i, 3); + } + + { + PRINT_TEST_NAME("k-mer rolling vs. base hash values - CC") + + std::string seq = "ACGTACACTGGACTGAGTCT"; + + btllib::BsHash bshash(seq, 3, seq.size() - 2, "CC"); + + std::string kmer1 = seq.substr(0, 18); + std::string kmer2 = seq.substr(1, 18); + std::string kmer3 = seq.substr(2, 18); + + btllib::BsHash bshash_vector[] = { + btllib::BsHash(kmer1, bshash.get_hash_num(), kmer1.size(), "CC"), + btllib::BsHash(kmer2, bshash.get_hash_num(), kmer2.size(), "CC"), + btllib::BsHash(kmer3, bshash.get_hash_num(), kmer3.size(), "CC") + }; + + size_t i; + for (i = 0; bshash.roll() && bshash_vector[i].roll(); ++i) { + for (size_t j = 0; j < bshash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(bshash.hashes()[j], bshash_vector[i].hashes()[j]); + } + } + TEST_ASSERT_EQ(i, 3); + } + { + PRINT_TEST_NAME("k-mer rolling vs. base hash values - CT") + std::string seq = "ACGTACACTGGACTGAGTCT"; + + btllib::BsHash bshash(seq, 3, seq.size() - 2, "CT"); + + std::string kmer1 = seq.substr(0, 18); + std::string kmer2 = seq.substr(1, 18); + std::string kmer3 = seq.substr(2, 18); + + btllib::BsHash bshash_vector[] = { + btllib::BsHash(kmer1, bshash.get_hash_num(), kmer1.size(), "CT"), + btllib::BsHash(kmer2, bshash.get_hash_num(), kmer2.size(), "CT"), + btllib::BsHash(kmer3, bshash.get_hash_num(), kmer3.size(), "CT") + }; + + size_t i; + for (i = 0; bshash.roll() && bshash_vector[i].roll(); ++i) { + for (size_t j = 0; j < bshash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(bshash.hashes()[j], bshash_vector[i].hashes()[j]); + } + } + TEST_ASSERT_EQ(i, 3); + } { - PRINT_TEST_NAME("k-mer rolling vs. base hash values") + PRINT_TEST_NAME("k-mer rolling vs. base hash values - CA") std::string seq = "ACGTACACTGGACTGAGTCT"; - btllib::BsHash nthash(seq, 3, seq.size() - 2, "CG"); - /* 18-mers of kmer*/ + btllib::BsHash bshash(seq, 3, seq.size() - 2, "CA"); + std::string kmer1 = seq.substr(0, 18); std::string kmer2 = seq.substr(1, 18); std::string kmer3 = seq.substr(2, 18); - btllib::BsHash nthash_vector[] = { - btllib::BsHash(kmer1, nthash.get_hash_num(), kmer1.size(), "CG"), - btllib::BsHash(kmer2, nthash.get_hash_num(), kmer2.size(), "CG"), - btllib::BsHash(kmer3, nthash.get_hash_num(), kmer3.size(), "CG") + btllib::BsHash bshash_vector[] = { + btllib::BsHash(kmer1, bshash.get_hash_num(), kmer1.size(), "CA"), + btllib::BsHash(kmer2, bshash.get_hash_num(), kmer2.size(), "CA"), + btllib::BsHash(kmer3, bshash.get_hash_num(), kmer3.size(), "CA") }; size_t i; - for (i = 0; nthash.roll() && nthash_vector[i].roll(); ++i) { - for (size_t j = 0; j < nthash.get_hash_num(); ++j) { - TEST_ASSERT_EQ(nthash.hashes()[j], nthash_vector[i].hashes()[j]); + for (i = 0; bshash.roll() && bshash_vector[i].roll(); ++i) { + for (size_t j = 0; j < bshash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(bshash.hashes()[j], bshash_vector[i].hashes()[j]); } } TEST_ASSERT_EQ(i, 3); @@ -665,67 +707,69 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { { PRINT_TEST_NAME("CG canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCTAA"; - std::string seq_r = "TTAGACTCAGTCCAGTGTACGT"; + std::string seq_f = "ACGTACGCGGGACGGAGTCGAA"; + std::string seq_r = "TTCGACTCCGTCCCGCGTACGT"; unsigned h = 3; - btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CG"); - btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CG"); + btllib::BsHash bshash_f(seq_f, h, seq_f.size(), "CG"); + btllib::BsHash bshash_r(seq_r, h, seq_r.size(), "CG"); + + bshash_f.roll(); + bshash_r.roll(); - nthash_f.roll(); - nthash_r.roll(); - TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) - TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + TEST_ASSERT_EQ(bshash_f.get_hash_num(), bshash_r.get_hash_num()); + TEST_ASSERT_ARRAY_EQ(bshash_f.hashes(), bshash_r.hashes(), h); } - - /*{ - PRINT_TEST_NAME("CC canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCT"; - std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + { + PRINT_TEST_NAME("CA canonical hashing") + + std::string seq_f = "ACATACACAGGACAGAGTCAAA"; + std::string seq_r = "TTTGACTCTGTCCTGTGTATGT"; unsigned h = 3; - btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CC"); - btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CC"); + btllib::BsHash bshash_f(seq_f, h, seq_f.size(), "CA"); + btllib::BsHash bshash_r(seq_r, h, seq_r.size(), "CA"); - nthash_f.roll(); - nthash_r.roll(); - TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) - TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) - } + bshash_f.roll(); + bshash_r.roll(); + TEST_ASSERT_EQ(bshash_f.get_hash_num(), bshash_r.get_hash_num()); + TEST_ASSERT_ARRAY_EQ(bshash_f.hashes(), bshash_r.hashes(), h); + } { - PRINT_TEST_NAME("CT canonical hashing") + PRINT_TEST_NAME("CC canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCT"; - std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + std::string seq_f = "ACCTACCCCGGACCGAGTCCAA"; + std::string seq_r = "TTGGACTCGGTCCGGGGTAGGT"; unsigned h = 3; - btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CT"); - btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CT"); + btllib::BsHash bshash_f(seq_f, h, seq_f.size(), "CC"); + btllib::BsHash bshash_r(seq_r, h, seq_r.size(), "CC"); - nthash_f.roll(); - nthash_r.roll(); - TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) - TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) - } + bshash_f.roll(); + bshash_r.roll(); + TEST_ASSERT_EQ(bshash_f.get_hash_num(), bshash_r.get_hash_num()); + TEST_ASSERT_ARRAY_EQ(bshash_f.hashes(), bshash_r.hashes(), h); + } { - PRINT_TEST_NAME("CA canonical hashing") + PRINT_TEST_NAME("CT canonical hashing") - std::string seq_f = "ACGTACACTGGACTGAGTCT"; - std::string seq_r = "AGACTCAGTCCAGTGTACGT"; + std::string seq_f = "ACTTACTCTGGACTGAGTCTAA"; + std::string seq_r = "TTAGACTCAGTCCAGAGTAAGT"; unsigned h = 3; - btllib::BsHash nthash_f(seq_f, h, seq_f.size(), "CA"); - btllib::BsHash nthash_r(seq_r, h, seq_r.size(), "CA"); + btllib::BsHash bshash_f(seq_f, h, seq_f.size(), "CT"); + btllib::BsHash bshash_r(seq_r, h, seq_r.size(), "CT"); + + bshash_f.roll(); + bshash_r.roll(); - nthash_f.roll(); - nthash_r.roll(); - TEST_ASSERT_EQ(nthash_f.get_hash_num(), nthash_r.get_hash_num()) - TEST_ASSERT_ARRAY_EQ(nthash_f.hashes(), nthash_r.hashes(), h) + TEST_ASSERT_EQ(bshash_f.get_hash_num(), bshash_r.get_hash_num()); + TEST_ASSERT_ARRAY_EQ(bshash_f.hashes(), bshash_r.hashes(), h); } -*/ + { PRINT_TEST_NAME("CG k-mer back rolling") @@ -733,45 +777,45 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { unsigned h = 3; unsigned k = 6; - btllib::BsHash nthash(seq, h, k, "CG"); + btllib::BsHash bshash(seq, h, k, "CG"); std::stack hashes; - while (nthash.roll()) { + while (bshash.roll()) { uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) do { - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), hashes.top(), h) hashes.pop(); - } while (nthash.roll_back()); + } while (bshash.roll_back()); } - /*{ + { PRINT_TEST_NAME("CC k-mer back rolling") std::string seq = "ACTGACTG"; unsigned h = 3; - unsigned k = 4; + unsigned k = 6; - btllib::BsHash nthash(seq, h, k, "CC"); + btllib::BsHash bshash(seq, h, k, "CC"); std::stack hashes; - while (nthash.roll()) { + while (bshash.roll()) { uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) do { - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), hashes.top(), h) hashes.pop(); - } while (nthash.roll_back()); + } while (bshash.roll_back()); } { @@ -779,23 +823,23 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { std::string seq = "ACTGACTG"; unsigned h = 3; - unsigned k = 4; + unsigned k = 6; - btllib::BsHash nthash(seq, h, k, "CT"); + btllib::BsHash bshash(seq, h, k, "CT"); std::stack hashes; - while (nthash.roll()) { + while (bshash.roll()) { uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) do { - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), hashes.top(), h) hashes.pop(); - } while (nthash.roll_back()); + } while (bshash.roll_back()); } { @@ -803,60 +847,100 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { std::string seq = "ACTGACTG"; unsigned h = 3; - unsigned k = 4; + unsigned k = 6; - btllib::BsHash nthash(seq, h, k, "CA"); + btllib::BsHash bshash(seq, h, k, "CA"); std::stack hashes; - while (nthash.roll()) { + while (bshash.roll()) { uint64_t* h_vals = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_vals); + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); hashes.push(h_vals); } TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) do { - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), hashes.top(), h) + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), hashes.top(), h) hashes.pop(); - } while (nthash.roll_back()); - } */ -/* + } while (bshash.roll_back()); + } + { - PRINT_TEST_NAME("k-mer peeking") + PRINT_TEST_NAME("CG skipping Ns") - std::string seq = "ACTGATCAG"; - unsigned h = 3; - unsigned k = 6; + std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; + std::string seq_with_ns = seq; - btllib::BsHash nthash(seq, h, k); - nthash.roll(); + TEST_ASSERT_GE(seq_with_ns.size(), 10) + + seq_with_ns[seq_with_ns.size() / 2] = 'N'; + seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; + unsigned k = (seq.size() - 2) / 2 - 1; + + btllib::BsHash bshash(seq_with_ns, 3, k, "CG"); + + std::vector positions; + for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { + positions.push_back(i); + } + for (size_t i = seq_with_ns.size() / 2 + 2; i < seq_with_ns.size() - k + 1; + i++) { + positions.push_back(i); + } + + size_t i = 0; + while (bshash.roll()) { + TEST_ASSERT_EQ(bshash.get_pos(), positions[i]) + i++; + } + TEST_ASSERT_EQ(positions.size(), i) + } + + { + PRINT_TEST_NAME("CC skipping Ns") + + std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; + std::string seq_with_ns = seq; + + TEST_ASSERT_GE(seq_with_ns.size(), 10) + + seq_with_ns[seq_with_ns.size() / 2] = 'N'; + seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; + unsigned k = (seq.size() - 2) / 2 - 1; + + btllib::BsHash bshash(seq_with_ns, 3, k, "CC"); + + std::vector positions; + for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { + positions.push_back(i); + } + for (size_t i = seq_with_ns.size() / 2 + 2; i < seq_with_ns.size() - k + 1; + i++) { + positions.push_back(i); + } - size_t steps = 3; - while (steps--) { - nthash.peek(); - uint64_t* h_peek = new uint64_t[h]; - std::copy(nthash.hashes(), nthash.hashes() + h, h_peek); - nthash.peek(seq[nthash.get_pos() + k]); - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); - nthash.roll(); - TEST_ASSERT_ARRAY_EQ(nthash.hashes(), h_peek, h); + size_t i = 0; + while (bshash.roll()) { + TEST_ASSERT_EQ(bshash.get_pos(), positions[i]) + i++; } - }*/ + TEST_ASSERT_EQ(positions.size(), i) + } { - PRINT_TEST_NAME("skipping Ns") + PRINT_TEST_NAME("CA skipping Ns") std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; std::string seq_with_ns = seq; TEST_ASSERT_GE(seq_with_ns.size(), 10) - //10, 11 are Ns + seq_with_ns[seq_with_ns.size() / 2] = 'N'; seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; unsigned k = (seq.size() - 2) / 2 - 1; - //k = 8 - btllib::BsHash nthash(seq_with_ns, 3, k); + + btllib::BsHash bshash(seq_with_ns, 3, k, "CA"); std::vector positions; for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { @@ -868,31 +952,190 @@ for (unsigned i = 0; i < h1.size(); i = i+2) { } size_t i = 0; - while (nthash.roll()) { - TEST_ASSERT_EQ(nthash.get_pos(), positions[i]) + while (bshash.roll()) { + TEST_ASSERT_EQ(bshash.get_pos(), positions[i]) i++; } TEST_ASSERT_EQ(positions.size(), i) } + { + PRINT_TEST_NAME("CT skipping Ns") + std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; + std::string seq_with_ns = seq; + + TEST_ASSERT_GE(seq_with_ns.size(), 10) + + seq_with_ns[seq_with_ns.size() / 2] = 'N'; + seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; + unsigned k = (seq.size() - 2) / 2 - 1; + btllib::BsHash bshash(seq_with_ns, 3, k, "CT"); + + std::vector positions; + for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { + positions.push_back(i); + } + for (size_t i = seq_with_ns.size() / 2 + 2; i < seq_with_ns.size() - k + 1; + i++) { + positions.push_back(i); + } + + size_t i = 0; + while (bshash.roll()) { + TEST_ASSERT_EQ(bshash.get_pos(), positions[i]) + i++; + } + TEST_ASSERT_EQ(positions.size(), i) + } + + { + PRINT_TEST_NAME("CG Methylation Module Tests") + + std::string seq = "AACGAA"; + std::string seq_no_meth = "AATGAA"; + std::string rc_seq = "TTCGTT"; + std::string rc_seq_no_meth = "TTCATT"; + + btllib::BsHash bshash(seq, 3, seq.size(), "CG"); + btllib::BsHash bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(), "CG"); + btllib::BsHash bshash_rc(rc_seq, 3, rc_seq.size(), "CG"); + btllib::BsHash bshash_rc_no_meth( + rc_seq_no_meth, 3, rc_seq_no_meth.size(), "CG"); + + bshash.roll(); + bshash_no_meth.roll(); + bshash_rc.roll(); + bshash_rc_no_meth.roll(); + + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_no_meth.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc_no_meth.get_hash_num()); + + size_t h = bshash.get_hash_num(); + TEST_ASSERT_EQ(h, 3); + + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_no_meth.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc_no_meth.hashes(), h); + + TEST_ASSERT_EQ(bshash.is_methylated(), bshash_rc.is_methylated()); + TEST_ASSERT_NE(bshash.is_methylated(), bshash_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash_rc.is_methylated(), + bshash_rc_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); + } { - PRINT_TEST_NAME("check methylation") - - std::string seq_k6 = "ATCGGA"; - std::string seq_k4 = "GGCGTA"; - btllib::BsHash nthash1(seq_k6, 3, 6); - nthash1.roll(); - TEST_ASSERT_EQ(nthash1.is_methylated(), true) - btllib::BsHash nthash2(seq_k4, 3, 6); - nthash2.roll(); - TEST_ASSERT_EQ(nthash2.is_methylated(), true) - - + PRINT_TEST_NAME("CC Methylation Module Tests") + + std::string seq = "AACCTA"; + std::string seq_no_meth = "AATTTA"; + std::string rc_seq = "TAGGTT"; + std::string rc_seq_no_meth = "TAAATT"; + + btllib::BsHash bshash(seq, 3, seq.size(), "CC"); + btllib::BsHash bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(), "CC"); + btllib::BsHash bshash_rc(rc_seq, 3, rc_seq.size(), "CC"); + btllib::BsHash bshash_rc_no_meth( + rc_seq_no_meth, 3, rc_seq_no_meth.size(), "CC"); + + bshash.roll(); + bshash_no_meth.roll(); + bshash_rc.roll(); + bshash_rc_no_meth.roll(); + + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_no_meth.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc_no_meth.get_hash_num()); + + size_t h = bshash.get_hash_num(); + TEST_ASSERT_EQ(h, 3); + + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_no_meth.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc_no_meth.hashes(), h); + + TEST_ASSERT_EQ(bshash.is_methylated(), bshash_rc.is_methylated()); + TEST_ASSERT_NE(bshash.is_methylated(), bshash_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash_rc.is_methylated(), + bshash_rc_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); } + { + PRINT_TEST_NAME("CA Methylation Module Tests") + + std::string seq = "AACAAA"; + std::string seq_no_meth = "AATAAA"; + std::string rc_seq = "TTTGTT"; + std::string rc_seq_no_meth = "TTTATT"; + + btllib::BsHash bshash(seq, 3, seq.size(), "CA"); + btllib::BsHash bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(), "CA"); + btllib::BsHash bshash_rc(rc_seq, 3, rc_seq.size(), "CA"); + btllib::BsHash bshash_rc_no_meth( + rc_seq_no_meth, 3, rc_seq_no_meth.size(), "CA"); + + bshash.roll(); + bshash_no_meth.roll(); + bshash_rc.roll(); + bshash_rc_no_meth.roll(); + + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_no_meth.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc_no_meth.get_hash_num()); + + size_t h = bshash.get_hash_num(); + TEST_ASSERT_EQ(h, 3); + + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_no_meth.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc_no_meth.hashes(), h); + + TEST_ASSERT_EQ(bshash.is_methylated(), bshash_rc.is_methylated()); + TEST_ASSERT_NE(bshash.is_methylated(), bshash_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash_rc.is_methylated(), + bshash_rc_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); + } + { + PRINT_TEST_NAME("CT Methylation Module Tests") + + std::string seq = "AACTAA"; + std::string seq_no_meth = "AATTAA"; + std::string rc_seq = "TTAGTT"; + std::string rc_seq_no_meth = "TTAATT"; + + btllib::BsHash bshash(seq, 3, seq.size(), "CT"); + btllib::BsHash bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(), "CT"); + btllib::BsHash bshash_rc(rc_seq, 3, rc_seq.size(), "CT"); + btllib::BsHash bshash_rc_no_meth( + rc_seq_no_meth, 3, rc_seq_no_meth.size(), "CT"); + + bshash.roll(); + bshash_no_meth.roll(); + bshash_rc.roll(); + bshash_rc_no_meth.roll(); + + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_no_meth.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc.get_hash_num()); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc_no_meth.get_hash_num()); + + size_t h = bshash.get_hash_num(); + TEST_ASSERT_EQ(h, 3); + + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_no_meth.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc.hashes(), h); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc_no_meth.hashes(), h); + + TEST_ASSERT_EQ(bshash.is_methylated(), bshash_rc.is_methylated()); + TEST_ASSERT_NE(bshash.is_methylated(), bshash_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash_rc.is_methylated(), + bshash_rc_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); + } return 0; } diff --git a/tests/bshash_directional.cpp b/tests/bshash_directional.cpp index 58b990b2..fda261ec 100644 --- a/tests/bshash_directional.cpp +++ b/tests/bshash_directional.cpp @@ -11,13 +11,45 @@ int main() { { - PRINT_TEST_NAME("k-mer rolling") + PRINT_TEST_NAME("k-mer rolling - CT Mode - No Mutation") std::string seq = "AGTCAGTCAGTCAG"; unsigned h = 3; unsigned k = 6; - btllib::BSHashDirectional bshash(seq, h, k, "CT"); + btllib::BsHashDirectional bshash(seq, h, k, "CT"); + std::vector hashes; + uint64_t fwd_hash = 0; + uint64_t final_fwd_hash = 0; + uint64_t rev_hash = 0; + uint64_t final_rev_hash = 0; + + while (bshash.roll()) { + if (fwd_hash == 0) { + fwd_hash = bshash.get_forward_hash(); + rev_hash = bshash.get_reverse_hash(); + } + final_fwd_hash = bshash.get_forward_hash(); + final_rev_hash = bshash.get_reverse_hash(); + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1); + TEST_ASSERT_EQ(fwd_hash, final_fwd_hash); + TEST_ASSERT_EQ(rev_hash, final_rev_hash); + TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); + } + + { + PRINT_TEST_NAME("k-mer rolling - GA Mode - No Mutation") + + std::string seq = "AGTCAGTCAGTCAG"; + unsigned h = 3; + unsigned k = 6; + + btllib::BsHashDirectional bshash(seq, h, k, "GA"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; @@ -49,7 +81,7 @@ main() unsigned h = 3; unsigned k = 6; - btllib::BSHashDirectional bshash(seq, h, k, "CT"); + btllib::BsHashDirectional bshash(seq, h, k, "CT"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; @@ -74,7 +106,6 @@ main() TEST_ASSERT_ARRAY_EQ(hashes.front(), hashes.back(), h); } - { PRINT_TEST_NAME("k-mer rolling - (GA Directional)") @@ -82,8 +113,7 @@ main() unsigned h = 3; unsigned k = 6; - - btllib::BSHashDirectional bshash(seq, h, k, "GA"); + btllib::BsHashDirectional bshash(seq, h, k, "GA"); std::vector hashes; uint64_t fwd_hash = 0; uint64_t final_fwd_hash = 0; @@ -109,20 +139,52 @@ main() } { - PRINT_TEST_NAME("k-mer rolling vs. base hash values") + PRINT_TEST_NAME("k-mer rolling vs. base hash values - CT Mode") + + std::string seq = "ACGTACACTGGACTGAGTCTATCG"; + + btllib::BsHashDirectional bshash(seq, 3, seq.size() - 2, "CT"); + /* 19-mers of kmer*/ + std::string kmer1 = seq.substr(0, 22); + std::string kmer2 = seq.substr(1, 22); + std::string kmer3 = seq.substr(2, 22); + + btllib::BsHashDirectional bshash_vector[] = { + btllib::BsHashDirectional( + kmer1, bshash.get_hash_num(), kmer1.size(), "CT"), + btllib::BsHashDirectional( + kmer2, bshash.get_hash_num(), kmer2.size(), "CT"), + btllib::BsHashDirectional( + kmer3, bshash.get_hash_num(), kmer3.size(), "CT") + }; + + size_t i; + for (i = 0; bshash.roll() && bshash_vector[i].roll(); ++i) { + for (size_t j = 0; j < bshash.get_hash_num(); ++j) { + TEST_ASSERT_EQ(bshash.hashes()[j], bshash_vector[i].hashes()[j]); + } + } + TEST_ASSERT_EQ(i, 3); + } + + { + PRINT_TEST_NAME("k-mer rolling vs. base hash values - GA Mode") std::string seq = "ACGTACACTGGACTGAGTCTATCG"; - btllib::BSHashDirectional bshash(seq, 3, seq.size() - 2, "CT"); + btllib::BsHashDirectional bshash(seq, 3, seq.size() - 2, "GA"); /* 19-mers of kmer*/ std::string kmer1 = seq.substr(0, 22); std::string kmer2 = seq.substr(1, 22); std::string kmer3 = seq.substr(2, 22); - btllib::BSHashDirectional bshash_vector[] = { - btllib::BSHashDirectional(kmer1, bshash.get_hash_num(), kmer1.size(), "CT"), - btllib::BSHashDirectional(kmer2, bshash.get_hash_num(), kmer2.size(), "CT"), - btllib::BSHashDirectional(kmer3, bshash.get_hash_num(), kmer3.size(), "CT") + btllib::BsHashDirectional bshash_vector[] = { + btllib::BsHashDirectional( + kmer1, bshash.get_hash_num(), kmer1.size(), "GA"), + btllib::BsHashDirectional( + kmer2, bshash.get_hash_num(), kmer2.size(), "GA"), + btllib::BsHashDirectional( + kmer3, bshash.get_hash_num(), kmer3.size(), "GA") }; size_t i; @@ -141,8 +203,8 @@ main() std::string seq_r = "TTAGACTCAGTCCAGTGTACGT"; unsigned h = 3; - btllib::BSHashDirectional bshash_f(seq_f, h, seq_f.size(), "CT"); - btllib::BSHashDirectional bshash_r(seq_r, h, seq_r.size(), "GA"); + btllib::BsHashDirectional bshash_f(seq_f, h, seq_f.size(), "CT"); + btllib::BsHashDirectional bshash_r(seq_r, h, seq_r.size(), "GA"); bshash_f.roll(); bshash_r.roll(); @@ -151,13 +213,53 @@ main() } { - PRINT_TEST_NAME("k-mer back rolling") + PRINT_TEST_NAME("canonical hashing - Reverse") + + std::string seq_f = "ACGTACACTGGACTGAGTCTAA"; + std::string seq_r = "TTAGACTCAGTCCAGTGTACGT"; + unsigned h = 3; + + btllib::BsHashDirectional bshash_f(seq_f, h, seq_f.size(), "GA"); + btllib::BsHashDirectional bshash_r(seq_r, h, seq_r.size(), "CT"); + + bshash_f.roll(); + bshash_r.roll(); + TEST_ASSERT_EQ(bshash_f.get_hash_num(), bshash_r.get_hash_num()) + TEST_ASSERT_ARRAY_EQ(bshash_f.hashes(), bshash_r.hashes(), h) + } + + { + PRINT_TEST_NAME("k-mer back rolling - CT Mode") + + std::string seq = "ACTAGACTG"; + unsigned h = 3; + unsigned k = 6; + + btllib::BsHashDirectional bshash(seq, h, k, "CT"); + std::stack hashes; + + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push(h_vals); + } + + TEST_ASSERT_EQ(hashes.size(), seq.length() - k + 1) + + do { + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), hashes.top(), h) + hashes.pop(); + } while (bshash.roll_back()); + } + + { + PRINT_TEST_NAME("k-mer back rolling - GA Mode") std::string seq = "ACTAGACTG"; unsigned h = 3; unsigned k = 6; - btllib::BSHashDirectional bshash(seq, h, k, "CT"); + btllib::BsHashDirectional bshash(seq, h, k, "GA"); std::stack hashes; while (bshash.roll()) { @@ -175,13 +277,13 @@ main() } { - PRINT_TEST_NAME("k-mer peeking") + PRINT_TEST_NAME("k-mer peeking - CT Mode") std::string seq = "ACTGATCAG"; unsigned h = 3; unsigned k = 6; - btllib::BSHashDirectional bshash(seq, h, k, "CT"); + btllib::BsHashDirectional bshash(seq, h, k, "CT"); bshash.roll(); size_t steps = 3; @@ -197,7 +299,58 @@ main() } { - PRINT_TEST_NAME("skipping Ns") + PRINT_TEST_NAME("k-mer peeking - GA Mode") + + std::string seq = "ACTGATCAG"; + unsigned h = 3; + unsigned k = 6; + + btllib::BsHashDirectional bshash(seq, h, k, "GA"); + bshash.roll(); + + size_t steps = 3; + while (steps--) { + bshash.peek(); + uint64_t* h_peek = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_peek); + bshash.peek(seq[bshash.get_pos() + k]); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), h_peek, h); + bshash.roll(); + TEST_ASSERT_ARRAY_EQ(bshash.hashes(), h_peek, h); + } + } + + { + PRINT_TEST_NAME("skipping Ns - CT Mode") + + std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; + std::string seq_with_ns = seq; + + TEST_ASSERT_GE(seq_with_ns.size(), 10) + seq_with_ns[seq_with_ns.size() / 2] = 'N'; + seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; + unsigned k = (seq.size() - 2) / 2 - 1; + btllib::BsHashDirectional bshash(seq_with_ns, 3, k, "CT"); + + std::vector positions; + for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { + positions.push_back(i); + } + for (size_t i = seq_with_ns.size() / 2 + 2; i < seq_with_ns.size() - k + 1; + i++) { + positions.push_back(i); + } + + size_t i = 0; + while (bshash.roll()) { + TEST_ASSERT_EQ(bshash.get_pos(), positions[i]) + i++; + } + TEST_ASSERT_EQ(positions.size(), i) + } + + { + PRINT_TEST_NAME("skipping Ns - GA Mode") std::string seq = "ACGTACACTGGACTGAGTCTAAAA"; std::string seq_with_ns = seq; @@ -206,7 +359,7 @@ main() seq_with_ns[seq_with_ns.size() / 2] = 'N'; seq_with_ns[seq_with_ns.size() / 2 + 1] = 'N'; unsigned k = (seq.size() - 2) / 2 - 1; - btllib::BSHashDirectional bshash(seq_with_ns, 3, k, "CT"); + btllib::BsHashDirectional bshash(seq_with_ns, 3, k, "CT"); std::vector positions; for (size_t i = 0; i < seq_with_ns.size() / 2 - k + 1; i++) { @@ -226,13 +379,33 @@ main() } { - PRINT_TEST_NAME("base substitution") + PRINT_TEST_NAME("base substitution - CT Mode") std::string seq = "ACGTACACTGGACTGAGTCTAA"; std::string sub = "ACGCGCACTGGACTGAGTCTAA"; - btllib::BSHashDirectional bshash(seq, 3, seq.size(), "CT"); - btllib::BSHashDirectional bshash_subbed(sub, 3, sub.size(), "CT"); + btllib::BsHashDirectional bshash(seq, 3, seq.size(), "CT"); + btllib::BsHashDirectional bshash_subbed(sub, 3, sub.size(), "CT"); + + bshash.roll(); + bshash.sub({ 3, 4 }, { 'C', 'G' }); + bshash_subbed.roll(); + TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_subbed.get_hash_num()); + size_t i; + for (i = 0; i < bshash.get_hash_num(); ++i) { + TEST_ASSERT_EQ(bshash.hashes()[i], bshash_subbed.hashes()[i]); + } + TEST_ASSERT_EQ(i, 3); + } + + { + PRINT_TEST_NAME("base substitution - GA Mode") + + std::string seq = "ACGTACACTGGACTGAGTCTAA"; + std::string sub = "ACGCGCACTGGACTGAGTCTAA"; + + btllib::BsHashDirectional bshash(seq, 3, seq.size(), "GA"); + btllib::BsHashDirectional bshash_subbed(sub, 3, sub.size(), "GA"); bshash.roll(); bshash.sub({ 3, 4 }, { 'C', 'G' }); @@ -253,19 +426,18 @@ main() std::string rc_seq = "TTCGGT"; std::string rc_seq_no_meth = "TTCAAT"; - - btllib::BSHashDirectional bshash(seq, 3, seq.size(), "CT"); - btllib::BSHashDirectional bshash_no_meth(seq_no_meth, 3, seq_no_meth.size(),"CT"); - btllib::BSHashDirectional bshash_rc(rc_seq, 3, rc_seq.size(), "GA"); - btllib::BSHashDirectional bshash_rc_no_meth(rc_seq_no_meth, 3, rc_seq_no_meth.size(), "GA"); - + btllib::BsHashDirectional bshash(seq, 3, seq.size(), "CT"); + btllib::BsHashDirectional bshash_no_meth( + seq_no_meth, 3, seq_no_meth.size(), "CT"); + btllib::BsHashDirectional bshash_rc(rc_seq, 3, rc_seq.size(), "GA"); + btllib::BsHashDirectional bshash_rc_no_meth( + rc_seq_no_meth, 3, rc_seq_no_meth.size(), "GA"); bshash.roll(); bshash_no_meth.roll(); bshash_rc.roll(); bshash_rc_no_meth.roll(); - TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_no_meth.get_hash_num()); TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc.get_hash_num()); TEST_ASSERT_EQ(bshash.get_hash_num(), bshash_rc_no_meth.get_hash_num()); @@ -273,16 +445,16 @@ main() size_t h = bshash.get_hash_num(); TEST_ASSERT_EQ(h, 3); - TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_no_meth.hashes(), h); TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc.hashes(), h); TEST_ASSERT_ARRAY_EQ(bshash.hashes(), bshash_rc_no_meth.hashes(), h); TEST_ASSERT_EQ(bshash.is_methylated(), bshash_rc.is_methylated()); TEST_ASSERT_NE(bshash.is_methylated(), bshash_no_meth.is_methylated()); - TEST_ASSERT_NE(bshash_rc.is_methylated(), bshash_rc_no_meth.is_methylated()); - TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); + TEST_ASSERT_NE(bshash_rc.is_methylated(), + bshash_rc_no_meth.is_methylated()); + TEST_ASSERT_NE(bshash.is_forward_higher(), bshash_rc.is_forward_higher()); } - + return 0; } From f0e5d8a64e3c91796ec507890bc3803267c6c7b3 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 22:23:34 -0800 Subject: [PATCH 17/38] seq: add comment and fixed spacing bs conversion utility --- include/btllib/seq.hpp | 21 +++++++++++++++------ src/btllib/seq.cpp | 27 ++++++++++++++------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/include/btllib/seq.hpp b/include/btllib/seq.hpp index 52149604..144a9c95 100644 --- a/include/btllib/seq.hpp +++ b/include/btllib/seq.hpp @@ -35,18 +35,27 @@ reverse_complement(std::string& seq); std::string get_reverse_complement(const std::string& seq); - - +/** + * Perform bisulfite conversion on a sequence in-place. + * + * Depending on the specified conversion type ("CT" or "GA"), each character in + * the sequence is replaced with its bisulfite-converted form using the + * corresponding conversion table. The argument sequence is modified directly. + * + * @param seq Sequence to convert. + * @param type Conversion type ("CT" or "GA"). + */ void bs_conversion(std::string& seq, std::string& type); /** - * Obtain a reverse complement of the provided sequence. The argument sequence - * is left untouched. + * Obtain a bisulfite-converted copy of the provided sequence. The argument + * sequence is left untouched. * - * @param seq Sequence to reverse complement. + * @param seq Sequence to convert. + * @param type Conversion type ("CT" or "GA"). * - * @return Reverse complemented sequence. + * @return Bisulfite-converted sequence. */ std::string get_bs_conversion(const std::string& seq, std::string& type); diff --git a/src/btllib/seq.cpp b/src/btllib/seq.cpp index 633cd652..134a8306 100644 --- a/src/btllib/seq.cpp +++ b/src/btllib/seq.cpp @@ -114,13 +114,13 @@ const char CT_CONVERSION[256] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // @ A B C D E F G H I J K L M N O - 0 , 'A', 0, 'T', 0, 0, 0, 'G', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + 0 , 'A', 0 , 'T', 0 , 0 , 0 , 'G', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // P Q R S T U V W X Y Z [ \ ] ^ _ 0 , 0 , 0 , 0 , 'T', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // ` a b c d e f g h i j k l m n o - 0 , 'a', 0, 't', 0, 0, 0, 'g', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + 0 , 'a', 0 , 't', 0 , 0 , 0 , 'g', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // p q r s t u v w x y z { | } ~ DEL 0 , 0 , 0 , 0 , 't', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , @@ -146,13 +146,13 @@ const char GA_CONVERSION[256] = { 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // @ A B C D E F G H I J K L M N O - 0 , 'A', 0, 'C', 0, 0, 0, 'A', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + 0 , 'A', 0 , 'C', 0 , 0 , 0 , 'A', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // P Q R S T U V W X Y Z [ \ ] ^ _ 0 , 0 , 0 , 0 , 'T', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // ` a b c d e f g h i j k l m n o - 0 , 'a', 0, 'c', 0, 0, 0, 'a', 0, 0, 0, 0 , 0 , 0 , 0 , 0 , + 0 , 'a', 0 , 'c', 0 , 0 , 0 , 'a', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , // p q r s t u v w x y z { | } ~ DEL 0 , 0 , 0 , 0 , 't', 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , @@ -186,21 +186,22 @@ get_reverse_complement(const std::string& seq) } void -bs_conversion(std::string& seq, std::string& type){ - const char* table = - (type == "CT" || type == "ct") ? CT_CONVERSION : GA_CONVERSION; +bs_conversion(std::string& seq, std::string& type) +{ + const char* table = + (type == "CT" || type == "ct") ? CT_CONVERSION : GA_CONVERSION; - std::transform(seq.begin(), seq.end(), seq.begin(), - [table](char c) { return table[(unsigned char)c]; }); + std::transform(seq.begin(), seq.end(), seq.begin(), [table](char c) { + return table[(unsigned char)c]; + }); } + std::string -get_bs_conversion(const std::string& seq, std::string& type){ +get_bs_conversion(const std::string& seq, std::string& type) +{ std::string conversion(seq); bs_conversion(conversion, type); return conversion; } - - - } // namespace btllib From 770fbb3bde6db87a89d9bccec3acd986240f3042 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 22:24:46 -0800 Subject: [PATCH 18/38] nthash: tidy up changes and made spacing --- include/btllib/hashing_internals.hpp | 17 ++--- include/btllib/nthash_kmer.hpp | 101 ++++++++------------------- 2 files changed, 37 insertions(+), 81 deletions(-) diff --git a/include/btllib/hashing_internals.hpp b/include/btllib/hashing_internals.hpp index 3ea06f6f..22f54b84 100644 --- a/include/btllib/hashing_internals.hpp +++ b/include/btllib/hashing_internals.hpp @@ -57,9 +57,10 @@ srol(const uint64_t x, const unsigned d) * @return Split-rotation result */ inline uint64_t -srol_table(unsigned char c, unsigned d, - const uint64_t* const* right_table, - const uint64_t* const* left_table ); +srol_table(unsigned char c, + unsigned d, + const uint64_t* const* right_table, + const uint64_t* const* left_table); /** * Split a 64-bit word into 33 and 31-bit sub-words and right-rotate them @@ -170,7 +171,6 @@ const uint64_t SEED_TAB[ASCII_SIZE] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N // 248..255 }; - const uint64_t A33R[33] = { 0x195c60474, 0x12b8c08e9, 0x571811d3, 0xae3023a6, 0x15c60474c, 0xb8c08e99, 0x171811d32, 0xe3023a65, 0x1c60474ca, 0x18c08e995, 0x11811d32b, 0x3023a657, @@ -347,12 +347,13 @@ const uint64_t* const MS_TAB_31L[ASCII_SIZE] = { N31L, N31L, N31L, N31L, N31L, N31L, N31L, N31L // 248..255 }; - inline uint64_t -srol_table(unsigned char c, unsigned d, const uint64_t* const* right_table = MS_TAB_33R, - const uint64_t* const* left_table = MS_TAB_31L) +srol_table(unsigned char c, + unsigned d, + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { - return (left_table[c][d < 31 ? d : d % 31] | /* NOLINT */ + return (left_table[c][d < 31 ? d : d % 31] | /* NOLINT */ right_table[c][d < 33 ? d : d % 33]); /* NOLINT */ } diff --git a/include/btllib/nthash_kmer.hpp b/include/btllib/nthash_kmer.hpp index 1b5fdc27..ceebf96a 100644 --- a/include/btllib/nthash_kmer.hpp +++ b/include/btllib/nthash_kmer.hpp @@ -20,51 +20,28 @@ namespace btllib::hashing_internals { * @return Hash value of k-mer_0 */ inline uint64_t -base_forward_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* dimer_tab = DIMER_TAB, const uint64_t* trimer_tab = TRIMER_TAB, const uint64_t* tetramer_tab = TETRAMER_TAB -, const uint8_t* convert_tab = CONVERT_TAB) +base_forward_hash(const char* seq, + unsigned k, + const uint64_t* primitive_tab = SEED_TAB, + const uint64_t* dimer_tab = DIMER_TAB, + const uint64_t* trimer_tab = TRIMER_TAB, + const uint64_t* tetramer_tab = TETRAMER_TAB, + const uint8_t* convert_tab = CONVERT_TAB) { uint64_t h_val = 0; - std::cerr << "abc" << std::endl; - for (unsigned i = 0; i < k - 3; i += 4) { h_val = srol(h_val, 4); uint8_t loc = 0; - /*loc += 64 * convert_tab[(unsigned char)seq[i]]; // NOLINT + loc += 64 * convert_tab[(unsigned char)seq[i]]; // NOLINT loc += 16 * convert_tab[(unsigned char)seq[i + 1]]; // NOLINT loc += 4 * convert_tab[(unsigned char)seq[i + 2]]; loc += convert_tab[(unsigned char)seq[i + 3]]; - h_val ^= tetramer_tab[loc];*/ - uint8_t v0 = convert_tab[(unsigned char)seq[i]]; - uint8_t v1 = convert_tab[(unsigned char)seq[i + 1]]; - uint8_t v2 = convert_tab[(unsigned char)seq[i + 2]]; - uint8_t v3 = convert_tab[(unsigned char)seq[i + 3]]; - - loc += 64 * v0; - loc += 16 * v1; - loc += 4 * v2; - loc += v3; - h_val ^= tetramer_tab[loc]; - - // Print each char, its convert_tab value, and the resulting loc - std::cerr << "Chars: " - << seq[i] << seq[i+1] << seq[i+2] << seq[i+3] - << " | Convert: " - << static_cast(v0) << " " - << static_cast(v1) << " " - << static_cast(v2) << " " - << static_cast(v3) - << " | loc = " << static_cast(loc) - << " (0x" << std::hex << static_cast(loc) << std::dec << ")" - << std::endl; } - std::cerr << h_val << std::endl; - const unsigned remainder = k % 4; if (remainder > 0) { h_val = srol(h_val, remainder); } - // TODO double check bug use k = 7 to test if (remainder == 3) { uint8_t trimer_loc = 0; trimer_loc += 16 * convert_tab[(unsigned char)seq[k - 3]]; // NOLINT @@ -79,7 +56,6 @@ base_forward_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = S } else if (remainder == 1) { h_val ^= primitive_tab[(unsigned char)seq[k - 1]]; } - std::cerr << h_val << std::endl; return h_val; } @@ -99,7 +75,7 @@ next_forward_hash(uint64_t fh_val, unsigned char char_in, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* const* right_table = MS_TAB_33R, - const uint64_t* const* left_table = MS_TAB_31L) + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t h_val = srol(fh_val); h_val ^= primitive_tab[char_in]; @@ -122,7 +98,7 @@ prev_forward_hash(uint64_t fh_val, unsigned char char_in, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* const* right_table = MS_TAB_33R, - const uint64_t* const* left_table = MS_TAB_31L) + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t h_val = fh_val ^ srol_table(char_in, k, right_table, left_table); h_val ^= primitive_tab[char_out]; @@ -138,12 +114,16 @@ prev_forward_hash(uint64_t fh_val, * @return Hash value of the reverse-complement of k-mer_0 */ inline uint64_t -base_reverse_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* dimer_tab = DIMER_TAB, const uint64_t* trimer_tab = TRIMER_TAB, const uint64_t* tetramer_tab = TETRAMER_TAB -, const uint8_t* rc_convert_tab = RC_CONVERT_TAB) +base_reverse_hash(const char* seq, + unsigned k, + const uint64_t* primitive_tab = SEED_TAB, + const uint64_t* dimer_tab = DIMER_TAB, + const uint64_t* trimer_tab = TRIMER_TAB, + const uint64_t* tetramer_tab = TETRAMER_TAB, + const uint8_t* rc_convert_tab = RC_CONVERT_TAB) { uint64_t h_val = 0; const unsigned remainder = k % 4; - std::cerr << "cde" << std::endl; if (remainder == 3) { uint8_t trimer_loc = 0; trimer_loc += 16 * rc_convert_tab[(unsigned char)seq[k - 1]]; // NOLINT @@ -158,42 +138,15 @@ base_reverse_hash(const char* seq, unsigned k, const uint64_t* primitive_tab = S } else if (remainder == 1) { h_val ^= primitive_tab[(unsigned char)seq[k - 1] & CP_OFF]; } - std::cerr << h_val << std::endl; for (int i = (int)(k - remainder) - 1; i >= 3; i -= 4) { h_val = srol(h_val, 4); uint8_t loc = 0; - /*loc += 64 * rc_convert_tab[(unsigned char)seq[i]]; // NOLINT + loc += 64 * rc_convert_tab[(unsigned char)seq[i]]; // NOLINT loc += 16 * rc_convert_tab[(unsigned char)seq[i - 1]]; // NOLINT loc += 4 * rc_convert_tab[(unsigned char)seq[i - 2]]; loc += rc_convert_tab[(unsigned char)seq[i - 3]]; h_val ^= tetramer_tab[loc]; - std::cout << "loc (dec): " << static_cast(loc) << std::endl;*/ - uint8_t rv0 = rc_convert_tab[(unsigned char)seq[i]]; -uint8_t rv1 = rc_convert_tab[(unsigned char)seq[i - 1]]; -uint8_t rv2 = rc_convert_tab[(unsigned char)seq[i - 2]]; -uint8_t rv3 = rc_convert_tab[(unsigned char)seq[i - 3]]; - -loc += 64 * rv0; -loc += 16 * rv1; -loc += 4 * rv2; -loc += rv3; - -h_val ^= tetramer_tab[loc]; - -// Print each char, its rc_convert_tab value, and the resulting loc -std::cerr << "Chars (rev): " - << seq[i] << seq[i-1] << seq[i-2] << seq[i-3] - << " | RC Convert: " - << static_cast(rv0) << " " - << static_cast(rv1) << " " - << static_cast(rv2) << " " - << static_cast(rv3) - << " | loc = " << static_cast(loc) - << " (0x" << std::hex << static_cast(loc) << std::dec << ")" - << std::endl; } - - std::cerr << h_val << std::endl; return h_val; } @@ -214,9 +167,10 @@ next_reverse_hash(uint64_t rh_val, unsigned char char_in, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* const* right_table = MS_TAB_33R, - const uint64_t* const* left_table = MS_TAB_31L) + const uint64_t* const* left_table = MS_TAB_31L) { - uint64_t h_val = rh_val ^ srol_table(char_in & CP_OFF, k, right_table, left_table); + uint64_t h_val = + rh_val ^ srol_table(char_in & CP_OFF, k, right_table, left_table); h_val ^= primitive_tab[char_out & CP_OFF]; h_val = sror(h_val); return h_val; @@ -237,7 +191,7 @@ prev_reverse_hash(uint64_t rh_val, unsigned char char_in, const uint64_t* primitive_tab = SEED_TAB, const uint64_t* const* right_table = MS_TAB_33R, - const uint64_t* const* left_table = MS_TAB_31L) + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t h_val = srol(rh_val); h_val ^= primitive_tab[char_in & CP_OFF]; @@ -268,8 +222,8 @@ sub_hash(uint64_t fh_val, unsigned k, unsigned m, uint64_t* h_val, - const uint64_t* const* right_table = MS_TAB_33R, - const uint64_t* const* left_table = MS_TAB_31L) + const uint64_t* const* right_table = MS_TAB_33R, + const uint64_t* const* left_table = MS_TAB_31L) { uint64_t b_val = 0; @@ -277,10 +231,12 @@ sub_hash(uint64_t fh_val, const auto pos = positions[i]; const auto new_base = new_bases[i]; - fh_val ^= srol_table((unsigned char)kmer_seq[pos], k - 1 - pos, right_table, left_table); + fh_val ^= srol_table( + (unsigned char)kmer_seq[pos], k - 1 - pos, right_table, left_table); fh_val ^= srol_table(new_base, k - 1 - pos, right_table, left_table); - rh_val ^= srol_table((unsigned char)kmer_seq[pos] & CP_OFF, pos, right_table, left_table); + rh_val ^= srol_table( + (unsigned char)kmer_seq[pos] & CP_OFF, pos, right_table, left_table); rh_val ^= srol_table(new_base & CP_OFF, pos, right_table, left_table); } @@ -557,7 +513,6 @@ class NtHash uint64_t rev_hash = 0; std::unique_ptr hash_arr; - /** * Initialize the internal state of the iterator * @return \p true if successful, \p false otherwise From 659dae1e1f5f5810225f013051c56527c6db7903 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 8 Dec 2025 22:25:52 -0800 Subject: [PATCH 19/38] bshash: spacing changes --- include/btllib/bshash.hpp | 2 +- include/btllib/bshash_hashing_internals.hpp | 2979 ++++++++++++++++--- include/btllib/bshash_kmer.hpp | 1164 ++------ include/btllib/bshash_kmer_directional.hpp | 185 +- 4 files changed, 2886 insertions(+), 1444 deletions(-) diff --git a/include/btllib/bshash.hpp b/include/btllib/bshash.hpp index 5a412671..f8c9d868 100644 --- a/include/btllib/bshash.hpp +++ b/include/btllib/bshash.hpp @@ -9,9 +9,9 @@ #include #include -#include #include #include +#include #include namespace btllib { diff --git a/include/btllib/bshash_hashing_internals.hpp b/include/btllib/bshash_hashing_internals.hpp index 8be5f2b7..d34e8022 100644 --- a/include/btllib/bshash_hashing_internals.hpp +++ b/include/btllib/bshash_hashing_internals.hpp @@ -9,524 +9,1295 @@ namespace btllib::hashing_internals { // Group CTConversion XOR pairs (6 seeds → 3 pairs) -static const uint64_t CT_SEED_A_ = 0x38e1680db3188ddfULL; //0 -static const uint64_t CT_SEED_G_ = 0x07d6453133a367b1ULL; //2 -static const uint64_t CT_SEED_CT = 0x2c555d4ed6a221a1ULL; //3 -static const uint64_t XX_SEED_N_ = SEED_N; //3 +static const uint64_t CT_SEED_A_ = 0x38e1680db3188ddfULL; // 0 +static const uint64_t CT_SEED_G_ = 0x07d6453133a367b1ULL; // 2 +static const uint64_t CT_SEED_CT = 0x2c555d4ed6a221a1ULL; // 3 +static const uint64_t XX_SEED_N_ = SEED_N; // 3 // Group GAConversion XOR pairs (6 seeds → 3 pairs) -static const uint64_t GA_SEED_GA = 0x4d9c9d29392bb7d6ULL; //0 -static const uint64_t GA_SEED_C_ = 0x0e06d727c6e35043ULL; //1 -static const uint64_t GA_SEED_T_ = 0x013a8fe8dea57b07ULL; //3 +static const uint64_t GA_SEED_GA = 0x4d9c9d29392bb7d6ULL; // 0 +static const uint64_t GA_SEED_C_ = 0x0e06d727c6e35043ULL; // 1 +static const uint64_t GA_SEED_T_ = 0x013a8fe8dea57b07ULL; // 3 const uint64_t BS_CT_SEED_TAB[ASCII_SIZE] = { - XX_SEED_N_, GA_SEED_T_, XX_SEED_N_, GA_SEED_GA, GA_SEED_GA, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, // 0..7 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 8..15 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 16..23 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 24..31 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 32..39 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 40..47 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 48..55 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 56..63 - XX_SEED_N_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_G_, // 64..71 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 72..79 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_CT, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, // 80..87 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 88..95 - XX_SEED_N_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_G_, // 96..103 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 104..111 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_CT, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, // 112..119 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 120..127 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 128..135 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 136..143 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 144..151 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 152..159 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 160..167 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 168..175 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 176..183 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 184..191 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 192..199 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 200..207 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 208..215 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 216..223 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 224..231 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 232..239 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 240..247 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_ // 248..255 + XX_SEED_N_, GA_SEED_T_, XX_SEED_N_, GA_SEED_GA, + GA_SEED_GA, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, // 0..7 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 8..15 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 16..23 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 24..31 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 32..39 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 40..47 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 48..55 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 56..63 + XX_SEED_N_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_G_, // 64..71 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 72..79 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + CT_SEED_CT, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, // 80..87 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 88..95 + XX_SEED_N_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, CT_SEED_G_, // 96..103 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 104..111 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + CT_SEED_CT, CT_SEED_CT, XX_SEED_N_, XX_SEED_N_, // 112..119 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 120..127 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 128..135 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 136..143 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 144..151 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 152..159 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 160..167 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 168..175 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 176..183 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 184..191 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 192..199 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 200..207 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 208..215 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 216..223 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 224..231 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 232..239 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 240..247 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_ // 248..255 }; const uint64_t BS_GA_SEED_TAB[ASCII_SIZE] = { - XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, CT_SEED_G_, CT_SEED_A_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, // 0..7 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 8..15 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 16..23 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 24..31 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 32..39 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 40..47 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 48..55 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 56..63 - XX_SEED_N_, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_GA, // 64..71 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 72..79 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_T_, GA_SEED_T_, XX_SEED_N_, XX_SEED_N_, // 80..87 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 88..95 - XX_SEED_N_, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_GA, // 96..103 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 104..111 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_T_, GA_SEED_T_, XX_SEED_N_, XX_SEED_N_, // 112..119 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 120..127 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 128..135 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 136..143 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 144..151 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 152..159 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 160..167 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 168..175 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 176..183 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 184..191 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 192..199 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 200..207 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 208..215 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 216..223 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 224..231 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 232..239 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 240..247 - XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_ // 248..255 -}; - -const uint64_t BS_CT_DIMER_TAB[16] = { - 5270258388568741472ULL, 0ULL, 8508589596476603406ULL, 6744014374774716958ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 3985090206635016892ULL, 0ULL, 611028647685761234ULL, 2592339656081469122ULL, - 6938871167320903325ULL, 0ULL, 6880655447580681459ULL, 8430711922963669731ULL - -}; - -const uint64_t BS_CT_TRIMER_TAB[64] = { - 12296542375903994143ULL, 0ULL, 10777453763052456817ULL, 13696059339288153441ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 15332577898988860867ULL, 0ULL, 17005433182935818157ULL, 13870091925232998845ULL, - 9497654699218631138ULL, 0ULL, 13617020073335395212ULL, 10915115690247869852ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 6231483061417281702ULL, 0ULL, 7587863783224107720ULL, 4813953927640687832ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 2888204484391197818ULL, 0ULL, 1667417705464477204ULL, 4368707067656630276ULL, - 9156599042800701531ULL, 0ULL, 4622359443003849269ULL, 7757154674165939237ULL, - 17903722964224905445ULL, 0ULL, 14358003450906344075ULL, 17060471493690637467ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 9662639389952885817ULL, 0ULL, 13344089177003470423ULL, 10569000649149409351ULL, - 15068665748674332696ULL, 0ULL, 17161400230922396278ULL, 14243358305793533030ULL - -}; - -const uint64_t BS_CT_TETRAMER_TAB[256] = { - 7903069985244696545ULL, 0ULL, 5952199028685153679ULL, 8726125620783047583ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 1423984374654180157ULL, 0ULL, 3239265717627122003ULL, 537959694350858051ULL, - 4955422779349047068ULL, 0ULL, 8931236194499526002ULL, 5796422442532485986ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 10480418080795158104ULL, 0ULL, 12557937413778541622ULL, 9639350223324760614ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 17230691697404916356ULL, 0ULL, 14999410022686677226ULL, 18134732970037944058ULL, - 13265781732207312549ULL, 0ULL, 9740911128328721611ULL, 12442798639878738651ULL, - 4574968163017784859ULL, 0ULL, 21005562311251061ULL, 3155255381851099749ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 4689328268616313543ULL, 0ULL, 9089664875619762345ULL, 6172150804119029433ULL, - 1591432901806794470ULL, 0ULL, 2964154664228736136ULL, 189664148293585560ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 10670208004106787987ULL, 0ULL, 12331731648208664317ULL, 9268413422807691501ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 16895935546904758351ULL, 0ULL, 15370019031266140705ULL, 18360716771597345841ULL, - 13653888368900424814ULL, 0ULL, 9388727718593310208ULL, 12234149755652892688ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 7550431377666028842ULL, 0ULL, 6340971969386962756ULL, 8970238170159749460ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 1632237297318027766ULL, 0ULL, 2995441377836469144ULL, 149439503918058888ULL, - 4729043290971336151ULL, 0ULL, 9121973896872036281ULL, 6130765314036646313ULL, - 14322824142644718953ULL, 0ULL, 18010960957546809095ULL, 15163850771780370711ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 13306657871389129141ULL, 0ULL, 9772129397874496475ULL, 12402503902173812171ULL, - 17270334977163097492ULL, 0ULL, 15031787489074651130ULL, 18093417574358702570ULL, - 14415162961378847764ULL, 0ULL, 17815078398106617466ULL, 15904680980454047850ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 13142251585342896328ULL, 0ULL, 9895962073632248486ULL, 11733738969707828406ULL, - 16241274185646601449ULL, 0ULL, 16020348337179074183ULL, 17712848172773365911ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 3805619366837683629ULL, 0ULL, 857772563979235267ULL, 2334117937676265939ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 5385773951433757041ULL, 0ULL, 8469355426070739743ULL, 6776274342720863503ULL, - 2141510149062286672ULL, 0ULL, 2490210351169910590ULL, 651923950472458542ULL, - 11158837266677243374ULL, 0ULL, 11955661059224500096ULL, 10261471664026595728ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 16479371589667244338ULL, 0ULL, 15818137341953581916ULL, 17295591019177141580ULL, - 12949060435646432531ULL, 0ULL, 10125042934416846717ULL, 12033750801055738221ULL - -}; - -const uint64_t BS_GA_DIMER_TAB[16] = { - 15466952634673191035ULL, 10754575533572636654ULL, 0ULL, 11097913711551190186ULL, - 5877535496320390993ULL, 1300266414860792004ULL, 0ULL, 2105188344771369856ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 5758277602454356440ULL, 897281711739086413ULL, 0ULL, 238567881438104841ULL - -}; - -const uint64_t BS_GA_TRIMER_TAB[64] = { - 16201651124989724449ULL, 11767230542839996596ULL, 0ULL, 12425925165519260656ULL, - 7485905922695612427ULL, 2628147013137149854ULL, 0ULL, 3117991808660735194ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 8762868368011140738ULL, 4179829000807872791ULL, 0ULL, 3836473814229799507ULL, - 17203464567451130228ULL, 12476292506423230177ULL, 0ULL, 11680341964607149477ULL, - 7605014056128239198ULL, 3030964276180857291ULL, 0ULL, 2678653576253708943ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 8643215145753641175ULL, 3776431493625472834ULL, 0ULL, 4275266935868017670ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 15154499325442733158ULL, 10508537125562078195ULL, 0ULL, 11450835371142150327ULL, - 6159530959071411020ULL, 1648676777120242905ULL, 0ULL, 1863678824033957789ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 5405372172001914309ULL, 619798376795097680ULL, 0ULL, 551039597220094228ULL - -}; - -const uint64_t BS_GA_TETRAMER_TAB[256] = { - 10102482841122093461ULL, 14963615003264376320ULL, 0ULL, 13877042217463412036ULL, - 794795739487041215ULL, 5232589638041243946ULL, 0ULL, 5161613641720576622ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 1548991017688113206ULL, 6261504389880137635ULL, 0ULL, 6474221187766217959ULL, - 9392840475291690944ULL, 13961256338029068373ULL, 0ULL, 14914796497557187345ULL, - 391398344175249642ULL, 5112936561611444095ULL, 0ULL, 5316662609698825275ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 1951808117724528227ULL, 6380612394262551030ULL, 0ULL, 6318591945652269746ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 13739200011520215762ULL, 18244429068126416199ULL, 0ULL, 17441706179187852803ULL, - 4152289719497344504ULL, 8792626876107301485ULL, 0ULL, 8447054438424348969ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 2875359367268995953ULL, 7240976841901733092ULL, 0ULL, 7728536354038472608ULL, - 10439742971334657342ULL, 15238809933785883307ULL, 0ULL, 15872787535609874927ULL, - 1717559167161027092ULL, 6075273402150661505ULL, 0ULL, 6589870402902132421ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 697300835471006877ULL, 5347749012401158920ULL, 0ULL, 4975173205340442700ULL, - 11423453917689355115ULL, 15929804584091122942ULL, 0ULL, 15145130632992380858ULL, - 1854593325084628033ULL, 6496051875600806868ULL, 0ULL, 6132429400583706768ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 559685854125523656ULL, 4926424929898751325ULL, 0ULL, 5432033363231264281ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 11707425986759079545ULL, 16276972118015169004ULL, 0ULL, 17212532555803097768ULL, - 2688004620665649491ULL, 7410673165475159750ULL, 0ULL, 7632377835808404866ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 4266321074409320410ULL, 8696255610672826447ULL, 0ULL, 8616255423678309131ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 16790453619013574426ULL, 12293110188280461455ULL, 0ULL, 11936155684350399435ULL, - 7941878618842992688ULL, 3291412832639317925ULL, 0ULL, 2490818554466065633ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 8092837802684991161ULL, 3735105958421766444ULL, 0ULL, 4389210678668986984ULL, - 16675286395804860751ULL, 11885226664861542106ULL, 0ULL, 12379434820203482526ULL, - 6944006080901647973ULL, 2577284614385265136ULL, 0ULL, 3240342666676093620ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 9090130062791399660ULL, 4448689121532002169ULL, 0ULL, 3639106275939783741ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 15824496359018523741ULL, 10953226629488970696ULL, 0ULL, 10898136436499554444ULL, - 6678683788476883831ULL, 2248766861403210978ULL, 0ULL, 1155561727788445606ULL, - 0ULL, 0ULL, 0ULL, 0ULL, - 4816531739325199870ULL, 93880803705459307ULL, 0ULL, 1040842618995587375ULL - + XX_SEED_N_, CT_SEED_CT, XX_SEED_N_, CT_SEED_G_, + CT_SEED_A_, CT_SEED_A_, XX_SEED_N_, CT_SEED_CT, // 0..7 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 8..15 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 16..23 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 24..31 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 32..39 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 40..47 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 48..55 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 56..63 + XX_SEED_N_, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_GA, // 64..71 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 72..79 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + GA_SEED_T_, GA_SEED_T_, XX_SEED_N_, XX_SEED_N_, // 80..87 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 88..95 + XX_SEED_N_, GA_SEED_GA, XX_SEED_N_, GA_SEED_C_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, GA_SEED_GA, // 96..103 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 104..111 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + GA_SEED_T_, GA_SEED_T_, XX_SEED_N_, XX_SEED_N_, // 112..119 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 120..127 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 128..135 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 136..143 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 144..151 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 152..159 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 160..167 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 168..175 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 176..183 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 184..191 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 192..199 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 200..207 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 208..215 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 216..223 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 224..231 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 232..239 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, // 240..247 + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, + XX_SEED_N_, XX_SEED_N_, XX_SEED_N_, XX_SEED_N_ // 248..255 }; +const uint64_t BS_CT_DIMER_TAB[16] = { 5270258388568741472ULL, + 0ULL, + 8508589596476603406ULL, + 6744014374774716958ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 3985090206635016892ULL, + 0ULL, + 611028647685761234ULL, + 2592339656081469122ULL, + 6938871167320903325ULL, + 0ULL, + 6880655447580681459ULL, + 8430711922963669731ULL + +}; + +const uint64_t BS_CT_TRIMER_TAB[64] = { 12296542375903994143ULL, + 0ULL, + 10777453763052456817ULL, + 13696059339288153441ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 15332577898988860867ULL, + 0ULL, + 17005433182935818157ULL, + 13870091925232998845ULL, + 9497654699218631138ULL, + 0ULL, + 13617020073335395212ULL, + 10915115690247869852ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 6231483061417281702ULL, + 0ULL, + 7587863783224107720ULL, + 4813953927640687832ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 2888204484391197818ULL, + 0ULL, + 1667417705464477204ULL, + 4368707067656630276ULL, + 9156599042800701531ULL, + 0ULL, + 4622359443003849269ULL, + 7757154674165939237ULL, + 17903722964224905445ULL, + 0ULL, + 14358003450906344075ULL, + 17060471493690637467ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 9662639389952885817ULL, + 0ULL, + 13344089177003470423ULL, + 10569000649149409351ULL, + 15068665748674332696ULL, + 0ULL, + 17161400230922396278ULL, + 14243358305793533030ULL + +}; + +const uint64_t BS_CT_TETRAMER_TAB[256] = { 7903069985244696545ULL, + 0ULL, + 5952199028685153679ULL, + 8726125620783047583ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 1423984374654180157ULL, + 0ULL, + 3239265717627122003ULL, + 537959694350858051ULL, + 4955422779349047068ULL, + 0ULL, + 8931236194499526002ULL, + 5796422442532485986ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 10480418080795158104ULL, + 0ULL, + 12557937413778541622ULL, + 9639350223324760614ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 17230691697404916356ULL, + 0ULL, + 14999410022686677226ULL, + 18134732970037944058ULL, + 13265781732207312549ULL, + 0ULL, + 9740911128328721611ULL, + 12442798639878738651ULL, + 4574968163017784859ULL, + 0ULL, + 21005562311251061ULL, + 3155255381851099749ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 4689328268616313543ULL, + 0ULL, + 9089664875619762345ULL, + 6172150804119029433ULL, + 1591432901806794470ULL, + 0ULL, + 2964154664228736136ULL, + 189664148293585560ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 10670208004106787987ULL, + 0ULL, + 12331731648208664317ULL, + 9268413422807691501ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 16895935546904758351ULL, + 0ULL, + 15370019031266140705ULL, + 18360716771597345841ULL, + 13653888368900424814ULL, + 0ULL, + 9388727718593310208ULL, + 12234149755652892688ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 7550431377666028842ULL, + 0ULL, + 6340971969386962756ULL, + 8970238170159749460ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 1632237297318027766ULL, + 0ULL, + 2995441377836469144ULL, + 149439503918058888ULL, + 4729043290971336151ULL, + 0ULL, + 9121973896872036281ULL, + 6130765314036646313ULL, + 14322824142644718953ULL, + 0ULL, + 18010960957546809095ULL, + 15163850771780370711ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 13306657871389129141ULL, + 0ULL, + 9772129397874496475ULL, + 12402503902173812171ULL, + 17270334977163097492ULL, + 0ULL, + 15031787489074651130ULL, + 18093417574358702570ULL, + 14415162961378847764ULL, + 0ULL, + 17815078398106617466ULL, + 15904680980454047850ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 13142251585342896328ULL, + 0ULL, + 9895962073632248486ULL, + 11733738969707828406ULL, + 16241274185646601449ULL, + 0ULL, + 16020348337179074183ULL, + 17712848172773365911ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 3805619366837683629ULL, + 0ULL, + 857772563979235267ULL, + 2334117937676265939ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 5385773951433757041ULL, + 0ULL, + 8469355426070739743ULL, + 6776274342720863503ULL, + 2141510149062286672ULL, + 0ULL, + 2490210351169910590ULL, + 651923950472458542ULL, + 11158837266677243374ULL, + 0ULL, + 11955661059224500096ULL, + 10261471664026595728ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 16479371589667244338ULL, + 0ULL, + 15818137341953581916ULL, + 17295591019177141580ULL, + 12949060435646432531ULL, + 0ULL, + 10125042934416846717ULL, + 12033750801055738221ULL + +}; + +const uint64_t BS_GA_DIMER_TAB[16] = { 15466952634673191035ULL, + 10754575533572636654ULL, + 0ULL, + 11097913711551190186ULL, + 5877535496320390993ULL, + 1300266414860792004ULL, + 0ULL, + 2105188344771369856ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 5758277602454356440ULL, + 897281711739086413ULL, + 0ULL, + 238567881438104841ULL + +}; + +const uint64_t BS_GA_TRIMER_TAB[64] = { 16201651124989724449ULL, + 11767230542839996596ULL, + 0ULL, + 12425925165519260656ULL, + 7485905922695612427ULL, + 2628147013137149854ULL, + 0ULL, + 3117991808660735194ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 8762868368011140738ULL, + 4179829000807872791ULL, + 0ULL, + 3836473814229799507ULL, + 17203464567451130228ULL, + 12476292506423230177ULL, + 0ULL, + 11680341964607149477ULL, + 7605014056128239198ULL, + 3030964276180857291ULL, + 0ULL, + 2678653576253708943ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 8643215145753641175ULL, + 3776431493625472834ULL, + 0ULL, + 4275266935868017670ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 15154499325442733158ULL, + 10508537125562078195ULL, + 0ULL, + 11450835371142150327ULL, + 6159530959071411020ULL, + 1648676777120242905ULL, + 0ULL, + 1863678824033957789ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 5405372172001914309ULL, + 619798376795097680ULL, + 0ULL, + 551039597220094228ULL + +}; + +const uint64_t BS_GA_TETRAMER_TAB[256] = { 10102482841122093461ULL, + 14963615003264376320ULL, + 0ULL, + 13877042217463412036ULL, + 794795739487041215ULL, + 5232589638041243946ULL, + 0ULL, + 5161613641720576622ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 1548991017688113206ULL, + 6261504389880137635ULL, + 0ULL, + 6474221187766217959ULL, + 9392840475291690944ULL, + 13961256338029068373ULL, + 0ULL, + 14914796497557187345ULL, + 391398344175249642ULL, + 5112936561611444095ULL, + 0ULL, + 5316662609698825275ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 1951808117724528227ULL, + 6380612394262551030ULL, + 0ULL, + 6318591945652269746ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 13739200011520215762ULL, + 18244429068126416199ULL, + 0ULL, + 17441706179187852803ULL, + 4152289719497344504ULL, + 8792626876107301485ULL, + 0ULL, + 8447054438424348969ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 2875359367268995953ULL, + 7240976841901733092ULL, + 0ULL, + 7728536354038472608ULL, + 10439742971334657342ULL, + 15238809933785883307ULL, + 0ULL, + 15872787535609874927ULL, + 1717559167161027092ULL, + 6075273402150661505ULL, + 0ULL, + 6589870402902132421ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 697300835471006877ULL, + 5347749012401158920ULL, + 0ULL, + 4975173205340442700ULL, + 11423453917689355115ULL, + 15929804584091122942ULL, + 0ULL, + 15145130632992380858ULL, + 1854593325084628033ULL, + 6496051875600806868ULL, + 0ULL, + 6132429400583706768ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 559685854125523656ULL, + 4926424929898751325ULL, + 0ULL, + 5432033363231264281ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 11707425986759079545ULL, + 16276972118015169004ULL, + 0ULL, + 17212532555803097768ULL, + 2688004620665649491ULL, + 7410673165475159750ULL, + 0ULL, + 7632377835808404866ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 4266321074409320410ULL, + 8696255610672826447ULL, + 0ULL, + 8616255423678309131ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 16790453619013574426ULL, + 12293110188280461455ULL, + 0ULL, + 11936155684350399435ULL, + 7941878618842992688ULL, + 3291412832639317925ULL, + 0ULL, + 2490818554466065633ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 8092837802684991161ULL, + 3735105958421766444ULL, + 0ULL, + 4389210678668986984ULL, + 16675286395804860751ULL, + 11885226664861542106ULL, + 0ULL, + 12379434820203482526ULL, + 6944006080901647973ULL, + 2577284614385265136ULL, + 0ULL, + 3240342666676093620ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 9090130062791399660ULL, + 4448689121532002169ULL, + 0ULL, + 3639106275939783741ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 15824496359018523741ULL, + 10953226629488970696ULL, + 0ULL, + 10898136436499554444ULL, + 6678683788476883831ULL, + 2248766861403210978ULL, + 0ULL, + 1155561727788445606ULL, + 0ULL, + 0ULL, + 0ULL, + 0ULL, + 4816531739325199870ULL, + 93880803705459307ULL, + 0ULL, + 1040842618995587375ULL + +}; // ===== CTConversion_XOR_1_4 ===== static const uint64_t CT_A_33R[33] = { - 0x00000001b3188ddf, 0x0000000166311bbf, 0x00000000cc62377f, 0x0000000198c46efe, 0x000000013188ddfd, 0x000000006311bbfb, 0x00000000c62377f6, 0x000000018c46efec, 0x00000001188ddfd9, 0x00000000311bbfb3, - 0x0000000062377f66, 0x00000000c46efecc, 0x0000000188ddfd98, 0x0000000111bbfb31, 0x000000002377f663, 0x0000000046efecc6, 0x000000008ddfd98c, 0x000000011bbfb318, 0x00000000377f6631, 0x000000006efecc62, - 0x00000000ddfd98c4, 0x00000001bbfb3188, 0x0000000177f66311, 0x00000000efecc623, 0x00000001dfd98c46, 0x00000001bfb3188d, 0x000000017f66311b, 0x00000000fecc6237, 0x00000001fd98c46e, 0x00000001fb3188dd, - 0x00000001f66311bb, 0x00000001ecc62377, 0x00000001d98c46ef + 0x00000001b3188ddf, 0x0000000166311bbf, 0x00000000cc62377f, + 0x0000000198c46efe, 0x000000013188ddfd, 0x000000006311bbfb, + 0x00000000c62377f6, 0x000000018c46efec, 0x00000001188ddfd9, + 0x00000000311bbfb3, 0x0000000062377f66, 0x00000000c46efecc, + 0x0000000188ddfd98, 0x0000000111bbfb31, 0x000000002377f663, + 0x0000000046efecc6, 0x000000008ddfd98c, 0x000000011bbfb318, + 0x00000000377f6631, 0x000000006efecc62, 0x00000000ddfd98c4, + 0x00000001bbfb3188, 0x0000000177f66311, 0x00000000efecc623, + 0x00000001dfd98c46, 0x00000001bfb3188d, 0x000000017f66311b, + 0x00000000fecc6237, 0x00000001fd98c46e, 0x00000001fb3188dd, + 0x00000001f66311bb, 0x00000001ecc62377, 0x00000001d98c46ef }; static const uint64_t CT_A_31L[31] = { - 0x38e1680c00000000, 0x71c2d01800000000, 0xe385a03000000000, 0xc70b406200000000, 0x8e1680c600000000, 0x1c2d018e00000000, 0x385a031c00000000, 0x70b4063800000000, 0xe1680c7000000000, 0xc2d018e200000000, - 0x85a031c600000000, 0xb40638e00000000, 0x1680c71c00000000, 0x2d018e3800000000, 0x5a031c7000000000, 0xb40638e000000000, 0x680c71c200000000, 0xd018e38400000000, 0xa031c70a00000000, 0x40638e1600000000, - 0x80c71c2c00000000, 0x18e385a00000000, 0x31c70b400000000, 0x638e16800000000, 0xc71c2d000000000, 0x18e385a000000000, 0x31c70b4000000000, 0x638e168000000000, 0xc71c2d0000000000, 0x8e385a0200000000, - 0x1c70b40600000000 + 0x38e1680c00000000, 0x71c2d01800000000, 0xe385a03000000000, + 0xc70b406200000000, 0x8e1680c600000000, 0x1c2d018e00000000, + 0x385a031c00000000, 0x70b4063800000000, 0xe1680c7000000000, + 0xc2d018e200000000, 0x85a031c600000000, 0xb40638e00000000, + 0x1680c71c00000000, 0x2d018e3800000000, 0x5a031c7000000000, + 0xb40638e000000000, 0x680c71c200000000, 0xd018e38400000000, + 0xa031c70a00000000, 0x40638e1600000000, 0x80c71c2c00000000, + 0x18e385a00000000, 0x31c70b400000000, 0x638e16800000000, + 0xc71c2d000000000, 0x18e385a000000000, 0x31c70b4000000000, + 0x638e168000000000, 0xc71c2d0000000000, 0x8e385a0200000000, + 0x1c70b40600000000 }; // ===== CTConversion_XOR_2_5 ===== static const uint64_t CT_G_33R[33] = { - 0x0000000133a367b1, 0x000000006746cf63, 0x00000000ce8d9ec6, 0x000000019d1b3d8c, 0x000000013a367b19, 0x00000000746cf633, 0x00000000e8d9ec66, 0x00000001d1b3d8cc, 0x00000001a367b199, 0x0000000146cf6333, - 0x000000008d9ec667, 0x000000011b3d8cce, 0x00000000367b199d, 0x000000006cf6333a, 0x00000000d9ec6674, 0x00000001b3d8cce8, 0x0000000167b199d1, 0x00000000cf6333a3, 0x000000019ec66746, 0x000000013d8cce8d, - 0x000000007b199d1b, 0x00000000f6333a36, 0x00000001ec66746c, 0x00000001d8cce8d9, 0x00000001b199d1b3, 0x000000016333a367, 0x00000000c66746cf, 0x000000018cce8d9e, 0x00000001199d1b3d, 0x00000000333a367b, - 0x0000000066746cf6, 0x00000000cce8d9ec, 0x0000000199d1b3d8 + 0x0000000133a367b1, 0x000000006746cf63, 0x00000000ce8d9ec6, + 0x000000019d1b3d8c, 0x000000013a367b19, 0x00000000746cf633, + 0x00000000e8d9ec66, 0x00000001d1b3d8cc, 0x00000001a367b199, + 0x0000000146cf6333, 0x000000008d9ec667, 0x000000011b3d8cce, + 0x00000000367b199d, 0x000000006cf6333a, 0x00000000d9ec6674, + 0x00000001b3d8cce8, 0x0000000167b199d1, 0x00000000cf6333a3, + 0x000000019ec66746, 0x000000013d8cce8d, 0x000000007b199d1b, + 0x00000000f6333a36, 0x00000001ec66746c, 0x00000001d8cce8d9, + 0x00000001b199d1b3, 0x000000016333a367, 0x00000000c66746cf, + 0x000000018cce8d9e, 0x00000001199d1b3d, 0x00000000333a367b, + 0x0000000066746cf6, 0x00000000cce8d9ec, 0x0000000199d1b3d8 }; static const uint64_t CT_G_31L[31] = { - 0x7d6453000000000, 0xfac8a6000000000, 0x1f5914c000000000, 0x3eb2298000000000, 0x7d64530000000000, 0xfac8a60000000000, 0xf5914c0200000000, 0xeb22980600000000, 0xd645300e00000000, 0xac8a601e00000000, - 0x5914c03e00000000, 0xb229807c00000000, 0x645300fa00000000, 0xc8a601f400000000, 0x914c03ea00000000, 0x229807d600000000, 0x45300fac00000000, 0x8a601f5800000000, 0x14c03eb200000000, 0x29807d6400000000, - 0x5300fac800000000, 0xa601f59000000000, 0x4c03eb2200000000, 0x9807d64400000000, 0x300fac8a00000000, 0x601f591400000000, 0xc03eb22800000000, 0x807d645200000000, 0xfac8a600000000, 0x1f5914c00000000, - 0x3eb229800000000 + 0x7d6453000000000, 0xfac8a6000000000, 0x1f5914c000000000, + 0x3eb2298000000000, 0x7d64530000000000, 0xfac8a60000000000, + 0xf5914c0200000000, 0xeb22980600000000, 0xd645300e00000000, + 0xac8a601e00000000, 0x5914c03e00000000, 0xb229807c00000000, + 0x645300fa00000000, 0xc8a601f400000000, 0x914c03ea00000000, + 0x229807d600000000, 0x45300fac00000000, 0x8a601f5800000000, + 0x14c03eb200000000, 0x29807d6400000000, 0x5300fac800000000, + 0xa601f59000000000, 0x4c03eb2200000000, 0x9807d64400000000, + 0x300fac8a00000000, 0x601f591400000000, 0xc03eb22800000000, + 0x807d645200000000, 0xfac8a600000000, 0x1f5914c00000000, + 0x3eb229800000000 }; // ===== CTConversion_XOR_3_6 ===== static const uint64_t CT_CT33R[33] = { - 0x00000000d6a221a1, 0x00000001ad444342, 0x000000015a888685, 0x00000000b5110d0b, 0x000000016a221a16, 0x00000000d444342d, 0x00000001a888685a, 0x000000015110d0b5, 0x00000000a221a16b, 0x00000001444342d6, - 0x00000000888685ad, 0x00000001110d0b5a, 0x00000000221a16b5, 0x0000000044342d6a, 0x0000000088685ad4, 0x0000000110d0b5a8, 0x0000000021a16b51, 0x000000004342d6a2, 0x000000008685ad44, 0x000000010d0b5a88, - 0x000000001a16b511, 0x00000000342d6a22, 0x00000000685ad444, 0x00000000d0b5a888, 0x00000001a16b5110, 0x0000000142d6a221, 0x0000000085ad4443, 0x000000010b5a8886, 0x0000000016b5110d, 0x000000002d6a221a, - 0x000000005ad44434, 0x00000000b5a88868, 0x000000016b5110d0 + 0x00000000d6a221a1, 0x00000001ad444342, 0x000000015a888685, + 0x00000000b5110d0b, 0x000000016a221a16, 0x00000000d444342d, + 0x00000001a888685a, 0x000000015110d0b5, 0x00000000a221a16b, + 0x00000001444342d6, 0x00000000888685ad, 0x00000001110d0b5a, + 0x00000000221a16b5, 0x0000000044342d6a, 0x0000000088685ad4, + 0x0000000110d0b5a8, 0x0000000021a16b51, 0x000000004342d6a2, + 0x000000008685ad44, 0x000000010d0b5a88, 0x000000001a16b511, + 0x00000000342d6a22, 0x00000000685ad444, 0x00000000d0b5a888, + 0x00000001a16b5110, 0x0000000142d6a221, 0x0000000085ad4443, + 0x000000010b5a8886, 0x0000000016b5110d, 0x000000002d6a221a, + 0x000000005ad44434, 0x00000000b5a88868, 0x000000016b5110d0 }; static const uint64_t CT_CT31L[31] = { - 0x2c555d4e00000000, 0x58aaba9c00000000, 0xb155753800000000, 0x62aaea7200000000, 0xc555d4e400000000, 0x8aaba9ca00000000, 0x1557539600000000, 0x2aaea72c00000000, 0x555d4e5800000000, 0xaaba9cb000000000, - 0x5575396200000000, 0xaaea72c400000000, 0x55d4e58a00000000, 0xaba9cb1400000000, 0x5753962a00000000, 0xaea72c5400000000, 0x5d4e58aa00000000, 0xba9cb15400000000, 0x753962aa00000000, 0xea72c55400000000, - 0xd4e58aaa00000000, 0xa9cb155600000000, 0x53962aae00000000, 0xa72c555c00000000, 0x4e58aaba00000000, 0x9cb1557400000000, 0x3962aaea00000000, 0x72c555d400000000, 0xe58aaba800000000, 0xcb15575200000000, - 0x962aaea600000000 + 0x2c555d4e00000000, 0x58aaba9c00000000, 0xb155753800000000, + 0x62aaea7200000000, 0xc555d4e400000000, 0x8aaba9ca00000000, + 0x1557539600000000, 0x2aaea72c00000000, 0x555d4e5800000000, + 0xaaba9cb000000000, 0x5575396200000000, 0xaaea72c400000000, + 0x55d4e58a00000000, 0xaba9cb1400000000, 0x5753962a00000000, + 0xaea72c5400000000, 0x5d4e58aa00000000, 0xba9cb15400000000, + 0x753962aa00000000, 0xea72c55400000000, 0xd4e58aaa00000000, + 0xa9cb155600000000, 0x53962aae00000000, 0xa72c555c00000000, + 0x4e58aaba00000000, 0x9cb1557400000000, 0x3962aaea00000000, + 0x72c555d400000000, 0xe58aaba800000000, 0xcb15575200000000, + 0x962aaea600000000 }; - // ===== GAConversion_XOR_1_4 ===== static const uint64_t GA_GA33R[33] = { - 0x00000001392bb7d6, 0x0000000072576fad, 0x00000000e4aedf5a, 0x00000001c95dbeb4, 0x0000000192bb7d69, 0x000000012576fad3, 0x000000004aedf5a7, 0x0000000095dbeb4e, 0x000000012bb7d69c, 0x00000000576fad39, - 0x00000000aedf5a72, 0x000000015dbeb4e4, 0x00000000bb7d69c9, 0x0000000176fad392, 0x00000000edf5a725, 0x00000001dbeb4e4a, 0x00000001b7d69c95, 0x000000016fad392b, 0x00000000df5a7257, 0x00000001beb4e4ae, - 0x000000017d69c95d, 0x00000000fad392bb, 0x00000001f5a72576, 0x00000001eb4e4aed, 0x00000001d69c95db, 0x00000001ad392bb7, 0x000000015a72576f, 0x00000000b4e4aedf, 0x0000000169c95dbe, 0x00000000d392bb7d, - 0x00000001a72576fa, 0x000000014e4aedf5, 0x000000009c95dbeb + 0x00000001392bb7d6, 0x0000000072576fad, 0x00000000e4aedf5a, + 0x00000001c95dbeb4, 0x0000000192bb7d69, 0x000000012576fad3, + 0x000000004aedf5a7, 0x0000000095dbeb4e, 0x000000012bb7d69c, + 0x00000000576fad39, 0x00000000aedf5a72, 0x000000015dbeb4e4, + 0x00000000bb7d69c9, 0x0000000176fad392, 0x00000000edf5a725, + 0x00000001dbeb4e4a, 0x00000001b7d69c95, 0x000000016fad392b, + 0x00000000df5a7257, 0x00000001beb4e4ae, 0x000000017d69c95d, + 0x00000000fad392bb, 0x00000001f5a72576, 0x00000001eb4e4aed, + 0x00000001d69c95db, 0x00000001ad392bb7, 0x000000015a72576f, + 0x00000000b4e4aedf, 0x0000000169c95dbe, 0x00000000d392bb7d, + 0x00000001a72576fa, 0x000000014e4aedf5, 0x000000009c95dbeb }; static const uint64_t GA_GA31L[31] = { - 0x4d9c9d2800000000, 0x9b393a5000000000, 0x367274a200000000, 0x6ce4e94400000000, 0xd9c9d28800000000, 0xb393a51200000000, 0x67274a2600000000, 0xce4e944c00000000, 0x9c9d289a00000000, 0x393a513600000000, - 0x7274a26c00000000, 0xe4e944d800000000, 0xc9d289b200000000, 0x93a5136600000000, 0x274a26ce00000000, 0x4e944d9c00000000, 0x9d289b3800000000, 0x3a51367200000000, 0x74a26ce400000000, 0xe944d9c800000000, - 0xd289b39200000000, 0xa513672600000000, 0x4a26ce4e00000000, 0x944d9c9c00000000, 0x289b393a00000000, 0x5136727400000000, 0xa26ce4e800000000, 0x44d9c9d200000000, 0x89b393a400000000, 0x1367274a00000000, - 0x26ce4e9400000000 + 0x4d9c9d2800000000, 0x9b393a5000000000, 0x367274a200000000, + 0x6ce4e94400000000, 0xd9c9d28800000000, 0xb393a51200000000, + 0x67274a2600000000, 0xce4e944c00000000, 0x9c9d289a00000000, + 0x393a513600000000, 0x7274a26c00000000, 0xe4e944d800000000, + 0xc9d289b200000000, 0x93a5136600000000, 0x274a26ce00000000, + 0x4e944d9c00000000, 0x9d289b3800000000, 0x3a51367200000000, + 0x74a26ce400000000, 0xe944d9c800000000, 0xd289b39200000000, + 0xa513672600000000, 0x4a26ce4e00000000, 0x944d9c9c00000000, + 0x289b393a00000000, 0x5136727400000000, 0xa26ce4e800000000, + 0x44d9c9d200000000, 0x89b393a400000000, 0x1367274a00000000, + 0x26ce4e9400000000 }; // ===== GAConversion_XOR_2_5 ===== static const uint64_t GA_C_33R[33] = { - 0x00000001c6e35043, 0x000000018dc6a087, 0x000000011b8d410f, 0x00000000371a821f, 0x000000006e35043e, 0x00000000dc6a087c, 0x00000001b8d410f8, 0x0000000171a821f1, 0x00000000e35043e3, 0x00000001c6a087c6, - 0x000000018d410f8d, 0x000000011a821f1b, 0x0000000035043e37, 0x000000006a087c6e, 0x00000000d410f8dc, 0x00000001a821f1b8, 0x000000015043e371, 0x00000000a087c6e3, 0x00000001410f8dc6, 0x00000000821f1b8d, - 0x00000001043e371a, 0x00000000087c6e35, 0x0000000010f8dc6a, 0x0000000021f1b8d4, 0x0000000043e371a8, 0x0000000087c6e350, 0x000000010f8dc6a0, 0x000000001f1b8d41, 0x000000003e371a82, 0x000000007c6e3504, - 0x00000000f8dc6a08, 0x00000001f1b8d410, 0x00000001e371a821 + 0x00000001c6e35043, 0x000000018dc6a087, 0x000000011b8d410f, + 0x00000000371a821f, 0x000000006e35043e, 0x00000000dc6a087c, + 0x00000001b8d410f8, 0x0000000171a821f1, 0x00000000e35043e3, + 0x00000001c6a087c6, 0x000000018d410f8d, 0x000000011a821f1b, + 0x0000000035043e37, 0x000000006a087c6e, 0x00000000d410f8dc, + 0x00000001a821f1b8, 0x000000015043e371, 0x00000000a087c6e3, + 0x00000001410f8dc6, 0x00000000821f1b8d, 0x00000001043e371a, + 0x00000000087c6e35, 0x0000000010f8dc6a, 0x0000000021f1b8d4, + 0x0000000043e371a8, 0x0000000087c6e350, 0x000000010f8dc6a0, + 0x000000001f1b8d41, 0x000000003e371a82, 0x000000007c6e3504, + 0x00000000f8dc6a08, 0x00000001f1b8d410, 0x00000001e371a821 }; static const uint64_t GA_C_31L[31] = { - 0xe06d72600000000, 0x1c0dae4c00000000, 0x381b5c9800000000, 0x7036b93000000000, 0xe06d726000000000, 0xc0dae4c200000000, 0x81b5c98600000000, 0x36b930e00000000, 0x6d7261c00000000, 0xdae4c3800000000, - 0x1b5c987000000000, 0x36b930e000000000, 0x6d7261c000000000, 0xdae4c38000000000, 0xb5c9870200000000, 0x6b930e0600000000, 0xd7261c0c00000000, 0xae4c381a00000000, 0x5c98703600000000, 0xb930e06c00000000, - 0x7261c0da00000000, 0xe4c381b400000000, 0xc987036a00000000, 0x930e06d600000000, 0x261c0dae00000000, 0x4c381b5c00000000, 0x987036b800000000, 0x30e06d7200000000, 0x61c0dae400000000, 0xc381b5c800000000, - 0x87036b9200000000 + 0xe06d72600000000, 0x1c0dae4c00000000, 0x381b5c9800000000, + 0x7036b93000000000, 0xe06d726000000000, 0xc0dae4c200000000, + 0x81b5c98600000000, 0x36b930e00000000, 0x6d7261c00000000, + 0xdae4c3800000000, 0x1b5c987000000000, 0x36b930e000000000, + 0x6d7261c000000000, 0xdae4c38000000000, 0xb5c9870200000000, + 0x6b930e0600000000, 0xd7261c0c00000000, 0xae4c381a00000000, + 0x5c98703600000000, 0xb930e06c00000000, 0x7261c0da00000000, + 0xe4c381b400000000, 0xc987036a00000000, 0x930e06d600000000, + 0x261c0dae00000000, 0x4c381b5c00000000, 0x987036b800000000, + 0x30e06d7200000000, 0x61c0dae400000000, 0xc381b5c800000000, + 0x87036b9200000000 }; // ===== GAConversion_XOR_3_6 ===== static const uint64_t GA_T_33R[33] = { - 0x00000000dea57b07, 0x00000001bd4af60e, 0x000000017a95ec1d, 0x00000000f52bd83b, 0x00000001ea57b076, 0x00000001d4af60ed, 0x00000001a95ec1db, 0x0000000152bd83b7, 0x00000000a57b076f, 0x000000014af60ede, - 0x0000000095ec1dbd, 0x000000012bd83b7a, 0x0000000057b076f5, 0x00000000af60edea, 0x000000015ec1dbd4, 0x00000000bd83b7a9, 0x000000017b076f52, 0x00000000f60edea5, 0x00000001ec1dbd4a, 0x00000001d83b7a95, - 0x00000001b076f52b, 0x0000000160edea57, 0x00000000c1dbd4af, 0x0000000183b7a95e, 0x00000001076f52bd, 0x000000000edea57b, 0x000000001dbd4af6, 0x000000003b7a95ec, 0x0000000076f52bd8, 0x00000000edea57b0, - 0x00000001dbd4af60, 0x00000001b7a95ec1, 0x000000016f52bd83 + 0x00000000dea57b07, 0x00000001bd4af60e, 0x000000017a95ec1d, + 0x00000000f52bd83b, 0x00000001ea57b076, 0x00000001d4af60ed, + 0x00000001a95ec1db, 0x0000000152bd83b7, 0x00000000a57b076f, + 0x000000014af60ede, 0x0000000095ec1dbd, 0x000000012bd83b7a, + 0x0000000057b076f5, 0x00000000af60edea, 0x000000015ec1dbd4, + 0x00000000bd83b7a9, 0x000000017b076f52, 0x00000000f60edea5, + 0x00000001ec1dbd4a, 0x00000001d83b7a95, 0x00000001b076f52b, + 0x0000000160edea57, 0x00000000c1dbd4af, 0x0000000183b7a95e, + 0x00000001076f52bd, 0x000000000edea57b, 0x000000001dbd4af6, + 0x000000003b7a95ec, 0x0000000076f52bd8, 0x00000000edea57b0, + 0x00000001dbd4af60, 0x00000001b7a95ec1, 0x000000016f52bd83 }; static const uint64_t GA_T_31L[31] = { - 0x13a8fe800000000, 0x2751fd000000000, 0x4ea3fa000000000, 0x9d47f4000000000, 0x13a8fe8000000000, 0x2751fd0000000000, 0x4ea3fa0000000000, 0x9d47f40000000000, 0x3a8fe80200000000, 0x751fd00400000000, - 0xea3fa00800000000, 0xd47f401200000000, 0xa8fe802600000000, 0x51fd004e00000000, 0xa3fa009c00000000, 0x47f4013a00000000, 0x8fe8027400000000, 0x1fd004ea00000000, 0x3fa009d400000000, 0x7f4013a800000000, - 0xfe80275000000000, 0xfd004ea200000000, 0xfa009d4600000000, 0xf4013a8e00000000, 0xe802751e00000000, 0xd004ea3e00000000, 0xa009d47e00000000, 0x4013a8fe00000000, 0x802751fc00000000, 0x4ea3fa00000000, - 0x9d47f400000000 + 0x13a8fe800000000, 0x2751fd000000000, 0x4ea3fa000000000, + 0x9d47f4000000000, 0x13a8fe8000000000, 0x2751fd0000000000, + 0x4ea3fa0000000000, 0x9d47f40000000000, 0x3a8fe80200000000, + 0x751fd00400000000, 0xea3fa00800000000, 0xd47f401200000000, + 0xa8fe802600000000, 0x51fd004e00000000, 0xa3fa009c00000000, + 0x47f4013a00000000, 0x8fe8027400000000, 0x1fd004ea00000000, + 0x3fa009d400000000, 0x7f4013a800000000, 0xfe80275000000000, + 0xfd004ea200000000, 0xfa009d4600000000, 0xf4013a8e00000000, + 0xe802751e00000000, 0xd004ea3e00000000, 0xa009d47e00000000, + 0x4013a8fe00000000, 0x802751fc00000000, 0x4ea3fa00000000, + 0x9d47f400000000 }; -//TODO see if alias works +// TODO see if alias works const uint64_t XX_N_33R[33] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N }; + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N }; const uint64_t XX_N_31L[31] = { SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, - SEED_N }; - + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, SEED_N, + SEED_N }; const uint64_t* const BS_CT_MS_TAB_33R[ASCII_SIZE] = { - XX_N_33R, GA_T_33R, XX_N_33R, GA_GA33R, GA_GA33R, GA_GA33R, XX_N_33R, GA_C_33R, // 0..7 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 8..15 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 16..23 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 24..31 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 32..39 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 40..47 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 48..55 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 56..63 - XX_N_33R, CT_A_33R, XX_N_33R, CT_CT33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_G_33R, // 64..71 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 72..79 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_CT33R, CT_CT33R, XX_N_33R, XX_N_33R, // 80..87 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 88..95 - XX_N_33R, CT_A_33R, XX_N_33R, CT_CT33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_G_33R, // 96..103 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 104..111 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, CT_CT33R, CT_CT33R, XX_N_33R, XX_N_33R, // 112..119 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 120..127 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 128..135 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 136..143 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 144..151 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 152..159 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 160..167 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 168..175 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 176..183 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 184..191 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 192..199 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 200..207 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 208..215 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 216..223 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 224..231 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 232..239 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 240..247 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R // 248..255 + XX_N_33R, GA_T_33R, XX_N_33R, GA_GA33R, + GA_GA33R, GA_GA33R, XX_N_33R, GA_C_33R, // 0..7 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 8..15 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 16..23 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 24..31 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 32..39 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 40..47 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 48..55 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 56..63 + XX_N_33R, CT_A_33R, XX_N_33R, CT_CT33R, + XX_N_33R, XX_N_33R, XX_N_33R, CT_G_33R, // 64..71 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 72..79 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + CT_CT33R, CT_CT33R, XX_N_33R, XX_N_33R, // 80..87 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 88..95 + XX_N_33R, CT_A_33R, XX_N_33R, CT_CT33R, + XX_N_33R, XX_N_33R, XX_N_33R, CT_G_33R, // 96..103 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 104..111 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + CT_CT33R, CT_CT33R, XX_N_33R, XX_N_33R, // 112..119 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 120..127 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 128..135 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 136..143 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 144..151 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 152..159 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 160..167 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 168..175 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 176..183 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 184..191 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 192..199 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 200..207 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 208..215 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 216..223 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 224..231 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 232..239 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 240..247 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R // 248..255 }; const uint64_t* const BS_CT_MS_TAB_31L[ASCII_SIZE] = { - XX_N_31L, GA_T_31L, XX_N_31L, GA_GA31L, GA_GA31L, GA_GA31L, XX_N_31L, GA_C_31L, // 0..7 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 8..15 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 16..23 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 24..31 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 32..39 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 40..47 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 48..55 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 56..63 - XX_N_31L, CT_A_31L, XX_N_31L, CT_CT31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_G_31L, // 64..71 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 72..79 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_CT31L, CT_CT31L, XX_N_31L, XX_N_31L, // 80..87 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 88..95 - XX_N_31L, CT_A_31L, XX_N_31L, CT_CT31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_G_31L, // 96..103 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 104..111 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, CT_CT31L, CT_CT31L, XX_N_31L, XX_N_31L, // 112..119 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 120..127 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 128..135 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 136..143 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 144..151 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 152..159 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 160..167 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 168..175 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 176..183 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 184..191 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 192..199 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 200..207 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 208..215 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 216..223 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 224..231 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 232..239 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 240..247 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L // 248..255 + XX_N_31L, GA_T_31L, XX_N_31L, GA_GA31L, + GA_GA31L, GA_GA31L, XX_N_31L, GA_C_31L, // 0..7 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 8..15 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 16..23 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 24..31 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 32..39 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 40..47 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 48..55 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 56..63 + XX_N_31L, CT_A_31L, XX_N_31L, CT_CT31L, + XX_N_31L, XX_N_31L, XX_N_31L, CT_G_31L, // 64..71 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 72..79 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + CT_CT31L, CT_CT31L, XX_N_31L, XX_N_31L, // 80..87 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 88..95 + XX_N_31L, CT_A_31L, XX_N_31L, CT_CT31L, + XX_N_31L, XX_N_31L, XX_N_31L, CT_G_31L, // 96..103 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 104..111 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + CT_CT31L, CT_CT31L, XX_N_31L, XX_N_31L, // 112..119 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 120..127 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 128..135 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 136..143 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 144..151 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 152..159 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 160..167 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 168..175 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 176..183 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 184..191 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 192..199 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 200..207 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 208..215 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 216..223 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 224..231 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 232..239 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 240..247 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L // 248..255 }; const uint64_t* const BS_GA_MS_TAB_33R[ASCII_SIZE] = { - XX_N_33R, CT_CT33R, XX_N_33R, CT_G_33R, CT_A_33R, CT_A_33R, XX_N_33R, CT_CT33R, // 0..7 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 8..15 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 16..23 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 24..31 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 32..39 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 40..47 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 48..55 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 56..63 - XX_N_33R, GA_GA33R, XX_N_33R, GA_C_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_GA33R, // 64..71 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 72..79 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_T_33R, GA_T_33R, XX_N_33R, XX_N_33R, // 80..87 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 88..95 - XX_N_33R, GA_GA33R, XX_N_33R, GA_C_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_GA33R, // 96..103 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 104..111 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, GA_T_33R, GA_T_33R, XX_N_33R, XX_N_33R, // 112..119 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 120..127 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 128..135 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 136..143 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 144..151 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 152..159 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 160..167 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 168..175 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 176..183 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 184..191 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 192..199 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 200..207 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 208..215 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 216..223 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 224..231 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 232..239 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 240..247 - XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R // 248..255 + XX_N_33R, CT_CT33R, XX_N_33R, CT_G_33R, + CT_A_33R, CT_A_33R, XX_N_33R, CT_CT33R, // 0..7 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 8..15 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 16..23 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 24..31 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 32..39 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 40..47 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 48..55 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 56..63 + XX_N_33R, GA_GA33R, XX_N_33R, GA_C_33R, + XX_N_33R, XX_N_33R, XX_N_33R, GA_GA33R, // 64..71 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 72..79 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + GA_T_33R, GA_T_33R, XX_N_33R, XX_N_33R, // 80..87 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 88..95 + XX_N_33R, GA_GA33R, XX_N_33R, GA_C_33R, + XX_N_33R, XX_N_33R, XX_N_33R, GA_GA33R, // 96..103 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 104..111 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + GA_T_33R, GA_T_33R, XX_N_33R, XX_N_33R, // 112..119 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 120..127 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 128..135 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 136..143 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 144..151 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 152..159 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 160..167 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 168..175 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 176..183 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 184..191 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 192..199 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 200..207 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 208..215 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 216..223 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 224..231 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 232..239 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, // 240..247 + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R, + XX_N_33R, XX_N_33R, XX_N_33R, XX_N_33R // 248..255 }; const uint64_t* const BS_GA_MS_TAB_31L[ASCII_SIZE] = { - XX_N_31L, CT_CT31L, XX_N_31L, CT_G_31L, CT_A_31L, CT_A_31L, XX_N_31L, CT_CT31L, // 0..7 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 8..15 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 16..23 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 24..31 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 32..39 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 40..47 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 48..55 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 56..63 - XX_N_31L, GA_GA31L, XX_N_31L, GA_C_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_GA31L, // 64..71 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 72..79 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_T_31L, GA_T_31L, XX_N_31L, XX_N_31L, // 80..87 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 88..95 - XX_N_31L, GA_GA31L, XX_N_31L, GA_C_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_GA31L, // 96..103 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 104..111 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, GA_T_31L, GA_T_31L, XX_N_31L, XX_N_31L, // 112..119 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 120..127 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 128..135 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 136..143 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 144..151 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 152..159 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 160..167 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 168..175 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 176..183 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 184..191 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 192..199 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 200..207 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 208..215 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 216..223 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 224..231 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 232..239 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 240..247 - XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L // 248..255 + XX_N_31L, CT_CT31L, XX_N_31L, CT_G_31L, + CT_A_31L, CT_A_31L, XX_N_31L, CT_CT31L, // 0..7 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 8..15 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 16..23 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 24..31 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 32..39 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 40..47 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 48..55 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 56..63 + XX_N_31L, GA_GA31L, XX_N_31L, GA_C_31L, + XX_N_31L, XX_N_31L, XX_N_31L, GA_GA31L, // 64..71 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 72..79 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + GA_T_31L, GA_T_31L, XX_N_31L, XX_N_31L, // 80..87 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 88..95 + XX_N_31L, GA_GA31L, XX_N_31L, GA_C_31L, + XX_N_31L, XX_N_31L, XX_N_31L, GA_GA31L, // 96..103 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 104..111 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + GA_T_31L, GA_T_31L, XX_N_31L, XX_N_31L, // 112..119 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 120..127 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 128..135 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 136..143 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 144..151 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 152..159 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 160..167 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 168..175 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 176..183 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 184..191 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 192..199 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 200..207 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 208..215 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 216..223 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 224..231 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 232..239 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, // 240..247 + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L, + XX_N_31L, XX_N_31L, XX_N_31L, XX_N_31L // 248..255 }; const uint8_t BS_CT_CONVERT_TAB[ASCII_SIZE] = { @@ -669,4 +1440,1306 @@ const uint8_t BS_GA_RC_CONVERT_TAB[ASCII_SIZE] = { 255, 255, 255, 255, 255, 255, 255, 255 // 248..255 }; -} \ No newline at end of file +constexpr uint8_t BS_CONVERT_TAB[128] = { + // 0–64 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 3, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +constexpr uint8_t BS_RC_CONVERT_TAB[128] = { + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 3, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 3, 0, 0, 0, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; + +// Group CG XOR pairs (18 seeds → 9 pairs) +static const uint64_t CG_group1 = 0x85447b35f730b24eULL; +static const uint64_t CG_group2 = 0x1e7ec42ae67bf290ULL; +static const uint64_t CG_group3 = 0xe630f41524612363ULL; +static const uint64_t CG_group4 = 0x891759872e4a1a40ULL; +static const uint64_t CG_group5 = 0x10a9d125b4781d1bULL; +static const uint64_t CG_group6 = 0xe345fa6e0b3efe26ULL; +static const uint64_t CG_group7 = 0xed73b07d7795d703ULL; +static const uint64_t CG_group8 = 0xc05a70c787ddc030ULL; +static const uint64_t CG_group9 = 0xb3e7ea1d5f69e07dULL; + +// Group CC XOR pairs (10 seeds → 5 pairs) +static const uint64_t CC_group1 = 0x35186b8fc962b995ULL; +static const uint64_t CC_group2 = 0xeeecbd3086db4364ULL; +static const uint64_t CC_group3 = 0x15dc1dfcf2d821d7ULL; +static const uint64_t CC_group4 = 0x501734bd0947b87dULL; +static const uint64_t CC_group5 = 0xda7b6ec0c3c8457aULL; + +// Group CA XOR pairs (18 seeds → 9 pairs) +static const uint64_t CA_group1 = 0xd2e69a6245391840ULL; +static const uint64_t CA_group2 = 0xfcdf37536278deafULL; +static const uint64_t CA_group3 = 0x72e4656f024fd310ULL; +static const uint64_t CA_group4 = 0xdee651b2976fa1f8ULL; +static const uint64_t CA_group5 = 0xf3288907ec8cd8f0ULL; +static const uint64_t CA_group6 = 0x129d2554868dad85ULL; +static const uint64_t CA_group7 = 0x23f706d477652a44ULL; +static const uint64_t CA_group8 = 0x3fd2c4487b9f6d6cULL; +static const uint64_t CA_group9 = 0xa8a3d2ed1139ceeaULL; + +// Group CT XOR pairs (18 seeds → 9 pairs) +static const uint64_t CT_group1 = 0x385ed2689c215294ULL; +static const uint64_t CT_group2 = 0x5997dfab383a421eULL; +static const uint64_t CT_group3 = 0x2c62c0516c643607ULL; +static const uint64_t CT_group4 = 0x5186a5a92af12d90ULL; +static const uint64_t CT_group5 = 0x34fd2f6305426fbeULL; +static const uint64_t CT_group6 = 0xfd1fccb21e12f0f6ULL; +static const uint64_t CT_group7 = 0xde1be1658d22c3a7ULL; +static const uint64_t CT_group8 = 0x669139b5a878cd6cULL; +static const uint64_t CT_group9 = 0x36f4413f12bacedcULL; + +constexpr uint64_t CG_DIMER_TAB[45] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, CG_group4, CG_group2, CG_group8, CG_group2, 0, 0, 0, 0, 0, // AA,AC,AG,AT + 0, CG_group1, CG_group7, CG_group1, CG_group9, 0, 0, 0, 0, 0, // CA,CC,CG,CT + 0, CG_group4, CG_group2, CG_group6, CG_group2, 0, 0, 0, 0, 0, // GA,GC,GG,GT + 0, CG_group5, CG_group3, CG_group1, CG_group3 // TA,TC,TG,TT +}; + +constexpr uint64_t CC_DIMER_TAB[45] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0–9 + 0, CC_group2, CC_group3, CC_group2, CC_group3, 0, 0, 0, 0, 0, // AA,AC,AG,AT + 0, CC_group4, CC_group1, CC_group5, CC_group1, 0, 0, 0, 0, 0, // CA,CC,CG,CT + 0, CC_group2, CC_group3, CC_group2, CC_group3, 0, 0, 0, 0, 0, // GA,GC,GG,GT + 0, CC_group4, CC_group1, CC_group4, CC_group1 // TA,TC,TG,TT +}; + +constexpr uint64_t CA_DIMER_TAB[45] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, CA_group3, CA_group2, CA_group7, CA_group2, 0, 0, 0, 0, 0, // AA,AC,AG,AT + 0, CA_group1, CA_group6, CA_group9, CA_group8, 0, 0, 0, 0, 0, // CA,CC,CG,CT + 0, CA_group3, CA_group2, CA_group5, CA_group2, 0, 0, 0, 0, 0, // GA,GC,GG,GT + 0, CA_group1, CA_group4, CA_group1, CA_group4 // TA,TC,TG,TT +}; + +constexpr uint64_t CT_DIMER_TAB[45] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, CT_group2, CT_group3, CT_group2, CT_group3, 0, 0, 0, 0, 0, // AA,AC,AG,AT + 0, CT_group8, CT_group5, CT_group9, CT_group1, 0, 0, 0, 0, 0, // CA,CC,CG,CT + 0, CT_group2, CT_group3, CT_group4, CT_group3, 0, 0, 0, 0, 0, // GA,GC,GG,GT + 0, CT_group6, CT_group1, CT_group7, CT_group1 // TA,TC,TG,TT +}; + +// Generic dimer-based rolling hash +inline std::vector +base_forward_bs_hash(const char* seq, + bool has_second, + unsigned k, + const uint64_t* DIMER_TAB) +{ + uint64_t h0 = 0; + uint64_t h1 = 0; + + std::cout << "[BASE_HASH] Computing forward hashes, k=" << k + << ", has_second=" << has_second << "\n"; + + for (unsigned i = 0; i < k; i += 2) { + h0 = srol(h0, 1); + const char c1 = seq[i]; + const char c2 = seq[i + 1]; + const uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; + const uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; + const uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 + h0 ^= DIMER_TAB[loc]; + + std::cout << "[BASE_HASH] h0 loop " << i << " | dimer='" << c1 << c2 + << "' loc=" << (unsigned)loc << " | h0=" << h0 << "\n"; + } + + if (!has_second) + return { h0, 0 }; + + for (unsigned i = 1; i < k + 1; i += 2) { + h1 = srol(h1, 1); + const char c1 = seq[i]; + const char c2 = seq[i + 1]; + const uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; + const uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; + const uint8_t loc = 10 * a + b; + h1 ^= DIMER_TAB[loc]; + + std::cout << "[BASE_HASH] h1 loop " << i << " | dimer='" << c1 << c2 + << "' loc=" << (unsigned)loc << " | h1=" << h1 << "\n"; + } + + return { h0, h1 }; +} + +// ===== CC_XOR_1_6 ===== +static const uint64_t CC_group1_33R[33] = { + 0x00000001c962b995, 0x0000000192c5732b, 0x00000001258ae657, + 0x000000004b15ccaf, 0x00000000962b995e, 0x000000012c5732bc, + 0x0000000058ae6579, 0x00000000b15ccaf2, 0x0000000162b995e4, + 0x00000000c5732bc9, 0x000000018ae65792, 0x0000000115ccaf25, + 0x000000002b995e4b, 0x000000005732bc96, 0x00000000ae65792c, + 0x000000015ccaf258, 0x00000000b995e4b1, 0x00000001732bc962, + 0x00000000e65792c5, 0x00000001ccaf258a, 0x00000001995e4b15, + 0x0000000132bc962b, 0x0000000065792c57, 0x00000000caf258ae, + 0x0000000195e4b15c, 0x000000012bc962b9, 0x000000005792c573, + 0x00000000af258ae6, 0x000000015e4b15cc, 0x00000000bc962b99, + 0x00000001792c5732, 0x00000000f258ae65, 0x00000001e4b15cca +}; + +static const uint64_t CC_group1_31L[31] = { + 0x35186b8e00000000, 0x6a30d71c00000000, 0xd461ae3800000000, + 0xa8c35c7200000000, 0x5186b8e600000000, 0xa30d71cc00000000, + 0x461ae39a00000000, 0x8c35c73400000000, 0x186b8e6a00000000, + 0x30d71cd400000000, 0x61ae39a800000000, 0xc35c735000000000, + 0x86b8e6a200000000, 0xd71cd4600000000, 0x1ae39a8c00000000, + 0x35c7351800000000, 0x6b8e6a3000000000, 0xd71cd46000000000, + 0xae39a8c200000000, 0x5c73518600000000, 0xb8e6a30c00000000, + 0x71cd461a00000000, 0xe39a8c3400000000, 0xc735186a00000000, + 0x8e6a30d600000000, 0x1cd461ae00000000, 0x39a8c35c00000000, + 0x735186b800000000, 0xe6a30d7000000000, 0xcd461ae200000000, + 0x9a8c35c600000000 +}; + +// ===== CC_XOR_2_7 ===== +static const uint64_t CC_group2_33R[33] = { + 0x0000000086db4364, 0x000000010db686c8, 0x000000001b6d0d91, + 0x0000000036da1b22, 0x000000006db43644, 0x00000000db686c88, + 0x00000001b6d0d910, 0x000000016da1b221, 0x00000000db436443, + 0x00000001b686c886, 0x000000016d0d910d, 0x00000000da1b221b, + 0x00000001b4364436, 0x00000001686c886d, 0x00000000d0d910db, + 0x00000001a1b221b6, 0x000000014364436d, 0x0000000086c886db, + 0x000000010d910db6, 0x000000001b221b6d, 0x00000000364436da, + 0x000000006c886db4, 0x00000000d910db68, 0x00000001b221b6d0, + 0x0000000164436da1, 0x00000000c886db43, 0x00000001910db686, + 0x00000001221b6d0d, 0x000000004436da1b, 0x00000000886db436, + 0x0000000110db686c, 0x0000000021b6d0d9, 0x00000000436da1b2 +}; + +static const uint64_t CC_group2_31L[31] = { + 0xeeecbd3000000000, 0xddd97a6200000000, 0xbbb2f4c600000000, + 0x7765e98e00000000, 0xeecbd31c00000000, 0xdd97a63a00000000, + 0xbb2f4c7600000000, 0x765e98ee00000000, 0xecbd31dc00000000, + 0xd97a63ba00000000, 0xb2f4c77600000000, 0x65e98eee00000000, + 0xcbd31ddc00000000, 0x97a63bba00000000, 0x2f4c777600000000, + 0x5e98eeec00000000, 0xbd31ddd800000000, 0x7a63bbb200000000, + 0xf4c7776400000000, 0xe98eeeca00000000, 0xd31ddd9600000000, + 0xa63bbb2e00000000, 0x4c77765e00000000, 0x98eeecbc00000000, + 0x31ddd97a00000000, 0x63bbb2f400000000, 0xc77765e800000000, + 0x8eeecbd200000000, 0x1ddd97a600000000, 0x3bbb2f4c00000000, + 0x77765e9800000000 +}; + +// ===== CC_XOR_3_8 ===== +static const uint64_t CC_group3_33R[33] = { + 0x00000000f2d821d7, 0x00000001e5b043ae, 0x00000001cb60875d, + 0x0000000196c10ebb, 0x000000012d821d77, 0x000000005b043aef, + 0x00000000b60875de, 0x000000016c10ebbc, 0x00000000d821d779, + 0x00000001b043aef2, 0x0000000160875de5, 0x00000000c10ebbcb, + 0x00000001821d7796, 0x00000001043aef2d, 0x000000000875de5b, + 0x0000000010ebbcb6, 0x0000000021d7796c, 0x0000000043aef2d8, + 0x00000000875de5b0, 0x000000010ebbcb60, 0x000000001d7796c1, + 0x000000003aef2d82, 0x0000000075de5b04, 0x00000000ebbcb608, + 0x00000001d7796c10, 0x00000001aef2d821, 0x000000015de5b043, + 0x00000000bbcb6087, 0x000000017796c10e, 0x00000000ef2d821d, + 0x00000001de5b043a, 0x00000001bcb60875, 0x00000001796c10eb +}; + +static const uint64_t CC_group3_31L[31] = { + 0x15dc1dfc00000000, 0x2bb83bf800000000, 0x577077f000000000, + 0xaee0efe000000000, 0x5dc1dfc200000000, 0xbb83bf8400000000, + 0x77077f0a00000000, 0xee0efe1400000000, 0xdc1dfc2a00000000, + 0xb83bf85600000000, 0x7077f0ae00000000, 0xe0efe15c00000000, + 0xc1dfc2ba00000000, 0x83bf857600000000, 0x77f0aee00000000, + 0xefe15dc00000000, 0x1dfc2bb800000000, 0x3bf8577000000000, + 0x77f0aee000000000, 0xefe15dc000000000, 0xdfc2bb8200000000, + 0xbf85770600000000, 0x7f0aee0e00000000, 0xfe15dc1c00000000, + 0xfc2bb83a00000000, 0xf857707600000000, 0xf0aee0ee00000000, + 0xe15dc1de00000000, 0xc2bb83be00000000, 0x8577077e00000000, + 0xaee0efe00000000 +}; + +// ===== CC_XOR_4_9 ===== +static const uint64_t CC_group4_33R[33] = { + 0x000000010947b87d, 0x00000000128f70fb, 0x00000000251ee1f6, + 0x000000004a3dc3ec, 0x00000000947b87d8, 0x0000000128f70fb0, + 0x0000000051ee1f61, 0x00000000a3dc3ec2, 0x0000000147b87d84, + 0x000000008f70fb09, 0x000000011ee1f612, 0x000000003dc3ec25, + 0x000000007b87d84a, 0x00000000f70fb094, 0x00000001ee1f6128, + 0x00000001dc3ec251, 0x00000001b87d84a3, 0x0000000170fb0947, + 0x00000000e1f6128f, 0x00000001c3ec251e, 0x0000000187d84a3d, + 0x000000010fb0947b, 0x000000001f6128f7, 0x000000003ec251ee, + 0x000000007d84a3dc, 0x00000000fb0947b8, 0x00000001f6128f70, + 0x00000001ec251ee1, 0x00000001d84a3dc3, 0x00000001b0947b87, + 0x000000016128f70f, 0x00000000c251ee1f, 0x0000000184a3dc3e +}; + +static const uint64_t CC_group4_31L[31] = { + 0x501734bc00000000, 0xa02e697800000000, 0x405cd2f200000000, + 0x80b9a5e400000000, 0x1734bca00000000, 0x2e6979400000000, + 0x5cd2f2800000000, 0xb9a5e5000000000, 0x1734bca000000000, + 0x2e69794000000000, 0x5cd2f28000000000, 0xb9a5e50000000000, + 0x734bca0200000000, 0xe697940400000000, 0xcd2f280a00000000, + 0x9a5e501600000000, 0x34bca02e00000000, 0x6979405c00000000, + 0xd2f280b800000000, 0xa5e5017200000000, 0x4bca02e600000000, + 0x979405cc00000000, 0x2f280b9a00000000, 0x5e50173400000000, + 0xbca02e6800000000, 0x79405cd200000000, 0xf280b9a400000000, + 0xe501734a00000000, 0xca02e69600000000, 0x9405cd2e00000000, + 0x280b9a5e00000000 +}; + +// ===== CC_XOR_5_10 ===== +static const uint64_t CC_group5_33R[33] = { + 0x00000000c3c8457a, 0x0000000187908af4, 0x000000010f2115e9, + 0x000000001e422bd3, 0x000000003c8457a6, 0x000000007908af4c, + 0x00000000f2115e98, 0x00000001e422bd30, 0x00000001c8457a61, + 0x00000001908af4c3, 0x000000012115e987, 0x00000000422bd30f, + 0x000000008457a61e, 0x0000000108af4c3c, 0x00000000115e9879, + 0x0000000022bd30f2, 0x00000000457a61e4, 0x000000008af4c3c8, + 0x0000000115e98790, 0x000000002bd30f21, 0x0000000057a61e42, + 0x00000000af4c3c84, 0x000000015e987908, 0x00000000bd30f211, + 0x000000017a61e422, 0x00000000f4c3c845, 0x00000001e987908a, + 0x00000001d30f2115, 0x00000001a61e422b, 0x000000014c3c8457, + 0x00000000987908af, 0x0000000130f2115e, 0x0000000061e422bd +}; + +static const uint64_t CC_group5_31L[31] = { + 0xda7b6ec000000000, 0xb4f6dd8200000000, 0x69edbb0600000000, + 0xd3db760c00000000, 0xa7b6ec1a00000000, 0x4f6dd83600000000, + 0x9edbb06c00000000, 0x3db760da00000000, 0x7b6ec1b400000000, + 0xf6dd836800000000, 0xedbb06d200000000, 0xdb760da600000000, + 0xb6ec1b4e00000000, 0x6dd8369e00000000, 0xdbb06d3c00000000, + 0xb760da7a00000000, 0x6ec1b4f600000000, 0xdd8369ec00000000, + 0xbb06d3da00000000, 0x760da7b600000000, 0xec1b4f6c00000000, + 0xd8369eda00000000, 0xb06d3db600000000, 0x60da7b6e00000000, + 0xc1b4f6dc00000000, 0x8369edba00000000, 0x6d3db7600000000, + 0xda7b6ec00000000, 0x1b4f6dd800000000, 0x369edbb000000000, + 0x6d3db76000000000 +}; + +// ===== CG_XOR_1_10 ===== +static const uint64_t CG_group1_33R[33] = { + 0x00000001f730b24e, 0x00000001ee61649d, 0x00000001dcc2c93b, + 0x00000001b9859277, 0x00000001730b24ef, 0x00000000e61649df, + 0x00000001cc2c93be, 0x000000019859277d, 0x0000000130b24efb, + 0x0000000061649df7, 0x00000000c2c93bee, 0x00000001859277dc, + 0x000000010b24efb9, 0x000000001649df73, 0x000000002c93bee6, + 0x0000000059277dcc, 0x00000000b24efb98, 0x00000001649df730, + 0x00000000c93bee61, 0x000000019277dcc2, 0x0000000124efb985, + 0x0000000049df730b, 0x0000000093bee616, 0x00000001277dcc2c, + 0x000000004efb9859, 0x000000009df730b2, 0x000000013bee6164, + 0x0000000077dcc2c9, 0x00000000efb98592, 0x00000001df730b24, + 0x00000001bee61649, 0x000000017dcc2c93, 0x00000000fb985927 +}; + +static const uint64_t CG_group1_31L[31] = { + 0x85447b3400000000, 0xa88f66a00000000, 0x1511ecd400000000, + 0x2a23d9a800000000, 0x5447b35000000000, 0xa88f66a000000000, + 0x511ecd4200000000, 0xa23d9a8400000000, 0x447b350a00000000, + 0x88f66a1400000000, 0x11ecd42a00000000, 0x23d9a85400000000, + 0x47b350a800000000, 0x8f66a15000000000, 0x1ecd42a200000000, + 0x3d9a854400000000, 0x7b350a8800000000, 0xf66a151000000000, + 0xecd42a2200000000, 0xd9a8544600000000, 0xb350a88e00000000, + 0x66a1511e00000000, 0xcd42a23c00000000, 0x9a85447a00000000, + 0x350a88f600000000, 0x6a1511ec00000000, 0xd42a23d800000000, + 0xa85447b200000000, 0x50a88f6600000000, 0xa1511ecc00000000, + 0x42a23d9a00000000 +}; + +// ===== CG_XOR_2_11 ===== +static const uint64_t CG_group2_33R[33] = { + 0x00000000e67bf290, 0x00000001ccf7e520, 0x0000000199efca41, + 0x0000000133df9483, 0x0000000067bf2907, 0x00000000cf7e520e, + 0x000000019efca41c, 0x000000013df94839, 0x000000007bf29073, + 0x00000000f7e520e6, 0x00000001efca41cc, 0x00000001df948399, + 0x00000001bf290733, 0x000000017e520e67, 0x00000000fca41ccf, + 0x00000001f948399e, 0x00000001f290733d, 0x00000001e520e67b, + 0x00000001ca41ccf7, 0x00000001948399ef, 0x00000001290733df, + 0x00000000520e67bf, 0x00000000a41ccf7e, 0x0000000148399efc, + 0x0000000090733df9, 0x0000000120e67bf2, 0x0000000041ccf7e5, + 0x000000008399efca, 0x000000010733df94, 0x000000000e67bf29, + 0x000000001ccf7e52, 0x00000000399efca4, 0x00000000733df948 +}; + +static const uint64_t CG_group2_31L[31] = { + 0x1e7ec42a00000000, 0x3cfd885400000000, 0x79fb10a800000000, + 0xf3f6215000000000, 0xe7ec42a200000000, 0xcfd8854600000000, + 0x9fb10a8e00000000, 0x3f62151e00000000, 0x7ec42a3c00000000, + 0xfd88547800000000, 0xfb10a8f200000000, 0xf62151e600000000, + 0xec42a3ce00000000, 0xd885479e00000000, 0xb10a8f3e00000000, + 0x62151e7e00000000, 0xc42a3cfc00000000, 0x885479fa00000000, + 0x10a8f3f600000000, 0x2151e7ec00000000, 0x42a3cfd800000000, + 0x85479fb000000000, 0xa8f3f6200000000, 0x151e7ec400000000, + 0x2a3cfd8800000000, 0x5479fb1000000000, 0xa8f3f62000000000, + 0x51e7ec4200000000, 0xa3cfd88400000000, 0x479fb10a00000000, + 0x8f3f621400000000 +}; + +// ===== CG_XOR_3_12 ===== +static const uint64_t CG_group3_33R[33] = { + 0x0000000124612363, 0x0000000048c246c7, 0x0000000091848d8e, + 0x0000000123091b1c, 0x0000000046123639, 0x000000008c246c72, + 0x000000011848d8e4, 0x000000003091b1c9, 0x0000000061236392, + 0x00000000c246c724, 0x00000001848d8e48, 0x00000001091b1c91, + 0x0000000012363923, 0x00000000246c7246, 0x0000000048d8e48c, + 0x0000000091b1c918, 0x0000000123639230, 0x0000000046c72461, + 0x000000008d8e48c2, 0x000000011b1c9184, 0x0000000036392309, + 0x000000006c724612, 0x00000000d8e48c24, 0x00000001b1c91848, + 0x0000000163923091, 0x00000000c7246123, 0x000000018e48c246, + 0x000000011c91848d, 0x000000003923091b, 0x0000000072461236, + 0x00000000e48c246c, 0x00000001c91848d8, 0x00000001923091b1 +}; + +static const uint64_t CG_group3_31L[31] = { + 0xe630f41400000000, 0xcc61e82a00000000, 0x98c3d05600000000, + 0x3187a0ae00000000, 0x630f415c00000000, 0xc61e82b800000000, + 0x8c3d057200000000, 0x187a0ae600000000, 0x30f415cc00000000, + 0x61e82b9800000000, 0xc3d0573000000000, 0x87a0ae6200000000, + 0xf415cc600000000, 0x1e82b98c00000000, 0x3d05731800000000, + 0x7a0ae63000000000, 0xf415cc6000000000, 0xe82b98c200000000, + 0xd057318600000000, 0xa0ae630e00000000, 0x415cc61e00000000, + 0x82b98c3c00000000, 0x573187a00000000, 0xae630f400000000, + 0x15cc61e800000000, 0x2b98c3d000000000, 0x573187a000000000, + 0xae630f4000000000, 0x5cc61e8200000000, 0xb98c3d0400000000, + 0x73187a0a00000000 +}; + +// ===== CG_XOR_4_13 ===== +static const uint64_t CG_group4_33R[33] = { + 0x000000012e4a1a40, 0x000000005c943481, 0x00000000b9286902, + 0x000000017250d204, 0x00000000e4a1a409, 0x00000001c9434812, + 0x0000000192869025, 0x00000001250d204b, 0x000000004a1a4097, + 0x000000009434812e, 0x000000012869025c, 0x0000000050d204b9, + 0x00000000a1a40972, 0x00000001434812e4, 0x00000000869025c9, + 0x000000010d204b92, 0x000000001a409725, 0x0000000034812e4a, + 0x0000000069025c94, 0x00000000d204b928, 0x00000001a4097250, + 0x000000014812e4a1, 0x000000009025c943, 0x00000001204b9286, + 0x000000004097250d, 0x00000000812e4a1a, 0x00000001025c9434, + 0x0000000004b92869, 0x00000000097250d2, 0x0000000012e4a1a4, + 0x0000000025c94348, 0x000000004b928690, 0x0000000097250d20 +}; + +static const uint64_t CG_group4_31L[31] = { + 0x8917598600000000, 0x122eb30e00000000, 0x245d661c00000000, + 0x48bacc3800000000, 0x9175987000000000, 0x22eb30e200000000, + 0x45d661c400000000, 0x8bacc38800000000, 0x1759871200000000, + 0x2eb30e2400000000, 0x5d661c4800000000, 0xbacc389000000000, + 0x7598712200000000, 0xeb30e24400000000, 0xd661c48a00000000, + 0xacc3891600000000, 0x5987122e00000000, 0xb30e245c00000000, + 0x661c48ba00000000, 0xcc38917400000000, 0x987122ea00000000, + 0x30e245d600000000, 0x61c48bac00000000, 0xc389175800000000, + 0x87122eb200000000, 0xe245d6600000000, 0x1c48bacc00000000, + 0x3891759800000000, 0x7122eb3000000000, 0xe245d66000000000, + 0xc48bacc200000000 +}; + +// ===== CG_XOR_5_14 ===== +static const uint64_t CG_group5_33R[33] = { + 0x00000001b4781d1b, 0x0000000168f03a37, 0x00000000d1e0746f, + 0x00000001a3c0e8de, 0x000000014781d1bd, 0x000000008f03a37b, + 0x000000011e0746f6, 0x000000003c0e8ded, 0x00000000781d1bda, + 0x00000000f03a37b4, 0x00000001e0746f68, 0x00000001c0e8ded1, + 0x0000000181d1bda3, 0x0000000103a37b47, 0x000000000746f68f, + 0x000000000e8ded1e, 0x000000001d1bda3c, 0x000000003a37b478, + 0x00000000746f68f0, 0x00000000e8ded1e0, 0x00000001d1bda3c0, + 0x00000001a37b4781, 0x0000000146f68f03, 0x000000008ded1e07, + 0x000000011bda3c0e, 0x0000000037b4781d, 0x000000006f68f03a, + 0x00000000ded1e074, 0x00000001bda3c0e8, 0x000000017b4781d1, + 0x00000000f68f03a3, 0x00000001ed1e0746, 0x00000001da3c0e8d +}; + +static const uint64_t CG_group5_31L[31] = { + 0x10a9d12400000000, 0x2153a24800000000, 0x42a7449000000000, + 0x854e892000000000, 0xa9d124200000000, 0x153a248400000000, + 0x2a74490800000000, 0x54e8921000000000, 0xa9d1242000000000, + 0x53a2484200000000, 0xa744908400000000, 0x4e89210a00000000, + 0x9d12421400000000, 0x3a24842a00000000, 0x7449085400000000, + 0xe89210a800000000, 0xd124215200000000, 0xa24842a600000000, + 0x4490854e00000000, 0x89210a9c00000000, 0x1242153a00000000, + 0x24842a7400000000, 0x490854e800000000, 0x9210a9d000000000, + 0x242153a200000000, 0x4842a74400000000, 0x90854e8800000000, + 0x210a9d1200000000, 0x42153a2400000000, 0x842a744800000000, + 0x854e89200000000 +}; + +// ===== CG_XOR_6_15 ===== +static const uint64_t CG_group6_33R[33] = { + 0x000000000b3efe26, 0x00000000167dfc4c, 0x000000002cfbf898, + 0x0000000059f7f130, 0x00000000b3efe260, 0x0000000167dfc4c0, + 0x00000000cfbf8981, 0x000000019f7f1302, 0x000000013efe2605, + 0x000000007dfc4c0b, 0x00000000fbf89816, 0x00000001f7f1302c, + 0x00000001efe26059, 0x00000001dfc4c0b3, 0x00000001bf898167, + 0x000000017f1302cf, 0x00000000fe26059f, 0x00000001fc4c0b3e, + 0x00000001f898167d, 0x00000001f1302cfb, 0x00000001e26059f7, + 0x00000001c4c0b3ef, 0x00000001898167df, 0x000000011302cfbf, + 0x0000000026059f7f, 0x000000004c0b3efe, 0x0000000098167dfc, + 0x00000001302cfbf8, 0x000000006059f7f1, 0x00000000c0b3efe2, + 0x000000018167dfc4, 0x0000000102cfbf89, 0x00000000059f7f13 +}; + +static const uint64_t CG_group6_31L[31] = { + 0xe345fa6e00000000, 0xc68bf4de00000000, 0x8d17e9be00000000, + 0x1a2fd37e00000000, 0x345fa6fc00000000, 0x68bf4df800000000, + 0xd17e9bf000000000, 0xa2fd37e200000000, 0x45fa6fc600000000, + 0x8bf4df8c00000000, 0x17e9bf1a00000000, 0x2fd37e3400000000, + 0x5fa6fc6800000000, 0xbf4df8d000000000, 0x7e9bf1a200000000, + 0xfd37e34400000000, 0xfa6fc68a00000000, 0xf4df8d1600000000, + 0xe9bf1a2e00000000, 0xd37e345e00000000, 0xa6fc68be00000000, + 0x4df8d17e00000000, 0x9bf1a2fc00000000, 0x37e345fa00000000, + 0x6fc68bf400000000, 0xdf8d17e800000000, 0xbf1a2fd200000000, + 0x7e345fa600000000, 0xfc68bf4c00000000, 0xf8d17e9a00000000, + 0xf1a2fd3600000000 +}; + +// ===== CG_XOR_7_16 ===== +static const uint64_t CG_group7_33R[33] = { + 0x000000017795d703, 0x00000000ef2bae07, 0x00000001de575c0e, + 0x00000001bcaeb81d, 0x00000001795d703b, 0x00000000f2bae077, + 0x00000001e575c0ee, 0x00000001caeb81dd, 0x0000000195d703bb, + 0x000000012bae0777, 0x00000000575c0eef, 0x00000000aeb81dde, + 0x000000015d703bbc, 0x00000000bae07779, 0x0000000175c0eef2, + 0x00000000eb81dde5, 0x00000001d703bbca, 0x00000001ae077795, + 0x000000015c0eef2b, 0x00000000b81dde57, 0x00000001703bbcae, + 0x00000000e077795d, 0x00000001c0eef2ba, 0x0000000181dde575, + 0x0000000103bbcaeb, 0x00000000077795d7, 0x000000000eef2bae, + 0x000000001dde575c, 0x000000003bbcaeb8, 0x0000000077795d70, + 0x00000000eef2bae0, 0x00000001dde575c0, 0x00000001bbcaeb81 +}; + +static const uint64_t CG_group7_31L[31] = { + 0xed73b07c00000000, 0xdae760fa00000000, 0xb5cec1f600000000, + 0x6b9d83ee00000000, 0xd73b07dc00000000, 0xae760fba00000000, + 0x5cec1f7600000000, 0xb9d83eec00000000, 0x73b07dda00000000, + 0xe760fbb400000000, 0xcec1f76a00000000, 0x9d83eed600000000, + 0x3b07ddae00000000, 0x760fbb5c00000000, 0xec1f76b800000000, + 0xd83eed7200000000, 0xb07ddae600000000, 0x60fbb5ce00000000, + 0xc1f76b9c00000000, 0x83eed73a00000000, 0x7ddae7600000000, + 0xfbb5cec00000000, 0x1f76b9d800000000, 0x3eed73b000000000, + 0x7ddae76000000000, 0xfbb5cec000000000, 0xf76b9d8200000000, + 0xeed73b0600000000, 0xddae760e00000000, 0xbb5cec1e00000000, + 0x76b9d83e00000000 +}; + +// ===== CG_XOR_8_17 ===== +static const uint64_t CG_group8_33R[33] = { + 0x0000000187ddc030, 0x000000010fbb8061, 0x000000001f7700c3, + 0x000000003eee0186, 0x000000007ddc030c, 0x00000000fbb80618, + 0x00000001f7700c30, 0x00000001eee01861, 0x00000001ddc030c3, + 0x00000001bb806187, 0x000000017700c30f, 0x00000000ee01861f, + 0x00000001dc030c3e, 0x00000001b806187d, 0x00000001700c30fb, + 0x00000000e01861f7, 0x00000001c030c3ee, 0x00000001806187dd, + 0x0000000100c30fbb, 0x0000000001861f77, 0x00000000030c3eee, + 0x0000000006187ddc, 0x000000000c30fbb8, 0x000000001861f770, + 0x0000000030c3eee0, 0x000000006187ddc0, 0x00000000c30fbb80, + 0x00000001861f7700, 0x000000010c3eee01, 0x00000000187ddc03, + 0x0000000030fbb806, 0x0000000061f7700c, 0x00000000c3eee018 +}; + +static const uint64_t CG_group8_31L[31] = { + 0xc05a70c600000000, 0x80b4e18e00000000, 0x169c31e00000000, + 0x2d3863c00000000, 0x5a70c7800000000, 0xb4e18f000000000, + 0x169c31e000000000, 0x2d3863c000000000, 0x5a70c78000000000, + 0xb4e18f0000000000, 0x69c31e0200000000, 0xd3863c0400000000, + 0xa70c780a00000000, 0x4e18f01600000000, 0x9c31e02c00000000, + 0x3863c05a00000000, 0x70c780b400000000, 0xe18f016800000000, + 0xc31e02d200000000, 0x863c05a600000000, 0xc780b4e00000000, + 0x18f0169c00000000, 0x31e02d3800000000, 0x63c05a7000000000, + 0xc780b4e000000000, 0x8f0169c200000000, 0x1e02d38600000000, + 0x3c05a70c00000000, 0x780b4e1800000000, 0xf0169c3000000000, + 0xe02d386200000000 +}; + +// ===== CG_XOR_9_18 ===== +static const uint64_t CG_group9_33R[33] = { + 0x000000015f69e07d, 0x00000000bed3c0fb, 0x000000017da781f6, + 0x00000000fb4f03ed, 0x00000001f69e07da, 0x00000001ed3c0fb5, + 0x00000001da781f6b, 0x00000001b4f03ed7, 0x0000000169e07daf, + 0x00000000d3c0fb5f, 0x00000001a781f6be, 0x000000014f03ed7d, + 0x000000009e07dafb, 0x000000013c0fb5f6, 0x00000000781f6bed, + 0x00000000f03ed7da, 0x00000001e07dafb4, 0x00000001c0fb5f69, + 0x0000000181f6bed3, 0x0000000103ed7da7, 0x0000000007dafb4f, + 0x000000000fb5f69e, 0x000000001f6bed3c, 0x000000003ed7da78, + 0x000000007dafb4f0, 0x00000000fb5f69e0, 0x00000001f6bed3c0, + 0x00000001ed7da781, 0x00000001dafb4f03, 0x00000001b5f69e07, + 0x000000016bed3c0f, 0x00000000d7da781f, 0x00000001afb4f03e +}; + +static const uint64_t CG_group9_31L[31] = { + 0xb3e7ea1c00000000, 0x67cfd43a00000000, 0xcf9fa87400000000, + 0x9f3f50ea00000000, 0x3e7ea1d600000000, 0x7cfd43ac00000000, + 0xf9fa875800000000, 0xf3f50eb200000000, 0xe7ea1d6600000000, + 0xcfd43ace00000000, 0x9fa8759e00000000, 0x3f50eb3e00000000, + 0x7ea1d67c00000000, 0xfd43acf800000000, 0xfa8759f200000000, + 0xf50eb3e600000000, 0xea1d67ce00000000, 0xd43acf9e00000000, + 0xa8759f3e00000000, 0x50eb3e7e00000000, 0xa1d67cfc00000000, + 0x43acf9fa00000000, 0x8759f3f400000000, 0xeb3e7ea00000000, + 0x1d67cfd400000000, 0x3acf9fa800000000, 0x759f3f5000000000, + 0xeb3e7ea000000000, 0xd67cfd4200000000, 0xacf9fa8600000000, + 0x59f3f50e00000000 +}; + +// ===== CT_XOR_1_10 ===== +static const uint64_t CT_group1_33R[33] = { + 0x000000009c215294, 0x000000013842a528, 0x0000000070854a51, + 0x00000000e10a94a2, 0x00000001c2152944, 0x00000001842a5289, + 0x000000010854a513, 0x0000000010a94a27, 0x000000002152944e, + 0x0000000042a5289c, 0x00000000854a5138, 0x000000010a94a270, + 0x00000000152944e1, 0x000000002a5289c2, 0x0000000054a51384, + 0x00000000a94a2708, 0x0000000152944e10, 0x00000000a5289c21, + 0x000000014a513842, 0x0000000094a27085, 0x000000012944e10a, + 0x000000005289c215, 0x00000000a513842a, 0x000000014a270854, + 0x00000000944e10a9, 0x00000001289c2152, 0x00000000513842a5, + 0x00000000a270854a, 0x0000000144e10a94, 0x0000000089c21529, + 0x0000000113842a52, 0x00000000270854a5, 0x000000004e10a94a +}; + +static const uint64_t CT_group1_31L[31] = { + 0x385ed26800000000, 0x70bda4d000000000, 0xe17b49a000000000, + 0xc2f6934200000000, 0x85ed268600000000, 0xbda4d0e00000000, + 0x17b49a1c00000000, 0x2f69343800000000, 0x5ed2687000000000, + 0xbda4d0e000000000, 0x7b49a1c200000000, 0xf693438400000000, + 0xed26870a00000000, 0xda4d0e1600000000, 0xb49a1c2e00000000, + 0x6934385e00000000, 0xd26870bc00000000, 0xa4d0e17a00000000, + 0x49a1c2f600000000, 0x934385ec00000000, 0x26870bda00000000, + 0x4d0e17b400000000, 0x9a1c2f6800000000, 0x34385ed200000000, + 0x6870bda400000000, 0xd0e17b4800000000, 0xa1c2f69200000000, + 0x4385ed2600000000, 0x870bda4c00000000, 0xe17b49a00000000, + 0x1c2f693400000000 +}; + +// ===== CT_XOR_2_11 ===== +static const uint64_t CT_group2_33R[33] = { + 0x00000001383a421e, 0x000000007074843d, 0x00000000e0e9087a, + 0x00000001c1d210f4, 0x0000000183a421e9, 0x00000001074843d3, + 0x000000000e9087a7, 0x000000001d210f4e, 0x000000003a421e9c, + 0x0000000074843d38, 0x00000000e9087a70, 0x00000001d210f4e0, + 0x00000001a421e9c1, 0x000000014843d383, 0x000000009087a707, + 0x00000001210f4e0e, 0x00000000421e9c1d, 0x00000000843d383a, + 0x00000001087a7074, 0x0000000010f4e0e9, 0x0000000021e9c1d2, + 0x0000000043d383a4, 0x0000000087a70748, 0x000000010f4e0e90, + 0x000000001e9c1d21, 0x000000003d383a42, 0x000000007a707484, + 0x00000000f4e0e908, 0x00000001e9c1d210, 0x00000001d383a421, + 0x00000001a7074843, 0x000000014e0e9087, 0x000000009c1d210f +}; + +static const uint64_t CT_group2_31L[31] = { + 0x5997dfaa00000000, 0xb32fbf5400000000, 0x665f7eaa00000000, + 0xccbefd5400000000, 0x997dfaaa00000000, 0x32fbf55600000000, + 0x65f7eaac00000000, 0xcbefd55800000000, 0x97dfaab200000000, + 0x2fbf556600000000, 0x5f7eaacc00000000, 0xbefd559800000000, + 0x7dfaab3200000000, 0xfbf5566400000000, 0xf7eaacca00000000, + 0xefd5599600000000, 0xdfaab32e00000000, 0xbf55665e00000000, + 0x7eaaccbe00000000, 0xfd55997c00000000, 0xfaab32fa00000000, + 0xf55665f600000000, 0xeaaccbee00000000, 0xd55997de00000000, + 0xaab32fbe00000000, 0x55665f7e00000000, 0xaaccbefc00000000, + 0x55997dfa00000000, 0xab32fbf400000000, 0x5665f7ea00000000, + 0xaccbefd400000000 +}; + +// ===== CT_XOR_3_12 ===== +static const uint64_t CT_group3_33R[33] = { + 0x000000016c643607, 0x00000000d8c86c0f, 0x00000001b190d81e, + 0x000000016321b03d, 0x00000000c643607b, 0x000000018c86c0f6, + 0x00000001190d81ed, 0x00000000321b03db, 0x00000000643607b6, + 0x00000000c86c0f6c, 0x0000000190d81ed8, 0x0000000121b03db1, + 0x0000000043607b63, 0x0000000086c0f6c6, 0x000000010d81ed8c, + 0x000000001b03db19, 0x000000003607b632, 0x000000006c0f6c64, + 0x00000000d81ed8c8, 0x00000001b03db190, 0x00000001607b6321, + 0x00000000c0f6c643, 0x0000000181ed8c86, 0x0000000103db190d, + 0x0000000007b6321b, 0x000000000f6c6436, 0x000000001ed8c86c, + 0x000000003db190d8, 0x000000007b6321b0, 0x00000000f6c64360, + 0x00000001ed8c86c0, 0x00000001db190d81, 0x00000001b6321b03 +}; + +static const uint64_t CT_group3_31L[31] = { + 0x2c62c05000000000, 0x58c580a000000000, 0xb18b014000000000, + 0x6316028200000000, 0xc62c050400000000, 0x8c580a0a00000000, + 0x18b0141600000000, 0x3160282c00000000, 0x62c0505800000000, + 0xc580a0b000000000, 0x8b01416200000000, 0x160282c600000000, + 0x2c05058c00000000, 0x580a0b1800000000, 0xb014163000000000, + 0x60282c6200000000, 0xc05058c400000000, 0x80a0b18a00000000, + 0x141631600000000, 0x282c62c00000000, 0x5058c5800000000, + 0xa0b18b000000000, 0x1416316000000000, 0x282c62c000000000, + 0x5058c58000000000, 0xa0b18b0000000000, 0x4163160200000000, + 0x82c62c0400000000, 0x58c580a00000000, 0xb18b01400000000, + 0x1631602800000000 +}; + +// ===== CT_XOR_4_13 ===== +static const uint64_t CT_group4_33R[33] = { + 0x000000012af12d90, 0x0000000055e25b21, 0x00000000abc4b642, + 0x0000000157896c84, 0x00000000af12d909, 0x000000015e25b212, + 0x00000000bc4b6425, 0x000000017896c84a, 0x00000000f12d9095, + 0x00000001e25b212a, 0x00000001c4b64255, 0x00000001896c84ab, + 0x0000000112d90957, 0x0000000025b212af, 0x000000004b64255e, + 0x0000000096c84abc, 0x000000012d909578, 0x000000005b212af1, + 0x00000000b64255e2, 0x000000016c84abc4, 0x00000000d9095789, + 0x00000001b212af12, 0x0000000164255e25, 0x00000000c84abc4b, + 0x0000000190957896, 0x00000001212af12d, 0x000000004255e25b, + 0x0000000084abc4b6, 0x000000010957896c, 0x0000000012af12d9, + 0x00000000255e25b2, 0x000000004abc4b64, 0x00000000957896c8 +}; + +static const uint64_t CT_group4_31L[31] = { + 0x5186a5a800000000, 0xa30d4b5000000000, 0x461a96a200000000, + 0x8c352d4400000000, 0x186a5a8a00000000, 0x30d4b51400000000, + 0x61a96a2800000000, 0xc352d45000000000, 0x86a5a8a200000000, + 0xd4b514600000000, 0x1a96a28c00000000, 0x352d451800000000, + 0x6a5a8a3000000000, 0xd4b5146000000000, 0xa96a28c200000000, + 0x52d4518600000000, 0xa5a8a30c00000000, 0x4b51461a00000000, + 0x96a28c3400000000, 0x2d45186a00000000, 0x5a8a30d400000000, + 0xb51461a800000000, 0x6a28c35200000000, 0xd45186a400000000, + 0xa8a30d4a00000000, 0x51461a9600000000, 0xa28c352c00000000, + 0x45186a5a00000000, 0x8a30d4b400000000, 0x1461a96a00000000, + 0x28c352d400000000 +}; + +// ===== CT_XOR_5_14 ===== +static const uint64_t CT_group5_33R[33] = { + 0x0000000105426fbe, 0x000000000a84df7d, 0x000000001509befa, + 0x000000002a137df4, 0x000000005426fbe8, 0x00000000a84df7d0, + 0x00000001509befa0, 0x00000000a137df41, 0x00000001426fbe82, + 0x0000000084df7d05, 0x0000000109befa0a, 0x00000000137df415, + 0x0000000026fbe82a, 0x000000004df7d054, 0x000000009befa0a8, + 0x0000000137df4150, 0x000000006fbe82a1, 0x00000000df7d0542, + 0x00000001befa0a84, 0x000000017df41509, 0x00000000fbe82a13, + 0x00000001f7d05426, 0x00000001efa0a84d, 0x00000001df41509b, + 0x00000001be82a137, 0x000000017d05426f, 0x00000000fa0a84df, + 0x00000001f41509be, 0x00000001e82a137d, 0x00000001d05426fb, + 0x00000001a0a84df7, 0x0000000141509bef, 0x0000000082a137df +}; + +static const uint64_t CT_group5_31L[31] = { + 0x34fd2f6200000000, 0x69fa5ec400000000, 0xd3f4bd8800000000, + 0xa7e97b1200000000, 0x4fd2f62600000000, 0x9fa5ec4c00000000, + 0x3f4bd89a00000000, 0x7e97b13400000000, 0xfd2f626800000000, + 0xfa5ec4d200000000, 0xf4bd89a600000000, 0xe97b134e00000000, + 0xd2f6269e00000000, 0xa5ec4d3e00000000, 0x4bd89a7e00000000, + 0x97b134fc00000000, 0x2f6269fa00000000, 0x5ec4d3f400000000, + 0xbd89a7e800000000, 0x7b134fd200000000, 0xf6269fa400000000, + 0xec4d3f4a00000000, 0xd89a7e9600000000, 0xb134fd2e00000000, + 0x6269fa5e00000000, 0xc4d3f4bc00000000, 0x89a7e97a00000000, + 0x134fd2f600000000, 0x269fa5ec00000000, 0x4d3f4bd800000000, + 0x9a7e97b000000000 +}; + +// ===== CT_XOR_6_15 ===== +static const uint64_t CT_group6_33R[33] = { + 0x000000001e12f0f6, 0x000000003c25e1ec, 0x00000000784bc3d8, + 0x00000000f09787b0, 0x00000001e12f0f60, 0x00000001c25e1ec1, + 0x0000000184bc3d83, 0x0000000109787b07, 0x0000000012f0f60f, + 0x0000000025e1ec1e, 0x000000004bc3d83c, 0x000000009787b078, + 0x000000012f0f60f0, 0x000000005e1ec1e1, 0x00000000bc3d83c2, + 0x00000001787b0784, 0x00000000f0f60f09, 0x00000001e1ec1e12, + 0x00000001c3d83c25, 0x0000000187b0784b, 0x000000010f60f097, + 0x000000001ec1e12f, 0x000000003d83c25e, 0x000000007b0784bc, + 0x00000000f60f0978, 0x00000001ec1e12f0, 0x00000001d83c25e1, + 0x00000001b0784bc3, 0x0000000160f09787, 0x00000000c1e12f0f, + 0x0000000183c25e1e, 0x000000010784bc3d, 0x000000000f09787b +}; + +static const uint64_t CT_group6_31L[31] = { + 0xfd1fccb200000000, 0xfa3f996600000000, 0xf47f32ce00000000, + 0xe8fe659e00000000, 0xd1fccb3e00000000, 0xa3f9967e00000000, + 0x47f32cfe00000000, 0x8fe659fc00000000, 0x1fccb3fa00000000, + 0x3f9967f400000000, 0x7f32cfe800000000, 0xfe659fd000000000, + 0xfccb3fa200000000, 0xf9967f4600000000, 0xf32cfe8e00000000, + 0xe659fd1e00000000, 0xccb3fa3e00000000, 0x9967f47e00000000, + 0x32cfe8fe00000000, 0x659fd1fc00000000, 0xcb3fa3f800000000, + 0x967f47f200000000, 0x2cfe8fe600000000, 0x59fd1fcc00000000, + 0xb3fa3f9800000000, 0x67f47f3200000000, 0xcfe8fe6400000000, + 0x9fd1fcca00000000, 0x3fa3f99600000000, 0x7f47f32c00000000, + 0xfe8fe65800000000 +}; + +// ===== CT_XOR_7_16 ===== +static const uint64_t CT_group7_33R[33] = { + 0x000000018d22c3a7, 0x000000011a45874f, 0x00000000348b0e9f, + 0x0000000069161d3e, 0x00000000d22c3a7c, 0x00000001a45874f8, + 0x0000000148b0e9f1, 0x000000009161d3e3, 0x0000000122c3a7c6, + 0x0000000045874f8d, 0x000000008b0e9f1a, 0x00000001161d3e34, + 0x000000002c3a7c69, 0x000000005874f8d2, 0x00000000b0e9f1a4, + 0x0000000161d3e348, 0x00000000c3a7c691, 0x00000001874f8d22, + 0x000000010e9f1a45, 0x000000001d3e348b, 0x000000003a7c6916, + 0x0000000074f8d22c, 0x00000000e9f1a458, 0x00000001d3e348b0, + 0x00000001a7c69161, 0x000000014f8d22c3, 0x000000009f1a4587, + 0x000000013e348b0e, 0x000000007c69161d, 0x00000000f8d22c3a, + 0x00000001f1a45874, 0x00000001e348b0e9, 0x00000001c69161d3 +}; + +static const uint64_t CT_group7_31L[31] = { + 0xde1be16400000000, 0xbc37c2ca00000000, 0x786f859600000000, + 0xf0df0b2c00000000, 0xe1be165a00000000, 0xc37c2cb600000000, + 0x86f8596e00000000, 0xdf0b2de00000000, 0x1be165bc00000000, + 0x37c2cb7800000000, 0x6f8596f000000000, 0xdf0b2de000000000, + 0xbe165bc200000000, 0x7c2cb78600000000, 0xf8596f0c00000000, + 0xf0b2de1a00000000, 0xe165bc3600000000, 0xc2cb786e00000000, + 0x8596f0de00000000, 0xb2de1be00000000, 0x165bc37c00000000, + 0x2cb786f800000000, 0x596f0df000000000, 0xb2de1be000000000, + 0x65bc37c200000000, 0xcb786f8400000000, 0x96f0df0a00000000, + 0x2de1be1600000000, 0x5bc37c2c00000000, 0xb786f85800000000, + 0x6f0df0b200000000 +}; + +// ===== CT_XOR_8_17 ===== +static const uint64_t CT_group8_33R[33] = { + 0x00000001a878cd6c, 0x0000000150f19ad9, 0x00000000a1e335b3, + 0x0000000143c66b66, 0x00000000878cd6cd, 0x000000010f19ad9a, + 0x000000001e335b35, 0x000000003c66b66a, 0x0000000078cd6cd4, + 0x00000000f19ad9a8, 0x00000001e335b350, 0x00000001c66b66a1, + 0x000000018cd6cd43, 0x0000000119ad9a87, 0x00000000335b350f, + 0x0000000066b66a1e, 0x00000000cd6cd43c, 0x000000019ad9a878, + 0x0000000135b350f1, 0x000000006b66a1e3, 0x00000000d6cd43c6, + 0x00000001ad9a878c, 0x000000015b350f19, 0x00000000b66a1e33, + 0x000000016cd43c66, 0x00000000d9a878cd, 0x00000001b350f19a, + 0x0000000166a1e335, 0x00000000cd43c66b, 0x000000019a878cd6, + 0x00000001350f19ad, 0x000000006a1e335b, 0x00000000d43c66b6 +}; + +static const uint64_t CT_group8_31L[31] = { + 0x669139b400000000, 0xcd22736800000000, 0x9a44e6d200000000, + 0x3489cda600000000, 0x69139b4c00000000, 0xd227369800000000, + 0xa44e6d3200000000, 0x489cda6600000000, 0x9139b4cc00000000, + 0x2273699a00000000, 0x44e6d33400000000, 0x89cda66800000000, + 0x139b4cd200000000, 0x273699a400000000, 0x4e6d334800000000, + 0x9cda669000000000, 0x39b4cd2200000000, 0x73699a4400000000, + 0xe6d3348800000000, 0xcda6691200000000, 0x9b4cd22600000000, + 0x3699a44e00000000, 0x6d33489c00000000, 0xda66913800000000, + 0xb4cd227200000000, 0x699a44e600000000, 0xd33489cc00000000, + 0xa669139a00000000, 0x4cd2273600000000, 0x99a44e6c00000000, + 0x33489cda00000000 +}; + +// ===== CT_XOR_9_18 ===== +static const uint64_t CT_group9_33R[33] = { + 0x0000000112bacedc, 0x0000000025759db9, 0x000000004aeb3b72, + 0x0000000095d676e4, 0x000000012bacedc8, 0x000000005759db91, + 0x00000000aeb3b722, 0x000000015d676e44, 0x00000000bacedc89, + 0x00000001759db912, 0x00000000eb3b7225, 0x00000001d676e44a, + 0x00000001acedc895, 0x0000000159db912b, 0x00000000b3b72257, + 0x00000001676e44ae, 0x00000000cedc895d, 0x000000019db912ba, + 0x000000013b722575, 0x0000000076e44aeb, 0x00000000edc895d6, + 0x00000001db912bac, 0x00000001b7225759, 0x000000016e44aeb3, + 0x00000000dc895d67, 0x00000001b912bace, 0x000000017225759d, + 0x00000000e44aeb3b, 0x00000001c895d676, 0x00000001912baced, + 0x00000001225759db, 0x0000000044aeb3b7, 0x00000000895d676e +}; + +static const uint64_t CT_group9_31L[31] = { + 0x36f4413e00000000, 0x6de8827c00000000, 0xdbd104f800000000, + 0xb7a209f200000000, 0x6f4413e600000000, 0xde8827cc00000000, + 0xbd104f9a00000000, 0x7a209f3600000000, 0xf4413e6c00000000, + 0xe8827cda00000000, 0xd104f9b600000000, 0xa209f36e00000000, + 0x4413e6de00000000, 0x8827cdbc00000000, 0x104f9b7a00000000, + 0x209f36f400000000, 0x413e6de800000000, 0x827cdbd000000000, + 0x4f9b7a200000000, 0x9f36f4400000000, 0x13e6de8800000000, + 0x27cdbd1000000000, 0x4f9b7a2000000000, 0x9f36f44000000000, + 0x3e6de88200000000, 0x7cdbd10400000000, 0xf9b7a20800000000, + 0xf36f441200000000, 0xe6de882600000000, 0xcdbd104e00000000, + 0x9b7a209e00000000 +}; + +// ===== CA_XOR_1_10 ===== +static const uint64_t CA_group1_33R[33] = { + 0x0000000045391840, 0x000000008a723080, 0x0000000114e46100, + 0x0000000029c8c201, 0x0000000053918402, 0x00000000a7230804, + 0x000000014e461008, 0x000000009c8c2011, 0x0000000139184022, + 0x0000000072308045, 0x00000000e461008a, 0x00000001c8c20114, + 0x0000000191840229, 0x0000000123080453, 0x00000000461008a7, + 0x000000008c20114e, 0x000000011840229c, 0x0000000030804539, + 0x0000000061008a72, 0x00000000c20114e4, 0x00000001840229c8, + 0x0000000108045391, 0x000000001008a723, 0x0000000020114e46, + 0x0000000040229c8c, 0x0000000080453918, 0x00000001008a7230, + 0x000000000114e461, 0x000000000229c8c2, 0x0000000004539184, + 0x0000000008a72308, 0x00000000114e4610, 0x00000000229c8c20 +}; + +static const uint64_t CA_group1_31L[31] = { + 0xd2e69a6200000000, 0xa5cd34c600000000, 0x4b9a698e00000000, + 0x9734d31c00000000, 0x2e69a63a00000000, 0x5cd34c7400000000, + 0xb9a698e800000000, 0x734d31d200000000, 0xe69a63a400000000, + 0xcd34c74a00000000, 0x9a698e9600000000, 0x34d31d2e00000000, + 0x69a63a5c00000000, 0xd34c74b800000000, 0xa698e97200000000, + 0x4d31d2e600000000, 0x9a63a5cc00000000, 0x34c74b9a00000000, + 0x698e973400000000, 0xd31d2e6800000000, 0xa63a5cd200000000, + 0x4c74b9a600000000, 0x98e9734c00000000, 0x31d2e69a00000000, + 0x63a5cd3400000000, 0xc74b9a6800000000, 0x8e9734d200000000, + 0x1d2e69a600000000, 0x3a5cd34c00000000, 0x74b9a69800000000, + 0xe9734d3000000000 +}; + +// ===== CA_XOR_2_11 ===== +static const uint64_t CA_group2_33R[33] = { + 0x000000016278deaf, 0x00000000c4f1bd5f, 0x0000000189e37abe, + 0x0000000113c6f57d, 0x00000000278deafb, 0x000000004f1bd5f6, + 0x000000009e37abec, 0x000000013c6f57d8, 0x0000000078deafb1, + 0x00000000f1bd5f62, 0x00000001e37abec4, 0x00000001c6f57d89, + 0x000000018deafb13, 0x000000011bd5f627, 0x0000000037abec4f, + 0x000000006f57d89e, 0x00000000deafb13c, 0x00000001bd5f6278, + 0x000000017abec4f1, 0x00000000f57d89e3, 0x00000001eafb13c6, + 0x00000001d5f6278d, 0x00000001abec4f1b, 0x0000000157d89e37, + 0x00000000afb13c6f, 0x000000015f6278de, 0x00000000bec4f1bd, + 0x000000017d89e37a, 0x00000000fb13c6f5, 0x00000001f6278dea, + 0x00000001ec4f1bd5, 0x00000001d89e37ab, 0x00000001b13c6f57 +}; + +static const uint64_t CA_group2_31L[31] = { + 0xfcdf375200000000, 0xf9be6ea600000000, 0xf37cdd4e00000000, + 0xe6f9ba9e00000000, 0xcdf3753e00000000, 0x9be6ea7e00000000, + 0x37cdd4fe00000000, 0x6f9ba9fc00000000, 0xdf3753f800000000, + 0xbe6ea7f200000000, 0x7cdd4fe600000000, 0xf9ba9fcc00000000, + 0xf3753f9a00000000, 0xe6ea7f3600000000, 0xcdd4fe6e00000000, + 0x9ba9fcde00000000, 0x3753f9be00000000, 0x6ea7f37c00000000, + 0xdd4fe6f800000000, 0xba9fcdf200000000, 0x753f9be600000000, + 0xea7f37cc00000000, 0xd4fe6f9a00000000, 0xa9fcdf3600000000, + 0x53f9be6e00000000, 0xa7f37cdc00000000, 0x4fe6f9ba00000000, + 0x9fcdf37400000000, 0x3f9be6ea00000000, 0x7f37cdd400000000, + 0xfe6f9ba800000000 +}; + +// ===== CA_XOR_3_12 ===== +static const uint64_t CA_group3_33R[33] = { + 0x00000001024fd310, 0x00000000049fa621, 0x00000000093f4c42, + 0x00000000127e9884, 0x0000000024fd3108, 0x0000000049fa6210, + 0x0000000093f4c420, 0x0000000127e98840, 0x000000004fd31081, + 0x000000009fa62102, 0x000000013f4c4204, 0x000000007e988409, + 0x00000000fd310812, 0x00000001fa621024, 0x00000001f4c42049, + 0x00000001e9884093, 0x00000001d3108127, 0x00000001a621024f, + 0x000000014c42049f, 0x000000009884093f, 0x000000013108127e, + 0x00000000621024fd, 0x00000000c42049fa, 0x00000001884093f4, + 0x00000001108127e9, 0x0000000021024fd3, 0x0000000042049fa6, + 0x0000000084093f4c, 0x0000000108127e98, 0x000000001024fd31, + 0x000000002049fa62, 0x000000004093f4c4, 0x000000008127e988 +}; + +static const uint64_t CA_group3_31L[31] = { + 0x72e4656e00000000, 0xe5c8cadc00000000, 0xcb9195ba00000000, + 0x97232b7600000000, 0x2e4656ee00000000, 0x5c8caddc00000000, + 0xb9195bb800000000, 0x7232b77200000000, 0xe4656ee400000000, + 0xc8caddca00000000, 0x9195bb9600000000, 0x232b772e00000000, + 0x4656ee5c00000000, 0x8caddcb800000000, 0x195bb97200000000, + 0x32b772e400000000, 0x656ee5c800000000, 0xcaddcb9000000000, + 0x95bb972200000000, 0x2b772e4600000000, 0x56ee5c8c00000000, + 0xaddcb91800000000, 0x5bb9723200000000, 0xb772e46400000000, + 0x6ee5c8ca00000000, 0xddcb919400000000, 0xbb97232a00000000, + 0x772e465600000000, 0xee5c8cac00000000, 0xdcb9195a00000000, + 0xb97232b600000000 +}; + +// ===== CA_XOR_4_13 ===== +static const uint64_t CA_group4_33R[33] = { + 0x00000000976fa1f8, 0x000000012edf43f0, 0x000000005dbe87e1, + 0x00000000bb7d0fc2, 0x0000000176fa1f84, 0x00000000edf43f09, + 0x00000001dbe87e12, 0x00000001b7d0fc25, 0x000000016fa1f84b, + 0x00000000df43f097, 0x00000001be87e12e, 0x000000017d0fc25d, + 0x00000000fa1f84bb, 0x00000001f43f0976, 0x00000001e87e12ed, + 0x00000001d0fc25db, 0x00000001a1f84bb7, 0x0000000143f0976f, + 0x0000000087e12edf, 0x000000010fc25dbe, 0x000000001f84bb7d, + 0x000000003f0976fa, 0x000000007e12edf4, 0x00000000fc25dbe8, + 0x00000001f84bb7d0, 0x00000001f0976fa1, 0x00000001e12edf43, + 0x00000001c25dbe87, 0x0000000184bb7d0f, 0x000000010976fa1f, + 0x0000000012edf43f, 0x0000000025dbe87e, 0x000000004bb7d0fc +}; + +static const uint64_t CA_group4_31L[31] = { + 0xdee651b200000000, 0xbdcca36600000000, 0x7b9946ce00000000, + 0xf7328d9c00000000, 0xee651b3a00000000, 0xdcca367600000000, + 0xb9946cee00000000, 0x7328d9de00000000, 0xe651b3bc00000000, + 0xcca3677a00000000, 0x9946cef600000000, 0x328d9dee00000000, + 0x651b3bdc00000000, 0xca3677b800000000, 0x946cef7200000000, + 0x28d9dee600000000, 0x51b3bdcc00000000, 0xa3677b9800000000, + 0x46cef73200000000, 0x8d9dee6400000000, 0x1b3bdcca00000000, + 0x3677b99400000000, 0x6cef732800000000, 0xd9dee65000000000, + 0xb3bdcca200000000, 0x677b994600000000, 0xcef7328c00000000, + 0x9dee651a00000000, 0x3bdcca3600000000, 0x77b9946c00000000, + 0xef7328d800000000 +}; + +// ===== CA_XOR_5_14 ===== +static const uint64_t CA_group5_33R[33] = { + 0x00000001ec8cd8f0, 0x00000001d919b1e1, 0x00000001b23363c3, + 0x000000016466c787, 0x00000000c8cd8f0f, 0x00000001919b1e1e, + 0x0000000123363c3d, 0x00000000466c787b, 0x000000008cd8f0f6, + 0x0000000119b1e1ec, 0x000000003363c3d9, 0x0000000066c787b2, + 0x00000000cd8f0f64, 0x000000019b1e1ec8, 0x00000001363c3d91, + 0x000000006c787b23, 0x00000000d8f0f646, 0x00000001b1e1ec8c, + 0x0000000163c3d919, 0x00000000c787b233, 0x000000018f0f6466, + 0x000000011e1ec8cd, 0x000000003c3d919b, 0x00000000787b2336, + 0x00000000f0f6466c, 0x00000001e1ec8cd8, 0x00000001c3d919b1, + 0x0000000187b23363, 0x000000010f6466c7, 0x000000001ec8cd8f, + 0x000000003d919b1e, 0x000000007b23363c, 0x00000000f6466c78 +}; + +static const uint64_t CA_group5_31L[31] = { + 0xf328890600000000, 0xe651120e00000000, 0xcca2241e00000000, + 0x9944483e00000000, 0x3288907e00000000, 0x651120fc00000000, + 0xca2241f800000000, 0x944483f200000000, 0x288907e600000000, + 0x51120fcc00000000, 0xa2241f9800000000, 0x44483f3200000000, + 0x88907e6400000000, 0x1120fcca00000000, 0x2241f99400000000, + 0x4483f32800000000, 0x8907e65000000000, 0x120fcca200000000, + 0x241f994400000000, 0x483f328800000000, 0x907e651000000000, + 0x20fcca2200000000, 0x41f9944400000000, 0x83f3288800000000, + 0x7e6511200000000, 0xfcca22400000000, 0x1f99444800000000, + 0x3f32889000000000, 0x7e65112000000000, 0xfcca224000000000, + 0xf994448200000000 +}; + +// ===== CA_XOR_6_15 ===== +static const uint64_t CA_group6_33R[33] = { + 0x00000000868dad85, 0x000000010d1b5b0a, 0x000000001a36b615, + 0x00000000346d6c2a, 0x0000000068dad854, 0x00000000d1b5b0a8, + 0x00000001a36b6150, 0x0000000146d6c2a1, 0x000000008dad8543, + 0x000000011b5b0a86, 0x0000000036b6150d, 0x000000006d6c2a1a, + 0x00000000dad85434, 0x00000001b5b0a868, 0x000000016b6150d1, + 0x00000000d6c2a1a3, 0x00000001ad854346, 0x000000015b0a868d, + 0x00000000b6150d1b, 0x000000016c2a1a36, 0x00000000d854346d, + 0x00000001b0a868da, 0x000000016150d1b5, 0x00000000c2a1a36b, + 0x00000001854346d6, 0x000000010a868dad, 0x00000000150d1b5b, + 0x000000002a1a36b6, 0x0000000054346d6c, 0x00000000a868dad8, + 0x0000000150d1b5b0, 0x00000000a1a36b61, 0x000000014346d6c2 +}; + +static const uint64_t CA_group6_31L[31] = { + 0x129d255400000000, 0x253a4aa800000000, 0x4a74955000000000, + 0x94e92aa000000000, 0x29d2554200000000, 0x53a4aa8400000000, + 0xa749550800000000, 0x4e92aa1200000000, 0x9d25542400000000, + 0x3a4aa84a00000000, 0x7495509400000000, 0xe92aa12800000000, + 0xd255425200000000, 0xa4aa84a600000000, 0x4955094e00000000, + 0x92aa129c00000000, 0x2554253a00000000, 0x4aa84a7400000000, + 0x955094e800000000, 0x2aa129d200000000, 0x554253a400000000, + 0xaa84a74800000000, 0x55094e9200000000, 0xaa129d2400000000, + 0x54253a4a00000000, 0xa84a749400000000, 0x5094e92a00000000, + 0xa129d25400000000, 0x4253a4aa00000000, 0x84a7495400000000, + 0x94e92aa00000000 +}; + +// ===== CA_XOR_7_16 ===== +static const uint64_t CA_group7_33R[33] = { + 0x0000000077652a44, 0x00000000eeca5488, 0x00000001dd94a910, + 0x00000001bb295221, 0x000000017652a443, 0x00000000eca54887, + 0x00000001d94a910e, 0x00000001b295221d, 0x00000001652a443b, + 0x00000000ca548877, 0x0000000194a910ee, 0x00000001295221dd, + 0x0000000052a443bb, 0x00000000a5488776, 0x000000014a910eec, + 0x0000000095221dd9, 0x000000012a443bb2, 0x0000000054887765, + 0x00000000a910eeca, 0x000000015221dd94, 0x00000000a443bb29, + 0x0000000148877652, 0x00000000910eeca5, 0x00000001221dd94a, + 0x00000000443bb295, 0x000000008877652a, 0x0000000110eeca54, + 0x0000000021dd94a9, 0x0000000043bb2952, 0x00000000877652a4, + 0x000000010eeca548, 0x000000001dd94a91, 0x000000003bb29522 +}; + +static const uint64_t CA_group7_31L[31] = { + 0x23f706d400000000, 0x47ee0da800000000, 0x8fdc1b5000000000, + 0x1fb836a200000000, 0x3f706d4400000000, 0x7ee0da8800000000, + 0xfdc1b51000000000, 0xfb836a2200000000, 0xf706d44600000000, + 0xee0da88e00000000, 0xdc1b511e00000000, 0xb836a23e00000000, + 0x706d447e00000000, 0xe0da88fc00000000, 0xc1b511fa00000000, + 0x836a23f600000000, 0x6d447ee00000000, 0xda88fdc00000000, + 0x1b511fb800000000, 0x36a23f7000000000, 0x6d447ee000000000, + 0xda88fdc000000000, 0xb511fb8200000000, 0x6a23f70600000000, + 0xd447ee0c00000000, 0xa88fdc1a00000000, 0x511fb83600000000, + 0xa23f706c00000000, 0x447ee0da00000000, 0x88fdc1b400000000, + 0x11fb836a00000000 +}; + +// ===== CA_XOR_8_17 ===== +static const uint64_t CA_group8_33R[33] = { + 0x000000007b9f6d6c, 0x00000000f73edad8, 0x00000001ee7db5b0, + 0x00000001dcfb6b61, 0x00000001b9f6d6c3, 0x0000000173edad87, + 0x00000000e7db5b0f, 0x00000001cfb6b61e, 0x000000019f6d6c3d, + 0x000000013edad87b, 0x000000007db5b0f7, 0x00000000fb6b61ee, + 0x00000001f6d6c3dc, 0x00000001edad87b9, 0x00000001db5b0f73, + 0x00000001b6b61ee7, 0x000000016d6c3dcf, 0x00000000dad87b9f, + 0x00000001b5b0f73e, 0x000000016b61ee7d, 0x00000000d6c3dcfb, + 0x00000001ad87b9f6, 0x000000015b0f73ed, 0x00000000b61ee7db, + 0x000000016c3dcfb6, 0x00000000d87b9f6d, 0x00000001b0f73eda, + 0x0000000161ee7db5, 0x00000000c3dcfb6b, 0x0000000187b9f6d6, + 0x000000010f73edad, 0x000000001ee7db5b, 0x000000003dcfb6b6 +}; + +static const uint64_t CA_group8_31L[31] = { + 0x3fd2c44800000000, 0x7fa5889000000000, 0xff4b112000000000, + 0xfe96224200000000, 0xfd2c448600000000, 0xfa58890e00000000, + 0xf4b1121e00000000, 0xe962243e00000000, 0xd2c4487e00000000, + 0xa58890fe00000000, 0x4b1121fe00000000, 0x962243fc00000000, + 0x2c4487fa00000000, 0x58890ff400000000, 0xb1121fe800000000, + 0x62243fd200000000, 0xc4487fa400000000, 0x8890ff4a00000000, + 0x1121fe9600000000, 0x2243fd2c00000000, 0x4487fa5800000000, + 0x890ff4b000000000, 0x121fe96200000000, 0x243fd2c400000000, + 0x487fa58800000000, 0x90ff4b1000000000, 0x21fe962200000000, + 0x43fd2c4400000000, 0x87fa588800000000, 0xff4b11200000000, + 0x1fe9622400000000 +}; + +// ===== CA_XOR_9_18 ===== +static const uint64_t CA_group9_33R[33] = { + 0x000000011139ceea, 0x0000000022739dd5, 0x0000000044e73baa, + 0x0000000089ce7754, 0x00000001139ceea8, 0x000000002739dd51, + 0x000000004e73baa2, 0x000000009ce77544, 0x0000000139ceea88, + 0x00000000739dd511, 0x00000000e73baa22, 0x00000001ce775444, + 0x000000019ceea889, 0x0000000139dd5113, 0x0000000073baa227, + 0x00000000e775444e, 0x00000001ceea889c, 0x000000019dd51139, + 0x000000013baa2273, 0x00000000775444e7, 0x00000000eea889ce, + 0x00000001dd51139c, 0x00000001baa22739, 0x0000000175444e73, + 0x00000000ea889ce7, 0x00000001d51139ce, 0x00000001aa22739d, + 0x000000015444e73b, 0x00000000a889ce77, 0x0000000151139cee, + 0x00000000a22739dd, 0x00000001444e73ba, 0x00000000889ce775 +}; + +static const uint64_t CA_group9_31L[31] = { + 0xa8a3d2ec00000000, 0x5147a5da00000000, 0xa28f4bb400000000, + 0x451e976a00000000, 0x8a3d2ed400000000, 0x147a5daa00000000, + 0x28f4bb5400000000, 0x51e976a800000000, 0xa3d2ed5000000000, + 0x47a5daa200000000, 0x8f4bb54400000000, 0x1e976a8a00000000, + 0x3d2ed51400000000, 0x7a5daa2800000000, 0xf4bb545000000000, + 0xe976a8a200000000, 0xd2ed514600000000, 0xa5daa28e00000000, + 0x4bb5451e00000000, 0x976a8a3c00000000, 0x2ed5147a00000000, + 0x5daa28f400000000, 0xbb5451e800000000, 0x76a8a3d200000000, + 0xed5147a400000000, 0xdaa28f4a00000000, 0xb5451e9600000000, + 0x6a8a3d2e00000000, 0xd5147a5c00000000, 0xaa28f4ba00000000, + 0x5451e97600000000 +}; + +constexpr const uint64_t* MS_TAB_33R_CC[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CC_group2_33R, CC_group3_33R, CC_group2_33R, CC_group3_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CC_group4_33R, CC_group1_33R, CC_group5_33R, CC_group1_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CC_group2_33R, CC_group3_33R, CC_group2_33R, CC_group3_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CC_group4_33R, CC_group1_33R, CC_group4_33R, CC_group1_33R // 40–44 +}; + +constexpr const uint64_t* MS_TAB_31L_CC[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CC_group2_31L, CC_group3_31L, CC_group2_31L, CC_group3_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CC_group4_31L, CC_group1_31L, CC_group5_31L, CC_group1_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CC_group2_31L, CC_group3_31L, CC_group2_31L, CC_group3_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CC_group4_31L, CC_group1_31L, CC_group4_31L, CC_group1_31L // 40–44 +}; + +// ===== MS_TAB_33R_CG ===== +constexpr const uint64_t* MS_TAB_33R_CG[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CG_group4_33R, CG_group2_33R, CG_group8_33R, CG_group2_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CG_group1_33R, CG_group7_33R, CG_group1_33R, CG_group9_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CG_group4_33R, CG_group2_33R, CG_group6_33R, CG_group2_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CG_group5_33R, CG_group3_33R, CG_group1_33R, CG_group3_33R // 40–44 +}; + +// ===== MS_TAB_31L_CG ===== +constexpr const uint64_t* MS_TAB_31L_CG[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CG_group4_31L, CG_group2_31L, CG_group8_31L, CG_group2_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CG_group1_31L, CG_group7_31L, CG_group1_31L, CG_group9_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CG_group4_31L, CG_group2_31L, CG_group6_31L, CG_group2_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CG_group5_31L, CG_group3_31L, CG_group1_31L, CG_group3_31L // 40–44 +}; + +// ===== MS_TAB_33R_CA ===== +constexpr const uint64_t* MS_TAB_33R_CA[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CA_group3_33R, CA_group2_33R, CA_group7_33R, CA_group2_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CA_group1_33R, CA_group6_33R, CA_group9_33R, CA_group8_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CA_group3_33R, CA_group2_33R, CA_group5_33R, CA_group2_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CA_group1_33R, CA_group4_33R, CA_group1_33R, CA_group4_33R // 40–44 +}; + +// ===== MS_TAB_31L_CA ===== +constexpr const uint64_t* MS_TAB_31L_CA[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CA_group3_31L, CA_group2_31L, CA_group7_31L, CA_group2_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CA_group1_31L, CA_group6_31L, CA_group9_31L, CA_group8_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CA_group3_31L, CA_group2_31L, CA_group5_31L, CA_group2_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CA_group1_31L, CA_group4_31L, CA_group1_31L, CA_group4_31L // 40–44 +}; + +// ===== MS_TAB_33R_CT ===== +constexpr const uint64_t* MS_TAB_33R_CT[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CT_group2_33R, CT_group3_33R, CT_group2_33R, CT_group3_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CT_group8_33R, CT_group5_33R, CT_group9_33R, CT_group1_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CT_group2_33R, CT_group3_33R, CT_group4_33R, CT_group3_33R, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CT_group6_33R, CT_group1_33R, CT_group7_33R, CT_group1_33R // 40–44 +}; + +// ===== MS_TAB_31L_CT ===== +constexpr const uint64_t* MS_TAB_31L_CT[45] = { + nullptr, nullptr, nullptr, nullptr, nullptr, + nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 + nullptr, CT_group2_31L, CT_group3_31L, CT_group2_31L, CT_group3_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 + nullptr, CT_group8_31L, CT_group5_31L, CT_group9_31L, CT_group1_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 + nullptr, CT_group2_31L, CT_group3_31L, CT_group4_31L, CT_group3_31L, + nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 + nullptr, CT_group6_31L, CT_group1_31L, CT_group7_31L, CT_group1_31L // 40–44 +}; + +inline uint64_t +srol_bs_table(uint8_t out_idx, + unsigned d, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + d = d / 2; + return (TAB_31L[out_idx][d < 31 ? d : d % 31] | /* NOLINT */ + TAB_33R[out_idx][d < 33 ? d : d % 33]); /* NOLINT */ +} + +inline uint64_t +next_forward_bs_hash(uint64_t fh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + // Compute indices for outgoing and incoming dimers + // Each dimer is encoded as (10 * first + second) + uint8_t out_idx = 10 * BS_CONVERT_TAB[char_out1] + BS_CONVERT_TAB[char_out2]; + uint8_t in_idx = 10 * BS_CONVERT_TAB[char_in1] + BS_CONVERT_TAB[char_in2]; + + // Rotate the hash left by 2 since we move by two bases + uint64_t h_val = srol(fh_val); + + // XOR out the old dimer and XOR in the new dimer + h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); + h_val ^= DIMER_TAB[in_idx]; + + return h_val; +} + +inline uint64_t +prev_forward_bs_hash(uint64_t fh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + // Compute indices for outgoing and incoming dimers + // Each dimer is encoded as (10 * first + second) + uint8_t out_idx = 10 * BS_CONVERT_TAB[char_out1] + BS_CONVERT_TAB[char_out2]; + uint8_t in_idx = 10 * BS_CONVERT_TAB[char_in1] + BS_CONVERT_TAB[char_in2]; + + uint64_t h_val = fh_val ^ srol_bs_table(in_idx, k, TAB_33R, TAB_31L); + h_val ^= DIMER_TAB[out_idx]; + h_val = sror(h_val); + return h_val; +} + +// Generic dimer-based reverse hash using pointer table +inline std::vector +base_reverse_bs_hash(const char* seq, + bool has_second, + unsigned k, + const uint64_t* DIMER_TAB) +{ + + uint64_t h0 = 0; + uint64_t h1 = 0; + + // Process pairs from the end for h0 + for (int i = (int)k - 1; i >= 1; i -= 2) { + h0 = srol(h0, 1); + const uint8_t a = BS_RC_CONVERT_TAB[(unsigned char)seq[i]]; + const uint8_t b = BS_RC_CONVERT_TAB[(unsigned char)seq[i - 1]]; + const uint8_t loc = 10 * a + b; // yields 11..44 + h0 ^= DIMER_TAB[loc]; // first element of pointer table + } + + // If length exactly k, return only h0 + if (!has_second) + return { h0, 0 }; + + // Offset by 1 for second hash (mirroring h1 in forward) + for (int i = (int)k; i >= 2; i -= 2) { + h1 = srol(h1, 1); + const uint8_t a = BS_RC_CONVERT_TAB[(unsigned char)seq[i]]; + const uint8_t b = BS_RC_CONVERT_TAB[(unsigned char)seq[i - 1]]; + const uint8_t loc = 10 * a + b; + h1 ^= DIMER_TAB[loc]; + } + + return { h0, h1 }; +} + +inline uint64_t +next_reverse_bs_hash(uint64_t rh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + uint8_t out_idx = + 10 * BS_RC_CONVERT_TAB[char_out2] + BS_RC_CONVERT_TAB[char_out1]; + uint8_t in_idx = + 10 * BS_RC_CONVERT_TAB[char_in2] + BS_RC_CONVERT_TAB[char_in1]; + uint64_t h_val = rh_val ^ srol_bs_table(in_idx, k, TAB_33R, TAB_31L); + h_val ^= DIMER_TAB[out_idx]; + h_val = sror(h_val); + return h_val; +} + +inline uint64_t +prev_reverse_bs_hash(uint64_t rh_val, + unsigned k, + unsigned char char_out1, + unsigned char char_out2, + unsigned char char_in1, + unsigned char char_in2, + const uint64_t* DIMER_TAB, + const uint64_t* const* TAB_33R, + const uint64_t* const* TAB_31L) +{ + uint8_t out_idx = + 10 * BS_RC_CONVERT_TAB[char_out2] + BS_RC_CONVERT_TAB[char_out1]; + uint8_t in_idx = + 10 * BS_RC_CONVERT_TAB[char_in2] + BS_RC_CONVERT_TAB[char_in1]; + uint64_t h_val = srol(rh_val); + h_val ^= DIMER_TAB[in_idx]; + h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); + return h_val; +} + +} // namespace btllib::hashing_internals \ No newline at end of file diff --git a/include/btllib/bshash_kmer.hpp b/include/btllib/bshash_kmer.hpp index d5dc45c4..fbede73b 100644 --- a/include/btllib/bshash_kmer.hpp +++ b/include/btllib/bshash_kmer.hpp @@ -1,727 +1,50 @@ #pragma once -#include #include +#include #include #include #include #include #include #include -#include #include +#include -#include +#include #include namespace btllib { -using hashing_internals::sror; -using hashing_internals::srol; -constexpr uint8_t CONVERT_TAB_SIMPLE[128] = { - // 0–64 - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0, - 1,0,2,0,0, - 0,3,0,0,0,0,0,0,0,0, - 0,0,0,0,4,0,0,0,0,0, - 0,0,0,0,0,0,0, - 1,0,2, - 0,0,0,3,0,0,0,0,0,0, - 0,0,0,0,0,0,4, - 0,0,0, - 0,0,0,0,0,0,0,0 -}; - -constexpr uint8_t CONVERT_TAB_RC[128] = { - - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0, - 4,0,3,0,0, - 0,2,0,0,0,0,0,0,0,0, - 0,0,0,0,1,0,0,0,0,0, - 0,0,0,0,0,0,0, - 4,0,3, - 0,0,0,2,0,0,0,0,0,0, - 0,0,0,0,0,0,1, - 0,0,0, - 0,0,0,0,0,0,0,0 -}; - -constexpr uint64_t CC_DIMER_TAB[45] = { - 0,0,0,0,0,0,0,0,0,0, // 0–9 - 0,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0,0,0,0,0, - 0,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL,0,0,0,0,0, - 0,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0x1bc5d5aba6ea6e02ULL,0x8f15314b06b5ed9cULL,0,0,0,0,0, - 0,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL,0x44fec7e4a74bce00ULL,0x77c24a36c934fdc1ULL -}; - - -constexpr uint64_t CG_DIMER_TAB[45] = { - 0,0,0,0,0,0,0,0,0,0, - 0,0x54f8849020caa27fULL,0xab9766c9a48a5d70ULL,0x7f01ee303c53b1bcULL,0xab9766c9a48a5d70ULL,0,0,0,0,0, - 0,0xd22f7891d06d2f5aULL,0xf1bd2f3bbfb10000ULL,0xd22f7891d06d2f5aULL,0xf1bd2f3bbfb10000ULL,0,0,0,0,0, - 0,0x54f8849020caa27fULL,0xab9766c9a48a5d70ULL,0x7f01ee303c53b1bcULL,0xab9766c9a48a5d70ULL,0,0,0,0,0, - 0,0x414ce1fc78d4d36fULL,0xff3adb2d91cd4803ULL,0xd22f7891d06d2f5aULL,0xff3adb2d91cd4803ULL -}; - -constexpr uint64_t CA_DIMER_TAB[45] = { - 0,0,0,0,0,0,0,0,0,0, - 0,0x7e4a36919c5032afULL,0x95b4b90696718a9fULL,0xf482881f5f3610bfULL,0x95b4b90696718a9fULL,0,0,0,0,0, - 0,0xe12f0e8e678e5d30ULL,0x9ff825331cf8a361ULL,0xf248b960f9159cecULL,0x9ff825331cf8a361ULL,0,0,0,0,0, - 0,0x7e4a36919c5032afULL,0x95b4b90696718a9fULL,0xf482881f5f3610bfULL,0x95b4b90696718a9fULL,0,0,0,0,0, - 0,0xe12f0e8e678e5d30ULL,0x00e055ee083cfcffULL,0xe12f0e8e678e5d30ULL,0x00e055ee083cfcffULL -}; - -constexpr uint64_t CT_DIMER_TAB[45] = { - 0,0,0,0,0,0,0,0,0,0, - 0,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x897da34f6b464570ULL,0,0,0,0,0, - 0,0xeeaf42d6e10d9ac0ULL,0x049bab70128da11fULL,0x68c7d4adebfb5001ULL,0x049bab70128da11fULL,0,0,0,0,0, - 0,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x9a533a6a5e7b14e2ULL,0x897da34f6b464570ULL,0,0,0,0,0, - 0,0x0f1bfc76a0e3e700ULL,0x049bab70128da11fULL,0x282450c57fae339fULL,0x049bab70128da11fULL -}; - - - -// Generic dimer-based rolling hash -inline std::vector -base_forward_bs_hash(const char* seq, bool has_second, unsigned k, const uint64_t* DIMER_TAB) -{ - uint64_t h0 = 0; - uint64_t h1 = 0; - - std::cout << "[BASE_HASH] Computing forward hashes, k=" << k - << ", has_second=" << has_second << "\n"; - - for (unsigned i = 0; i < k; i += 2) { - h0 = srol(h0, 1); - const char c1 = seq[i]; - const char c2 = seq[i + 1]; - const uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - const uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - const uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 - h0 ^= DIMER_TAB[loc]; - - std::cout << "[BASE_HASH] h0 loop " << i << " | dimer='" - << c1 << c2 << "' loc=" << (unsigned)loc - << " | h0=" << h0 << "\n"; - } - - if (!has_second) - return {h0, 0}; - - for (unsigned i = 1; i < k + 1; i += 2) { - h1 = srol(h1, 1); - const char c1 = seq[i]; - const char c2 = seq[i + 1]; - const uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - const uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - const uint8_t loc = 10 * a + b; - h1 ^= DIMER_TAB[loc]; - - std::cout << "[BASE_HASH] h1 loop " << i << " | dimer='" - << c1 << c2 << "' loc=" << (unsigned)loc - << " | h1=" << h1 << "\n"; - } - - return {h0, h1}; -} - - - -// ===== CC_group1 ===== -static const uint64_t CC_group1_33R[33] = { - 0xc934fdc1, 0x19269fb82, 0x124d3f705, 0x49a7ee0b, 0x934fdc16, 0x1269fb82c, 0x4d3f7059, 0x9a7ee0b2, 0x134fdc164, 0x69fb82c9, - 0xd3f70592, 0x1a7ee0b24, 0x14fdc1649, 0x9fb82c93, 0x13f705926, 0x7ee0b24d, 0xfdc1649a, 0x1fb82c934, 0x1f7059269, 0x1ee0b24d3, - 0x1dc1649a7, 0x1b82c934f, 0x17059269f, 0xe0b24d3f, 0x1c1649a7e, 0x182c934fd, 0x1059269fb, 0xb24d3f7, 0x1649a7ee, 0x2c934fdc, - 0x59269fb8, 0xb24d3f70, 0x1649a7ee0 -}; - -static const uint64_t CC_group1_31L[31] = { - 0x77c24a3600000000, 0xef84946c00000000, 0xdf0928da00000000, 0xbe1251b600000000, 0x7c24a36e00000000, 0xf84946dc00000000, 0xf0928dba00000000, 0xe1251b7600000000, 0xc24a36ee00000000, 0x84946dde00000000, - 0x928dbbe00000000, 0x1251b77c00000000, 0x24a36ef800000000, 0x4946ddf000000000, 0x928dbbe000000000, 0x251b77c200000000, 0x4a36ef8400000000, 0x946ddf0800000000, 0x28dbbe1200000000, 0x51b77c2400000000, - 0xa36ef84800000000, 0x46ddf09200000000, 0x8dbbe12400000000, 0x1b77c24a00000000, 0x36ef849400000000, 0x6ddf092800000000, 0xdbbe125000000000, 0xb77c24a200000000, 0x6ef8494600000000, 0xddf0928c00000000, - 0xbbe1251a00000000 -}; - -// ===== CC_group2 ===== -static const uint64_t CC_group2_33R[33] = { - 0x1a6ea6e02, 0x14dd4dc05, 0x9ba9b80b, 0x137537016, 0x6ea6e02d, 0xdd4dc05a, 0x1ba9b80b4, 0x175370169, 0xea6e02d3, 0x1d4dc05a6, - 0x1a9b80b4d, 0x15370169b, 0xa6e02d37, 0x14dc05a6e, 0x9b80b4dd, 0x1370169ba, 0x6e02d375, 0xdc05a6ea, 0x1b80b4dd4, 0x170169ba9, - 0xe02d3753, 0x1c05a6ea6, 0x180b4dd4d, 0x10169ba9b, 0x2d37537, 0x5a6ea6e, 0xb4dd4dc, 0x169ba9b8, 0x2d375370, 0x5a6ea6e0, - 0xb4dd4dc0, 0x169ba9b80, 0xd3753701 -}; - -static const uint64_t CC_group2_31L[31] = { - 0x1bc5d5aa00000000, 0x378bab5400000000, 0x6f1756a800000000, 0xde2ead5000000000, 0xbc5d5aa200000000, 0x78bab54600000000, 0xf1756a8c00000000, 0xe2ead51a00000000, 0xc5d5aa3600000000, 0x8bab546e00000000, - 0x1756a8de00000000, 0x2ead51bc00000000, 0x5d5aa37800000000, 0xbab546f000000000, 0x756a8de200000000, 0xead51bc400000000, 0xd5aa378a00000000, 0xab546f1600000000, 0x56a8de2e00000000, 0xad51bc5c00000000, - 0x5aa378ba00000000, 0xb546f17400000000, 0x6a8de2ea00000000, 0xd51bc5d400000000, 0xaa378baa00000000, 0x546f175600000000, 0xa8de2eac00000000, 0x51bc5d5a00000000, 0xa378bab400000000, 0x46f1756a00000000, - 0x8de2ead400000000 -}; - -// ===== CC_group3 ===== -static const uint64_t CC_group3_33R[33] = { - 0xa74bce00, 0x14e979c00, 0x9d2f3801, 0x13a5e7002, 0x74bce005, 0xe979c00a, 0x1d2f38014, 0x1a5e70029, 0x14bce0053, 0x979c00a7, - 0x12f38014e, 0x5e70029d, 0xbce0053a, 0x179c00a74, 0xf38014e9, 0x1e70029d2, 0x1ce0053a5, 0x19c00a74b, 0x138014e97, 0x70029d2f, - 0xe0053a5e, 0x1c00a74bc, 0x18014e979, 0x10029d2f3, 0x53a5e7, 0xa74bce, 0x14e979c, 0x29d2f38, 0x53a5e70, 0xa74bce0, - 0x14e979c0, 0x29d2f380, 0x53a5e700 -}; - -static const uint64_t CC_group3_31L[31] = { - 0x44fec7e400000000, 0x89fd8fc800000000, 0x13fb1f9200000000, 0x27f63f2400000000, 0x4fec7e4800000000, 0x9fd8fc9000000000, 0x3fb1f92200000000, 0x7f63f24400000000, 0xfec7e48800000000, 0xfd8fc91200000000, - 0xfb1f922600000000, 0xf63f244e00000000, 0xec7e489e00000000, 0xd8fc913e00000000, 0xb1f9227e00000000, 0x63f244fe00000000, 0xc7e489fc00000000, 0x8fc913fa00000000, 0x1f9227f600000000, 0x3f244fec00000000, - 0x7e489fd800000000, 0xfc913fb000000000, 0xf9227f6200000000, 0xf244fec600000000, 0xe489fd8e00000000, 0xc913fb1e00000000, 0x9227f63e00000000, 0x244fec7e00000000, 0x489fd8fc00000000, 0x913fb1f800000000, - 0x227f63f200000000 -}; - -// ===== CC_group4 ===== -static const uint64_t CC_group4_33R[33] = { - 0xb06b5ed9, 0x160d6bdb2, 0xc1ad7b65, 0x1835af6ca, 0x106b5ed95, 0xd6bdb2b, 0x1ad7b656, 0x35af6cac, 0x6b5ed958, 0xd6bdb2b0, - 0x1ad7b6560, 0x15af6cac1, 0xb5ed9583, 0x16bdb2b06, 0xd7b6560d, 0x1af6cac1a, 0x15ed95835, 0xbdb2b06b, 0x17b6560d6, 0xf6cac1ad, - 0x1ed95835a, 0x1db2b06b5, 0x1b6560d6b, 0x16cac1ad7, 0xd95835af, 0x1b2b06b5e, 0x16560d6bd, 0xcac1ad7b, 0x195835af6, 0x12b06b5ed, - 0x560d6bdb, 0xac1ad7b6, 0x15835af6c -}; - -static const uint64_t CC_group4_31L[31] = { - 0x8f1531400000000, 0x11e2a62800000000, 0x23c54c5000000000, 0x478a98a000000000, 0x8f15314000000000, 0x1e2a628200000000, 0x3c54c50400000000, 0x78a98a0800000000, 0xf153141000000000, 0xe2a6282200000000, - 0xc54c504600000000, 0x8a98a08e00000000, 0x1531411e00000000, 0x2a62823c00000000, 0x54c5047800000000, 0xa98a08f000000000, 0x531411e200000000, 0xa62823c400000000, 0x4c50478a00000000, 0x98a08f1400000000, - 0x31411e2a00000000, 0x62823c5400000000, 0xc50478a800000000, 0x8a08f15200000000, 0x1411e2a600000000, 0x2823c54c00000000, 0x50478a9800000000, 0xa08f153000000000, 0x411e2a6200000000, 0x823c54c400000000, - 0x478a98a00000000 -}; - -// ===== CG_group1 ===== -static const uint64_t CG_group1_33R[33] = { - 0x20caa27f, 0x419544fe, 0x832a89fc, 0x1065513f8, 0xcaa27f1, 0x19544fe2, 0x32a89fc4, 0x65513f88, 0xcaa27f10, 0x19544fe20, - 0x12a89fc41, 0x5513f883, 0xaa27f106, 0x1544fe20c, 0xa89fc419, 0x1513f8832, 0xa27f1065, 0x144fe20ca, 0x89fc4195, 0x113f8832a, - 0x27f10655, 0x4fe20caa, 0x9fc41954, 0x13f8832a8, 0x7f106551, 0xfe20caa2, 0x1fc419544, 0x1f8832a89, 0x1f1065513, 0x1e20caa27, - 0x1c419544f, 0x18832a89f, 0x11065513f -}; - -static const uint64_t CG_group1_31L[31] = { - 0x54f8849000000000, 0xa9f1092000000000, 0x53e2124200000000, 0xa7c4248400000000, 0x4f88490a00000000, 0x9f10921400000000, 0x3e21242a00000000, 0x7c42485400000000, 0xf88490a800000000, 0xf109215200000000, - 0xe21242a600000000, 0xc424854e00000000, 0x88490a9e00000000, 0x1092153e00000000, 0x21242a7c00000000, 0x424854f800000000, 0x8490a9f000000000, 0x92153e200000000, 0x1242a7c400000000, 0x24854f8800000000, - 0x490a9f1000000000, 0x92153e2000000000, 0x242a7c4200000000, 0x4854f88400000000, 0x90a9f10800000000, 0x2153e21200000000, 0x42a7c42400000000, 0x854f884800000000, 0xa9f109200000000, 0x153e212400000000, - 0x2a7c424800000000 -}; - -// ===== CG_group2 ===== -static const uint64_t CG_group2_33R[33] = { - 0x1a48a5d70, 0x14914bae1, 0x922975c3, 0x12452eb86, 0x48a5d70d, 0x914bae1a, 0x122975c34, 0x452eb869, 0x8a5d70d2, 0x114bae1a4, - 0x2975c349, 0x52eb8692, 0xa5d70d24, 0x14bae1a48, 0x975c3491, 0x12eb86922, 0x5d70d245, 0xbae1a48a, 0x175c34914, 0xeb869229, - 0x1d70d2452, 0x1ae1a48a5, 0x15c34914b, 0xb8692297, 0x170d2452e, 0xe1a48a5d, 0x1c34914ba, 0x186922975, 0x10d2452eb, 0x1a48a5d7, - 0x34914bae, 0x6922975c, 0xd2452eb8 -}; - -static const uint64_t CG_group2_31L[31] = { - 0xab9766c800000000, 0x572ecd9200000000, 0xae5d9b2400000000, 0x5cbb364a00000000, 0xb9766c9400000000, 0x72ecd92a00000000, 0xe5d9b25400000000, 0xcbb364aa00000000, 0x9766c95600000000, 0x2ecd92ae00000000, - 0x5d9b255c00000000, 0xbb364ab800000000, 0x766c957200000000, 0xecd92ae400000000, 0xd9b255ca00000000, 0xb364ab9600000000, 0x66c9572e00000000, 0xcd92ae5c00000000, 0x9b255cba00000000, 0x364ab97600000000, - 0x6c9572ec00000000, 0xd92ae5d800000000, 0xb255cbb200000000, 0x64ab976600000000, 0xc9572ecc00000000, 0x92ae5d9a00000000, 0x255cbb3600000000, 0x4ab9766c00000000, 0x9572ecd800000000, 0x2ae5d9b200000000, - 0x55cbb36400000000 -}; - -// ===== CG_group3 ===== -static const uint64_t CG_group3_33R[33] = { - 0x3c53b1bc, 0x78a76378, 0xf14ec6f0, 0x1e29d8de0, 0x1c53b1bc1, 0x18a763783, 0x114ec6f07, 0x29d8de0f, 0x53b1bc1e, 0xa763783c, - 0x14ec6f078, 0x9d8de0f1, 0x13b1bc1e2, 0x763783c5, 0xec6f078a, 0x1d8de0f14, 0x1b1bc1e29, 0x163783c53, 0xc6f078a7, 0x18de0f14e, - 0x11bc1e29d, 0x3783c53b, 0x6f078a76, 0xde0f14ec, 0x1bc1e29d8, 0x1783c53b1, 0xf078a763, 0x1e0f14ec6, 0x1c1e29d8d, 0x183c53b1b, - 0x1078a7637, 0xf14ec6f, 0x1e29d8de -}; - -static const uint64_t CG_group3_31L[31] = { - 0x7f01ee3000000000, 0xfe03dc6000000000, 0xfc07b8c200000000, 0xf80f718600000000, 0xf01ee30e00000000, 0xe03dc61e00000000, 0xc07b8c3e00000000, 0x80f7187e00000000, 0x1ee30fe00000000, 0x3dc61fc00000000, - 0x7b8c3f800000000, 0xf7187f000000000, 0x1ee30fe000000000, 0x3dc61fc000000000, 0x7b8c3f8000000000, 0xf7187f0000000000, 0xee30fe0200000000, 0xdc61fc0600000000, 0xb8c3f80e00000000, 0x7187f01e00000000, - 0xe30fe03c00000000, 0xc61fc07a00000000, 0x8c3f80f600000000, 0x187f01ee00000000, 0x30fe03dc00000000, 0x61fc07b800000000, 0xc3f80f7000000000, 0x87f01ee200000000, 0xfe03dc600000000, 0x1fc07b8c00000000, - 0x3f80f71800000000 -}; - -// ===== CG_group4 ===== -static const uint64_t CG_group4_33R[33] = { - 0x1d06d2f5a, 0x1a0da5eb5, 0x141b4bd6b, 0x83697ad7, 0x106d2f5ae, 0xda5eb5d, 0x1b4bd6ba, 0x3697ad74, 0x6d2f5ae8, 0xda5eb5d0, - 0x1b4bd6ba0, 0x1697ad741, 0xd2f5ae83, 0x1a5eb5d06, 0x14bd6ba0d, 0x97ad741b, 0x12f5ae836, 0x5eb5d06d, 0xbd6ba0da, 0x17ad741b4, - 0xf5ae8369, 0x1eb5d06d2, 0x1d6ba0da5, 0x1ad741b4b, 0x15ae83697, 0xb5d06d2f, 0x16ba0da5e, 0xd741b4bd, 0x1ae83697a, 0x15d06d2f5, - 0xba0da5eb, 0x1741b4bd6, 0xe83697ad -}; - -static const uint64_t CG_group4_31L[31] = { - 0xd22f789000000000, 0xa45ef12200000000, 0x48bde24600000000, 0x917bc48c00000000, 0x22f7891a00000000, 0x45ef123400000000, 0x8bde246800000000, 0x17bc48d200000000, 0x2f7891a400000000, 0x5ef1234800000000, - 0xbde2469000000000, 0x7bc48d2200000000, 0xf7891a4400000000, 0xef12348a00000000, 0xde24691600000000, 0xbc48d22e00000000, 0x7891a45e00000000, 0xf12348bc00000000, 0xe246917a00000000, 0xc48d22f600000000, - 0x891a45ee00000000, 0x12348bde00000000, 0x246917bc00000000, 0x48d22f7800000000, 0x91a45ef000000000, 0x2348bde200000000, 0x46917bc400000000, 0x8d22f78800000000, 0x1a45ef1200000000, 0x348bde2400000000, - 0x6917bc4800000000 -}; - -// ===== CG_group5 ===== -static const uint64_t CG_group5_33R[33] = { - 0x1bfb10000, 0x17f620001, 0xfec40003, 0x1fd880006, 0x1fb10000d, 0x1f620001b, 0x1ec400037, 0x1d880006f, 0x1b10000df, 0x1620001bf, - 0xc400037f, 0x1880006fe, 0x110000dfd, 0x20001bfb, 0x400037f6, 0x80006fec, 0x10000dfd8, 0x1bfb1, 0x37f62, 0x6fec4, - 0xdfd88, 0x1bfb10, 0x37f620, 0x6fec40, 0xdfd880, 0x1bfb100, 0x37f6200, 0x6fec400, 0xdfd8800, 0x1bfb1000, - 0x37f62000, 0x6fec4000, 0xdfd88000 -}; - -static const uint64_t CG_group5_31L[31] = { - 0xf1bd2f3a00000000, 0xe37a5e7600000000, 0xc6f4bcee00000000, 0x8de979de00000000, 0x1bd2f3be00000000, 0x37a5e77c00000000, 0x6f4bcef800000000, 0xde979df000000000, 0xbd2f3be200000000, 0x7a5e77c600000000, - 0xf4bcef8c00000000, 0xe979df1a00000000, 0xd2f3be3600000000, 0xa5e77c6e00000000, 0x4bcef8de00000000, 0x979df1bc00000000, 0x2f3be37a00000000, 0x5e77c6f400000000, 0xbcef8de800000000, 0x79df1bd200000000, - 0xf3be37a400000000, 0xe77c6f4a00000000, 0xcef8de9600000000, 0x9df1bd2e00000000, 0x3be37a5e00000000, 0x77c6f4bc00000000, 0xef8de97800000000, 0xdf1bd2f200000000, 0xbe37a5e600000000, 0x7c6f4bce00000000, - 0xf8de979c00000000 -}; - -// ===== CG_group6 ===== -static const uint64_t CG_group6_33R[33] = { - 0x78d4d36f, 0xf1a9a6de, 0x1e3534dbc, 0x1c6a69b79, 0x18d4d36f3, 0x11a9a6de7, 0x3534dbcf, 0x6a69b79e, 0xd4d36f3c, 0x1a9a6de78, - 0x1534dbcf1, 0xa69b79e3, 0x14d36f3c6, 0x9a6de78d, 0x134dbcf1a, 0x69b79e35, 0xd36f3c6a, 0x1a6de78d4, 0x14dbcf1a9, 0x9b79e353, - 0x136f3c6a6, 0x6de78d4d, 0xdbcf1a9a, 0x1b79e3534, 0x16f3c6a69, 0xde78d4d3, 0x1bcf1a9a6, 0x179e3534d, 0xf3c6a69b, 0x1e78d4d36, - 0x1cf1a9a6d, 0x19e3534db, 0x13c6a69b7 -}; - -static const uint64_t CG_group6_31L[31] = { - 0x414ce1fc00000000, 0x8299c3f800000000, 0x53387f200000000, 0xa670fe400000000, 0x14ce1fc800000000, 0x299c3f9000000000, 0x53387f2000000000, 0xa670fe4000000000, 0x4ce1fc8200000000, 0x99c3f90400000000, - 0x3387f20a00000000, 0x670fe41400000000, 0xce1fc82800000000, 0x9c3f905200000000, 0x387f20a600000000, 0x70fe414c00000000, 0xe1fc829800000000, 0xc3f9053200000000, 0x87f20a6600000000, 0xfe414ce00000000, - 0x1fc8299c00000000, 0x3f90533800000000, 0x7f20a67000000000, 0xfe414ce000000000, 0xfc8299c200000000, 0xf905338600000000, 0xf20a670e00000000, 0xe414ce1e00000000, 0xc8299c3e00000000, 0x9053387e00000000, - 0x20a670fe00000000 -}; - -// ===== CG_group7 ===== -static const uint64_t CG_group7_33R[33] = { - 0x191cd4803, 0x1239a9007, 0x4735200f, 0x8e6a401e, 0x11cd4803c, 0x39a90079, 0x735200f2, 0xe6a401e4, 0x1cd4803c8, 0x19a900791, - 0x135200f23, 0x6a401e47, 0xd4803c8e, 0x1a900791c, 0x15200f239, 0xa401e473, 0x14803c8e6, 0x900791cd, 0x1200f239a, 0x401e4735, - 0x803c8e6a, 0x100791cd4, 0xf239a9, 0x1e47352, 0x3c8e6a4, 0x791cd48, 0xf239a90, 0x1e473520, 0x3c8e6a40, 0x791cd480, - 0xf239a900, 0x1e4735200, 0x1c8e6a401 -}; - -static const uint64_t CG_group7_31L[31] = { - 0xff3adb2c00000000, 0xfe75b65a00000000, 0xfceb6cb600000000, 0xf9d6d96e00000000, 0xf3adb2de00000000, 0xe75b65be00000000, 0xceb6cb7e00000000, 0x9d6d96fe00000000, 0x3adb2dfe00000000, 0x75b65bfc00000000, - 0xeb6cb7f800000000, 0xd6d96ff200000000, 0xadb2dfe600000000, 0x5b65bfce00000000, 0xb6cb7f9c00000000, 0x6d96ff3a00000000, 0xdb2dfe7400000000, 0xb65bfcea00000000, 0x6cb7f9d600000000, 0xd96ff3ac00000000, - 0xb2dfe75a00000000, 0x65bfceb600000000, 0xcb7f9d6c00000000, 0x96ff3ada00000000, 0x2dfe75b600000000, 0x5bfceb6c00000000, 0xb7f9d6d800000000, 0x6ff3adb200000000, 0xdfe75b6400000000, 0xbfceb6ca00000000, - 0x7f9d6d9600000000 -}; - -// ===== CT_group1 ===== -static const uint64_t CT_group1_33R[33] = { - 0x128da11f, 0x251b423e, 0x4a36847c, 0x946d08f8, 0x128da11f0, 0x51b423e1, 0xa36847c2, 0x146d08f84, 0x8da11f09, 0x11b423e12, - 0x36847c25, 0x6d08f84a, 0xda11f094, 0x1b423e128, 0x16847c251, 0xd08f84a3, 0x1a11f0946, 0x1423e128d, 0x847c251b, 0x108f84a36, - 0x11f0946d, 0x23e128da, 0x47c251b4, 0x8f84a368, 0x11f0946d0, 0x3e128da1, 0x7c251b42, 0xf84a3684, 0x1f0946d08, 0x1e128da11, - 0x1c251b423, 0x184a36847, 0x10946d08f -}; - -static const uint64_t CT_group1_31L[31] = { - 0x49bab7000000000, 0x93756e000000000, 0x126eadc000000000, 0x24dd5b8000000000, 0x49bab70000000000, 0x93756e0000000000, 0x26eadc0200000000, 0x4dd5b80400000000, 0x9bab700800000000, 0x3756e01200000000, - 0x6eadc02400000000, 0xdd5b804800000000, 0xbab7009200000000, 0x756e012600000000, 0xeadc024c00000000, 0xd5b8049a00000000, 0xab70093600000000, 0x56e0126e00000000, 0xadc024dc00000000, 0x5b8049ba00000000, - 0xb700937400000000, 0x6e0126ea00000000, 0xdc024dd400000000, 0xb8049baa00000000, 0x7009375600000000, 0xe0126eac00000000, 0xc024dd5a00000000, 0x8049bab600000000, 0x93756e00000000, 0x126eadc00000000, - 0x24dd5b800000000 -}; - -// ===== CT_group2 ===== -static const uint64_t CT_group2_33R[33] = { - 0x5e7b14e2, 0xbcf629c4, 0x179ec5388, 0xf3d8a711, 0x1e7b14e22, 0x1cf629c45, 0x19ec5388b, 0x13d8a7117, 0x7b14e22f, 0xf629c45e, - 0x1ec5388bc, 0x1d8a71179, 0x1b14e22f3, 0x1629c45e7, 0xc5388bcf, 0x18a71179e, 0x114e22f3d, 0x29c45e7b, 0x5388bcf6, 0xa71179ec, - 0x14e22f3d8, 0x9c45e7b1, 0x1388bcf62, 0x71179ec5, 0xe22f3d8a, 0x1c45e7b14, 0x188bcf629, 0x11179ec53, 0x22f3d8a7, 0x45e7b14e, - 0x8bcf629c, 0x1179ec538, 0x2f3d8a71 -}; - -static const uint64_t CT_group2_31L[31] = { - 0x9a533a6a00000000, 0x34a674d600000000, 0x694ce9ac00000000, 0xd299d35800000000, 0xa533a6b200000000, 0x4a674d6600000000, 0x94ce9acc00000000, 0x299d359a00000000, 0x533a6b3400000000, 0xa674d66800000000, - 0x4ce9acd200000000, 0x99d359a400000000, 0x33a6b34a00000000, 0x674d669400000000, 0xce9acd2800000000, 0x9d359a5200000000, 0x3a6b34a600000000, 0x74d6694c00000000, 0xe9acd29800000000, 0xd359a53200000000, - 0xa6b34a6600000000, 0x4d6694ce00000000, 0x9acd299c00000000, 0x359a533a00000000, 0x6b34a67400000000, 0xd6694ce800000000, 0xacd299d200000000, 0x59a533a600000000, 0xb34a674c00000000, 0x6694ce9a00000000, - 0xcd299d3400000000 -}; - -// ===== CT_group3 ===== -static const uint64_t CT_group3_33R[33] = { - 0xe10d9ac0, 0x1c21b3580, 0x184366b01, 0x1086cd603, 0x10d9ac07, 0x21b3580e, 0x4366b01c, 0x86cd6038, 0x10d9ac070, 0x1b3580e1, - 0x366b01c2, 0x6cd60384, 0xd9ac0708, 0x1b3580e10, 0x166b01c21, 0xcd603843, 0x19ac07086, 0x13580e10d, 0x6b01c21b, 0xd6038436, - 0x1ac07086c, 0x1580e10d9, 0xb01c21b3, 0x160384366, 0xc07086cd, 0x180e10d9a, 0x101c21b35, 0x384366b, 0x7086cd6, 0xe10d9ac, - 0x1c21b358, 0x384366b0, 0x7086cd60 -}; - -static const uint64_t CT_group3_31L[31] = { - 0xeeaf42d600000000, 0xdd5e85ae00000000, 0xbabd0b5e00000000, 0x757a16be00000000, 0xeaf42d7c00000000, 0xd5e85afa00000000, 0xabd0b5f600000000, 0x57a16bee00000000, 0xaf42d7dc00000000, 0x5e85afba00000000, - 0xbd0b5f7400000000, 0x7a16beea00000000, 0xf42d7dd400000000, 0xe85afbaa00000000, 0xd0b5f75600000000, 0xa16beeae00000000, 0x42d7dd5e00000000, 0x85afbabc00000000, 0xb5f757a00000000, 0x16beeaf400000000, - 0x2d7dd5e800000000, 0x5afbabd000000000, 0xb5f757a000000000, 0x6beeaf4200000000, 0xd7dd5e8400000000, 0xafbabd0a00000000, 0x5f757a1600000000, 0xbeeaf42c00000000, 0x7dd5e85a00000000, 0xfbabd0b400000000, - 0xf757a16a00000000 -}; - -// ===== CT_group4 ===== -static const uint64_t CT_group4_33R[33] = { - 0x1ebfb5001, 0x1d7f6a003, 0x1afed4007, 0x15fda800f, 0xbfb5001f, 0x17f6a003e, 0xfed4007d, 0x1fda800fa, 0x1fb5001f5, 0x1f6a003eb, - 0x1ed4007d7, 0x1da800faf, 0x1b5001f5f, 0x16a003ebf, 0xd4007d7f, 0x1a800fafe, 0x15001f5fd, 0xa003ebfb, 0x14007d7f6, 0x800fafed, - 0x1001f5fda, 0x3ebfb5, 0x7d7f6a, 0xfafed4, 0x1f5fda8, 0x3ebfb50, 0x7d7f6a0, 0xfafed40, 0x1f5fda80, 0x3ebfb500, - 0x7d7f6a00, 0xfafed400, 0x1f5fda800 -}; - -static const uint64_t CT_group4_31L[31] = { - 0x68c7d4ac00000000, 0xd18fa95800000000, 0xa31f52b200000000, 0x463ea56600000000, 0x8c7d4acc00000000, 0x18fa959a00000000, 0x31f52b3400000000, 0x63ea566800000000, 0xc7d4acd000000000, 0x8fa959a200000000, - 0x1f52b34600000000, 0x3ea5668c00000000, 0x7d4acd1800000000, 0xfa959a3000000000, 0xf52b346200000000, 0xea5668c600000000, 0xd4acd18e00000000, 0xa959a31e00000000, 0x52b3463e00000000, 0xa5668c7c00000000, - 0x4acd18fa00000000, 0x959a31f400000000, 0x2b3463ea00000000, 0x5668c7d400000000, 0xacd18fa800000000, 0x59a31f5200000000, 0xb3463ea400000000, 0x668c7d4a00000000, 0xcd18fa9400000000, 0x9a31f52a00000000, - 0x3463ea5600000000 -}; - -// ===== CT_group5 ===== -static const uint64_t CT_group5_33R[33] = { - 0xa0e3e700, 0x141c7ce00, 0x838f9c01, 0x1071f3802, 0xe3e7005, 0x1c7ce00a, 0x38f9c014, 0x71f38028, 0xe3e70050, 0x1c7ce00a0, - 0x18f9c0141, 0x11f380283, 0x3e700507, 0x7ce00a0e, 0xf9c0141c, 0x1f3802838, 0x1e7005071, 0x1ce00a0e3, 0x19c0141c7, 0x13802838f, - 0x7005071f, 0xe00a0e3e, 0x1c0141c7c, 0x1802838f9, 0x1005071f3, 0xa0e3e7, 0x141c7ce, 0x2838f9c, 0x5071f38, 0xa0e3e70, - 0x141c7ce0, 0x2838f9c0, 0x5071f380 -}; - -static const uint64_t CT_group5_31L[31] = { - 0xf1bfc7600000000, 0x1e37f8ec00000000, 0x3c6ff1d800000000, 0x78dfe3b000000000, 0xf1bfc76000000000, 0xe37f8ec200000000, 0xc6ff1d8600000000, 0x8dfe3b0e00000000, 0x1bfc761e00000000, 0x37f8ec3c00000000, - 0x6ff1d87800000000, 0xdfe3b0f000000000, 0xbfc761e200000000, 0x7f8ec3c600000000, 0xff1d878c00000000, 0xfe3b0f1a00000000, 0xfc761e3600000000, 0xf8ec3c6e00000000, 0xf1d878de00000000, 0xe3b0f1be00000000, - 0xc761e37e00000000, 0x8ec3c6fe00000000, 0x1d878dfe00000000, 0x3b0f1bfc00000000, 0x761e37f800000000, 0xec3c6ff000000000, 0xd878dfe200000000, 0xb0f1bfc600000000, 0x61e37f8e00000000, 0xc3c6ff1c00000000, - 0x878dfe3a00000000 -}; - -// ===== CT_group6 ===== -static const uint64_t CT_group6_33R[33] = { - 0x17fae339f, 0xff5c673f, 0x1feb8ce7e, 0x1fd719cfd, 0x1fae339fb, 0x1f5c673f7, 0x1eb8ce7ef, 0x1d719cfdf, 0x1ae339fbf, 0x15c673f7f, - 0xb8ce7eff, 0x1719cfdfe, 0xe339fbfd, 0x1c673f7fa, 0x18ce7eff5, 0x119cfdfeb, 0x339fbfd7, 0x673f7fae, 0xce7eff5c, 0x19cfdfeb8, - 0x139fbfd71, 0x73f7fae3, 0xe7eff5c6, 0x1cfdfeb8c, 0x19fbfd719, 0x13f7fae33, 0x7eff5c67, 0xfdfeb8ce, 0x1fbfd719c, 0x1f7fae339, - 0x1eff5c673, 0x1dfeb8ce7, 0x1bfd719cf -}; - -static const uint64_t CT_group6_31L[31] = { - 0x282450c400000000, 0x5048a18800000000, 0xa091431000000000, 0x4122862200000000, 0x82450c4400000000, 0x48a188a00000000, 0x914311400000000, 0x1228622800000000, 0x2450c45000000000, 0x48a188a000000000, - 0x9143114000000000, 0x2286228200000000, 0x450c450400000000, 0x8a188a0800000000, 0x1431141200000000, 0x2862282400000000, 0x50c4504800000000, 0xa188a09000000000, 0x4311412200000000, 0x8622824400000000, - 0xc45048a00000000, 0x188a091400000000, 0x3114122800000000, 0x6228245000000000, 0xc45048a000000000, 0x88a0914200000000, 0x1141228600000000, 0x2282450c00000000, 0x45048a1800000000, 0x8a09143000000000, - 0x1412286200000000 -}; - -// ===== CT_group7 ===== -static const uint64_t CT_group7_33R[33] = { - 0x16b464570, 0xd68c8ae1, 0x1ad1915c2, 0x15a322b85, 0xb464570b, 0x168c8ae16, 0xd1915c2d, 0x1a322b85a, 0x1464570b5, 0x8c8ae16b, - 0x11915c2d6, 0x322b85ad, 0x64570b5a, 0xc8ae16b4, 0x1915c2d68, 0x122b85ad1, 0x4570b5a3, 0x8ae16b46, 0x115c2d68c, 0x2b85ad19, - 0x570b5a32, 0xae16b464, 0x15c2d68c8, 0xb85ad191, 0x170b5a322, 0xe16b4645, 0x1c2d68c8a, 0x185ad1915, 0x10b5a322b, 0x16b46457, - 0x2d68c8ae, 0x5ad1915c, 0xb5a322b8 -}; - -static const uint64_t CT_group7_31L[31] = { - 0x897da34e00000000, 0x12fb469e00000000, 0x25f68d3c00000000, 0x4bed1a7800000000, 0x97da34f000000000, 0x2fb469e200000000, 0x5f68d3c400000000, 0xbed1a78800000000, 0x7da34f1200000000, 0xfb469e2400000000, - 0xf68d3c4a00000000, 0xed1a789600000000, 0xda34f12e00000000, 0xb469e25e00000000, 0x68d3c4be00000000, 0xd1a7897c00000000, 0xa34f12fa00000000, 0x469e25f600000000, 0x8d3c4bec00000000, 0x1a7897da00000000, - 0x34f12fb400000000, 0x69e25f6800000000, 0xd3c4bed000000000, 0xa7897da200000000, 0x4f12fb4600000000, 0x9e25f68c00000000, 0x3c4bed1a00000000, 0x7897da3400000000, 0xf12fb46800000000, 0xe25f68d200000000, - 0xc4bed1a600000000 -}; - -// ===== CA_group1 ===== -static const uint64_t CA_group1_33R[33] = { - 0x678e5d30, 0xcf1cba60, 0x19e3974c0, 0x13c72e981, 0x78e5d303, 0xf1cba606, 0x1e3974c0c, 0x1c72e9819, 0x18e5d3033, 0x11cba6067, - 0x3974c0cf, 0x72e9819e, 0xe5d3033c, 0x1cba60678, 0x1974c0cf1, 0x12e9819e3, 0x5d3033c7, 0xba60678e, 0x174c0cf1c, 0xe9819e39, - 0x1d3033c72, 0x1a60678e5, 0x14c0cf1cb, 0x9819e397, 0x13033c72e, 0x60678e5d, 0xc0cf1cba, 0x1819e3974, 0x1033c72e9, 0x678e5d3, - 0xcf1cba6, 0x19e3974c, 0x33c72e98 -}; - -static const uint64_t CA_group1_31L[31] = { - 0xe12f0e8e00000000, 0xc25e1d1e00000000, 0x84bc3a3e00000000, 0x978747e00000000, 0x12f0e8fc00000000, 0x25e1d1f800000000, 0x4bc3a3f000000000, 0x978747e000000000, 0x2f0e8fc200000000, 0x5e1d1f8400000000, - 0xbc3a3f0800000000, 0x78747e1200000000, 0xf0e8fc2400000000, 0xe1d1f84a00000000, 0xc3a3f09600000000, 0x8747e12e00000000, 0xe8fc25e00000000, 0x1d1f84bc00000000, 0x3a3f097800000000, 0x747e12f000000000, - 0xe8fc25e000000000, 0xd1f84bc200000000, 0xa3f0978600000000, 0x47e12f0e00000000, 0x8fc25e1c00000000, 0x1f84bc3a00000000, 0x3f09787400000000, 0x7e12f0e800000000, 0xfc25e1d000000000, 0xf84bc3a200000000, - 0xf097874600000000 -}; - -// ===== CA_group2 ===== -static const uint64_t CA_group2_33R[33] = { - 0x96718a9f, 0x12ce3153e, 0x59c62a7d, 0xb38c54fa, 0x16718a9f4, 0xce3153e9, 0x19c62a7d2, 0x138c54fa5, 0x718a9f4b, 0xe3153e96, - 0x1c62a7d2c, 0x18c54fa59, 0x118a9f4b3, 0x3153e967, 0x62a7d2ce, 0xc54fa59c, 0x18a9f4b38, 0x1153e9671, 0x2a7d2ce3, 0x54fa59c6, - 0xa9f4b38c, 0x153e96718, 0xa7d2ce31, 0x14fa59c62, 0x9f4b38c5, 0x13e96718a, 0x7d2ce315, 0xfa59c62a, 0x1f4b38c54, 0x1e96718a9, - 0x1d2ce3153, 0x1a59c62a7, 0x14b38c54f -}; - -static const uint64_t CA_group2_31L[31] = { - 0x95b4b90600000000, 0x2b69720e00000000, 0x56d2e41c00000000, 0xada5c83800000000, 0x5b4b907200000000, 0xb69720e400000000, 0x6d2e41ca00000000, 0xda5c839400000000, 0xb4b9072a00000000, 0x69720e5600000000, - 0xd2e41cac00000000, 0xa5c8395a00000000, 0x4b9072b600000000, 0x9720e56c00000000, 0x2e41cada00000000, 0x5c8395b400000000, 0xb9072b6800000000, 0x720e56d200000000, 0xe41cada400000000, 0xc8395b4a00000000, - 0x9072b69600000000, 0x20e56d2e00000000, 0x41cada5c00000000, 0x8395b4b800000000, 0x72b697200000000, 0xe56d2e400000000, 0x1cada5c800000000, 0x395b4b9000000000, 0x72b6972000000000, 0xe56d2e4000000000, - 0xcada5c8200000000 -}; - -// ===== CA_group3 ===== -static const uint64_t CA_group3_33R[33] = { - 0x15f3610bf, 0xbe6c217f, 0x17cd842fe, 0xf9b085fd, 0x1f3610bfa, 0x1e6c217f5, 0x1cd842feb, 0x19b085fd7, 0x13610bfaf, 0x6c217f5f, - 0xd842febe, 0x1b085fd7c, 0x1610bfaf9, 0xc217f5f3, 0x1842febe6, 0x1085fd7cd, 0x10bfaf9b, 0x217f5f36, 0x42febe6c, 0x85fd7cd8, - 0x10bfaf9b0, 0x17f5f361, 0x2febe6c2, 0x5fd7cd84, 0xbfaf9b08, 0x17f5f3610, 0xfebe6c21, 0x1fd7cd842, 0x1faf9b085, 0x1f5f3610b, - 0x1ebe6c217, 0x1d7cd842f, 0x1af9b085f -}; - -static const uint64_t CA_group3_31L[31] = { - 0xf482881e00000000, 0xe905103e00000000, 0xd20a207e00000000, 0xa41440fe00000000, 0x482881fe00000000, 0x905103fc00000000, 0x20a207fa00000000, 0x41440ff400000000, 0x82881fe800000000, 0x5103fd200000000, - 0xa207fa400000000, 0x1440ff4800000000, 0x2881fe9000000000, 0x5103fd2000000000, 0xa207fa4000000000, 0x440ff48200000000, 0x881fe90400000000, 0x103fd20a00000000, 0x207fa41400000000, 0x40ff482800000000, - 0x81fe905000000000, 0x3fd20a200000000, 0x7fa414400000000, 0xff4828800000000, 0x1fe9051000000000, 0x3fd20a2000000000, 0x7fa4144000000000, 0xff48288000000000, 0xfe90510200000000, 0xfd20a20600000000, - 0xfa41440e00000000 -}; - -// ===== CA_group4 ===== -static const uint64_t CA_group4_33R[33] = { - 0x19c5032af, 0x138a0655f, 0x7140cabf, 0xe281957e, 0x1c5032afc, 0x18a0655f9, 0x1140cabf3, 0x281957e7, 0x5032afce, 0xa0655f9c, - 0x140cabf38, 0x81957e71, 0x1032afce2, 0x655f9c5, 0xcabf38a, 0x1957e714, 0x32afce28, 0x655f9c50, 0xcabf38a0, 0x1957e7140, - 0x12afce281, 0x55f9c503, 0xabf38a06, 0x157e7140c, 0xafce2819, 0x15f9c5032, 0xbf38a065, 0x17e7140ca, 0xfce28195, 0x1f9c5032a, - 0x1f38a0655, 0x1e7140cab, 0x1ce281957 -}; - -static const uint64_t CA_group4_31L[31] = { - 0x7e4a369000000000, 0xfc946d2000000000, 0xf928da4200000000, 0xf251b48600000000, 0xe4a3690e00000000, 0xc946d21e00000000, 0x928da43e00000000, 0x251b487e00000000, 0x4a3690fc00000000, 0x946d21f800000000, - 0x28da43f200000000, 0x51b487e400000000, 0xa3690fc800000000, 0x46d21f9200000000, 0x8da43f2400000000, 0x1b487e4a00000000, 0x3690fc9400000000, 0x6d21f92800000000, 0xda43f25000000000, 0xb487e4a200000000, - 0x690fc94600000000, 0xd21f928c00000000, 0xa43f251a00000000, 0x487e4a3600000000, 0x90fc946c00000000, 0x21f928da00000000, 0x43f251b400000000, 0x87e4a36800000000, 0xfc946d200000000, 0x1f928da400000000, - 0x3f251b4800000000 -}; - -// ===== CA_group5 ===== -static const uint64_t CA_group5_33R[33] = { - 0xf9159cec, 0x1f22b39d8, 0x1e45673b1, 0x1c8ace763, 0x19159cec7, 0x122b39d8f, 0x45673b1f, 0x8ace763e, 0x1159cec7c, 0x2b39d8f9, - 0x5673b1f2, 0xace763e4, 0x159cec7c8, 0xb39d8f91, 0x1673b1f22, 0xce763e45, 0x19cec7c8a, 0x139d8f915, 0x73b1f22b, 0xe763e456, - 0x1cec7c8ac, 0x19d8f9159, 0x13b1f22b3, 0x763e4567, 0xec7c8ace, 0x1d8f9159c, 0x1b1f22b39, 0x163e45673, 0xc7c8ace7, 0x18f9159ce, - 0x11f22b39d, 0x3e45673b, 0x7c8ace76 -}; - -static const uint64_t CA_group5_31L[31] = { - 0xf248b96000000000, 0xe49172c200000000, 0xc922e58600000000, 0x9245cb0e00000000, 0x248b961e00000000, 0x49172c3c00000000, 0x922e587800000000, 0x245cb0f200000000, 0x48b961e400000000, 0x9172c3c800000000, - 0x22e5879200000000, 0x45cb0f2400000000, 0x8b961e4800000000, 0x172c3c9200000000, 0x2e58792400000000, 0x5cb0f24800000000, 0xb961e49000000000, 0x72c3c92200000000, 0xe587924400000000, 0xcb0f248a00000000, - 0x961e491600000000, 0x2c3c922e00000000, 0x5879245c00000000, 0xb0f248b800000000, 0x61e4917200000000, 0xc3c922e400000000, 0x879245ca00000000, 0xf248b9600000000, 0x1e49172c00000000, 0x3c922e5800000000, - 0x79245cb000000000 -}; - -// ===== CA_group6 ===== -static const uint64_t CA_group6_33R[33] = { - 0x11cf8a361, 0x39f146c3, 0x73e28d86, 0xe7c51b0c, 0x1cf8a3618, 0x19f146c31, 0x13e28d863, 0x7c51b0c7, 0xf8a3618e, 0x1f146c31c, - 0x1e28d8639, 0x1c51b0c73, 0x18a3618e7, 0x1146c31cf, 0x28d8639f, 0x51b0c73e, 0xa3618e7c, 0x146c31cf8, 0x8d8639f1, 0x11b0c73e2, - 0x3618e7c5, 0x6c31cf8a, 0xd8639f14, 0x1b0c73e28, 0x1618e7c51, 0xc31cf8a3, 0x18639f146, 0x10c73e28d, 0x18e7c51b, 0x31cf8a36, - 0x639f146c, 0xc73e28d8, 0x18e7c51b0 -}; - -static const uint64_t CA_group6_31L[31] = { - 0x9ff8253200000000, 0x3ff04a6600000000, 0x7fe094cc00000000, 0xffc1299800000000, 0xff82533200000000, 0xff04a66600000000, 0xfe094cce00000000, 0xfc12999e00000000, 0xf825333e00000000, 0xf04a667e00000000, - 0xe094ccfe00000000, 0xc12999fe00000000, 0x825333fe00000000, 0x4a667fe00000000, 0x94ccffc00000000, 0x12999ff800000000, 0x25333ff000000000, 0x4a667fe000000000, 0x94ccffc000000000, 0x2999ff8200000000, - 0x5333ff0400000000, 0xa667fe0800000000, 0x4ccffc1200000000, 0x999ff82400000000, 0x333ff04a00000000, 0x667fe09400000000, 0xccffc12800000000, 0x99ff825200000000, 0x33ff04a600000000, 0x67fe094c00000000, - 0xcffc129800000000 -}; -// ===== CA_group7 ===== -static const uint64_t CA_group7_33R[33] = { - 0x83cfcff, 0x1079f9fe, 0x20f3f3fc, 0x41e7e7f8, 0x83cfcff0, 0x1079f9fe0, 0xf3f3fc1, 0x1e7e7f82, 0x3cfcff04, 0x79f9fe08, - 0xf3f3fc10, 0x1e7e7f820, 0x1cfcff041, 0x19f9fe083, 0x13f3fc107, 0x7e7f820f, 0xfcff041e, 0x1f9fe083c, 0x1f3fc1079, 0x1e7f820f3, - 0x1cff041e7, 0x19fe083cf, 0x13fc1079f, 0x7f820f3f, 0xff041e7e, 0x1fe083cfc, 0x1fc1079f9, 0x1f820f3f3, 0x1f041e7e7, 0x1e083cfcf, - 0x1c1079f9f, 0x1820f3f3f, 0x1041e7e7f -}; - -static const uint64_t CA_group7_31L[31] = { - 0xe055ee00000000, 0x1c0abdc00000000, 0x38157b800000000, 0x702af7000000000, 0xe055ee000000000, 0x1c0abdc000000000, 0x38157b8000000000, 0x702af70000000000, 0xe055ee0000000000, 0xc0abdc0200000000, - 0x8157b80600000000, 0x2af700e00000000, 0x55ee01c00000000, 0xabdc03800000000, 0x157b807000000000, 0x2af700e000000000, 0x55ee01c000000000, 0xabdc038000000000, 0x57b8070200000000, 0xaf700e0400000000, - 0x5ee01c0a00000000, 0xbdc0381400000000, 0x7b80702a00000000, 0xf700e05400000000, 0xee01c0aa00000000, 0xdc03815600000000, 0xb80702ae00000000, 0x700e055e00000000, 0xe01c0abc00000000, 0xc038157a00000000, - 0x80702af600000000 -}; - -constexpr const uint64_t* MS_TAB_33R_CC[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CC_group2_33R, CC_group4_33R, CC_group2_33R, CC_group4_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CC_group3_33R, CC_group1_33R, CC_group3_33R, CC_group1_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CC_group2_33R, CC_group4_33R, CC_group2_33R, CC_group4_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CC_group3_33R, CC_group1_33R, CC_group3_33R, CC_group1_33R // 40–44 -}; - -constexpr const uint64_t* MS_TAB_31L_CC[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CC_group2_31L, CC_group4_31L, CC_group2_31L, CC_group4_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CC_group3_31L, CC_group1_31L, CC_group3_31L, CC_group1_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CC_group2_31L, CC_group4_31L, CC_group2_31L, CC_group4_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CC_group3_31L, CC_group1_31L, CC_group3_31L, CC_group1_31L // 40–44 -}; - -// ===== MS_TAB_33R_CG ===== -constexpr const uint64_t* MS_TAB_33R_CG[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CG_group1_33R, CG_group2_33R, CG_group3_33R, CG_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CG_group4_33R, CG_group5_33R, CG_group4_33R, CG_group5_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CG_group1_33R, CG_group2_33R, CG_group3_33R, CG_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CG_group6_33R, CG_group7_33R, CG_group4_33R, CG_group7_33R // 40–44 -}; - -// ===== MS_TAB_31L_CG ===== -constexpr const uint64_t* MS_TAB_31L_CG[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CG_group1_31L, CG_group2_31L, CG_group3_31L, CG_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CG_group4_31L, CG_group5_31L, CG_group4_31L, CG_group5_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CG_group1_31L, CG_group2_31L, CG_group3_31L, CG_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CG_group6_31L, CG_group7_31L, CG_group4_31L, CG_group7_31L // 40–44 -}; - - -// ===== MS_TAB_33R_CA ===== -constexpr const uint64_t* MS_TAB_33R_CA[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CA_group4_33R, CA_group2_33R, CA_group3_33R, CA_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CA_group1_33R, CA_group6_33R, CA_group5_33R, CA_group6_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CA_group4_33R, CA_group2_33R, CA_group3_33R, CA_group2_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CA_group1_33R, CA_group7_33R, CA_group1_33R, CA_group7_33R // 40–44 -}; - -// ===== MS_TAB_31L_CA ===== -constexpr const uint64_t* MS_TAB_31L_CA[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CA_group4_31L, CA_group2_31L, CA_group3_31L, CA_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CA_group1_31L, CA_group6_31L, CA_group5_31L, CA_group6_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CA_group4_31L, CA_group2_31L, CA_group3_31L, CA_group2_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CA_group1_31L, CA_group7_31L, CA_group1_31L, CA_group7_31L // 40–44 -}; - -// ===== MS_TAB_33R_CT ===== -constexpr const uint64_t* MS_TAB_33R_CT[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CT_group2_33R, CT_group2_33R, CT_group2_33R, CT_group7_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CT_group3_33R, CT_group1_33R, CT_group4_33R, CT_group1_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CT_group2_33R, CT_group2_33R, CT_group2_33R, CT_group7_33R, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CT_group5_33R, CT_group1_33R, CT_group6_33R, CT_group1_33R // 40–44 -}; - -// ===== MS_TAB_31L_CT ===== -constexpr const uint64_t* MS_TAB_31L_CT[45] = { - nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, // 0–9 - nullptr, CT_group2_31L, CT_group2_31L, CT_group2_31L, CT_group7_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 10–19 - nullptr, CT_group3_31L, CT_group1_31L, CT_group4_31L, CT_group1_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 20–29 - nullptr, CT_group2_31L, CT_group2_31L, CT_group2_31L, CT_group7_31L, nullptr, nullptr, nullptr, nullptr, nullptr, // 30–39 - nullptr, CT_group5_31L, CT_group1_31L, CT_group6_31L, CT_group1_31L // 40–44 -}; - - -inline uint64_t -srol_bs_table(uint8_t out_idx, unsigned d, const uint64_t* const* TAB_33R, const uint64_t* const* TAB_31L) -{ - d= d/2; - return (TAB_31L[out_idx][d < 31 ? d : d % 31] | /* NOLINT */ - TAB_33R[out_idx][d < 33 ? d : d % 33]); /* NOLINT */ -} - - - -inline uint64_t -next_forward_bs_hash(uint64_t fh_val, - unsigned k, - unsigned char char_out1, - unsigned char char_out2, - unsigned char char_in1, - unsigned char char_in2, - const uint64_t* DIMER_TAB, - const uint64_t* const* TAB_33R, - const uint64_t* const* TAB_31L) -{ - // Compute indices for outgoing and incoming dimers - // Each dimer is encoded as (10 * first + second) - uint8_t out_idx = 10 * CONVERT_TAB_SIMPLE[char_out1] + CONVERT_TAB_SIMPLE[char_out2]; - uint8_t in_idx = 10 * CONVERT_TAB_SIMPLE[char_in1] + CONVERT_TAB_SIMPLE[char_in2]; - - // Rotate the hash left by 2 since we move by two bases - uint64_t h_val = srol(fh_val); - - // XOR out the old dimer and XOR in the new dimer - h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); - h_val ^= DIMER_TAB[in_idx]; - - return h_val; -} - - -inline uint64_t -prev_forward_bs_hash(uint64_t fh_val, - unsigned k, - unsigned char char_out1, - unsigned char char_out2, - unsigned char char_in1, - unsigned char char_in2, - const uint64_t* DIMER_TAB, - const uint64_t* const* TAB_33R, - const uint64_t* const* TAB_31L) -{ - // Compute indices for outgoing and incoming dimers - // Each dimer is encoded as (10 * first + second) - uint8_t out_idx = 10 * CONVERT_TAB_SIMPLE[char_out1] + CONVERT_TAB_SIMPLE[char_out2]; - uint8_t in_idx = 10 * CONVERT_TAB_SIMPLE[char_in1] + CONVERT_TAB_SIMPLE[char_in2]; - - uint64_t h_val = fh_val ^ srol_bs_table(in_idx, k, TAB_33R, TAB_31L); - h_val ^= DIMER_TAB[out_idx]; - h_val = sror(h_val); - return h_val; -} - -// Generic dimer-based reverse hash using pointer table -inline std::vector -base_reverse_bs_hash(const char* seq, bool has_second, unsigned k, const uint64_t* DIMER_TAB) -{ - - uint64_t h0 = 0; - uint64_t h1 = 0; +using hashing_internals::extend_hashes; +using hashing_internals::srol; +using hashing_internals::sror; - // Process pairs from the end for h0 - for (int i = (int)k - 1; i >= 1; i -= 2) { - h0 = srol(h0, 1); - const uint8_t a = CONVERT_TAB_RC[(unsigned char)seq[i]]; - const uint8_t b = CONVERT_TAB_RC[(unsigned char)seq[i - 1]]; - const uint8_t loc = 10 * a + b; // yields 11..44 - h0 ^= DIMER_TAB[loc]; // first element of pointer table - } +using hashing_internals::base_forward_bs_hash; +using hashing_internals::next_forward_bs_hash; +using hashing_internals::prev_forward_bs_hash; - // If length exactly k, return only h0 - if (!has_second) - return {h0, 0}; +using hashing_internals::base_reverse_bs_hash; +using hashing_internals::next_reverse_bs_hash; +using hashing_internals::prev_reverse_bs_hash; - // Offset by 1 for second hash (mirroring h1 in forward) - for (int i = (int)k; i >= 2; i -= 2) { - h1 = srol(h1, 1); - const uint8_t a = CONVERT_TAB_RC[(unsigned char)seq[i]]; - const uint8_t b = CONVERT_TAB_RC[(unsigned char)seq[i - 1]]; - const uint8_t loc = 10 * a + b; - h1 ^= DIMER_TAB[loc]; - } +using hashing_internals::BS_CONVERT_TAB; +using hashing_internals::BS_RC_CONVERT_TAB; - return {h0, h1}; -} - -inline uint64_t -next_reverse_bs_hash(uint64_t rh_val, - unsigned k, - unsigned char char_out1, - unsigned char char_out2, - unsigned char char_in1, - unsigned char char_in2, - const uint64_t* DIMER_TAB, - const uint64_t* const* TAB_33R, - const uint64_t* const* TAB_31L) -{ - uint8_t out_idx = 10 * CONVERT_TAB_RC[char_out2] + CONVERT_TAB_RC[char_out1]; - uint8_t in_idx = 10 * CONVERT_TAB_RC[char_in2] + CONVERT_TAB_RC[char_in1]; - uint64_t h_val = rh_val ^ srol_bs_table(in_idx, k, TAB_33R, TAB_31L); - h_val ^= DIMER_TAB[out_idx]; - h_val = sror(h_val); - return h_val; -} - - -inline uint64_t -prev_reverse_bs_hash(uint64_t rh_val, - unsigned k, - unsigned char char_out1, - unsigned char char_out2, - unsigned char char_in1, - unsigned char char_in2, - const uint64_t* DIMER_TAB, - const uint64_t* const* TAB_33R, - const uint64_t* const* TAB_31L) -{ - uint8_t out_idx = 10 * CONVERT_TAB_RC[char_out2] + CONVERT_TAB_RC[char_out1]; - uint8_t in_idx = 10 * CONVERT_TAB_RC[char_in2] + CONVERT_TAB_RC[char_in1]; - uint64_t h_val = srol(rh_val); - h_val ^= DIMER_TAB[in_idx]; - h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); - return h_val; -} -}// namespace btllib::hashing_internals +using hashing_internals::CA_DIMER_TAB; +using hashing_internals::CC_DIMER_TAB; +using hashing_internals::CG_DIMER_TAB; +using hashing_internals::CT_DIMER_TAB; -namespace btllib { +using hashing_internals::MS_TAB_31L_CA; +using hashing_internals::MS_TAB_31L_CC; +using hashing_internals::MS_TAB_31L_CG; +using hashing_internals::MS_TAB_31L_CT; +using hashing_internals::MS_TAB_33R_CA; +using hashing_internals::MS_TAB_33R_CC; +using hashing_internals::MS_TAB_33R_CG; +using hashing_internals::MS_TAB_33R_CT; -using hashing_internals::extend_hashes; -using hashing_internals::sror; -using hashing_internals::srol; class BsHash { @@ -732,6 +55,13 @@ class BsHash * @param seq_len Length of the sequence * @param num_hashes Number of hashes to generate per k-mer * @param k K-mer size + * @param mode Hashing scheme targeting a specific methylation context. + * Supported values are "CG", "CC", "CT", and "CA". Each mode + * applies the corresponding bisulfite-oriented hashing behavior. + * + * Note: BsHash can only handle one methylation context at a + * time. For multi-context bisulfite processing, use BsHashDirectional, which + * performs the additional required handling. * @param pos Position in the sequence to start hashing from */ BsHash(const char* seq, @@ -750,7 +80,7 @@ class BsHash { check_error(k == 0, "BsHash: k must be greater than 0"); check_error(k % 2 == 1, "BsHash: k must be even"); - check_error((k/2) % 2 == 0, "BsHash: k must have odd number of dimers"); + check_error((k / 2) % 2 == 0, "BsHash: k must have odd number of dimers"); check_error(this->seq_len < k, "BsHash: sequence length (" + std::to_string(this->seq_len) + ") is smaller than k (" + std::to_string(k) + ")"); @@ -758,30 +88,33 @@ class BsHash "BsHash: passed position (" + std::to_string(pos) + ") is larger than sequence length (" + std::to_string(this->seq_len) + ")"); - static const std::unordered_set valid_modes = {"CG", "CC", "CT", "CA"}; + static const std::unordered_set valid_modes = { + "CG", "CC", "CT", "CA" + }; check_error(valid_modes.find(mode) == valid_modes.end(), - "BsHash: mode must be one of [CG, CC, CT, CA], but got '" + mode + "'"); + "BsHash: mode must be one of [CG, CC, CT, CA], but got '" + + mode + "'"); if (mode == "CC") { - DIMER_TAB = CC_DIMER_TAB; - TAB_33R = MS_TAB_33R_CC; - TAB_31L = MS_TAB_31L_CC; - meth_signal = {"CC", "GG"}; + DIMER_TAB = CC_DIMER_TAB; + TAB_33R = MS_TAB_33R_CC; + TAB_31L = MS_TAB_31L_CC; + meth_signal = { "CC", "GG" }; } else if (mode == "CG") { - DIMER_TAB = CG_DIMER_TAB; - TAB_33R = MS_TAB_33R_CG; - TAB_31L = MS_TAB_31L_CG; - meth_signal = {"CG"}; + DIMER_TAB = CG_DIMER_TAB; + TAB_33R = MS_TAB_33R_CG; + TAB_31L = MS_TAB_31L_CG; + meth_signal = { "CG" }; } else if (mode == "CT") { - DIMER_TAB = CT_DIMER_TAB; - TAB_33R = MS_TAB_33R_CT; - TAB_31L = MS_TAB_31L_CT; - meth_signal = {"CT", "AG"}; + DIMER_TAB = CT_DIMER_TAB; + TAB_33R = MS_TAB_33R_CT; + TAB_31L = MS_TAB_31L_CT; + meth_signal = { "CT", "AG" }; } else if (mode == "CA") { - DIMER_TAB = CA_DIMER_TAB; - TAB_33R = MS_TAB_33R_CA; - TAB_31L = MS_TAB_31L_CA; - meth_signal = {"CA", "TG"}; + DIMER_TAB = CA_DIMER_TAB; + TAB_33R = MS_TAB_33R_CA; + TAB_31L = MS_TAB_31L_CA; + meth_signal = { "CA", "TG" }; } } @@ -790,6 +123,13 @@ class BsHash * @param seq Sequence string * @param num_hashes Number of hashes to produce per k-mer * @param k K-mer size + * @param mode Hashing scheme targeting a specific methylation context. + * Supported values are "CG", "CC", "CT", and "CA". Each mode + * applies the corresponding bisulfite-oriented hashing behavior. + * + * Note: BsHash can only handle one methylation context at a + * time. For multi-context bisulfite processing, use BsHashDirectional, which + * performs the additional required handling. * @param pos Position in sequence to start hashing from */ BsHash(const std::string& seq, @@ -810,7 +150,7 @@ class BsHash , initialized(obj.initialized) , fwd_hashes(obj.fwd_hashes) , rev_hashes(obj.rev_hashes) - , hash_arr(new uint64_t[obj.num_hashes] ) + , hash_arr(new uint64_t[obj.num_hashes]) { DIMER_TAB = obj.DIMER_TAB; TAB_33R = obj.TAB_33R; @@ -819,9 +159,7 @@ class BsHash hash_arr.get(), obj.hash_arr.get(), num_hashes * sizeof(uint64_t)); } - -// TODO - BsHash(BsHash&&) = default; + BsHash(BsHash&&) = default; /** * Calculate the hash values of current k-mer and advance to the next k-mer. @@ -835,143 +173,131 @@ class BsHash * if there are N's or invalid characters in the hashed sequence. * @return \p true on success and \p false otherwise */ -bool roll() -{ + bool roll() + { if (!initialized) { - std::cout << "[ROLL] Not initialized. Calling init()\n"; - return init(); + return init(); } - std::cout << "[ROLL] Rolling at position " << pos << ", even=" << even << "\n"; + auto dimer_valid = [&](size_t p) -> bool { + if (p + 1 >= seq_len) + return false; + char c1 = seq[p]; + char c2 = seq[p + 1]; + uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; + uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; + uint8_t loc = 10 * a + b; + return DIMER_TAB[loc] != 0; + }; if (even) { - if (pos >= seq_len - k) { - std::cout << "[ROLL] Reached end of sequence, cannot roll\n"; - return false; - } - char c1 = seq[pos + k - 1]; - char c2 = seq[pos + k]; - uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 - if (DIMER_TAB[loc] == 0) { - std::cout << "[ROLL] Invalid dimer at position " << (pos + k) - << ", skipping " << k << " positions and re-initializing\n"; - pos += k; - return init(); - } - std::cout << "[ROLL] Even roll, extending hashes at position " << pos << "\n"; - even = false; - ++pos; - extend_hashes(fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); - return true; - } - - if (pos >= seq_len - k) { - std::cout << "[ROLL] Reached end of sequence, cannot roll\n"; + if (pos >= seq_len - k) return false; - } - char c1 = seq[pos + k - 1]; - char c2 = seq[pos + k]; - uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 - if (DIMER_TAB[loc] == 0) { - std::cout << "[ROLL] Invalid dimer at position " << (pos + k) - << ", skipping " << k << " positions and re-initializing\n"; + if (!dimer_valid(pos + k - 1)) { pos += k; return init(); + } + + even = false; + ++pos; + + extend_hashes( + fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + return true; } - bool can_roll_second = true; - c1 = seq[pos + k]; - c2 = seq[pos + k + 1]; - a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - loc = 10 * a + b; - if (pos + 1 >= seq_len - k || DIMER_TAB[loc] == 0) { - can_roll_second = false; - std::cout << "[ROLL] Cannot roll second k-mer at position " << (pos + 1) << "\n"; + if (pos >= seq_len - k) + return false; + + if (!dimer_valid(pos + k - 1)) { + pos += k; + return init(); } + bool can_roll_second = dimer_valid(pos + k); + for (int i = 0; i < 2; i++) { - if (i == 0 || can_roll_second) { - char out1 = seq[pos + i - 1]; - char out2 = seq[pos + i + 1 - 1]; - char in1 = seq[pos + k + i - 1]; - char in2 = seq[pos + k + i + 1 - 1]; - - std::cout << "[ROLL] Loop " << i << " at pos=" << pos - << " | OUT: " << out1 << out2 - << " | IN: " << in1 << in2 << "\n"; - - fwd_hashes[i] = next_forward_bs_hash(fwd_hashes[i], k, - out1, out2, in1, in2, - DIMER_TAB, TAB_33R, TAB_31L); - rev_hashes[i] = next_reverse_bs_hash(rev_hashes[i], k, - out1, out2, in1, in2, - DIMER_TAB, TAB_33R, TAB_31L); - } - } + if (i == 0 || can_roll_second) { - std::cout << "[ROLL] Extending hashes at position " << pos << "\n"; - extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); + char out1 = seq[pos + i - 1]; + char out2 = seq[pos + i + 1 - 1]; - ++pos; - even = true; + char in1 = seq[pos + k + i - 1]; + char in2 = seq[pos + k + i + 1 - 1]; - std::cout << "[ROLL] Roll complete, new position=" << pos << ", even=" << even << "\n"; + fwd_hashes[i] = next_forward_bs_hash( + fwd_hashes[i], k, out1, out2, in1, in2, DIMER_TAB, TAB_33R, TAB_31L); - if (!can_roll_second ) { - fwd_hashes[1] = 0; - rev_hashes[1] = 0; + rev_hashes[i] = next_reverse_bs_hash( + rev_hashes[i], k, out1, out2, in1, in2, DIMER_TAB, TAB_33R, TAB_31L); + } } - return true; -} -std::string center_dimer() { - if (k < 2) return ""; + extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); - size_t center_idx; - size_t num_dimers = k / 2; + ++pos; + even = true; - if (num_dimers % 2 == 1) { - center_idx = pos + (num_dimers / 2) * 2; - } else { - center_idx = pos + ((num_dimers / 2) - 1) * 2; + if (!can_roll_second) { + fwd_hashes[1] = 0; + rev_hashes[1] = 0; } - if (center_idx + 1 >= seq_len) return ""; + return true; + } - // Return actual dimer as a 2-char string - return std::string{seq[center_idx], seq[center_idx + 1]}; -} + /** + * Extract the central dimer (two-base substring) of the current k-mer window. + * Since the number of dimers in a k-mer is always odd, the center dimer is + * unambiguously defined as the (num_dimers / 2)-th dimer from the start. + * + * If the computed index extends beyond the sequence boundary, an empty string + * is returned. + * + * @return The center dimer of the current k-mer. + */ + std::string center_dimer() + { + size_t num_dimers = k / 2; + size_t center_idx = pos + (num_dimers / 2) * 2; -bool is_methylated() { + return std::string{ seq[center_idx], seq[center_idx + 1] }; + } + /** + * Check if the k-mer center position is methylated based on conversion type. + * For CT conversion, methylation corresponds to a cytosine ('C'/'c'). + * For GA conversion, methylation corresponds to a guanine ('G'/'g'). + * + * @return true if the base at the methylation index indicates methylation, + * false otherwise. + */ + bool is_methylated() + { size_t num_dimers = k / 2; size_t center_idx = pos + (num_dimers / 2) * 2; - // Build center dimer string + std::string dimer; dimer += seq[center_idx]; dimer += seq[center_idx + 1]; - std::transform(dimer.begin(), dimer.end(), dimer.begin(), ::toupper); - // Check membership in meth_signal - return std::find(meth_signal.begin(), meth_signal.end(), dimer) != meth_signal.end(); -} + std::transform(dimer.begin(), dimer.end(), dimer.begin(), ::toupper); + return std::find(meth_signal.begin(), meth_signal.end(), dimer) != + meth_signal.end(); + } -bool is_forward_higher(const std::vector& fwd_hashes, - const std::vector& rev_hashes, - bool even) -{ + /** + * Check if the forward hash is higher than the reverse hash. + * @return true if forward hash > reverse hash, false otherwise + */ + bool is_forward_higher() + { size_t idx = even ? 0 : 1; return fwd_hashes[idx] > rev_hashes[idx]; -} - + } /** * Like the roll() function, but advance backwards. @@ -979,79 +305,82 @@ bool is_forward_higher(const std::vector& fwd_hashes, */ bool roll_back() { - if (!initialized) { - return init(); + return init(); } if (pos == 0) { - return false; + return false; } + auto dimer_valid = [&](size_t p) -> bool { + if (p + 1 >= seq_len) + return false; + uint8_t a = BS_CONVERT_TAB[(unsigned char)seq[p]]; + uint8_t b = BS_CONVERT_TAB[(unsigned char)seq[p + 1]]; + uint8_t loc = 10 * a + b; + return DIMER_TAB[loc] != 0; + }; + + // ---- ODD STATE: only roll back the primary hash ---- // if (!even) { - extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); - --pos; - even = true; // toggle to indicate next roll_back can attempt both - return true; + extend_hashes( + fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); + --pos; + even = true; + return true; } - - char c1 = seq[pos - 1]; - char c2 = seq[pos]; - uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 - if (DIMER_TAB[loc] == 0) { - std::cout << "[ROLL] Invalid dimer at position " << (pos + k) - << ", skipping " << k << " positions and re-initializing\n"; - if (pos <= k) { - return false; - } - pos -= k; - return init(); - + // ---- EVEN STATE ---- // + // First dimer (the one at pos-1, pos) + if (!dimer_valid(pos - 1)) { + if (pos <= k) + return false; + pos -= k; + return init(); } - c1 = seq[pos - 2]; - c2 = seq[pos - 1]; - a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - loc = 10 * a + b; - // even step: try to roll back both hashes + // Check if the second rollback is possible bool can_roll_second = true; - if (pos <= 1 || DIMER_TAB[loc] == 0) { - can_roll_second = false; + if (pos <= 1 || !dimer_valid(pos - 2)) { + can_roll_second = false; } - if (fwd_hashes[1] != 0) { - for (int i = 0; i < 2; i++) { - if (i == 1 || can_roll_second) { - fwd_hashes[i] = prev_forward_bs_hash(fwd_hashes[i], k, - seq[pos + k - 2 + i], seq[pos + k - 1 + i], - seq[pos - 2 + i], seq[pos - 1 + i], - DIMER_TAB, TAB_33R, TAB_31L); - rev_hashes[i] = prev_reverse_bs_hash(rev_hashes[i], k, - seq[pos + k - 2 + i], seq[pos + k - 1 + i], - seq[pos - 2 + i], seq[pos - 1 + i], - DIMER_TAB, TAB_33R, TAB_31L); - } - } - } else { - std::cerr << "abcdefgh" << std::endl; - pos -=2; - init(); - pos++; - extend_hashes(fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); - even = false; - return true; + + // If the second hash is missing, restore it by reinitializing around pos-2 + if (fwd_hashes[1] == 0) { + pos -= 2; + init(); + pos++; + extend_hashes( + fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + even = false; + return true; + } + + // ---- Perform 1 or 2 backward hash updates ---- // + for (int i = 0; i < 2; i++) { + if (i == 1 || can_roll_second) { + + char out1 = seq[pos + k - 2 + i]; + char out2 = seq[pos + k - 1 + i]; + char in1 = seq[pos - 2 + i]; + char in2 = seq[pos - 1 + i]; + + fwd_hashes[i] = prev_forward_bs_hash( + fwd_hashes[i], k, out1, out2, in1, in2, DIMER_TAB, TAB_33R, TAB_31L); + + rev_hashes[i] = prev_reverse_bs_hash( + rev_hashes[i], k, out1, out2, in1, in2, DIMER_TAB, TAB_33R, TAB_31L); + } } extend_hashes(fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + --pos; even = false; return true; } - /** * Get the array of current canonical hash values (length = \p get_hash_num()) * @return Pointer to the hash array @@ -1081,13 +410,27 @@ bool is_forward_higher(const std::vector& fwd_hashes, * Get the hash value of the forward strand. * @return Forward hash value */ - uint64_t get_forward_hash() const { if (even) {return fwd_hashes[0];} else {return fwd_hashes[1];}} + uint64_t get_forward_hash() const + { + if (even) { + return fwd_hashes[0]; + } else { + return fwd_hashes[1]; + } + } /** * Get the hash value of the reverse strand. * @return Reverse-complement hash value */ - uint64_t get_reverse_hash() const { if (even) {return rev_hashes[0];} else {return rev_hashes[1];}} + uint64_t get_reverse_hash() const + { + if (even) { + return rev_hashes[0]; + } else { + return rev_hashes[1]; + } + } private: const char* seq; @@ -1096,8 +439,8 @@ bool is_forward_higher(const std::vector& fwd_hashes, hashing_internals::K_TYPE k; size_t pos; bool initialized; - std::vector fwd_hashes = {0, 0}; - std::vector rev_hashes = {0, 0}; + std::vector fwd_hashes = { 0, 0 }; + std::vector rev_hashes = { 0, 0 }; std::vector meth_signal; bool even = true; std::unique_ptr hash_arr; @@ -1110,61 +453,46 @@ bool is_forward_higher(const std::vector& fwd_hashes, * @return \p true if successful, \p false otherwise */ -bool init() -{ - std::cout << "[INIT] Starting initialization at position " << pos << "\n"; - + bool init() + { bool has_n = true; while (pos <= seq_len - k + 1 && has_n) { - has_n = false; - for (unsigned i = 0; i < k && pos <= seq_len - k + 1; i += 2) { - const char c1 = seq[pos + k - i - 2]; - const char c2 = seq[pos + k - i - 1]; - const uint8_t a = CONVERT_TAB_SIMPLE[(unsigned char)c1]; - const uint8_t b = CONVERT_TAB_SIMPLE[(unsigned char)c2]; - const uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 - - std::cout << "[INIT] Checking dimer '" << c1 << c2 - << "' -> loc=" << (unsigned)loc << "\n"; - - if (DIMER_TAB[loc] == 0) { - std::cout << "[INIT] Found invalid base at position " << (pos + k - i - 1) - << ", skipping " << (k - i) << " positions\n"; - if (b == 0) { - pos += k - i; - } else { - pos += k - i - 1; - } - - has_n = true; - } + has_n = false; + for (unsigned i = 0; i < k && pos <= seq_len - k + 1; i += 2) { + const char c1 = seq[pos + k - i - 2]; + const char c2 = seq[pos + k - i - 1]; + const uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; + const uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; + const uint8_t loc = 10 * a + b; + + if (DIMER_TAB[loc] == 0) { + if (b == 0) { + pos += k - i; + } else { + pos += k - i - 1; + } + + has_n = true; } + } } if (pos > seq_len - k) { - std::cout << "[INIT] Position exceeded sequence length, returning false\n"; - return false; + return false; } bool has_second = true; if (pos == seq_len - k) { - has_second = false; - std::cout << "[INIT] At last k-mer, no second k-mer available\n"; + has_second = false; } - std::cout << "[INIT] Computing forward hashes at position " << pos << "\n"; fwd_hashes = base_forward_bs_hash(seq + pos, has_second, k, DIMER_TAB); - - std::cout << "[INIT] Computing reverse hashes at position " << pos << "\n"; rev_hashes = base_reverse_bs_hash(seq + pos, has_second, k, DIMER_TAB); - std::cout << "[INIT] Extending hashes\n"; extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); initialized = true; - std::cout << "[INIT] Initialization complete at position " << pos << "\n"; return true; -} - + } }; } \ No newline at end of file diff --git a/include/btllib/bshash_kmer_directional.hpp b/include/btllib/bshash_kmer_directional.hpp index ed991087..21361493 100644 --- a/include/btllib/bshash_kmer_directional.hpp +++ b/include/btllib/bshash_kmer_directional.hpp @@ -13,8 +13,6 @@ #include #include - - namespace btllib { using hashing_internals::base_forward_hash; @@ -44,16 +42,16 @@ using hashing_internals::BS_CT_RC_CONVERT_TAB; using hashing_internals::BS_GA_RC_CONVERT_TAB; using hashing_internals::BS_CT_DIMER_TAB; -using hashing_internals::BS_CT_TRIMER_TAB; using hashing_internals::BS_CT_TETRAMER_TAB; +using hashing_internals::BS_CT_TRIMER_TAB; using hashing_internals::BS_GA_DIMER_TAB; -using hashing_internals::BS_GA_TRIMER_TAB; using hashing_internals::BS_GA_TETRAMER_TAB; +using hashing_internals::BS_GA_TRIMER_TAB; /** * Directional BS-seq k-mer hashing. */ -class BSHashDirectional +class BsHashDirectional { public: @@ -63,17 +61,17 @@ class BSHashDirectional * @param seq_len Length of the sequence * @param num_hashes Number of hashes to generate per k-mer * @param k K-mer size - * @param conversion_type Bisulfite conversion mode ("CT" or "GA"), + * @param conversion_type Bisulfite conversion mode ("CT" or "GA"), * used to determine how bases are interpreted during hashing. * @param pos Position in the sequence to start hashing from */ - BSHashDirectional(const char* seq, - size_t seq_len, - hashing_internals::NUM_HASHES_TYPE num_hashes, - hashing_internals::K_TYPE k, - std::string conversion_type, - size_t pos = 0) - : seq(seq) + BsHashDirectional(const char* seq, + size_t seq_len, + hashing_internals::NUM_HASHES_TYPE num_hashes, + hashing_internals::K_TYPE k, + std::string conversion_type, + size_t pos = 0) + : seq(seq) , seq_len(seq_len) , num_hashes(num_hashes) , k(k) @@ -84,7 +82,7 @@ class BSHashDirectional { check_error(k == 0, "BsHash: k must be greater than 0"); check_error(k % 2 == 1, "BsHash: k must be even"); - check_error((k/2) % 2 == 0, "BsHash: k must have odd number of dimers"); + check_error((k / 2) % 2 == 0, "BsHash: k must have odd number of dimers"); check_error(this->seq_len < k, "BsHash: sequence length (" + std::to_string(this->seq_len) + ") is smaller than k (" + std::to_string(k) + ")"); @@ -101,36 +99,38 @@ class BSHashDirectional primitive_tab = BS_CT_SEED_TAB; right_table = BS_CT_MS_TAB_33R; - left_table = BS_CT_MS_TAB_31L; + left_table = BS_CT_MS_TAB_31L; - convert_tab = BS_CT_CONVERT_TAB; - rc_convert_tab = BS_CT_RC_CONVERT_TAB; + convert_tab = BS_CT_CONVERT_TAB; + rc_convert_tab = BS_CT_RC_CONVERT_TAB; dimer_tab = BS_CT_DIMER_TAB; rc_dimer_tab = BS_GA_DIMER_TAB; trimer_tab = BS_CT_TRIMER_TAB; - rc_trimer_tab = BS_GA_TRIMER_TAB; - tetramer_tab = BS_CT_TETRAMER_TAB;; - rc_tetramer_tab= BS_GA_TETRAMER_TAB; + rc_trimer_tab = BS_GA_TRIMER_TAB; + tetramer_tab = BS_CT_TETRAMER_TAB; + ; + rc_tetramer_tab = BS_GA_TETRAMER_TAB; } else if (conversion_type == "GA") { meth_base_idx = center_dimer_start + 1; - primitive_tab = BS_GA_SEED_TAB; + primitive_tab = BS_GA_SEED_TAB; - right_table = BS_GA_MS_TAB_33R; - left_table = BS_GA_MS_TAB_31L; + right_table = BS_GA_MS_TAB_33R; + left_table = BS_GA_MS_TAB_31L; - convert_tab = BS_GA_CONVERT_TAB; - rc_convert_tab = BS_GA_RC_CONVERT_TAB; + convert_tab = BS_GA_CONVERT_TAB; + rc_convert_tab = BS_GA_RC_CONVERT_TAB; - dimer_tab = BS_GA_DIMER_TAB; - rc_dimer_tab = BS_CT_DIMER_TAB; - trimer_tab = BS_GA_TRIMER_TAB; - rc_trimer_tab = BS_CT_TRIMER_TAB; - tetramer_tab = BS_GA_TETRAMER_TAB; - rc_tetramer_tab = BS_CT_TETRAMER_TAB; + dimer_tab = BS_GA_DIMER_TAB; + rc_dimer_tab = BS_CT_DIMER_TAB; + trimer_tab = BS_GA_TRIMER_TAB; + rc_trimer_tab = BS_CT_TRIMER_TAB; + tetramer_tab = BS_GA_TETRAMER_TAB; + rc_tetramer_tab = BS_CT_TETRAMER_TAB; } else { - check_error(true, "Unsupported conversion_type. Must be CT or GA, as capitalized."); + check_error( + true, "Unsupported conversion_type. Must be CT or GA, as capitalized."); } } @@ -139,20 +139,25 @@ class BSHashDirectional * @param seq Sequence string * @param num_hashes Number of hashes to produce per k-mer * @param k K-mer size - * @param conversion_type Bisulfite conversion mode ("CT" or "GA"), + * @param conversion_type Bisulfite conversion mode ("CT" or "GA"), * used to determine how bases are interpreted during hashing. * @param pos Position in sequence to start hashing from */ - BSHashDirectional(const std::string& seq, - hashing_internals::NUM_HASHES_TYPE num_hashes, - hashing_internals::K_TYPE k, - std::string conversion_type, - size_t pos = 0) - : BSHashDirectional(seq.data(), seq.size(), num_hashes, k, conversion_type, pos) + BsHashDirectional(const std::string& seq, + hashing_internals::NUM_HASHES_TYPE num_hashes, + hashing_internals::K_TYPE k, + std::string conversion_type, + size_t pos = 0) + : BsHashDirectional(seq.data(), + seq.size(), + num_hashes, + k, + conversion_type, + pos) { } - BSHashDirectional(const BSHashDirectional& obj) + BsHashDirectional(const BsHashDirectional& obj) : seq(obj.seq) , seq_len(obj.seq_len) , num_hashes(obj.num_hashes) @@ -174,15 +179,12 @@ class BSHashDirectional , rc_trimer_tab(obj.rc_trimer_tab) , tetramer_tab(obj.tetramer_tab) , rc_tetramer_tab(obj.rc_tetramer_tab) -{ + { std::memcpy( - hash_arr.get(), - obj.hash_arr.get(), - num_hashes * sizeof(uint64_t) - ); -} + hash_arr.get(), obj.hash_arr.get(), num_hashes * sizeof(uint64_t)); + } - BSHashDirectional(BSHashDirectional&&) = default; + BsHashDirectional(BsHashDirectional&&) = default; /** * Calculate the hash values of current k-mer and advance to the next k-mer. * BsHash advances one nucleotide at a time until it finds a k-mer with valid @@ -208,8 +210,20 @@ class BSHashDirectional pos += k; return init(); } - fwd_hash = next_forward_hash(fwd_hash, k, seq[pos], seq[pos + k], primitive_tab, right_table, left_table); - rev_hash = next_reverse_hash(rev_hash, k, seq[pos], seq[pos + k], primitive_tab, right_table, left_table); + fwd_hash = next_forward_hash(fwd_hash, + k, + seq[pos], + seq[pos + k], + primitive_tab, + right_table, + left_table); + rev_hash = next_reverse_hash(rev_hash, + k, + seq[pos], + seq[pos + k], + primitive_tab, + right_table, + left_table); extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); ++pos; return true; @@ -234,8 +248,20 @@ class BSHashDirectional if (SEED_TAB[(unsigned char)seq[pos - 1]] == SEED_N) { return false; } - fwd_hash = prev_forward_hash(fwd_hash, k, seq[pos + k - 1], seq[pos - 1], primitive_tab, right_table, left_table); - rev_hash = prev_reverse_hash(rev_hash, k, seq[pos + k - 1], seq[pos - 1], primitive_tab, right_table, left_table); + fwd_hash = prev_forward_hash(fwd_hash, + k, + seq[pos + k - 1], + seq[pos - 1], + primitive_tab, + right_table, + left_table); + rev_hash = prev_reverse_hash(rev_hash, + k, + seq[pos + k - 1], + seq[pos - 1], + primitive_tab, + right_table, + left_table); extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); --pos; return true; @@ -281,8 +307,10 @@ class BSHashDirectional if (SEED_TAB[(unsigned char)char_in] == SEED_N) { return false; } - const uint64_t fwd = next_forward_hash(fwd_hash, k, seq[pos], char_in, primitive_tab, right_table, left_table); - const uint64_t rev = next_reverse_hash(rev_hash, k, seq[pos], char_in, primitive_tab, right_table, left_table); + const uint64_t fwd = next_forward_hash( + fwd_hash, k, seq[pos], char_in, primitive_tab, right_table, left_table); + const uint64_t rev = next_reverse_hash( + rev_hash, k, seq[pos], char_in, primitive_tab, right_table, left_table); extend_hashes(fwd, rev, k, num_hashes, hash_arr.get()); return true; } @@ -300,36 +328,38 @@ class BSHashDirectional return false; } const unsigned char char_out = seq[pos + k - 1]; - const uint64_t fwd = prev_forward_hash(fwd_hash, k, char_out, char_in, primitive_tab, right_table, left_table); - const uint64_t rev = prev_reverse_hash(rev_hash, k, char_out, char_in, primitive_tab, right_table, left_table); + const uint64_t fwd = prev_forward_hash( + fwd_hash, k, char_out, char_in, primitive_tab, right_table, left_table); + const uint64_t rev = prev_reverse_hash( + rev_hash, k, char_out, char_in, primitive_tab, right_table, left_table); extend_hashes(fwd, rev, k, num_hashes, hash_arr.get()); return true; } - /** + /** * Check if the k-mer center position is methylated based on conversion type. * For CT conversion, methylation corresponds to a cytosine ('C'/'c'). * For GA conversion, methylation corresponds to a guanine ('G'/'g'). * - * @return true if the base at the methylation index indicates methylation, false otherwise. + * @return true if the base at the methylation index indicates methylation, + * false otherwise. */ - bool is_methylated() { - char base = seq[pos + meth_base_idx]; + bool is_methylated() + { + char base = seq[pos + meth_base_idx]; - if (conversion_type == "CT") - return base == 'C' || base == 'c'; - else - return base == 'G' || base == 'g'; + if (conversion_type == "CT") + return base == 'C' || base == 'c'; + else + return base == 'G' || base == 'g'; } + /** * Check if the forward hash is higher than the reverse hash. * @return true if forward hash > reverse hash, false otherwise */ - bool is_forward_higher() const { - return fwd_hash > rev_hash; - } - + bool is_forward_higher() const { return fwd_hash > rev_hash; } void sub(const std::vector& positions, const std::vector& new_bases) @@ -401,8 +431,8 @@ class BSHashDirectional const uint64_t* const* right_table; const uint64_t* const* left_table; - const uint8_t* convert_tab; - const uint8_t* rc_convert_tab; + const uint8_t* convert_tab; + const uint8_t* rc_convert_tab; const uint64_t* dimer_tab; const uint64_t* rc_dimer_tab; @@ -430,13 +460,24 @@ class BSHashDirectional if (pos > seq_len - k) { return false; } - fwd_hash = base_forward_hash(seq + pos, k, primitive_tab, dimer_tab, trimer_tab, tetramer_tab, convert_tab); - rev_hash = base_reverse_hash(seq + pos, k, primitive_tab, rc_dimer_tab, rc_trimer_tab, rc_tetramer_tab, rc_convert_tab); + fwd_hash = base_forward_hash(seq + pos, + k, + primitive_tab, + dimer_tab, + trimer_tab, + tetramer_tab, + convert_tab); + rev_hash = base_reverse_hash(seq + pos, + k, + primitive_tab, + rc_dimer_tab, + rc_trimer_tab, + rc_tetramer_tab, + rc_convert_tab); extend_hashes(fwd_hash, rev_hash, k, num_hashes, hash_arr.get()); initialized = true; return true; } - }; } // namespace btllib \ No newline at end of file From 02fd42a53f4f3710440710081e4f1200ed713c7d Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 22:36:24 -0800 Subject: [PATCH 20/38] mi_bloom_filter.cpp: fix compilation error --- recipes/mi_bloom_filter.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/mi_bloom_filter.cpp b/recipes/mi_bloom_filter.cpp index 54175803..8bcbf1ea 100644 --- a/recipes/mi_bloom_filter.cpp +++ b/recipes/mi_bloom_filter.cpp @@ -214,6 +214,7 @@ main(int argc, char* argv[]) } if (verbose) { std::cout << "verbose" << std::endl; + std::cout << "Thread count: " << thread_count << std::endl; } const std::vector read_paths(&argv[optind], &argv[argc]); From 908e084b3703572ffaf8c5335b383b94cbbdd187 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 22:43:33 -0800 Subject: [PATCH 21/38] seq.cpp: add bs conversion tests --- tests/seq.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/seq.cpp b/tests/seq.cpp index 029a6854..1589abc0 100644 --- a/tests/seq.cpp +++ b/tests/seq.cpp @@ -9,5 +9,9 @@ main() std::string seq = "ACGTACACTGGACTGAGTCT"; std::string rc = "AGACTCAGTCCAGTGTACGT"; TEST_ASSERT_EQ(btllib::get_reverse_complement(seq), rc); + std::string ct = "ATGTATATTGGATGTAGTTT"; + TEST_ASSERT_EQ(btllib::get_bs_conversion(seq, "CT"), ct); + std::string ga = "ACATAAACTAAACTAAATCT"; + TEST_ASSERT_EQ(btllib::get_bs_conversion(seq, "GA"), ga); return 0; -} \ No newline at end of file +} From 9cc3b5c334ae96318a533de075b69dcf66854b0b Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 22:52:30 -0800 Subject: [PATCH 22/38] seq.cpp: fix const bug --- src/btllib/seq.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btllib/seq.cpp b/src/btllib/seq.cpp index 134a8306..dfec4cae 100644 --- a/src/btllib/seq.cpp +++ b/src/btllib/seq.cpp @@ -186,7 +186,7 @@ get_reverse_complement(const std::string& seq) } void -bs_conversion(std::string& seq, std::string& type) +bs_conversion(std::string& seq, const std::string& type) { const char* table = (type == "CT" || type == "ct") ? CT_CONVERSION : GA_CONVERSION; @@ -197,7 +197,7 @@ bs_conversion(std::string& seq, std::string& type) } std::string -get_bs_conversion(const std::string& seq, std::string& type) +get_bs_conversion(const std::string& seq, const std::string& type) { std::string conversion(seq); bs_conversion(conversion, type); From 205f4fa2c5468554384cb8fe2b8fd1d0d9e4f37a Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 22:58:07 -0800 Subject: [PATCH 23/38] seq.hpp: fix const bug --- include/btllib/seq.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/btllib/seq.hpp b/include/btllib/seq.hpp index 144a9c95..73b4cc35 100644 --- a/include/btllib/seq.hpp +++ b/include/btllib/seq.hpp @@ -46,7 +46,7 @@ get_reverse_complement(const std::string& seq); * @param type Conversion type ("CT" or "GA"). */ void -bs_conversion(std::string& seq, std::string& type); +bs_conversion(std::string& seq, const std::string& type); /** * Obtain a bisulfite-converted copy of the provided sequence. The argument @@ -58,7 +58,7 @@ bs_conversion(std::string& seq, std::string& type); * @return Bisulfite-converted sequence. */ std::string -get_bs_conversion(const std::string& seq, std::string& type); +get_bs_conversion(const std::string& seq, const std::string& type); } From 52638b5738dd7e2888cb11172b5e6e4cd0e99862 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 23:14:24 -0800 Subject: [PATCH 24/38] Update mi_bloom_filter.cpp --- recipes/mi_bloom_filter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/mi_bloom_filter.cpp b/recipes/mi_bloom_filter.cpp index 8bcbf1ea..5cbe39a7 100644 --- a/recipes/mi_bloom_filter.cpp +++ b/recipes/mi_bloom_filter.cpp @@ -214,7 +214,7 @@ main(int argc, char* argv[]) } if (verbose) { std::cout << "verbose" << std::endl; - std::cout << "Thread count: " << thread_count << std::endl; + std::cout << "Thread: " << thread_count << std::endl; } const std::vector read_paths(&argv[optind], &argv[argc]); From c28f5e24b100f33582b0e5bf2a5c41073f59a793 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Mon, 8 Dec 2025 23:45:49 -0800 Subject: [PATCH 25/38] Update mi_bloom_filter.cpp --- recipes/mi_bloom_filter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/mi_bloom_filter.cpp b/recipes/mi_bloom_filter.cpp index 5cbe39a7..b09990f6 100644 --- a/recipes/mi_bloom_filter.cpp +++ b/recipes/mi_bloom_filter.cpp @@ -214,7 +214,7 @@ main(int argc, char* argv[]) } if (verbose) { std::cout << "verbose" << std::endl; - std::cout << "Thread: " << thread_count << std::endl; + std::cout << thread_count << std::endl; } const std::vector read_paths(&argv[optind], &argv[argc]); From 2549292b9dcc2f1df4a470a36d1c4348b98db5cb Mon Sep 17 00:00:00 2001 From: jowong04 Date: Tue, 9 Dec 2025 00:21:40 -0800 Subject: [PATCH 26/38] mi_bloom_filter.cpp: clang-format --- recipes/mi_bloom_filter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/mi_bloom_filter.cpp b/recipes/mi_bloom_filter.cpp index b09990f6..95fa4ef6 100644 --- a/recipes/mi_bloom_filter.cpp +++ b/recipes/mi_bloom_filter.cpp @@ -214,7 +214,7 @@ main(int argc, char* argv[]) } if (verbose) { std::cout << "verbose" << std::endl; - std::cout << thread_count << std::endl; + std::cout << "Thread count: " << thread_count << std::endl; } const std::vector read_paths(&argv[optind], &argv[argc]); From fc93c1310e036b8f1808d895c7950d4984d945c9 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 00:35:47 -0800 Subject: [PATCH 27/38] Update meson.build --- recipes/meson.build | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/recipes/meson.build b/recipes/meson.build index 6b078dc5..20b8eb56 100644 --- a/recipes/meson.build +++ b/recipes/meson.build @@ -12,8 +12,7 @@ executable('randseq', include_directories : btllib_include, dependencies : deps + [ btllib_dep, argparse_dep ], install : true, - install_dir : 'bin', - override_options : ['cpp_std=c++17']) + install_dir : 'bin') executable('mi_bf_generate', meson.project_source_root() + '/recipes/mi_bloom_filter.cpp', From 569c2400e6491b88ba1790d4c8111fc9116f2f8c Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 00:42:34 -0800 Subject: [PATCH 28/38] seq.cpp: fix test case --- tests/seq.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/seq.cpp b/tests/seq.cpp index 1589abc0..e50df607 100644 --- a/tests/seq.cpp +++ b/tests/seq.cpp @@ -9,9 +9,9 @@ main() std::string seq = "ACGTACACTGGACTGAGTCT"; std::string rc = "AGACTCAGTCCAGTGTACGT"; TEST_ASSERT_EQ(btllib::get_reverse_complement(seq), rc); - std::string ct = "ATGTATATTGGATGTAGTTT"; + std::string ct = "ATGTATATTGGATTGAGTTT"; TEST_ASSERT_EQ(btllib::get_bs_conversion(seq, "CT"), ct); - std::string ga = "ACATAAACTAAACTAAATCT"; + std::string ga = "ACATACACTAAACTAAATCT"; TEST_ASSERT_EQ(btllib::get_bs_conversion(seq, "GA"), ga); return 0; } From 325d46c58a4b01d3cc137952c3075d98d465c2a2 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 01:07:58 -0800 Subject: [PATCH 29/38] azure-pipelines.yml: unpin cmake --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index ae5d88c6..1caba94e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake=3.29.3 openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake= openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake=3.29.3 openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake openmp gcovr displayName: 'Install required software' - script: | From 1f4eb17e33b485ce742acfa331b2b150e3d75c5f Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 09:32:43 -0800 Subject: [PATCH 30/38] azure-pipelines.yml: repin cmake and unpin ninja --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1caba94e..7d64f21e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake= openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.2.1 ninja cmake=3.29.3 openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.2.1 ninja=1.11.1 cmake openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.2.1 ninja cmake=3.29.3 openmp gcovr displayName: 'Install required software' - script: | From a27a2a3abaaa9d1972e0c537b4ed3dca96632cfd Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 10:23:04 -0800 Subject: [PATCH 31/38] azure-pipelines.yml: upgrade meson --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 7d64f21e..2b956314 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.2.1 ninja cmake=3.29.3 openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.9 ninja cmake=3.29.3 openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.2.1 ninja cmake=3.29.3 openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.9 ninja cmake=3.29.3 openmp gcovr displayName: 'Install required software' - script: | From 01a6108133ad2ebed23e9dc45574adce4344c603 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 20:05:00 -0800 Subject: [PATCH 32/38] azure-pipelines.yml: unpin cmake --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 2b956314..81872f6c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.9 ninja cmake=3.29.3 openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.9 ninja cmake openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.9 ninja cmake=3.29.3 openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.9 ninja cmake openmp gcovr displayName: 'Install required software' - script: | From 7ff5c2c665f4ff3c29fd7d81eec4e68d3c2d0f76 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Tue, 9 Dec 2025 21:58:55 -0800 Subject: [PATCH 33/38] azure-pipelines.yml: pin cmake --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 81872f6c..2b956314 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,7 +20,7 @@ jobs: - script: | source activate btllib_CI conda install --yes -c conda-forge mamba - mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.9 ninja cmake openmp + mamba install --yes -c conda-forge -c bioconda libcxx compilers clang llvm clang-format=18 clang-tools boost samtools coreutils xz lrzip meson=1.9 ninja cmake=3.29.3 openmp pip install gcovr displayName: Install dependencies @@ -93,7 +93,7 @@ jobs: - script: | source activate btllib_CI - mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.9 ninja cmake openmp gcovr + mamba install --yes -c conda-forge -c bioconda libcxx compilers llvm clang-format clang-tools boost 'samtools>=1.14' coreutils xz lrzip meson=1.9 ninja cmake=3.29.3 openmp gcovr displayName: 'Install required software' - script: | From 3759177f1c105c35ddb6a8d8349a91c99cce34f0 Mon Sep 17 00:00:00 2001 From: JW <34543031+jwcodee@users.noreply.github.com> Date: Wed, 10 Dec 2025 19:57:21 -0800 Subject: [PATCH 34/38] bshash_hashing_internals.hpp: remove log information --- include/btllib/bshash_hashing_internals.hpp | 26 ++++----------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/include/btllib/bshash_hashing_internals.hpp b/include/btllib/bshash_hashing_internals.hpp index d34e8022..7aa0cc34 100644 --- a/include/btllib/bshash_hashing_internals.hpp +++ b/include/btllib/bshash_hashing_internals.hpp @@ -1540,9 +1540,6 @@ base_forward_bs_hash(const char* seq, uint64_t h0 = 0; uint64_t h1 = 0; - std::cout << "[BASE_HASH] Computing forward hashes, k=" << k - << ", has_second=" << has_second << "\n"; - for (unsigned i = 0; i < k; i += 2) { h0 = srol(h0, 1); const char c1 = seq[i]; @@ -1551,9 +1548,6 @@ base_forward_bs_hash(const char* seq, const uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; const uint8_t loc = 10 * a + b; // yields 11, 12, ..., 44 h0 ^= DIMER_TAB[loc]; - - std::cout << "[BASE_HASH] h0 loop " << i << " | dimer='" << c1 << c2 - << "' loc=" << (unsigned)loc << " | h0=" << h0 << "\n"; } if (!has_second) @@ -1567,9 +1561,6 @@ base_forward_bs_hash(const char* seq, const uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; const uint8_t loc = 10 * a + b; h1 ^= DIMER_TAB[loc]; - - std::cout << "[BASE_HASH] h1 loop " << i << " | dimer='" << c1 << c2 - << "' loc=" << (unsigned)loc << " | h1=" << h1 << "\n"; } return { h0, h1 }; @@ -2627,15 +2618,11 @@ next_forward_bs_hash(uint64_t fh_val, const uint64_t* const* TAB_33R, const uint64_t* const* TAB_31L) { - // Compute indices for outgoing and incoming dimers - // Each dimer is encoded as (10 * first + second) uint8_t out_idx = 10 * BS_CONVERT_TAB[char_out1] + BS_CONVERT_TAB[char_out2]; uint8_t in_idx = 10 * BS_CONVERT_TAB[char_in1] + BS_CONVERT_TAB[char_in2]; - // Rotate the hash left by 2 since we move by two bases uint64_t h_val = srol(fh_val); - // XOR out the old dimer and XOR in the new dimer h_val ^= srol_bs_table(out_idx, k, TAB_33R, TAB_31L); h_val ^= DIMER_TAB[in_idx]; @@ -2653,8 +2640,6 @@ prev_forward_bs_hash(uint64_t fh_val, const uint64_t* const* TAB_33R, const uint64_t* const* TAB_31L) { - // Compute indices for outgoing and incoming dimers - // Each dimer is encoded as (10 * first + second) uint8_t out_idx = 10 * BS_CONVERT_TAB[char_out1] + BS_CONVERT_TAB[char_out2]; uint8_t in_idx = 10 * BS_CONVERT_TAB[char_in1] + BS_CONVERT_TAB[char_in2]; @@ -2675,20 +2660,18 @@ base_reverse_bs_hash(const char* seq, uint64_t h0 = 0; uint64_t h1 = 0; - // Process pairs from the end for h0 + for (int i = (int)k - 1; i >= 1; i -= 2) { h0 = srol(h0, 1); const uint8_t a = BS_RC_CONVERT_TAB[(unsigned char)seq[i]]; const uint8_t b = BS_RC_CONVERT_TAB[(unsigned char)seq[i - 1]]; - const uint8_t loc = 10 * a + b; // yields 11..44 - h0 ^= DIMER_TAB[loc]; // first element of pointer table + const uint8_t loc = 10 * a + b; + h0 ^= DIMER_TAB[loc]; } - // If length exactly k, return only h0 if (!has_second) return { h0, 0 }; - // Offset by 1 for second hash (mirroring h1 in forward) for (int i = (int)k; i >= 2; i -= 2) { h1 = srol(h1, 1); const uint8_t a = BS_RC_CONVERT_TAB[(unsigned char)seq[i]]; @@ -2742,4 +2725,5 @@ prev_reverse_bs_hash(uint64_t rh_val, return h_val; } -} // namespace btllib::hashing_internals \ No newline at end of file + +} // namespace btllib::hashing_internals From 65b14105f2c2d3c8d5ef45d773d3a741b4f18177 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Mon, 15 Dec 2025 10:20:42 -0800 Subject: [PATCH 35/38] bshash_hashing_internals.hpp: clang-format --- include/btllib/bshash_hashing_internals.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/btllib/bshash_hashing_internals.hpp b/include/btllib/bshash_hashing_internals.hpp index 7aa0cc34..e6ad5dbe 100644 --- a/include/btllib/bshash_hashing_internals.hpp +++ b/include/btllib/bshash_hashing_internals.hpp @@ -2660,7 +2660,6 @@ base_reverse_bs_hash(const char* seq, uint64_t h0 = 0; uint64_t h1 = 0; - for (int i = (int)k - 1; i >= 1; i -= 2) { h0 = srol(h0, 1); const uint8_t a = BS_RC_CONVERT_TAB[(unsigned char)seq[i]]; @@ -2725,5 +2724,4 @@ prev_reverse_bs_hash(uint64_t rh_val, return h_val; } - } // namespace btllib::hashing_internals From eed8cb3cc253288c70e6ad2e44a50b20ebd786e1 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Tue, 16 Dec 2025 11:45:52 -0800 Subject: [PATCH 36/38] bshash_kmer.hpp: use string_view --- include/btllib/bshash_kmer.hpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/include/btllib/bshash_kmer.hpp b/include/btllib/bshash_kmer.hpp index fbede73b..d0f2a3f7 100644 --- a/include/btllib/bshash_kmer.hpp +++ b/include/btllib/bshash_kmer.hpp @@ -257,13 +257,11 @@ class BsHash * * @return The center dimer of the current k-mer. */ - std::string center_dimer() + std::string_view center_dimer() const { - - size_t num_dimers = k / 2; - size_t center_idx = pos + (num_dimers / 2) * 2; - - return std::string{ seq[center_idx], seq[center_idx + 1] }; + constexpr size_t DIMER_LEN = 2; + const size_t center_idx = pos + (k / 4) * DIMER_LEN; + return std::string_view(seq + center_idx, DIMER_LEN); } /** From afb300033a67cd832e35f1175f6d46c4587705e4 Mon Sep 17 00:00:00 2001 From: jowong04 Date: Wed, 17 Dec 2025 00:40:31 -0800 Subject: [PATCH 37/38] bshash_kmer.hpp: fix condition --- include/btllib/bshash_kmer.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/btllib/bshash_kmer.hpp b/include/btllib/bshash_kmer.hpp index d0f2a3f7..96bdb54d 100644 --- a/include/btllib/bshash_kmer.hpp +++ b/include/btllib/bshash_kmer.hpp @@ -454,9 +454,9 @@ class BsHash bool init() { bool has_n = true; - while (pos <= seq_len - k + 1 && has_n) { + while (pos < seq_len - k + 1 && has_n) { has_n = false; - for (unsigned i = 0; i < k && pos <= seq_len - k + 1; i += 2) { + for (unsigned i = 0; i < k && pos < seq_len - k + 1; i += 2) { const char c1 = seq[pos + k - i - 2]; const char c2 = seq[pos + k - i - 1]; const uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; @@ -493,4 +493,4 @@ class BsHash return true; } }; -} \ No newline at end of file +} From 3f34e9b3b43c1e85a507af0f18a2f2a257ae8a2a Mon Sep 17 00:00:00 2001 From: jowong04 Date: Wed, 17 Dec 2025 13:34:04 -0800 Subject: [PATCH 38/38] bshash_kmer: fix boundaries and init conditions --- include/btllib/bshash_kmer.hpp | 40 +++++++++++++++++++++++++++------- tests/bshash.cpp | 19 ++++++++++++++++ 2 files changed, 51 insertions(+), 8 deletions(-) diff --git a/include/btllib/bshash_kmer.hpp b/include/btllib/bshash_kmer.hpp index 96bdb54d..b43c9bc8 100644 --- a/include/btllib/bshash_kmer.hpp +++ b/include/btllib/bshash_kmer.hpp @@ -175,12 +175,14 @@ class BsHash */ bool roll() { + //std::cerr << "running roll" << std::endl; + //std::cerr << "pos: " << pos << std::endl; if (!initialized) { return init(); } auto dimer_valid = [&](size_t p) -> bool { - if (p + 1 >= seq_len) + if (p + 1 > seq_len - 1) return false; char c1 = seq[p]; char c2 = seq[p + 1]; @@ -191,31 +193,40 @@ class BsHash }; if (even) { + //std::cerr << "checkpoint 1 even" << pos << std::endl; if (pos >= seq_len - k) return false; + //std::cerr << "checkpoint 2 even" << pos << std::endl; if (!dimer_valid(pos + k - 1)) { - pos += k; + pos += k + 1; + // pos += k; return init(); } + //std::cerr << "checkpoint 3 even" << pos << std::endl; even = false; ++pos; extend_hashes( fwd_hashes[1], rev_hashes[1], k, num_hashes, hash_arr.get()); + //std::cerr << "checkpoint 4 even" << pos << std::endl; return true; } - + //std::cerr << "checkpoint 0 odd" << pos << std::endl; + if (pos >= seq_len - k) return false; + //std::cerr << "checkpoint 1 odd" << pos << std::endl; if (!dimer_valid(pos + k - 1)) { - pos += k; + pos += k + 1; return init(); } + //std::cerr << "checkpoint 2 odd" << pos << std::endl; bool can_roll_second = dimer_valid(pos + k); + //std::cerr << "checkpoint 3 odd" << pos << std::endl; for (int i = 0; i < 2; i++) { if (i == 0 || can_roll_second) { @@ -233,6 +244,7 @@ class BsHash rev_hashes[i], k, out1, out2, in1, in2, DIMER_TAB, TAB_33R, TAB_31L); } } + //std::cerr << "checkpoint 4 odd" << pos << std::endl; extend_hashes(fwd_hashes[0], rev_hashes[0], k, num_hashes, hash_arr.get()); @@ -453,10 +465,12 @@ class BsHash bool init() { + //std::cerr << "running init" << std::endl; + even = true; bool has_n = true; - while (pos < seq_len - k + 1 && has_n) { + while (pos <= seq_len - k + 1 && has_n) { has_n = false; - for (unsigned i = 0; i < k && pos < seq_len - k + 1; i += 2) { + for (unsigned i = 0; i < k && pos <= seq_len - k + 1; i += 2) { const char c1 = seq[pos + k - i - 2]; const char c2 = seq[pos + k - i - 1]; const uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; @@ -479,8 +493,18 @@ class BsHash return false; } + auto dimer_valid = [&](size_t p) -> bool { + if (p + 1 > seq_len - 1) + return false; + char c1 = seq[p]; + char c2 = seq[p + 1]; + uint8_t a = BS_CONVERT_TAB[(unsigned char)c1]; + uint8_t b = BS_CONVERT_TAB[(unsigned char)c2]; + uint8_t loc = 10 * a + b; + return DIMER_TAB[loc] != 0; + }; bool has_second = true; - if (pos == seq_len - k) { + if (pos == seq_len - k || !dimer_valid(pos + k - 1)) { has_second = false; } @@ -493,4 +517,4 @@ class BsHash return true; } }; -} +} \ No newline at end of file diff --git a/tests/bshash.cpp b/tests/bshash.cpp index 27147c4e..8f1f7be2 100644 --- a/tests/bshash.cpp +++ b/tests/bshash.cpp @@ -10,6 +10,25 @@ int main() { + { + PRINT_TEST_NAME("Real Test Case") + + std::string first34 = "ATCACGCNTAAACTTTTTTTTTTTTTTTTTTTTTTTTTTTNTTCCCCACCAGATCGGAAGAGCACACGTCTTAAAANCAGTCAATGGAGTTTATCGCGTTTGCCGTCGTCGGCTTGNAAAGGNGGGGGGGGGGGGGGGGGGGGGGGGGGGG"; + + std::string seq = first34; + unsigned h = 3; + unsigned k = 34; + + btllib::BsHash bshash(seq, h, k, "CG"); + std::vector hashes; + + while (bshash.roll()) { + uint64_t* h_vals = new uint64_t[h]; + std::copy(bshash.hashes(), bshash.hashes() + h, h_vals); + hashes.push_back(h_vals); + } + } + { PRINT_TEST_NAME("CG k-mer rolling (64-mer base)")