From 38e8a0f4fa445b5794e54f897645bc8c04297f0a Mon Sep 17 00:00:00 2001 From: parham-k Date: Thu, 2 May 2024 20:03:27 -0700 Subject: [PATCH 1/5] Add seq setter to ntHash --- include/btllib/nthash_kmer.hpp | 22 +++++++++++++++++++++- include/btllib/nthash_seed.hpp | 25 ++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/btllib/nthash_kmer.hpp b/include/btllib/nthash_kmer.hpp index a12ae59d..c482cb3f 100644 --- a/include/btllib/nthash_kmer.hpp +++ b/include/btllib/nthash_kmer.hpp @@ -300,6 +300,26 @@ class NtHash NtHash(NtHash&&) = default; + /** + * Reset iterator on a new sequence. Useful for re-using NtHash objects. + * @param seq New sequence for hashing + */ + void set_seq(const std::string& seq, size_t pos = 0) + { + this->seq = seq.data(); + this->seq_len = seq.size(); + this->pos = pos; + this->initialized = false; + this->hash_arr.reset(new uint64_t[num_hashes]); + check_error(this->seq_len < k, + "NtHash: sequence length (" + std::to_string(this->seq_len) + + ") is smaller than k (" + std::to_string(k) + ")"); + check_error(pos > this->seq_len - k, + "NtHash: passed position (" + std::to_string(pos) + + ") is larger than sequence length (" + + std::to_string(this->seq_len) + ")"); + } + /** * Calculate the hash values of current k-mer and advance to the next k-mer. * NtHash advances one nucleotide at a time until it finds a k-mer with valid @@ -475,7 +495,7 @@ class NtHash private: const char* seq; - const size_t seq_len; + size_t seq_len; hashing_internals::NUM_HASHES_TYPE num_hashes; hashing_internals::K_TYPE k; size_t pos; diff --git a/include/btllib/nthash_seed.hpp b/include/btllib/nthash_seed.hpp index 721f1525..cd624558 100644 --- a/include/btllib/nthash_seed.hpp +++ b/include/btllib/nthash_seed.hpp @@ -650,6 +650,29 @@ class SeedNtHash SeedNtHash(SeedNtHash&&) = default; + /** + * Reset iterator on a new sequence. Useful for re-using NtHash objects. + * @param seq New sequence for hashing + */ + void set_seq(const std::string& seq, size_t pos = 0) + { + this->seq = seq.data(); + this->seq_len = seq.size(); + this->pos = pos; + this->initialized = false; + this->fwd_hash.reset(new uint64_t[blocks.size()]); + this->rev_hash.reset(new uint64_t[blocks.size()]); + this->hash_arr.reset(new uint64_t[blocks.size() * num_hashes_per_seed]); + check_error(this->seq_len < k, + "SeedNtHash: sequence length (" + + std::to_string(this->seq_len) + ") is smaller than k (" + + std::to_string(k) + ")"); + check_error(pos > this->seq_len - k, + "SeedNtHash: passed position (" + std::to_string(pos) + + ") is larger than sequence length (" + + std::to_string(this->seq_len) + ")"); + } + /** * Calculate the next hash value. Refer to \ref NtHash::roll() for more * information. @@ -869,7 +892,7 @@ class SeedNtHash private: const char* seq; - const size_t seq_len; + size_t seq_len; hashing_internals::NUM_HASHES_TYPE num_hashes_per_seed; hashing_internals::K_TYPE k; size_t pos; From b77413effcad96fe413024b31855befdecacf837 Mon Sep 17 00:00:00 2001 From: parham-k Date: Thu, 2 May 2024 20:03:35 -0700 Subject: [PATCH 2/5] Add seq setting tests --- tests/nthash.cpp | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/tests/nthash.cpp b/tests/nthash.cpp index a2909166..7f157714 100644 --- a/tests/nthash.cpp +++ b/tests/nthash.cpp @@ -449,5 +449,48 @@ main() } } + { + PRINT_TEST_NAME("k-mer reset seq") + const std::string seq1 = "ATGCTAGTAGCTGAC"; + const std::string seq2 = "GTGACTAGCTGACTG"; + const unsigned h = 3, k = 7; + btllib::NtHash h1(seq1, h, k); + h1.roll(); + h1.roll(); + h1.roll(); + h1.set_seq(seq2); + btllib::NtHash h2(seq2, h, k); + bool can_roll = true; + while (can_roll) { + can_roll = h1.roll(); + can_roll |= h2.roll(); + TEST_ASSERT_ARRAY_EQ(h1.hashes(), h2.hashes(), h) + } + TEST_ASSERT(!h1.roll()) + TEST_ASSERT(!h2.roll()) + } + + { + PRINT_TEST_NAME("seed reset seq") + const std::string seq1 = "ATGCTAGTAGCTGAC"; + const std::string seq2 = "GTGACTAGCTGACTG"; + const std::vector seeds = { "11011" }; + const unsigned h = 3, k = seeds[0].size(); + btllib::SeedNtHash h1(seq1, seeds, h, k); + h1.roll(); + h1.roll(); + h1.roll(); + h1.set_seq(seq2); + btllib::SeedNtHash h2(seq2, seeds, h, k); + bool can_roll = true; + while (can_roll) { + can_roll = h1.roll(); + can_roll |= h2.roll(); + TEST_ASSERT_ARRAY_EQ(h1.hashes(), h2.hashes(), h) + } + TEST_ASSERT(!h1.roll()) + TEST_ASSERT(!h2.roll()) + } + return 0; } From a0a5ab89fa8210927153f488ca65b043298e9f4f Mon Sep 17 00:00:00 2001 From: parham-k Date: Thu, 2 May 2024 20:03:40 -0700 Subject: [PATCH 3/5] Bump version --- meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meson.build b/meson.build index db8dfb77..cf988cfa 100644 --- a/meson.build +++ b/meson.build @@ -1,5 +1,5 @@ project('btllib', 'cpp', - version : '1.7.2', + version : '1.7.3', license : 'GPL3', default_options : [ 'cpp_std=c++17', 'warning_level=3', 'werror=true', 'b_coverage=true' ], meson_version : '>= 0.60.0') From 1024481b03a5d13998fdcd9c0cfee1fb643a52c4 Mon Sep 17 00:00:00 2001 From: parham-k Date: Thu, 2 May 2024 20:18:30 -0700 Subject: [PATCH 4/5] Update wrappers --- wrappers/python/btllib_wrap.cxx | 216 ++++++++++++++++++++++++++++++++ 1 file changed, 216 insertions(+) diff --git a/wrappers/python/btllib_wrap.cxx b/wrappers/python/btllib_wrap.cxx index 8d279b38..759ded5c 100644 --- a/wrappers/python/btllib_wrap.cxx +++ b/wrappers/python/btllib_wrap.cxx @@ -37375,6 +37375,113 @@ SWIGINTERN PyObject *_wrap_parse_seeds(PyObject *self, PyObject *args) { } +SWIGINTERN PyObject *_wrap_SeedNtHash_set_seq__SWIG_0(PyObject *self, Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + btllib::SeedNtHash *arg1 = (btllib::SeedNtHash *) 0 ; + std::string *arg2 = 0 ; + size_t arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int res2 = SWIG_OLDOBJ ; + size_t val3 ; + int ecode3 = 0 ; + + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; + res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__SeedNtHash, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SeedNtHash_set_seq" "', argument " "1"" of type '" "btllib::SeedNtHash *""'"); + } + arg1 = reinterpret_cast< btllib::SeedNtHash * >(argp1); + { + std::string *ptr = (std::string *)0; + res2 = SWIG_AsPtr_std_string(swig_obj[1], &ptr); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SeedNtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!ptr) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SeedNtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = ptr; + } + ecode3 = SWIG_AsVal_size_t(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "SeedNtHash_set_seq" "', argument " "3"" of type '" "size_t""'"); + } + arg3 = static_cast< size_t >(val3); + (arg1)->set_seq((std::string const &)*arg2,arg3); + resultobj = SWIG_Py_Void(); + if (SWIG_IsNewObj(res2)) delete arg2; + return resultobj; +fail: + if (SWIG_IsNewObj(res2)) delete arg2; + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SeedNtHash_set_seq__SWIG_1(PyObject *self, Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + btllib::SeedNtHash *arg1 = (btllib::SeedNtHash *) 0 ; + std::string *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + int res2 = SWIG_OLDOBJ ; + + if ((nobjs < 2) || (nobjs > 2)) SWIG_fail; + res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__SeedNtHash, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "SeedNtHash_set_seq" "', argument " "1"" of type '" "btllib::SeedNtHash *""'"); + } + arg1 = reinterpret_cast< btllib::SeedNtHash * >(argp1); + { + std::string *ptr = (std::string *)0; + res2 = SWIG_AsPtr_std_string(swig_obj[1], &ptr); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "SeedNtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!ptr) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "SeedNtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = ptr; + } + (arg1)->set_seq((std::string const &)*arg2); + resultobj = SWIG_Py_Void(); + if (SWIG_IsNewObj(res2)) delete arg2; + return resultobj; +fail: + if (SWIG_IsNewObj(res2)) delete arg2; + return NULL; +} + + +SWIGINTERN PyObject *_wrap_SeedNtHash_set_seq(PyObject *self, PyObject *args) { + Py_ssize_t argc; + PyObject *argv[4] = { + 0 + }; + + (void)self; + if (!(argc = SWIG_Python_UnpackTuple(args, "SeedNtHash_set_seq", 0, 3, argv+1))) SWIG_fail; + argv[0] = self; + if (argc == 2) { + PyObject *retobj = _wrap_SeedNtHash_set_seq__SWIG_1(self, argc, argv); + if (!SWIG_Python_TypeErrorOccurred(retobj)) return retobj; + SWIG_fail; + } + if (argc == 3) { + PyObject *retobj = _wrap_SeedNtHash_set_seq__SWIG_0(self, argc, argv); + if (!SWIG_Python_TypeErrorOccurred(retobj)) return retobj; + SWIG_fail; + } + +fail: + SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'SeedNtHash_set_seq'.\n" + " Possible C/C++ prototypes are:\n" + " btllib::SeedNtHash::set_seq(std::string const &,size_t)\n" + " btllib::SeedNtHash::set_seq(std::string const &)\n"); + return 0; +} + + SWIGINTERN PyObject *_wrap_SeedNtHash_roll(PyObject *self, PyObject *args) { PyObject *resultobj = 0; btllib::SeedNtHash *arg1 = (btllib::SeedNtHash *) 0 ; @@ -41633,6 +41740,113 @@ SWIGINTERN PyObject *_wrap_sub_hash(PyObject *self, PyObject *args) { } +SWIGINTERN PyObject *_wrap_NtHash_set_seq__SWIG_0(PyObject *self, Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + btllib::NtHash *arg1 = (btllib::NtHash *) 0 ; + std::string *arg2 = 0 ; + size_t arg3 ; + void *argp1 = 0 ; + int res1 = 0 ; + int res2 = SWIG_OLDOBJ ; + size_t val3 ; + int ecode3 = 0 ; + + if ((nobjs < 3) || (nobjs > 3)) SWIG_fail; + res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__NtHash, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "NtHash_set_seq" "', argument " "1"" of type '" "btllib::NtHash *""'"); + } + arg1 = reinterpret_cast< btllib::NtHash * >(argp1); + { + std::string *ptr = (std::string *)0; + res2 = SWIG_AsPtr_std_string(swig_obj[1], &ptr); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "NtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!ptr) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "NtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = ptr; + } + ecode3 = SWIG_AsVal_size_t(swig_obj[2], &val3); + if (!SWIG_IsOK(ecode3)) { + SWIG_exception_fail(SWIG_ArgError(ecode3), "in method '" "NtHash_set_seq" "', argument " "3"" of type '" "size_t""'"); + } + arg3 = static_cast< size_t >(val3); + (arg1)->set_seq((std::string const &)*arg2,arg3); + resultobj = SWIG_Py_Void(); + if (SWIG_IsNewObj(res2)) delete arg2; + return resultobj; +fail: + if (SWIG_IsNewObj(res2)) delete arg2; + return NULL; +} + + +SWIGINTERN PyObject *_wrap_NtHash_set_seq__SWIG_1(PyObject *self, Py_ssize_t nobjs, PyObject **swig_obj) { + PyObject *resultobj = 0; + btllib::NtHash *arg1 = (btllib::NtHash *) 0 ; + std::string *arg2 = 0 ; + void *argp1 = 0 ; + int res1 = 0 ; + int res2 = SWIG_OLDOBJ ; + + if ((nobjs < 2) || (nobjs > 2)) SWIG_fail; + res1 = SWIG_ConvertPtr(self, &argp1,SWIGTYPE_p_btllib__NtHash, 0 | 0 ); + if (!SWIG_IsOK(res1)) { + SWIG_exception_fail(SWIG_ArgError(res1), "in method '" "NtHash_set_seq" "', argument " "1"" of type '" "btllib::NtHash *""'"); + } + arg1 = reinterpret_cast< btllib::NtHash * >(argp1); + { + std::string *ptr = (std::string *)0; + res2 = SWIG_AsPtr_std_string(swig_obj[1], &ptr); + if (!SWIG_IsOK(res2)) { + SWIG_exception_fail(SWIG_ArgError(res2), "in method '" "NtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + if (!ptr) { + SWIG_exception_fail(SWIG_ValueError, "invalid null reference " "in method '" "NtHash_set_seq" "', argument " "2"" of type '" "std::string const &""'"); + } + arg2 = ptr; + } + (arg1)->set_seq((std::string const &)*arg2); + resultobj = SWIG_Py_Void(); + if (SWIG_IsNewObj(res2)) delete arg2; + return resultobj; +fail: + if (SWIG_IsNewObj(res2)) delete arg2; + return NULL; +} + + +SWIGINTERN PyObject *_wrap_NtHash_set_seq(PyObject *self, PyObject *args) { + Py_ssize_t argc; + PyObject *argv[4] = { + 0 + }; + + (void)self; + if (!(argc = SWIG_Python_UnpackTuple(args, "NtHash_set_seq", 0, 3, argv+1))) SWIG_fail; + argv[0] = self; + if (argc == 2) { + PyObject *retobj = _wrap_NtHash_set_seq__SWIG_1(self, argc, argv); + if (!SWIG_Python_TypeErrorOccurred(retobj)) return retobj; + SWIG_fail; + } + if (argc == 3) { + PyObject *retobj = _wrap_NtHash_set_seq__SWIG_0(self, argc, argv); + if (!SWIG_Python_TypeErrorOccurred(retobj)) return retobj; + SWIG_fail; + } + +fail: + SWIG_Python_RaiseOrModifyTypeError("Wrong number or type of arguments for overloaded function 'NtHash_set_seq'.\n" + " Possible C/C++ prototypes are:\n" + " btllib::NtHash::set_seq(std::string const &,size_t)\n" + " btllib::NtHash::set_seq(std::string const &)\n"); + return 0; +} + + SWIGINTERN PyObject *_wrap_NtHash_roll(PyObject *self, PyObject *args) { PyObject *resultobj = 0; btllib::NtHash *arg1 = (btllib::NtHash *) 0 ; @@ -67165,6 +67379,7 @@ SwigPyBuiltin__btllib__SeedNtHash_richcompare(PyObject *self, PyObject *other, i } SWIGINTERN PyMethodDef SwigPyBuiltin__btllib__SeedNtHash_methods[] = { + { "set_seq", _wrap_SeedNtHash_set_seq, METH_VARARGS, "" }, { "roll", _wrap_SeedNtHash_roll, METH_NOARGS, "" }, { "roll_back", _wrap_SeedNtHash_roll_back, METH_NOARGS, "" }, { "peek", _wrap_SeedNtHash_peek, METH_VARARGS, "" }, @@ -67653,6 +67868,7 @@ SwigPyBuiltin__btllib__NtHash_richcompare(PyObject *self, PyObject *other, int o } SWIGINTERN PyMethodDef SwigPyBuiltin__btllib__NtHash_methods[] = { + { "set_seq", _wrap_NtHash_set_seq, METH_VARARGS, "" }, { "roll", _wrap_NtHash_roll, METH_NOARGS, "" }, { "roll_back", _wrap_NtHash_roll_back, METH_NOARGS, "" }, { "peek", _wrap_NtHash_peek, METH_VARARGS, "" }, From fa07066b3943ace283b3f4337bef498ea1b86f30 Mon Sep 17 00:00:00 2001 From: parham-k Date: Thu, 2 May 2024 20:18:46 -0700 Subject: [PATCH 5/5] Update docs --- docs/classbtllib_1_1NtHash-members.html | 3 +- docs/classbtllib_1_1NtHash.html | 43 + docs/classbtllib_1_1SeedNtHash-members.html | 1 + docs/classbtllib_1_1SeedNtHash.html | 43 + ...unting__bloom__filter-inl_8hpp_source.html | 4 +- docs/functions_0x73.html | 6 +- docs/functions_func_0x73.html | 6 +- docs/nthash__kmer_8hpp_source.html | 535 +++++++------ docs/nthash__seed_8hpp_source.html | 744 +++++++++--------- docs/search/all_73.js | 1 + docs/search/functions_73.js | 1 + 11 files changed, 761 insertions(+), 626 deletions(-) diff --git a/docs/classbtllib_1_1NtHash-members.html b/docs/classbtllib_1_1NtHash-members.html index 72293e12..5d9a9b34 100644 --- a/docs/classbtllib_1_1NtHash-members.html +++ b/docs/classbtllib_1_1NtHash-members.html @@ -109,7 +109,8 @@ peek_back(char char_in)btllib::NtHashinline roll()btllib::NtHashinline roll_back()btllib::NtHashinline - sub(const std::vector< unsigned > &positions, const std::vector< unsigned char > &new_bases) (defined in btllib::NtHash)btllib::NtHashinline + set_seq(const std::string &seq, size_t pos=0)btllib::NtHashinline + sub(const std::vector< unsigned > &positions, const std::vector< unsigned char > &new_bases) (defined in btllib::NtHash)btllib::NtHashinline