8#ifndef BTLLIB_NTHASH_HPP 
    9#define BTLLIB_NTHASH_HPP 
   11#include "btllib/nthash_lowlevel.hpp" 
   12#include "btllib/status.hpp" 
   22static const char* 
const NTHASH_FN_NAME = 
"ntHash_v2";
 
   26using NTHASH_HASH_NUM_TYPE = uint8_t;
 
   27static const int NTHASH_HASH_NUM_MAX =
 
   28  std::numeric_limits<NTHASH_HASH_NUM_TYPE>::max();
 
   30using NTHASH_K_TYPE = uint16_t;
 
   31static const int NTHASH_K_MAX = std::numeric_limits<NTHASH_K_TYPE>::max();
 
   36std::vector<SpacedSeed>
 
   37parse_seeds(
const std::vector<std::string>& seed_strings);
 
   40parse_seeds(
const std::vector<std::string>& seed_strings,
 
   41            std::vector<SpacedSeedBlocks>& blocks,
 
   42            std::vector<SpacedSeedMonomers>& monomers);
 
   45parsed_seeds_to_blocks(
const std::vector<SpacedSeed>& seeds,
 
   47                       std::vector<SpacedSeedBlocks>& blocks,
 
   48                       std::vector<SpacedSeedMonomers>& monomers);
 
   51check_seeds(
const std::vector<std::string>& seeds, 
unsigned k);
 
   78  NtHash(
const std::string& seq, 
unsigned hash_num, 
unsigned k, 
size_t pos = 0);
 
  135  void sub(
const std::vector<unsigned>& positions,
 
  136           const std::vector<unsigned char>& new_bases);
 
  138  const uint64_t* hashes()
 const { 
return hashes_array.get(); }
 
  145  bool forward()
 const { 
return forward_hash <= reverse_hash; }
 
  146  unsigned get_hash_num()
 const { 
return hash_num; }
 
  147  unsigned get_k()
 const { 
return k; }
 
  149  uint64_t get_forward_hash()
 const { 
return forward_hash; }
 
  150  uint64_t get_reverse_hash()
 const { 
return reverse_hash; }
 
  152  void change_seq(
const std::string& new_seq, 
size_t new_pos = 0)
 
  154    seq = new_seq.data();
 
  155    seq_len = new_seq.length();
 
  163  friend class SeedNtHash;
 
  170  const NTHASH_HASH_NUM_TYPE hash_num;
 
  171  const NTHASH_K_TYPE k;
 
  175  std::unique_ptr<uint64_t[]> hashes_array;
 
  176  uint64_t forward_hash = 0;
 
  177  uint64_t reverse_hash = 0;
 
  250  void sub(
const std::vector<unsigned>& positions,
 
  251           const std::vector<unsigned char>& new_bases);
 
  253  const uint64_t* hashes()
 const { 
return hashes_array.get(); }
 
  260  bool forward()
 const { 
return forward_hash <= reverse_hash; }
 
  261  unsigned get_hash_num()
 const { 
return hash_num; }
 
  262  unsigned get_k()
 const { 
return k; }
 
  264  uint64_t get_forward_hash()
 const { 
return forward_hash; }
 
  265  uint64_t get_reverse_hash()
 const { 
return reverse_hash; }
 
  267  void change_seq(
const std::string& new_seq, 
size_t new_pos = 0)
 
  269    seq_len = new_seq.length();
 
  270    std::memcpy(seq.get(), new_seq.data(), seq_len);
 
  281  std::unique_ptr<char[]> seq;
 
  283  const NTHASH_HASH_NUM_TYPE hash_num;
 
  284  const NTHASH_K_TYPE k;
 
  288  std::unique_ptr<uint64_t[]> hashes_array;
 
  289  uint64_t forward_hash = 0;
 
  290  uint64_t reverse_hash = 0;
 
  299             const std::vector<SpacedSeed>& seeds,
 
  300             unsigned hash_num_per_seed,
 
  304             const std::vector<SpacedSeed>& seeds,
 
  305             unsigned hash_num_per_seed,
 
  310             const std::vector<std::string>& seeds,
 
  311             unsigned hash_num_per_seed,
 
  315             const std::vector<std::string>& seeds,
 
  316             unsigned hash_num_per_seed,
 
  367  const uint64_t* hashes()
 const { 
return nthash.hashes(); }
 
  369  void change_seq(
const std::string& seq, 
size_t pos = 0)
 
  371    nthash.change_seq(seq, pos);
 
  374  size_t get_pos()
 const { 
return nthash.
get_pos(); }
 
  375  bool forward()
 const { 
return nthash.forward(); }
 
  376  unsigned get_hash_num()
 const { 
return nthash.get_hash_num(); }
 
  377  unsigned get_hash_num_per_seed()
 const { 
return hash_num_per_seed; }
 
  378  unsigned get_k()
 const { 
return nthash.get_k(); }
 
  380  uint64_t* get_forward_hash()
 const { 
return forward_hash.get(); }
 
  381  uint64_t* get_reverse_hash()
 const { 
return reverse_hash.get(); }
 
  387  const unsigned hash_num_per_seed;
 
  389  std::vector<SpacedSeedBlocks> blocks;
 
  390  std::vector<SpacedSeedMonomers> monomers;
 
  392  std::unique_ptr<uint64_t[]> fh_no_monomers;
 
  393  std::unique_ptr<uint64_t[]> rh_no_monomers;
 
  394  std::unique_ptr<uint64_t[]> forward_hash;
 
  395  std::unique_ptr<uint64_t[]> reverse_hash;
 
  399#define BTLLIB_NTHASH_INIT(CLASS, NTHASH_CALL, MEMBER_PREFIX)                  \ 
  400  inline bool CLASS::init()                                                    \ 
  402    if (MEMBER_PREFIX k > MEMBER_PREFIX seq_len) {                             \ 
  403      MEMBER_PREFIX pos = std::numeric_limits<std::size_t>::max();             \ 
  408      (MEMBER_PREFIX pos < MEMBER_PREFIX seq_len - MEMBER_PREFIX k + 1) &&     \ 
  410      MEMBER_PREFIX pos += posN + 1;                                           \ 
  412    if (MEMBER_PREFIX pos > MEMBER_PREFIX seq_len - MEMBER_PREFIX k) {         \ 
  413      MEMBER_PREFIX pos = std::numeric_limits<std::size_t>::max();             \ 
  416    MEMBER_PREFIX initialized = true;                                          \ 
  421#define BTLLIB_NTHASH_ROLL(CLASS, FN_DECL, NTHASH_CALL, MEMBER_PREFIX)         \ 
  422  inline bool CLASS::FN_DECL                                                   \ 
  424    if (!MEMBER_PREFIX initialized) {                                          \ 
  427    if (MEMBER_PREFIX pos >= MEMBER_PREFIX seq_len - MEMBER_PREFIX k) {        \ 
  430    if (SEED_TAB[(unsigned char)(MEMBER_PREFIX seq[MEMBER_PREFIX pos +         \ 
  431                                                   MEMBER_PREFIX k])] ==       \ 
  433      MEMBER_PREFIX pos += MEMBER_PREFIX k;                                    \ 
  437      ++ MEMBER_PREFIX pos;                                                    \ 
  442#define BTLLIB_NTHASH_ROLL_BACK(CLASS, FN_DECL, NTHASH_CALL, MEMBER_PREFIX)    \ 
  443  inline bool CLASS::FN_DECL                                                   \ 
  445    if (!MEMBER_PREFIX initialized) {                                          \ 
  448    if (MEMBER_PREFIX pos <= 0) {                                              \ 
  451    if (SEED_TAB[(unsigned char)(MEMBER_PREFIX seq[MEMBER_PREFIX pos - 1])] == \ 
  453      MEMBER_PREFIX pos -= MEMBER_PREFIX k;                                    \ 
  457      -- MEMBER_PREFIX pos;                                                    \ 
  462#define BTLLIB_NTHASH_PEEK(CLASS, FN_DECL, NTHASH_CALL, MEMBER_PREFIX)         \ 
  463  inline bool CLASS::FN_DECL                                                   \ 
  465    if (!MEMBER_PREFIX initialized) {                                          \ 
  472BTLLIB_NTHASH_INIT(NtHash,
 
  479                          hashes_array.get()), )
 
  480BTLLIB_NTHASH_ROLL(NtHash,
 
  490BTLLIB_NTHASH_ROLL_BACK(NtHash,
 
  492                        ntmc64l(seq[pos + k - 1],
 
  505    uint64_t forward_hash_tmp = forward_hash;
 
  506    uint64_t reverse_hash_tmp = reverse_hash;
 
  520    uint64_t forward_hash_tmp = forward_hash;
 
  521    uint64_t reverse_hash_tmp = reverse_hash;
 
  535    uint64_t forward_hash_tmp = forward_hash;
 
  536    uint64_t reverse_hash_tmp = reverse_hash;
 
  537    ntmc64l(seq[pos + k - 1],
 
  547  peek_back(
char char_in),
 
  550    uint64_t forward_hash_tmp = forward_hash;
 
  551    uint64_t reverse_hash_tmp = reverse_hash;
 
  552    ntmc64l(seq[pos + k - 1],
 
  561BTLLIB_NTHASH_INIT(BlindNtHash,
 
  562                   ntmc64(seq.get() + pos,
 
  568                          hashes_array.get()), )
 
  573    ntmc64(seq[pos % seq_len],
 
  580    seq[pos % seq_len] = char_in;
 
  582BTLLIB_NTHASH_ROLL_BACK(
 
  584  roll_back(
char char_in),
 
  586    ntmc64l(seq[(pos + k - 1) % seq_len],
 
  593    seq[(pos + k - 1) % seq_len] = char_in;
 
  600    uint64_t forward_hash_tmp = forward_hash;
 
  601    uint64_t reverse_hash_tmp = reverse_hash;
 
  602    ntmc64(seq[pos % seq_len],
 
  612  peek_back(
char char_in),
 
  614    uint64_t forward_hash_tmp = forward_hash;
 
  615    uint64_t reverse_hash_tmp = reverse_hash;
 
  616    ntmc64l(seq[(pos + k - 1) % seq_len],
 
  625BTLLIB_NTHASH_INIT(SeedNtHash,
 
  626                   ntmsm64(nthash.seq + nthash.pos,
 
  632                           fh_no_monomers.get(),
 
  633                           rh_no_monomers.get(),
 
  637                           nthash.hashes_array.get()),
 
  639BTLLIB_NTHASH_ROLL(SeedNtHash,
 
  641                   ntmsm64(nthash.seq + nthash.pos,
 
  647                           fh_no_monomers.get(),
 
  648                           rh_no_monomers.get(),
 
  651                           nthash.hashes_array.get());
 
  653BTLLIB_NTHASH_ROLL_BACK(SeedNtHash,
 
  655                        ntmsm64l(nthash.seq + nthash.pos - 1,
 
  661                                 fh_no_monomers.get(),
 
  662                                 rh_no_monomers.get(),
 
  665                                 nthash.hashes_array.get());
 
  672    std::unique_ptr<uint64_t[]> fh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  673    std::unique_ptr<uint64_t[]> rh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  674    std::unique_ptr<uint64_t[]> forward_hash_tmp(
new uint64_t[blocks.size()]);
 
  675    std::unique_ptr<uint64_t[]> reverse_hash_tmp(
new uint64_t[blocks.size()]);
 
  676    std::memcpy(fh_no_monomers_tmp.get(),
 
  678                blocks.size() * 
sizeof(uint64_t));
 
  679    std::memcpy(rh_no_monomers_tmp.get(),
 
  681                blocks.size() * 
sizeof(uint64_t));
 
  682    std::memcpy(forward_hash_tmp.get(),
 
  684                blocks.size() * 
sizeof(uint64_t));
 
  685    std::memcpy(reverse_hash_tmp.get(),
 
  687                blocks.size() * 
sizeof(uint64_t));
 
  688    ntmsm64(nthash.seq + nthash.pos,
 
  694            fh_no_monomers_tmp.get(),
 
  695            rh_no_monomers_tmp.get(),
 
  696            forward_hash_tmp.get(),
 
  697            reverse_hash_tmp.get(),
 
  698            nthash.hashes_array.get());
 
  706    std::unique_ptr<uint64_t[]> fh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  707    std::unique_ptr<uint64_t[]> rh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  708    std::unique_ptr<uint64_t[]> forward_hash_tmp(
new uint64_t[blocks.size()]);
 
  709    std::unique_ptr<uint64_t[]> reverse_hash_tmp(
new uint64_t[blocks.size()]);
 
  710    std::memcpy(fh_no_monomers_tmp.get(),
 
  712                blocks.size() * 
sizeof(uint64_t));
 
  713    std::memcpy(rh_no_monomers_tmp.get(),
 
  715                blocks.size() * 
sizeof(uint64_t));
 
  716    std::memcpy(forward_hash_tmp.get(),
 
  718                blocks.size() * 
sizeof(uint64_t));
 
  719    std::memcpy(reverse_hash_tmp.get(),
 
  721                blocks.size() * 
sizeof(uint64_t));
 
  722    ntmsm64(nthash.seq + nthash.pos,
 
  729            fh_no_monomers_tmp.get(),
 
  730            rh_no_monomers_tmp.get(),
 
  731            forward_hash_tmp.get(),
 
  732            reverse_hash_tmp.get(),
 
  733            nthash.hashes_array.get());
 
  741    std::unique_ptr<uint64_t[]> fh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  742    std::unique_ptr<uint64_t[]> rh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  743    std::unique_ptr<uint64_t[]> forward_hash_tmp(
new uint64_t[blocks.size()]);
 
  744    std::unique_ptr<uint64_t[]> reverse_hash_tmp(
new uint64_t[blocks.size()]);
 
  745    std::memcpy(fh_no_monomers_tmp.get(),
 
  747                blocks.size() * 
sizeof(uint64_t));
 
  748    std::memcpy(rh_no_monomers_tmp.get(),
 
  750                blocks.size() * 
sizeof(uint64_t));
 
  751    std::memcpy(forward_hash_tmp.get(),
 
  753                blocks.size() * 
sizeof(uint64_t));
 
  754    std::memcpy(reverse_hash_tmp.get(),
 
  756                blocks.size() * 
sizeof(uint64_t));
 
  757    ntmsm64l(nthash.seq + nthash.pos - 1,
 
  763             fh_no_monomers_tmp.get(),
 
  764             rh_no_monomers_tmp.get(),
 
  765             forward_hash_tmp.get(),
 
  766             reverse_hash_tmp.get(),
 
  767             nthash.hashes_array.get());
 
  772  peek_back(
char char_in),
 
  775    std::unique_ptr<uint64_t[]> fh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  776    std::unique_ptr<uint64_t[]> rh_no_monomers_tmp(
new uint64_t[blocks.size()]);
 
  777    std::unique_ptr<uint64_t[]> forward_hash_tmp(
new uint64_t[blocks.size()]);
 
  778    std::unique_ptr<uint64_t[]> reverse_hash_tmp(
new uint64_t[blocks.size()]);
 
  779    std::memcpy(fh_no_monomers_tmp.get(),
 
  781                blocks.size() * 
sizeof(uint64_t));
 
  782    std::memcpy(rh_no_monomers_tmp.get(),
 
  784                blocks.size() * 
sizeof(uint64_t));
 
  785    std::memcpy(forward_hash_tmp.get(),
 
  787                blocks.size() * 
sizeof(uint64_t));
 
  788    std::memcpy(reverse_hash_tmp.get(),
 
  790                blocks.size() * 
sizeof(uint64_t));
 
  791    ntmsm64l(nthash.seq + nthash.pos - 1,
 
  798             fh_no_monomers_tmp.get(),
 
  799             rh_no_monomers_tmp.get(),
 
  800             forward_hash_tmp.get(),
 
  801             reverse_hash_tmp.get(),
 
  802             nthash.hashes_array.get());
 
  806#undef BTLLIB_NTHASH_INIT 
  807#undef BTLLIB_NTHASH_ROLL 
  808#undef BTLLIB_NTHASH_ROLL_BACK 
  809#undef BTLLIB_NTHASH_PEEK 
Definition: nthash.hpp:187
bool roll_back(char char_in)
BlindNtHash(const char *seq, size_t seq_len, unsigned hash_num, unsigned k, size_t pos=0)
size_t get_pos() const
Definition: nthash.hpp:259
BlindNtHash(const std::string &seq, unsigned hash_num, unsigned k, size_t pos=0)
bool peek_back(char char_in)
Definition: nthash.hpp:54
NtHash(const std::string &seq, unsigned hash_num, unsigned k, size_t pos=0)
NtHash(const char *seq, size_t seq_len, unsigned hash_num, unsigned k, size_t pos=0)
bool peek_back(char char_in)
size_t get_pos() const
Definition: nthash.hpp:144
Definition: nthash.hpp:294
bool peek_back(char char_in)
Definition: bloom_filter.hpp:16