1 #ifndef BTLLIB_ROLLING_HASH_HPP 
    2 #define BTLLIB_ROLLING_HASH_HPP 
   13 class SeedRollingHash;
 
   14 using SpacedSeed = std::vector<unsigned>;
 
   15 static std::vector<SpacedSeed>
 
   16 parse_seeds(
const std::vector<std::string>& seed_strings);
 
   37   RollingHash(
const char* seq, 
size_t seq_len, 
unsigned k, 
unsigned hash_num);
 
   45   RollingHash(
const std::string& seq, 
unsigned k, 
unsigned hash_num);
 
   49   const uint64_t* hashes() 
const;
 
   51   size_t get_pos()
 const { 
return pos; }
 
   52   unsigned get_k()
 const { 
return k; }
 
   53   unsigned get_hash_num()
 const { 
return hash_num; }
 
   62   const unsigned hash_num;
 
   64   std::vector<uint64_t> hashes_vector;
 
   65   uint64_t forward_hash = 0;
 
   66   uint64_t reverse_hash = 0;
 
   76                   const std::vector<SpacedSeed>& seeds,
 
   77                   unsigned hash_num_per_seed);
 
   80                   const std::vector<SpacedSeed>& seeds,
 
   81                   unsigned hash_num_per_seed);
 
   85                   const std::vector<std::string>& seeds,
 
   86                   unsigned hash_num_per_seed);
 
   89                   const std::vector<std::string>& seeds,
 
   90                   unsigned hash_num_per_seed);
 
   92   unsigned get_hash_num_per_seed()
 const { 
return hash_num_per_seed; }
 
   99   const unsigned hash_num_per_seed;
 
  100   std::vector<SpacedSeed> seeds;
 
  112   hashes_vector.resize(hash_num);
 
  118   : 
RollingHash(seq.c_str(), seq.size(), k, hash_num)
 
  121 inline SeedRollingHash::SeedRollingHash(
const char* seq,
 
  124                                         const std::vector<SpacedSeed>& seeds,
 
  125                                         unsigned hash_num_per_seed)
 
  126   : 
RollingHash(seq, seq_len, k, seeds.size() * hash_num_per_seed)
 
  127   , hash_num_per_seed(hash_num_per_seed)
 
  131 inline SeedRollingHash::SeedRollingHash(
const std::string& seq,
 
  133                                         const std::vector<SpacedSeed>& seeds,
 
  134                                         unsigned hash_num_per_seed)
 
  135   : RollingHash(seq, k, seeds.size() * hash_num_per_seed)
 
  136   , hash_num_per_seed(hash_num_per_seed)
 
  140 inline SeedRollingHash::SeedRollingHash(
const char* seq,
 
  143                                         const std::vector<std::string>& seeds,
 
  144                                         unsigned hash_num_per_seed)
 
  145   : RollingHash(seq, seq_len, k, seeds.size() * hash_num_per_seed)
 
  146   , hash_num_per_seed(hash_num_per_seed)
 
  147   , seeds(parse_seeds(seeds))
 
  150 inline SeedRollingHash::SeedRollingHash(
const std::string& seq,
 
  152                                         const std::vector<std::string>& seeds,
 
  153                                         unsigned hash_num_per_seed)
 
  154   : RollingHash(seq, k, seeds.size() * hash_num_per_seed)
 
  155   , hash_num_per_seed(hash_num_per_seed)
 
  156   , seeds(parse_seeds(seeds))
 
  159 static std::vector<SpacedSeed>
 
  160 parse_seeds(
const std::vector<std::string>& seed_strings)
 
  162   std::vector<SpacedSeed> seed_set;
 
  163   for (
const auto& seed_string : seed_strings) {
 
  166     for (
const auto& c : seed_string) {
 
  172     seed_set.push_back(seed);
 
  178 #define ROLLING_HASH_INIT(CLASS, NTHASH_CALL)                                  \ 
  179   inline bool CLASS::init()                                                    \ 
  182       pos = std::numeric_limits<std::size_t>::max();                           \ 
  186     while ((pos < seq_len - k + 1) && !(NTHASH_CALL)) {                        \ 
  189     if (pos > seq_len - k) {                                                   \ 
  190       pos = std::numeric_limits<std::size_t>::max();                           \ 
  198 #define ROLLING_HASH_ROLL(CLASS, NTHASH_CALL)                                  \ 
  199   inline bool CLASS::roll()                                                    \ 
  204     if (pos > seq_len - k) {                                                   \ 
  207     if (seed_tab[(unsigned char)(seq[pos + k - 1])] == seedN) {                \ 
  216 ROLLING_HASH_INIT(RollingHash,
 
  223                          hashes_vector.data()))
 
  224 ROLLING_HASH_ROLL(RollingHash,
 
  231                          hashes_vector.data()))
 
  233 ROLLING_HASH_INIT(SeedRollingHash,
 
  242                           hashes_vector.data()))
 
  243 ROLLING_HASH_ROLL(SeedRollingHash,
 
  253                           hashes_vector.data()))
 
  255 #undef ROLLING_HASH_INIT 
  256 #undef ROLLING_HASH_ROLL 
  258 inline const uint64_t*
 
  259 RollingHash::hashes()
 const 
  261   return hashes_vector.data();